From 4b66af2d6356a00e94bcdea3e7fea324e8b5c6f4 Mon Sep 17 00:00:00 2001 From: Kevin Easton Date: Sat, 7 Apr 2018 11:40:33 -0400 Subject: [PATCH 001/347] af_key: Always verify length of provided sadb_key Key extensions (struct sadb_key) include a user-specified number of key bits. The kernel uses that number to determine how much key data to copy out of the message in pfkey_msg2xfrm_state(). The length of the sadb_key message must be verified to be long enough, even in the case of SADB_X_AALG_NULL. Furthermore, the sadb_key_len value must be long enough to include both the key data and the struct sadb_key itself. Introduce a helper function verify_key_len(), and call it from parse_exthdrs() where other exthdr types are similarly checked for correctness. Signed-off-by: Kevin Easton Reported-by: syzbot+5022a34ca5a3d49b84223653fab632dfb7b4cf37@syzkaller.appspotmail.com Signed-off-by: Steffen Klassert --- net/key/af_key.c | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/net/key/af_key.c b/net/key/af_key.c index 7e2e7188e7f4..e62e52e8f141 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -437,6 +437,24 @@ static int verify_address_len(const void *p) return 0; } +static inline int sadb_key_len(const struct sadb_key *key) +{ + int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8); + + return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes, + sizeof(uint64_t)); +} + +static int verify_key_len(const void *p) +{ + const struct sadb_key *key = p; + + if (sadb_key_len(key) > key->sadb_key_len) + return -EINVAL; + + return 0; +} + static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx) { return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + @@ -533,16 +551,25 @@ static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void * return -EINVAL; if (ext_hdrs[ext_type-1] != NULL) return -EINVAL; - if (ext_type == SADB_EXT_ADDRESS_SRC || - ext_type == SADB_EXT_ADDRESS_DST || - ext_type == SADB_EXT_ADDRESS_PROXY || - ext_type == SADB_X_EXT_NAT_T_OA) { + switch (ext_type) { + case SADB_EXT_ADDRESS_SRC: + case SADB_EXT_ADDRESS_DST: + case SADB_EXT_ADDRESS_PROXY: + case SADB_X_EXT_NAT_T_OA: if (verify_address_len(p)) return -EINVAL; - } - if (ext_type == SADB_X_EXT_SEC_CTX) { + break; + case SADB_X_EXT_SEC_CTX: if (verify_sec_ctx_len(p)) return -EINVAL; + break; + case SADB_EXT_KEY_AUTH: + case SADB_EXT_KEY_ENCRYPT: + if (verify_key_len(p)) + return -EINVAL; + break; + default: + break; } ext_hdrs[ext_type-1] = (void *) p; } @@ -1104,14 +1131,12 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && - ((key->sadb_key_bits+7) / 8 == 0 || - (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t))) + key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key != NULL && sa->sadb_sa_encrypt != SADB_EALG_NULL && - ((key->sadb_key_bits+7) / 8 == 0 || - (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t))) + key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); x = xfrm_state_alloc(net); From fd17ed684b6e33312cdcd9270b1fece9df266103 Mon Sep 17 00:00:00 2001 From: Kunihiko Hayashi Date: Mon, 9 Apr 2018 10:32:34 +0900 Subject: [PATCH 002/347] Revert "ata: ahci-platform: add reset control support" This reverts commit f0f56716fc3e5d547fd7811eb218a30ed0695605. According to Thierry's view, https://www.spinics.net/lists/linux-ide/msg55357.html some hardware-specific drivers already use their own resets, and the common reset might make a path to occur double controls of resets. For now, revert the commit that adds reset control support to ahci-platform, and hold until the solution is confirmed not be affect all hardware-specific drivers. Fixes: f0f56716fc3e ("ata: ahci-platform: add reset control support") Reported-by: Thierry Reding Suggested-by: Hans de Goede Acked-by: Hans de Goede Signed-off-by: Kunihiko Hayashi Signed-off-by: Tejun Heo --- .../devicetree/bindings/ata/ahci-platform.txt | 1 - drivers/ata/ahci.h | 1 - drivers/ata/libahci_platform.c | 24 +++---------------- 3 files changed, 3 insertions(+), 23 deletions(-) diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt index f4006d3c9fdf..c760ecb81381 100644 --- a/Documentation/devicetree/bindings/ata/ahci-platform.txt +++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt @@ -30,7 +30,6 @@ compatible: Optional properties: - dma-coherent : Present if dma operations are coherent - clocks : a list of phandle + clock specifier pairs -- resets : a list of phandle + reset specifier pairs - target-supply : regulator for SATA target power - phys : reference to the SATA PHY node - phy-names : must be "sata-phy" diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 4356ef1d28a8..a9d996e17d75 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -350,7 +350,6 @@ struct ahci_host_priv { u32 em_msg_type; /* EM message type */ bool got_runtime_pm; /* Did we do pm_runtime_get? */ struct clk *clks[AHCI_MAX_CLKS]; /* Optional */ - struct reset_control *rsts; /* Optional */ struct regulator **target_pwrs; /* Optional */ /* * If platform uses PHYs. There is a 1:1 relation between the port number and diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 46a762442dc5..30cc8f1a31e1 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -25,7 +25,6 @@ #include #include #include -#include #include "ahci.h" static void ahci_host_stop(struct ata_host *host); @@ -196,8 +195,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_disable_regulators); * following order: * 1) Regulator * 2) Clocks (through ahci_platform_enable_clks) - * 3) Resets - * 4) Phys + * 3) Phys * * If resource enabling fails at any point the previous enabled resources * are disabled in reverse order. @@ -217,19 +215,12 @@ int ahci_platform_enable_resources(struct ahci_host_priv *hpriv) if (rc) goto disable_regulator; - rc = reset_control_deassert(hpriv->rsts); + rc = ahci_platform_enable_phys(hpriv); if (rc) goto disable_clks; - rc = ahci_platform_enable_phys(hpriv); - if (rc) - goto disable_resets; - return 0; -disable_resets: - reset_control_assert(hpriv->rsts); - disable_clks: ahci_platform_disable_clks(hpriv); @@ -248,15 +239,12 @@ EXPORT_SYMBOL_GPL(ahci_platform_enable_resources); * following order: * 1) Phys * 2) Clocks (through ahci_platform_disable_clks) - * 3) Resets - * 4) Regulator + * 3) Regulator */ void ahci_platform_disable_resources(struct ahci_host_priv *hpriv) { ahci_platform_disable_phys(hpriv); - reset_control_assert(hpriv->rsts); - ahci_platform_disable_clks(hpriv); ahci_platform_disable_regulators(hpriv); @@ -405,12 +393,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev) hpriv->clks[i] = clk; } - hpriv->rsts = devm_reset_control_array_get_optional_shared(dev); - if (IS_ERR(hpriv->rsts)) { - rc = PTR_ERR(hpriv->rsts); - goto err_out; - } - hpriv->nports = child_nodes = of_get_child_count(dev->of_node); /* From af82800cd2d9cfcae2de1b47a35c26c147b9cdd1 Mon Sep 17 00:00:00 2001 From: wangbo Date: Thu, 12 Apr 2018 16:58:08 +0800 Subject: [PATCH 003/347] spi: imx: Update MODULE_DESCRIPTION to "SPI Controller driver" Now i.MX SPI controller can work in Slave mode. Update MODULE_DESCRIPTION to "SPI Controller driver". Signed-off-by: wangbo Signed-off-by: Mark Brown --- drivers/spi/spi-imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 6f57592a7f95..a056ee88a960 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1701,7 +1701,7 @@ static struct platform_driver spi_imx_driver = { }; module_platform_driver(spi_imx_driver); -MODULE_DESCRIPTION("SPI Master Controller driver"); +MODULE_DESCRIPTION("SPI Controller driver"); MODULE_AUTHOR("Sascha Hauer, Pengutronix"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" DRIVER_NAME); From b48c05ab5d32af2af4bc63851c153782d1c6ba42 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 16 Apr 2018 07:50:09 +0200 Subject: [PATCH 004/347] xfrm: Fix warning in xfrm6_tunnel_net_exit. We need to make sure that all states are really deleted before we check that the state lists are empty. Otherwise we trigger a warning. Fixes: baeb0dbbb5659 ("xfrm6_tunnel: exit_net cleanup check added") Reported-and-tested-by:syzbot+777bf170a89e7b326405@syzkaller.appspotmail.com Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/ipv6/xfrm6_tunnel.c | 3 +++ net/xfrm/xfrm_state.c | 6 ++++++ 3 files changed, 10 insertions(+) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a872379b69da..45e75c36b738 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -375,6 +375,7 @@ struct xfrm_input_afinfo { int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo); int xfrm_input_unregister_afinfo(const struct xfrm_input_afinfo *afinfo); +void xfrm_flush_gc(void); void xfrm_state_delete_tunnel(struct xfrm_state *x); struct xfrm_type { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index f85f0d7480ac..4a46df8441c9 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -341,6 +341,9 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net) struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); unsigned int i; + xfrm_state_flush(net, IPSEC_PROTO_ANY, false); + xfrm_flush_gc(); + for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f9d2f2233f09..6c177ae7a6d9 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2175,6 +2175,12 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) return afinfo; } +void xfrm_flush_gc(void) +{ + flush_work(&xfrm_state_gc_work); +} +EXPORT_SYMBOL(xfrm_flush_gc); + /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */ void xfrm_state_delete_tunnel(struct xfrm_state *x) { From 10b4640833e95eeacaef8060bc1b35e636df3218 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Fri, 13 Apr 2018 15:44:16 +0300 Subject: [PATCH 005/347] spi: sh-msiof: Fix bit field overflow writes to TSCR/RSCR The change fixes a bit field overflow which allows to write to higher bits while calculating SPI transfer clock and setting BRPS and BRDV bit fields, the problem is reproduced if 'parent_rate' to 'spi_hz' ratio is greater than 1024, for instance p->min_div = 2, MSO rate = 33333333, SPI device rate = 10000 results in k = 5, i.e. BRDV = 0b100 or 1/32 prescaler output, BRPS = 105, TSCR value = 0x6804, thus MSSEL and MSIMM bit fields are non-zero. Fixes: 65d5665bb260 ("spi: sh-msiof: Update calculation of frequency dividing") Signed-off-by: Vladimir Zapolskiy Signed-off-by: Mark Brown --- drivers/spi/spi-sh-msiof.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index ae086aab57d5..8171eedbfc90 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -283,6 +283,7 @@ static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p, } k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_div_table) - 1); + brps = min_t(int, brps, 32); scr = sh_msiof_spi_div_table[k].brdv | SCR_BRPS(brps); sh_msiof_write(p, TSCR, scr); From 99bf8f27f3f94d2a37291354b8dc83f13728f75f Mon Sep 17 00:00:00 2001 From: Lukasz Majewski Date: Wed, 4 Apr 2018 09:52:04 +0200 Subject: [PATCH 006/347] doc: Add vendor prefix for Kieback & Peter GmbH The 'kiebackpeter' entry has been added to vendor-prefixes.txt to indicate products from Kieback & Peter GmbH. Signed-off-by: Lukasz Majewski Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index b5f978a4cac6..a38d8bfae19c 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -182,6 +182,7 @@ karo Ka-Ro electronics GmbH keithkoep Keith & Koep GmbH keymile Keymile GmbH khadas Khadas +kiebackpeter Kieback & Peter GmbH kinetic Kinetic Technologies kingnovel Kingnovel Technology Co., Ltd. kosagi Sutajio Ko-Usagi PTE Ltd. From b614e905a0bc8fc5d4fa72665ac26ae00c874a4e Mon Sep 17 00:00:00 2001 From: Matheus Castello Date: Wed, 11 Apr 2018 01:17:03 -0400 Subject: [PATCH 007/347] dt-bindings: pinctrl: sunxi: Fix reference to driver Bindings describe hardware, not drivers. Use reference to hardware Allwinner A1X Pin Controller instead driver. Signed-off-by: Matheus Castello Signed-off-by: Rob Herring --- .../devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt index ed5eb547afc8..64bc5c2a76da 100644 --- a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt +++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt @@ -56,9 +56,9 @@ pins it needs, and how they should be configured, with regard to muxer configuration, drive strength and pullups. If one of these options is not set, its actual value will be unspecified. -This driver supports the generic pin multiplexing and configuration -bindings. For details on each properties, you can refer to -./pinctrl-bindings.txt. +Allwinner A1X Pin Controller supports the generic pin multiplexing and +configuration bindings. For details on each properties, you can refer to + ./pinctrl-bindings.txt. Required sub-node properties: - pins From 1a862488729a6ea9cfd285d2c90f8738949ae7d2 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 16 Apr 2018 15:55:17 +0200 Subject: [PATCH 008/347] dt-bindings: net: ravb: Add support for r8a77965 SoC Add documentation for r8a77965 compatible string to renesas ravb device tree bindings documentation. Signed-off-by: Jacopo Mondi Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Acked-by: Sergei Shtylyov Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index c306f55d335b..890526dbfc26 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -18,6 +18,7 @@ Required properties: - "renesas,etheravb-r8a7795" for the R8A7795 SoC. - "renesas,etheravb-r8a7796" for the R8A7796 SoC. + - "renesas,etheravb-r8a77965" for the R8A77965 SoC. - "renesas,etheravb-r8a77970" for the R8A77970 SoC. - "renesas,etheravb-r8a77980" for the R8A77980 SoC. - "renesas,etheravb-r8a77995" for the R8A77995 SoC. From 7de5b7e5f6a67c285b86d1478e8e150929c93482 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 16 Apr 2018 15:55:28 +0200 Subject: [PATCH 009/347] dt-bindings: serial: sh-sci: Add support for r8a77965 (H)SCIF Add documentation for r8a77965 compatible string to Renesas sci-serial device tree bindings documentation. Signed-off-by: Jacopo Mondi Reviewed-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/serial/renesas,sci-serial.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt index ad962f4ec3aa..0cc5417904dd 100644 --- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt +++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt @@ -41,6 +41,8 @@ Required properties: - "renesas,hscif-r8a7795" for R8A7795 (R-Car H3) HSCIF compatible UART. - "renesas,scif-r8a7796" for R8A7796 (R-Car M3-W) SCIF compatible UART. - "renesas,hscif-r8a7796" for R8A7796 (R-Car M3-W) HSCIF compatible UART. + - "renesas,scif-r8a77965" for R8A77965 (R-Car M3-N) SCIF compatible UART. + - "renesas,hscif-r8a77965" for R8A77965 (R-Car M3-N) HSCIF compatible UART. - "renesas,scif-r8a77970" for R8A77970 (R-Car V3M) SCIF compatible UART. - "renesas,hscif-r8a77970" for R8A77970 (R-Car V3M) HSCIF compatible UART. - "renesas,scif-r8a77980" for R8A77980 (R-Car V3H) SCIF compatible UART. From b89bc283286b105e50aab9ab35992c0237ac77d8 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Mon, 16 Apr 2018 15:56:08 +0200 Subject: [PATCH 010/347] dt-bindings: dmaengine: rcar-dmac: document R8A77965 support Add documentation for r8a77965 compatible string to rcar-dmac device tree bindings documentation. Signed-off-by: Jacopo Mondi Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt index aadfb236d53a..61315eaa7660 100644 --- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt +++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt @@ -26,6 +26,7 @@ Required Properties: - "renesas,dmac-r8a7794" (R-Car E2) - "renesas,dmac-r8a7795" (R-Car H3) - "renesas,dmac-r8a7796" (R-Car M3-W) + - "renesas,dmac-r8a77965" (R-Car M3-N) - "renesas,dmac-r8a77970" (R-Car V3M) - "renesas,dmac-r8a77980" (R-Car V3H) From 49530e6411789c1b9ea3ebc58e520c19d1c3752f Mon Sep 17 00:00:00 2001 From: sxauwsk Date: Tue, 17 Apr 2018 04:01:27 +0800 Subject: [PATCH 011/347] spi: cadence: Add usleep_range() for cdns_spi_fill_tx_fifo() In case of xspi work in busy condition, may send bytes failed. once something wrong, spi controller did't work any more My test found this situation appear in both of read/write process. so when TX FIFO is full, add one byte delay before send data; Signed-off-by: sxauwsk Signed-off-by: Mark Brown --- drivers/spi/spi-cadence.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 5c9516ae4942..4a001634023e 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -313,6 +313,14 @@ static void cdns_spi_fill_tx_fifo(struct cdns_spi *xspi) while ((trans_cnt < CDNS_SPI_FIFO_DEPTH) && (xspi->tx_bytes > 0)) { + + /* When xspi in busy condition, bytes may send failed, + * then spi control did't work thoroughly, add one byte delay + */ + if (cdns_spi_read(xspi, CDNS_SPI_ISR) & + CDNS_SPI_IXR_TXFULL) + usleep_range(10, 20); + if (xspi->txbuf) cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++); else From 911a26484c33e10de6237228ca1d7293548e9f49 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Tue, 3 Apr 2018 11:35:22 +0300 Subject: [PATCH 012/347] mac80211: Fix condition validating WMM IE Commit c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs") handled cases where an AP reports a zeroed WMM IE. However, the condition that checks the validity accessed the wrong index in the ieee80211_tx_queue_params array, thus wrongly deducing that the parameters are invalid. Fix it. Fixes: c470bdc1aaf3 ("mac80211: don't WARN on bad WMM parameters from buggy APs") Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 69449db7e283..6fe72ef182a1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1787,7 +1787,7 @@ static bool ieee80211_sta_wmm_params(struct ieee80211_local *local, params[ac].acm = acm; params[ac].uapsd = uapsd; - if (params->cw_min == 0 || + if (params[ac].cw_min == 0 || params[ac].cw_min > params[ac].cw_max) { sdata_info(sdata, "AP has invalid WMM params (CWmin/max=%d/%d for ACI %d), using defaults\n", From a7cfebcb7594a24609268f91299ab85ba064bf82 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 3 Apr 2018 14:33:49 +0200 Subject: [PATCH 013/347] cfg80211: limit wiphy names to 128 bytes There's currently no limit on wiphy names, other than netlink message size and memory limitations, but that causes issues when, for example, the wiphy name is used in a uevent, e.g. in rfkill where we use the same name for the rfkill instance, and then the buffer there is "only" 2k for the environment variables. This was reported by syzkaller, which used a 4k name. Limit the name to something reasonable, I randomly picked 128. Reported-by: syzbot+230d9e642a85d3fec29c@syzkaller.appspotmail.com Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 ++ net/wireless/core.c | 3 +++ 2 files changed, 5 insertions(+) diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 15daf5e2638d..9c3630146cec 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2698,6 +2698,8 @@ enum nl80211_attrs { #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS +#define NL80211_WIPHY_NAME_MAXLEN 128 + #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_HT_RATES 77 #define NL80211_MAX_SUPP_REG_RULES 64 diff --git a/net/wireless/core.c b/net/wireless/core.c index a6f3cac8c640..c0fd8a85e7f7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -95,6 +95,9 @@ static int cfg80211_dev_check_name(struct cfg80211_registered_device *rdev, ASSERT_RTNL(); + if (strlen(newname) > NL80211_WIPHY_NAME_MAXLEN) + return -EINVAL; + /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) { From 83826469e36b9c8219f88c16713a755b2cea6ff5 Mon Sep 17 00:00:00 2001 From: "weiyongjun (A)" Date: Fri, 30 Mar 2018 02:07:05 +0000 Subject: [PATCH 014/347] cfg80211: fix possible memory leak in regdb_query_country() 'wmm_ptrs' is malloced in regdb_query_country() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database") Signed-off-by: Wei Yongjun [johannes: add Fixes tag] Signed-off-by: Johannes Berg --- net/wireless/reg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 16c7e4ef5820..ac3e12c32aa3 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1026,6 +1026,7 @@ static int regdb_query_country(const struct fwdb_header *db, if (!tmp_rd) { kfree(regdom); + kfree(wmm_ptrs); return -ENOMEM; } regdom = tmp_rd; From 2f0605a697f4b9f5b1c1571c7ec6a16df4dc2616 Mon Sep 17 00:00:00 2001 From: Srinivas Dasari Date: Fri, 20 Apr 2018 11:41:14 +0530 Subject: [PATCH 015/347] nl80211: Free connkeys on external authentication failure The failure scenario while processing NL80211_ATTR_EXTERNAL_AUTH_SUPPORT does not free the connkeys. This commit addresses the same. Signed-off-by: Srinivas Dasari Signed-off-by: Johannes Berg --- net/wireless/nl80211.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ff28f8feeb09..a052693c2e85 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9214,6 +9214,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) { if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) { + kzfree(connkeys); GENL_SET_ERR_MSG(info, "external auth requires connection ownership"); return -EINVAL; From 94912e8df4e5005cb5fa3f3603741eb5e8bf3334 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 5 Apr 2018 11:20:06 -0500 Subject: [PATCH 016/347] ieee802154: mcr20a: Fix memory leak in mcr20a_probe Free allocated memory for pdata before return. Addresses-Coverity-ID: 1466096 ("Resource leak") Fixes: 8c6ad9cc5157 ("ieee802154: Add NXP MCR20A IEEE 802.15.4 transceiver driver") Signed-off-by: Gustavo A. R. Silva Acked-by: Xue Liu Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mcr20a.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 55a22c761808..944470d69ba9 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -1267,7 +1267,7 @@ mcr20a_probe(struct spi_device *spi) ret = mcr20a_get_platform_data(spi, pdata); if (ret < 0) { dev_crit(&spi->dev, "mcr20a_get_platform_data failed.\n"); - return ret; + goto free_pdata; } /* init reset gpio */ @@ -1275,7 +1275,7 @@ mcr20a_probe(struct spi_device *spi) ret = devm_gpio_request_one(&spi->dev, pdata->rst_gpio, GPIOF_OUT_INIT_HIGH, "reset"); if (ret) - return ret; + goto free_pdata; } /* reset mcr20a */ @@ -1291,7 +1291,8 @@ mcr20a_probe(struct spi_device *spi) hw = ieee802154_alloc_hw(sizeof(*lp), &mcr20a_hw_ops); if (!hw) { dev_crit(&spi->dev, "ieee802154_alloc_hw failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto free_pdata; } /* init mcr20a local data */ @@ -1366,6 +1367,8 @@ mcr20a_probe(struct spi_device *spi) free_dev: ieee802154_free_hw(lp->hw); +free_pdata: + kfree(pdata); return ret; } From f9e628a6645ed4f42a15dfa3c5af45a916a228e4 Mon Sep 17 00:00:00 2001 From: Jia-Ju Bai Date: Wed, 11 Apr 2018 10:14:10 +0800 Subject: [PATCH 017/347] net: ieee802154: atusb: Replace GFP_ATOMIC with GFP_KERNEL in atusb_probe atusb_probe() is never called in atomic context. This function is only set as ".probe" in struct usb_driver. Despite never getting called from atomic context, atusb_probe() calls usb_alloc_urb() with GFP_ATOMIC, which does not sleep for allocation. GFP_ATOMIC is not necessary and can be replaced with GFP_KERNEL, which can sleep and improve the possibility of sucessful allocation. This is found by a static analysis tool named DCNS written by myself. And I also manually check it. Signed-off-by: Jia-Ju Bai Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/atusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 9fb9b565a002..4f684cbcdc57 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -1045,7 +1045,7 @@ static int atusb_probe(struct usb_interface *interface, atusb->tx_dr.bRequest = ATUSB_TX; atusb->tx_dr.wValue = cpu_to_le16(0); - atusb->tx_urb = usb_alloc_urb(0, GFP_ATOMIC); + atusb->tx_urb = usb_alloc_urb(0, GFP_KERNEL); if (!atusb->tx_urb) goto fail; From 7e0ffee1b2e7d26cc2147be0d9d5186823e923f3 Mon Sep 17 00:00:00 2001 From: Stefan Schmidt Date: Thu, 12 Apr 2018 14:28:49 +0200 Subject: [PATCH 018/347] net: ieee802154: mcr20a: do not leak resources on error path We already allocated the device and platform data at this point. Instead of simply return from the probe function we need to cleanup the resources first. Signed-off-by: Stefan Schmidt Acked-by: Xue Liu --- drivers/net/ieee802154/mcr20a.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 944470d69ba9..de0d7f28a181 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -1309,8 +1309,10 @@ mcr20a_probe(struct spi_device *spi) /* init buf */ lp->buf = devm_kzalloc(&spi->dev, SPI_COMMAND_BUFFER, GFP_KERNEL); - if (!lp->buf) - return -ENOMEM; + if (!lp->buf) { + ret = -ENOMEM; + goto free_dev; + } mcr20a_setup_tx_spi_messages(lp); mcr20a_setup_rx_spi_messages(lp); From f18fa5de5ba7f1d6650951502bb96a6e4715a948 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 20 Apr 2018 14:54:13 -0400 Subject: [PATCH 019/347] net: ieee802154: 6lowpan: fix frag reassembly This patch initialize stack variables which are used in frag_lowpan_compare_key to zero. In my case there are padding bytes in the structures ieee802154_addr as well in frag_lowpan_compare_key. Otherwise the key variable contains random bytes. The result is that a compare of two keys by memcmp works incorrect. Fixes: 648700f76b03 ("inet: frags: use rhashtables for reassembly units") Signed-off-by: Alexander Aring Reported-by: Stefan Schmidt Signed-off-by: Stefan Schmidt --- net/ieee802154/6lowpan/6lowpan_i.h | 4 ++-- net/ieee802154/6lowpan/reassembly.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index b8d95cb71c25..44a7e16bf3b5 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -20,8 +20,8 @@ typedef unsigned __bitwise lowpan_rx_result; struct frag_lowpan_compare_key { u16 tag; u16 d_size; - const struct ieee802154_addr src; - const struct ieee802154_addr dst; + struct ieee802154_addr src; + struct ieee802154_addr dst; }; /* Equivalent of ipv4 struct ipq diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 1790b65944b3..2cc224106b69 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -75,14 +75,14 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); - struct frag_lowpan_compare_key key = { - .tag = cb->d_tag, - .d_size = cb->d_size, - .src = *src, - .dst = *dst, - }; + struct frag_lowpan_compare_key key = {}; struct inet_frag_queue *q; + key.tag = cb->d_tag; + key.d_size = cb->d_size; + key.src = *src; + key.dst = *dst; + q = inet_frag_find(&ieee802154_lowpan->frags, &key); if (!q) return NULL; @@ -372,7 +372,7 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) struct lowpan_frag_queue *fq; struct net *net = dev_net(skb->dev); struct lowpan_802154_cb *cb = lowpan_802154_cb(skb); - struct ieee802154_hdr hdr; + struct ieee802154_hdr hdr = {}; int err; if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) From 970f04c840f6004a1a956c1a836792a341d63eae Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 20 Apr 2018 08:08:23 -0500 Subject: [PATCH 020/347] dtc: checks: drop warning for missing PCI bridge bus-range Cherry-picked from dtc upstream commit e1f139ea4900fd0324c646822b4061fec6e08321. Having a 'bus-range' property for PCI bridges should not be required, so remove the warning when missing. There was some confusion with the Linux kernel printing a message that no property is present and the OS assigned the bus number. This message was intended to be informational rather than a warning. When the firmware doesn't enumerate the PCI bus and leaves it up to the OS to do, then it is perfectly fine for the OS to assign bus numbers and bus-range is not necessary. There are a few cases where bus-range is needed or useful as Arnd Bergmann summarized: - Traditionally Linux avoided using multiple PCI domains, but instead configured separate PCI host bridges to have non-overlapping bus ranges so we can present them to user space as a single domain, and run the kernel without CONFIG_PCI_DOMAINS. Specifying the bus ranges this way would and give stable bus numbers across boots when the probe order is not fixed. - On certain ARM64 systems, we must only use the first 128 bus numbers based on the way the IOMMU identifies the device with truncated bus/dev/fn number. There are probably others like this, with various limitations. - To leave some room for hotplugged devices, each slot on a host bridge can in theory get a range of bus numbers that are available when assigning bus numbers at boot time Cc: Arnd Bergmann Signed-off-by: Rob Herring Signed-off-by: David Gibson --- scripts/dtc/checks.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scripts/dtc/checks.c b/scripts/dtc/checks.c index c07ba4da9e36..815eaf140ab5 100644 --- a/scripts/dtc/checks.c +++ b/scripts/dtc/checks.c @@ -787,10 +787,9 @@ static void check_pci_bridge(struct check *c, struct dt_info *dti, struct node * FAIL(c, dti, node, "incorrect #size-cells for PCI bridge"); prop = get_property(node, "bus-range"); - if (!prop) { - FAIL(c, dti, node, "missing bus-range for PCI bridge"); + if (!prop) return; - } + if (prop->val.len != (sizeof(cell_t) * 2)) { FAIL_PROP(c, dti, node, prop, "value must be 2 cells"); return; From 13b86f50eaaddaea4bdd2fe476fd12e6a0951add Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 22 Apr 2018 19:56:17 +0200 Subject: [PATCH 021/347] thermal: int3403_thermal: Fix NULL pointer deref on module load / probe Starting with kernel 4.17 thermal_cooling_device_register() will call the get_max_state() op during register. Since we deref priv->priv in int3403_get_max_state() this means we must set priv->priv before calling thermal_cooling_device_register(). Signed-off-by: Hans de Goede Signed-off-by: Zhang Rui --- drivers/thermal/int340x_thermal/int3403_thermal.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c index 8a7f24dd9315..0c19fcd56a0d 100644 --- a/drivers/thermal/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/int340x_thermal/int3403_thermal.c @@ -194,6 +194,7 @@ static int int3403_cdev_add(struct int3403_priv *priv) return -EFAULT; } + priv->priv = obj; obj->max_state = p->package.count - 1; obj->cdev = thermal_cooling_device_register(acpi_device_bid(priv->adev), @@ -201,8 +202,6 @@ static int int3403_cdev_add(struct int3403_priv *priv) if (IS_ERR(obj->cdev)) result = PTR_ERR(obj->cdev); - priv->priv = obj; - kfree(buf.pointer); /* TODO: add ACPI notification support */ From efc4a13724b852ddaa3358402a8dec024ffbcb17 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Apr 2018 19:53:32 +0300 Subject: [PATCH 022/347] spi: pxa2xx: Allow 64-bit DMA Currently the 32-bit device address only is supported for DMA. However, starting from Intel Sunrisepoint PCH the DMA address of the device FIFO can be 64-bit. Change the respective variable to be compatible with DMA engine expectations, i.e. to phys_addr_t. Fixes: 34cadd9c1bcb ("spi: pxa2xx: Add support for Intel Sunrisepoint") Signed-off-by: Andy Shevchenko Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-pxa2xx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index 513ec6c6e25b..0ae7defd3492 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -38,7 +38,7 @@ struct driver_data { /* SSP register addresses */ void __iomem *ioaddr; - u32 ssdr_physical; + phys_addr_t ssdr_physical; /* SSP masks*/ u32 dma_cr1; From a230cd52b8a2be39cd6e9a13b3e62af57f21372a Mon Sep 17 00:00:00 2001 From: pgzh Date: Thu, 12 Apr 2018 19:36:47 +0200 Subject: [PATCH 023/347] HID: lenovo: Add support for IBM/Lenovo Scrollpoint mice The IBM/Lenovo Scrollpoint mice feature a trackpoint-like stick instead of a scrolling wheel capable of 2-D (vertical+horizontal) scrolling. hid-generic does only expose 1-D (vertical) scrolling functionality for these mice. This patch adds support for horizontal scrolling for the IBM/Lenovo Scrollpoint mice to hid-lenovo. [jkosina@suse.cz: remove change versioning from git changelog] Signed-off-by: Peter Ganzhorn Reviewed-by: Benjamin Tissoires Signed-off-by: Peter De Wachter Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 7 ++++--- drivers/hid/hid-ids.h | 8 ++++++++ drivers/hid/hid-lenovo.c | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 60252fd796f6..0000434a1fbd 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -462,10 +462,11 @@ config HID_LENOVO select NEW_LEDS select LEDS_CLASS ---help--- - Support for Lenovo devices that are not fully compliant with HID standard. + Support for IBM/Lenovo devices that are not fully compliant with HID standard. - Say Y if you want support for the non-compliant features of the Lenovo - Thinkpad standalone keyboards, e.g: + Say Y if you want support for horizontal scrolling of the IBM/Lenovo + Scrollpoint mice or the non-compliant features of the Lenovo Thinkpad + standalone keyboards, e.g: - ThinkPad USB Keyboard with TrackPoint (supports extra LEDs and trackpoint configuration) - ThinkPad Compact Bluetooth Keyboard with TrackPoint (supports Fn keys) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 0b5cc910f62e..ec73aa486315 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -552,6 +552,13 @@ #define USB_VENDOR_ID_HUION 0x256c #define USB_DEVICE_ID_HUION_TABLET 0x006e +#define USB_VENDOR_ID_IBM 0x04b3 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_III 0x3100 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_PRO 0x3103 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL 0x3105 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL 0x3108 +#define USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO 0x3109 + #define USB_VENDOR_ID_IDEACOM 0x1cb6 #define USB_DEVICE_ID_IDEACOM_IDC6650 0x6650 #define USB_DEVICE_ID_IDEACOM_IDC6651 0x6651 @@ -684,6 +691,7 @@ #define USB_DEVICE_ID_LENOVO_TPKBD 0x6009 #define USB_DEVICE_ID_LENOVO_CUSBKBD 0x6047 #define USB_DEVICE_ID_LENOVO_CBTKBD 0x6048 +#define USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL 0x6049 #define USB_DEVICE_ID_LENOVO_TPPRODOCK 0x6067 #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 diff --git a/drivers/hid/hid-lenovo.c b/drivers/hid/hid-lenovo.c index 1ac4ff4d57a6..643b6eb54442 100644 --- a/drivers/hid/hid-lenovo.c +++ b/drivers/hid/hid-lenovo.c @@ -6,6 +6,17 @@ * * Copyright (c) 2012 Bernhard Seibold * Copyright (c) 2014 Jamie Lentin + * + * Linux IBM/Lenovo Scrollpoint mouse driver: + * - IBM Scrollpoint III + * - IBM Scrollpoint Pro + * - IBM Scrollpoint Optical + * - IBM Scrollpoint Optical 800dpi + * - IBM Scrollpoint Optical 800dpi Pro + * - Lenovo Scrollpoint Optical + * + * Copyright (c) 2012 Peter De Wachter + * Copyright (c) 2018 Peter Ganzhorn */ /* @@ -160,6 +171,17 @@ static int lenovo_input_mapping_cptkbd(struct hid_device *hdev, return 0; } +static int lenovo_input_mapping_scrollpoint(struct hid_device *hdev, + struct hid_input *hi, struct hid_field *field, + struct hid_usage *usage, unsigned long **bit, int *max) +{ + if (usage->hid == HID_GD_Z) { + hid_map_usage(hi, usage, bit, max, EV_REL, REL_HWHEEL); + return 1; + } + return 0; +} + static int lenovo_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) @@ -172,6 +194,14 @@ static int lenovo_input_mapping(struct hid_device *hdev, case USB_DEVICE_ID_LENOVO_CBTKBD: return lenovo_input_mapping_cptkbd(hdev, hi, field, usage, bit, max); + case USB_DEVICE_ID_IBM_SCROLLPOINT_III: + case USB_DEVICE_ID_IBM_SCROLLPOINT_PRO: + case USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL: + case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL: + case USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO: + case USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL: + return lenovo_input_mapping_scrollpoint(hdev, hi, field, + usage, bit, max); default: return 0; } @@ -883,6 +913,12 @@ static const struct hid_device_id lenovo_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CUSBKBD) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_CBTKBD) }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_TPPRODOCK) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_III) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_PRO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_OPTICAL) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL) }, + { HID_USB_DEVICE(USB_VENDOR_ID_IBM, USB_DEVICE_ID_IBM_SCROLLPOINT_800DPI_OPTICAL_PRO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_SCROLLPOINT_OPTICAL) }, { } }; From 097b8f62dd793e08f1732fc74dbb64596c7fbff9 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 24 Apr 2018 13:33:03 +0530 Subject: [PATCH 024/347] HID: wacom: Release device resource data obtained by devres_alloc() Free device resource data, if __wacom_devm_sysfs_create_group is not successful. Signed-off-by: Arvind Yadav Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index b54ef1ffcbec..ee7a37eb159a 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -1213,8 +1213,10 @@ static int __wacom_devm_sysfs_create_group(struct wacom *wacom, devres->root = root; error = sysfs_create_group(devres->root, group); - if (error) + if (error) { + devres_free(devres); return error; + } devres_add(&wacom->hdev->dev, devres); From 37ba3c350e64adcbdd483e81bf194c05d9573515 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 14 Apr 2018 17:06:44 +0200 Subject: [PATCH 025/347] HID: intel_ish-hid: Move header size check to inside the loop With the headersize check outside of the loop, the second time through the loop the: "payload_len = recv_msg->hdr.size;" statement may deref recv_msg while it is pointing outside of our input buffer. Move the headersize check to inside the loop to fix this. Signed-off-by: Hans de Goede Reviewed-by: Benjamin Tissoires Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index 157b44aacdff..6ce1856bb368 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -77,21 +77,21 @@ static void process_recv(struct ishtp_cl *hid_ishtp_cl, void *recv_buf, struct ishtp_cl_data *client_data = hid_ishtp_cl->client_data; int curr_hid_dev = client_data->cur_hid_dev; - if (data_len < sizeof(struct hostif_msg_hdr)) { - dev_err(&client_data->cl_device->dev, - "[hid-ish]: error, received %u which is less than data header %u\n", - (unsigned int)data_len, - (unsigned int)sizeof(struct hostif_msg_hdr)); - ++client_data->bad_recv_cnt; - ish_hw_reset(hid_ishtp_cl->dev); - return; - } - payload = recv_buf + sizeof(struct hostif_msg_hdr); total_len = data_len; cur_pos = 0; do { + if (cur_pos + sizeof(struct hostif_msg) > total_len) { + dev_err(&client_data->cl_device->dev, + "[hid-ish]: error, received %u which is less than data header %u\n", + (unsigned int)data_len, + (unsigned int)sizeof(struct hostif_msg_hdr)); + ++client_data->bad_recv_cnt; + ish_hw_reset(hid_ishtp_cl->dev); + break; + } + recv_msg = (struct hostif_msg *)(recv_buf + cur_pos); payload_len = recv_msg->hdr.size; From 749ab300d41bcdbb8edf776d20e9a9891e79eba4 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 14 Apr 2018 17:06:45 +0200 Subject: [PATCH 026/347] HID: intel_ish-hid: Stop using a static local buffer in get_report() hid_ishtp_get_report() may be called by multiple callers at the same time, causing trouble with the static local buffer used. Also there is no reason to use a non stack buffer, the buffer is tiny and ishtp_cl_send() copies its contents so the lifetime is not an issue either. Signed-off-by: Hans de Goede Reviewed-by: Benjamin Tissoires Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp-hid-client.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index 6ce1856bb368..acc2536c8094 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -412,9 +412,7 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id, { struct ishtp_hid_data *hid_data = hid->driver_data; struct ishtp_cl_data *client_data = hid_data->client_data; - static unsigned char buf[10]; - unsigned int len; - struct hostif_msg_to_sensor *msg = (struct hostif_msg_to_sensor *)buf; + struct hostif_msg_to_sensor msg = {}; int rv; int i; @@ -426,14 +424,11 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id, return; } - len = sizeof(struct hostif_msg_to_sensor); - - memset(msg, 0, sizeof(struct hostif_msg_to_sensor)); - msg->hdr.command = (report_type == HID_FEATURE_REPORT) ? + msg.hdr.command = (report_type == HID_FEATURE_REPORT) ? HOSTIF_GET_FEATURE_REPORT : HOSTIF_GET_INPUT_REPORT; for (i = 0; i < client_data->num_hid_devices; ++i) { if (hid == client_data->hid_sensor_hubs[i]) { - msg->hdr.device_id = + msg.hdr.device_id = client_data->hid_devices[i].dev_id; break; } @@ -442,8 +437,9 @@ void hid_ishtp_get_report(struct hid_device *hid, int report_id, if (i == client_data->num_hid_devices) return; - msg->report_id = report_id; - rv = ishtp_cl_send(client_data->hid_ishtp_cl, buf, len); + msg.report_id = report_id; + rv = ishtp_cl_send(client_data->hid_ishtp_cl, (uint8_t *)&msg, + sizeof(msg)); if (rv) hid_ishtp_trace(client_data, "%s hid %p send failed\n", __func__, hid); From f241632fd087d3d9fbd5450f4d8c8604badd8348 Mon Sep 17 00:00:00 2001 From: Govert Overgaauw Date: Fri, 6 Apr 2018 14:41:35 +0200 Subject: [PATCH 027/347] gpio: fix aspeed_gpio unmask irq The unmask function disables all interrupts in a bank when unmasking an interrupt. Only disable the given interrupt. Cc: stable@vger.kernel.org Signed-off-by: Govert Overgaauw Signed-off-by: Linus Walleij --- drivers/gpio/gpio-aspeed.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 77e485557498..6f693b7d5220 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -384,7 +384,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set) if (set) reg |= bit; else - reg &= bit; + reg &= ~bit; iowrite32(reg, addr); spin_unlock_irqrestore(&gpio->lock, flags); From ab3dbcf78f60f46d6a0ad63b1f4b690b7a427140 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Thu, 29 Mar 2018 13:29:12 -0500 Subject: [PATCH 028/347] gpioib: do not free unrequested descriptors If the main loop in linehandle_create() encounters an error, it unwinds completely by freeing all previously requested GPIO descriptors. However, if the error occurs in the beginning of the loop before that GPIO is requested, then the exit code attempts to free a null descriptor. If extrachecks is enabled, gpiod_free() triggers a WARN_ON. Instead, keep a separate count of legitimate GPIOs so that only those are freed. Cc: stable@vger.kernel.org Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Reviewed-by: Bjorn Andersson Signed-off-by: Timur Tabi Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 43aeb07343ec..d07771797707 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -497,7 +497,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) struct gpiohandle_request handlereq; struct linehandle_state *lh; struct file *file; - int fd, i, ret; + int fd, i, count = 0, ret; u32 lflags; if (copy_from_user(&handlereq, ip, sizeof(handlereq))) @@ -558,6 +558,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) if (ret) goto out_free_descs; lh->descs[i] = desc; + count = i; if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW) set_bit(FLAG_ACTIVE_LOW, &desc->flags); @@ -628,7 +629,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) out_put_unused_fd: put_unused_fd(fd); out_free_descs: - for (; i >= 0; i--) + for (i = 0; i < count; i++) gpiod_free(lh->descs[i]); kfree(lh->label); out_free_lh: From a4eb490a41a0da3b1275fc7427084cf9ae2c3c1c Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 30 Mar 2018 16:56:10 +0530 Subject: [PATCH 029/347] HID: intel-ish-hid: use put_device() instead of kfree() Never directly free @dev after calling device_register(), even if it returned an error. Always use put_device() to give up the reference initialized. Signed-off-by: Arvind Yadav Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ishtp/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index f272cdd9bd55..2623a567ffba 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -418,7 +418,7 @@ static struct ishtp_cl_device *ishtp_bus_add_device(struct ishtp_device *dev, list_del(&device->device_link); spin_unlock_irqrestore(&dev->device_list_lock, flags); dev_err(dev->devc, "Failed to register ISHTP client device\n"); - kfree(device); + put_device(&device->dev); return NULL; } From fa89f53bd7288d6aa7a982841119e7123faf5a53 Mon Sep 17 00:00:00 2001 From: Evan Wang Date: Fri, 13 Apr 2018 12:32:30 +0800 Subject: [PATCH 030/347] libahci: Allow drivers to override stop_engine Marvell armada37xx, armada7k and armada8k share the same AHCI sata controller IP, and currently there is an issue (Errata Ref#226)that the SATA can not be detected via SATA Port-MultiPlayer(PMP). After debugging, the reason is found that the value of Port-x FIS-based Switching Control (PxFBS@0x40) became wrong. According to design, the bits[11:8, 0] of register PxFBS are cleared when Port Command and Status (0x18) bit[0] changes its value from 1 to 0, i.e. falling edge of Port Command and Status bit[0] sends PULSE that resets PxFBS bits[11:8; 0]. So it needs save the port PxFBS register before PxCMD ST write and restore the port PxFBS register afterwards in ahci_stop_engine(). This commit allows drivers to override ahci_stop_engine behavior for use by the Marvell AHCI driver(and potentially other drivers in the future). Signed-off-by: Evan Wang Cc: Ofer Heifetz Cc: Tejun Heo Cc: Thomas Petazzoni Signed-off-by: Tejun Heo --- drivers/ata/ahci.c | 6 +++--- drivers/ata/ahci.h | 7 +++++++ drivers/ata/ahci_qoriq.c | 2 +- drivers/ata/ahci_xgene.c | 4 ++-- drivers/ata/libahci.c | 20 ++++++++++++-------- drivers/ata/sata_highbank.c | 2 +- 6 files changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 1ff17799769d..6389c88b3500 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -698,7 +698,7 @@ static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, DPRINTK("ENTER\n"); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); rc = sata_link_hardreset(link, sata_ehc_deb_timing(&link->eh_context), deadline, &online, NULL); @@ -724,7 +724,7 @@ static int ahci_p5wdh_hardreset(struct ata_link *link, unsigned int *class, bool online; int rc; - ahci_stop_engine(ap); + hpriv->stop_engine(ap); /* clear D2H reception area to properly wait for D2H FIS */ ata_tf_init(link->device, &tf); @@ -788,7 +788,7 @@ static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, DPRINTK("ENTER\n"); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); for (i = 0; i < 2; i++) { u16 val; diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index a9d996e17d75..824bd399f02e 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -365,6 +365,13 @@ struct ahci_host_priv { * be overridden anytime before the host is activated. */ void (*start_engine)(struct ata_port *ap); + /* + * Optional ahci_stop_engine override, if not set this gets set to the + * default ahci_stop_engine during ahci_save_initial_config, this can + * be overridden anytime before the host is activated. + */ + int (*stop_engine)(struct ata_port *ap); + irqreturn_t (*irq_handler)(int irq, void *dev_instance); /* only required for per-port MSI(-X) support */ diff --git a/drivers/ata/ahci_qoriq.c b/drivers/ata/ahci_qoriq.c index 2685f28160f7..cfdef4d44ae9 100644 --- a/drivers/ata/ahci_qoriq.c +++ b/drivers/ata/ahci_qoriq.c @@ -96,7 +96,7 @@ static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class, DPRINTK("ENTER\n"); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); /* * There is a errata on ls1021a Rev1.0 and Rev2.0 which is: diff --git a/drivers/ata/ahci_xgene.c b/drivers/ata/ahci_xgene.c index c2b5941d9184..ad58da7c9aff 100644 --- a/drivers/ata/ahci_xgene.c +++ b/drivers/ata/ahci_xgene.c @@ -165,7 +165,7 @@ static int xgene_ahci_restart_engine(struct ata_port *ap) PORT_CMD_ISSUE, 0x0, 1, 100)) return -EBUSY; - ahci_stop_engine(ap); + hpriv->stop_engine(ap); ahci_start_fis_rx(ap); /* @@ -421,7 +421,7 @@ static int xgene_ahci_hardreset(struct ata_link *link, unsigned int *class, portrxfis_saved = readl(port_mmio + PORT_FIS_ADDR); portrxfishi_saved = readl(port_mmio + PORT_FIS_ADDR_HI); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); rc = xgene_ahci_do_hardreset(link, deadline, &online); diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 7adcf3caabd0..e5d90977caec 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -560,6 +560,9 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) if (!hpriv->start_engine) hpriv->start_engine = ahci_start_engine; + if (!hpriv->stop_engine) + hpriv->stop_engine = ahci_stop_engine; + if (!hpriv->irq_handler) hpriv->irq_handler = ahci_single_level_irq_intr; } @@ -897,9 +900,10 @@ static void ahci_start_port(struct ata_port *ap) static int ahci_deinit_port(struct ata_port *ap, const char **emsg) { int rc; + struct ahci_host_priv *hpriv = ap->host->private_data; /* disable DMA */ - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) { *emsg = "failed to stop engine"; return rc; @@ -1310,7 +1314,7 @@ int ahci_kick_engine(struct ata_port *ap) int busy, rc; /* stop engine */ - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) goto out_restart; @@ -1549,7 +1553,7 @@ int ahci_do_hardreset(struct ata_link *link, unsigned int *class, DPRINTK("ENTER\n"); - ahci_stop_engine(ap); + hpriv->stop_engine(ap); /* clear D2H reception area to properly wait for D2H FIS */ ata_tf_init(link->device, &tf); @@ -2075,14 +2079,14 @@ void ahci_error_handler(struct ata_port *ap) if (!(ap->pflags & ATA_PFLAG_FROZEN)) { /* restart engine */ - ahci_stop_engine(ap); + hpriv->stop_engine(ap); hpriv->start_engine(ap); } sata_pmp_error_handler(ap); if (!ata_dev_enabled(ap->link.device)) - ahci_stop_engine(ap); + hpriv->stop_engine(ap); } EXPORT_SYMBOL_GPL(ahci_error_handler); @@ -2129,7 +2133,7 @@ static void ahci_set_aggressive_devslp(struct ata_port *ap, bool sleep) return; /* set DITO, MDAT, DETO and enable DevSlp, need to stop engine first */ - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) return; @@ -2189,7 +2193,7 @@ static void ahci_enable_fbs(struct ata_port *ap) return; } - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) return; @@ -2222,7 +2226,7 @@ static void ahci_disable_fbs(struct ata_port *ap) return; } - rc = ahci_stop_engine(ap); + rc = hpriv->stop_engine(ap); if (rc) return; diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c index aafb8cc03523..e67815b896fc 100644 --- a/drivers/ata/sata_highbank.c +++ b/drivers/ata/sata_highbank.c @@ -410,7 +410,7 @@ static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class, int rc; int retry = 100; - ahci_stop_engine(ap); + hpriv->stop_engine(ap); /* clear D2H reception area to properly wait for D2H FIS */ ata_tf_init(link->device, &tf); From daa2e3bdbb0b3e691cf20a042350817310cb8cb5 Mon Sep 17 00:00:00 2001 From: Evan Wang Date: Fri, 13 Apr 2018 12:32:31 +0800 Subject: [PATCH 031/347] ata: ahci: mvebu: override ahci_stop_engine for mvebu AHCI There is an issue(Errata Ref#226) that the SATA can not be detected via SATA Port-MultiPlayer(PMP) with following error log: ata1.15: PMP product ID mismatch ata1.15: SATA link up 6.0 Gbps (SStatus 133 SControl 300) ata1.15: Port Multiplier vendor mismatch '0x1b4b'!='0x0' ata1.15: PMP revalidation failed (errno=-19) After debugging, the reason is found that the value Port-x FIS-based Switching Control(PxFBS@0x40) become wrong. According to design, the bits[11:8, 0] of register PxFBS are cleared when Port Command and Status (0x18) bit[0] changes its value from 1 to 0, i.e. falling edge of Port Command and Status bit[0] sends PULSE that resets PxFBS bits[11:8; 0]. So it needs a mvebu SATA WA to save the port PxFBS register before PxCMD ST write and restore it afterwards. This patch implements the WA in a separate function of ahci_mvebu_stop_engine to override ahci_stop_gngine. Signed-off-by: Evan Wang Cc: Ofer Heifetz Cc: Tejun Heo Cc: Thomas Petazzoni Signed-off-by: Tejun Heo --- drivers/ata/ahci_mvebu.c | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index de7128d81e9c..0045dacd814b 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -62,6 +62,60 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv) writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); } +/** + * ahci_mvebu_stop_engine + * + * @ap: Target ata port + * + * Errata Ref#226 - SATA Disk HOT swap issue when connected through + * Port Multiplier in FIS-based Switching mode. + * + * To avoid the issue, according to design, the bits[11:8, 0] of + * register PxFBS are cleared when Port Command and Status (0x18) bit[0] + * changes its value from 1 to 0, i.e. falling edge of Port + * Command and Status bit[0] sends PULSE that resets PxFBS + * bits[11:8; 0]. + * + * This function is used to override function of "ahci_stop_engine" + * from libahci.c by adding the mvebu work around(WA) to save PxFBS + * value before the PxCMD ST write of 0, then restore PxFBS value. + * + * Return: 0 on success; Error code otherwise. + */ +int ahci_mvebu_stop_engine(struct ata_port *ap) +{ + void __iomem *port_mmio = ahci_port_base(ap); + u32 tmp, port_fbs; + + tmp = readl(port_mmio + PORT_CMD); + + /* check if the HBA is idle */ + if ((tmp & (PORT_CMD_START | PORT_CMD_LIST_ON)) == 0) + return 0; + + /* save the port PxFBS register for later restore */ + port_fbs = readl(port_mmio + PORT_FBS); + + /* setting HBA to idle */ + tmp &= ~PORT_CMD_START; + writel(tmp, port_mmio + PORT_CMD); + + /* + * bit #15 PxCMD signal doesn't clear PxFBS, + * restore the PxFBS register right after clearing the PxCMD ST, + * no need to wait for the PxCMD bit #15. + */ + writel(port_fbs, port_mmio + PORT_FBS); + + /* wait for engine to stop. This could be as long as 500 msec */ + tmp = ata_wait_register(ap, port_mmio + PORT_CMD, + PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500); + if (tmp & PORT_CMD_LIST_ON) + return -EIO; + + return 0; +} + #ifdef CONFIG_PM_SLEEP static int ahci_mvebu_suspend(struct platform_device *pdev, pm_message_t state) { @@ -112,6 +166,8 @@ static int ahci_mvebu_probe(struct platform_device *pdev) if (rc) return rc; + hpriv->stop_engine = ahci_mvebu_stop_engine; + if (of_device_is_compatible(pdev->dev.of_node, "marvell,armada-380-ahci")) { dram = mv_mbus_dram_info(); From b5b4d3a52c8fd6e3fc6469c5a64ca0139c07229e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 24 Apr 2018 11:19:07 +0200 Subject: [PATCH 032/347] libata: Apply NOLPM quirk for SAMSUNG MZMPC128HBFU-000MV SSD Kevin Shanahan reports the following repeating errors when using LPM, causing long delays accessing the disk: Apr 23 10:21:43 link kernel: ata1.00: exception Emask 0x0 SAct 0x0 SErr 0x50000 action 0x6 frozen Apr 23 10:21:43 link kernel: ata1: SError: { PHYRdyChg CommWake } Apr 23 10:21:43 link kernel: ata1.00: failed command: WRITE DMA Apr 23 10:21:43 link kernel: ata1.00: cmd ca/00:08:60:5d:cd/00:00:00:00:00/e1 tag 9 dma 4096 out res 50/01:01:01:00:00/00:00:00:00:00/00 Emask 0x4 (timeout) Apr 23 10:21:43 link kernel: ata1.00: status: { DRDY } Apr 23 10:21:43 link kernel: ata1.00: error: { AMNF } Apr 23 10:21:43 link kernel: ata1: hard resetting link Apr 23 10:21:43 link kernel: ata1: SATA link up 6.0 Gbps (SStatus 133 SControl 300) Apr 23 10:21:43 link kernel: ata1.00: configured for UDMA/133 Apr 23 10:21:43 link kernel: ata1: EH complete These go away when switching from med_power_with_dipm to medium_power. This is somewhat weird as the PM830 datasheet explicitly mentions DIPM being supported and the idle power-consumption is specified with DIPM enabled. There are many OEM customized firmware versions for the PM830, so for now lets assume this is firmware version specific and blacklist LPM based on the firmware version. Cc: Kevin Shanahan Reported-by: Kevin Shanahan Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 8bc71ca61e7f..6e400ff2b5db 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4549,6 +4549,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NOLPM, }, + /* This specific Samsung model/firmware-rev does not handle LPM well */ + { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, + /* devices that don't properly handle queued TRIM commands */ { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, From 184add2ca23ce5edcac0ab9c3b9be13f91e7b567 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 26 Apr 2018 22:32:21 +0200 Subject: [PATCH 033/347] libata: Apply NOLPM quirk for SanDisk SD7UB3Q*G1001 SSDs Richard Jones has reported that using med_power_with_dipm on a T450s with a Sandisk SD7UB3Q256G1001 SSD (firmware version X2180501) is causing the machine to hang. Switching the LPM to max_performance fixes this, so it seems that this Sandisk SSD does not handle LPM well. Note in the past there have been bug-reports about the following Sandisk models not working with min_power, so we may need to extend the quirk list in the future: name - firmware Sandisk SD6SB2M512G1022I - X210400 Sandisk SD6PP4M-256G-1006 - A200906 Cc: stable@vger.kernel.org Cc: Richard W.M. Jones Reported-and-tested-by: Richard W.M. Jones Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- drivers/ata/libata-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 6e400ff2b5db..68596bd4cf06 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4552,6 +4552,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* This specific Samsung model/firmware-rev does not handle LPM well */ { "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, }, + /* Sandisk devices which are known to not handle LPM well */ + { "SanDisk SD7UB3Q*G1001", NULL, ATA_HORKAGE_NOLPM, }, + /* devices that don't properly handle queued TRIM commands */ { "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, From f001cc351ad3309ec8736c374e90e5a4bc472d41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 16 Apr 2018 13:17:53 +0200 Subject: [PATCH 034/347] gpio: fix error path in lineevent_create MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If gpiod_request() fails the cleanup must not call gpiod_free(). Cc: stable@vger.kernel.org Fixes: 61f922db7221 ("gpio: userspace ABI for reading GPIO line events") Signed-off-by: Uwe Kleine-König Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index d07771797707..d8ccb500872f 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -903,7 +903,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) desc = &gdev->descs[offset]; ret = gpiod_request(desc, le->label); if (ret) - goto out_free_desc; + goto out_free_label; le->desc = desc; le->eflags = eflags; From aaf96e51de117cdfa2dc04735639895b46a3da3f Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 18 Apr 2018 08:53:10 -0400 Subject: [PATCH 035/347] gpio: pci-idio-16: Fix port memory offset for get_multiple callback The ioread8 function expects a memory offset argument. This patch fixes the ports array to provide the memory addresses of the respective device I/O registers. Fixes: 810ebfc5efca ("gpio: pci-idio-16: Implement get_multiple callback") Signed-off-by: William Breathitt Gray Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pci-idio-16.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c index 1948724d8c36..25d16b2af1c3 100644 --- a/drivers/gpio/gpio-pci-idio-16.c +++ b/drivers/gpio/gpio-pci-idio-16.c @@ -116,9 +116,9 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip, unsigned long word_mask; const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); unsigned long port_state; - u8 __iomem ports[] = { - idio16gpio->reg->out0_7, idio16gpio->reg->out8_15, - idio16gpio->reg->in0_7, idio16gpio->reg->in8_15, + void __iomem *ports[] = { + &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15, + &idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15, }; /* clear bits array to a clean slate */ @@ -143,7 +143,7 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip, } /* read bits from current gpio port */ - port_state = ioread8(ports + i); + port_state = ioread8(ports[i]); /* store acquired bits at respective bits array offset */ bits[word_index] |= port_state << word_offset; From 304440aa96c6e5cc37eaa7a46ff4dd862e4c21be Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Wed, 18 Apr 2018 08:53:21 -0400 Subject: [PATCH 036/347] gpio: pcie-idio-24: Fix port memory offset for get_multiple/set_multiple callbacks The ioread8/iowrite8 functions expect a memory offset argument. This patch fixes the ports array to provide the memory addresses of the respective device I/O registers. Fixes: ca37081595a2 ("gpio: pcie-idio-24: Implement get_multiple/set_multiple callbacks") Signed-off-by: William Breathitt Gray Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pcie-idio-24.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index 835607ecf658..3e77c2a9a9fd 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -206,10 +206,10 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip, unsigned long word_mask; const unsigned long port_mask = GENMASK(gpio_reg_size - 1, 0); unsigned long port_state; - u8 __iomem ports[] = { - idio24gpio->reg->out0_7, idio24gpio->reg->out8_15, - idio24gpio->reg->out16_23, idio24gpio->reg->in0_7, - idio24gpio->reg->in8_15, idio24gpio->reg->in16_23, + void __iomem *ports[] = { + &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15, + &idio24gpio->reg->out16_23, &idio24gpio->reg->in0_7, + &idio24gpio->reg->in8_15, &idio24gpio->reg->in16_23, }; const unsigned long out_mode_mask = BIT(1); @@ -236,7 +236,7 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip, /* read bits from current gpio port (port 6 is TTL GPIO) */ if (i < 6) - port_state = ioread8(ports + i); + port_state = ioread8(ports[i]); else if (ioread8(&idio24gpio->reg->ctl) & out_mode_mask) port_state = ioread8(&idio24gpio->reg->ttl_out0_7); else @@ -301,9 +301,9 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip, const unsigned long port_mask = GENMASK(gpio_reg_size, 0); unsigned long flags; unsigned int out_state; - u8 __iomem ports[] = { - idio24gpio->reg->out0_7, idio24gpio->reg->out8_15, - idio24gpio->reg->out16_23 + void __iomem *ports[] = { + &idio24gpio->reg->out0_7, &idio24gpio->reg->out8_15, + &idio24gpio->reg->out16_23 }; const unsigned long out_mode_mask = BIT(1); const unsigned int ttl_offset = 48; @@ -327,9 +327,9 @@ static void idio_24_gpio_set_multiple(struct gpio_chip *chip, raw_spin_lock_irqsave(&idio24gpio->lock, flags); /* process output lines */ - out_state = ioread8(ports + i) & ~gpio_mask; + out_state = ioread8(ports[i]) & ~gpio_mask; out_state |= (*bits >> bits_offset) & gpio_mask; - iowrite8(out_state, ports + i); + iowrite8(out_state, ports[i]); raw_spin_unlock_irqrestore(&idio24gpio->lock, flags); } From 83ef4777f5ff3689e6e52d3913a13d79aa25f1b5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 26 Apr 2018 13:00:30 +0200 Subject: [PATCH 037/347] of: overlay: Stop leaking resources on overlay removal Only the overlay notifier callbacks have a chance to potentially get hold of references to those two resources, but they are not supposed to store them beyond OF_OVERLAY_POST_REMOVE. Document the overlay notifier API, its constraint regarding pointer lifetime, and then remove intentional leaks of ovcs->overlay_tree and ovcs->fdt from free_overlay_changeset. See also https://lkml.org/lkml/2018/4/23/1063 and following. Signed-off-by: Jan Kiszka Reviewed-by: Frank Rowand Signed-off-by: Rob Herring --- Documentation/devicetree/overlay-notes.txt | 8 ++++++ drivers/of/overlay.c | 30 +++++++++++++++------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/overlay-notes.txt b/Documentation/devicetree/overlay-notes.txt index a4feb6dde8cd..725fb8d255c1 100644 --- a/Documentation/devicetree/overlay-notes.txt +++ b/Documentation/devicetree/overlay-notes.txt @@ -98,6 +98,14 @@ Finally, if you need to remove all overlays in one-go, just call of_overlay_remove_all() which will remove every single one in the correct order. +In addition, there is the option to register notifiers that get called on +overlay operations. See of_overlay_notifier_register/unregister and +enum of_overlay_notify_action for details. + +Note that a notifier callback is not supposed to store pointers to a device +tree node or its content beyond OF_OVERLAY_POST_REMOVE corresponding to the +respective node it received. + Overlay DTS Format ------------------ diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c index b35fe88f1851..7baa53e5b1d7 100644 --- a/drivers/of/overlay.c +++ b/drivers/of/overlay.c @@ -102,12 +102,28 @@ static DEFINE_IDR(ovcs_idr); static BLOCKING_NOTIFIER_HEAD(overlay_notify_chain); +/** + * of_overlay_notifier_register() - Register notifier for overlay operations + * @nb: Notifier block to register + * + * Register for notification on overlay operations on device tree nodes. The + * reported actions definied by @of_reconfig_change. The notifier callback + * furthermore receives a pointer to the affected device tree node. + * + * Note that a notifier callback is not supposed to store pointers to a device + * tree node or its content beyond @OF_OVERLAY_POST_REMOVE corresponding to the + * respective node it received. + */ int of_overlay_notifier_register(struct notifier_block *nb) { return blocking_notifier_chain_register(&overlay_notify_chain, nb); } EXPORT_SYMBOL_GPL(of_overlay_notifier_register); +/** + * of_overlay_notifier_register() - Unregister notifier for overlay operations + * @nb: Notifier block to unregister + */ int of_overlay_notifier_unregister(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&overlay_notify_chain, nb); @@ -671,17 +687,13 @@ static void free_overlay_changeset(struct overlay_changeset *ovcs) of_node_put(ovcs->fragments[i].overlay); } kfree(ovcs->fragments); - /* - * TODO - * - * would like to: kfree(ovcs->overlay_tree); - * but can not since drivers may have pointers into this data - * - * would like to: kfree(ovcs->fdt); - * but can not since drivers may have pointers into this data + * There should be no live pointers into ovcs->overlay_tree and + * ovcs->fdt due to the policy that overlay notifiers are not allowed + * to retain pointers into the overlay devicetree. */ - + kfree(ovcs->overlay_tree); + kfree(ovcs->fdt); kfree(ovcs); } From 84edb315cf766e7c8e09d473d63aaccd1637bbda Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Mon, 23 Apr 2018 11:14:28 +0200 Subject: [PATCH 038/347] MAINTAINERS: update s390 zcrypt maintainers email address Signed-off-by: Harald Freudenberger Signed-off-by: Martin Schwidefsky --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index dd66ae9a847e..2ad33062c9e3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12239,7 +12239,7 @@ F: Documentation/s390/vfio-ccw.txt F: include/uapi/linux/vfio_ccw.h S390 ZCRYPT DRIVER -M: Harald Freudenberger +M: Harald Freudenberger L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported From d14be68fd1be2b33e362594b831eef6f007febe5 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 26 Apr 2018 14:31:52 +0200 Subject: [PATCH 039/347] s390: update defconfigs Change the following to y: arch/s390/configs/performance_defconfig:262:warning: symbol value 'm' invalid for NF_TABLES_IPV4 arch/s390/configs/performance_defconfig:264:warning: symbol value 'm' invalid for NF_TABLES_ARP arch/s390/configs/performance_defconfig:285:warning: symbol value 'm' invalid for NF_TABLES_IPV6 arch/s390/configs/performance_defconfig:306:warning: symbol value 'm' invalid for NF_TABLES_BRIDGE Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/configs/debug_defconfig | 9 ++++----- arch/s390/configs/performance_defconfig | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6176fe9795ca..941d8cc6c9f5 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -261,9 +261,9 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -284,7 +284,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m +CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -305,7 +305,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -604,7 +604,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_TIMEKEEPING=y -CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index c105bcc6d7a6..eb6f75f24208 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -259,9 +259,9 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -282,7 +282,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m +CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -303,7 +303,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m From 598d76562cc2329ef2fdd95475e287b60ba9463f Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 25 Apr 2018 12:39:06 +0200 Subject: [PATCH 040/347] s390/kexec_file: add declaration of purgatory related globals Fix the following sparse complaints: arch/s390/purgatory/purgatory.c:18:5: warning: symbol 'kernel_entry' was not declared. Should it be static? arch/s390/purgatory/purgatory.c:19:5: warning: symbol 'kernel_type' was not declared. Should it be static? arch/s390/purgatory/purgatory.c:21:5: warning: symbol 'crash_start' was not declared. Should it be static? arch/s390/purgatory/purgatory.c:22:5: warning: symbol 'crash_size' was not declared. Should it be static? Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/purgatory.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h index e297bcfc476f..6090670df51f 100644 --- a/arch/s390/include/asm/purgatory.h +++ b/arch/s390/include/asm/purgatory.h @@ -13,5 +13,11 @@ int verify_sha256_digest(void); +extern u64 kernel_entry; +extern u64 kernel_type; + +extern u64 crash_start; +extern u64 crash_size; + #endif /* __ASSEMBLY__ */ #endif /* _S390_PURGATORY_H_ */ From d66a7355717ec903d455277a550d930ba13df4a8 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Tue, 24 Apr 2018 13:26:56 +0200 Subject: [PATCH 041/347] vfio: ccw: fix cleanup if cp_prefetch fails If the translation of a channel program fails, we may end up attempting to clean up (free, unpin) stuff that never got translated (and allocated, pinned) in the first place. By adjusting the lengths of the chains accordingly (so the element that failed, and all subsequent elements are excluded) cleanup activities based on false assumptions can be avoided. Let's make sure cp_free works properly after cp_prefetch returns with an error by setting ch_len of a ccw chain to the number of the translated CCWs on that chain. Cc: stable@vger.kernel.org #v4.12+ Acked-by: Pierre Morel Reviewed-by: Dong Jia Shi Signed-off-by: Halil Pasic Signed-off-by: Dong Jia Shi Message-Id: <20180423110113.59385-2-bjsdjshi@linux.vnet.ibm.com> [CH: fixed typos] Signed-off-by: Cornelia Huck Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/vfio_ccw_cp.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 2c7550797ec2..dce92b2a895d 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -715,6 +715,10 @@ void cp_free(struct channel_program *cp) * and stores the result to ccwchain list. @cp must have been * initialized by a previous call with cp_init(). Otherwise, undefined * behavior occurs. + * For each chain composing the channel program: + * - On entry ch_len holds the count of CCWs to be translated. + * - On exit ch_len is adjusted to the count of successfully translated CCWs. + * This allows cp_free to find in ch_len the count of CCWs to free in a chain. * * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced * as helpers to do ccw chain translation inside the kernel. Basically @@ -749,11 +753,18 @@ int cp_prefetch(struct channel_program *cp) for (idx = 0; idx < len; idx++) { ret = ccwchain_fetch_one(chain, idx, cp); if (ret) - return ret; + goto out_err; } } return 0; +out_err: + /* Only cleanup the chain elements that were actually translated. */ + chain->ch_len = idx; + list_for_each_entry_continue(chain, &cp->ccwchain_list, next) { + chain->ch_len = 0; + } + return ret; } /** From b4331a681822b420511b3258f1c3db35001fde48 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 26 Apr 2018 19:39:09 +0200 Subject: [PATCH 042/347] vti6: Change minimum MTU to IPV4_MIN_MTU, vti6 can carry IPv4 too A vti6 interface can carry IPv4 as well, so it makes no sense to enforce a minimum MTU of IPV6_MIN_MTU. If the user sets an MTU below IPV6_MIN_MTU, IPv6 will be disabled on the interface, courtesy of addrconf_notify(). Reported-by: Xin Long Fixes: b96f9afee4eb ("ipv4/6: use core net MTU range checking") Fixes: c6741fbed6dc ("vti6: Properly adjust vti6 MTU from MTU of lower device") Fixes: 53c81e95df17 ("ip6_vti: adjust vti mtu according to mtu of lower device") Signed-off-by: Stefano Brivio Signed-off-by: Steffen Klassert --- net/ipv6/ip6_vti.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index c214ffec02f0..ca957dd93a29 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -669,7 +669,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu) else mtu = ETH_DATA_LEN - LL_MAX_HEADER - sizeof(struct ipv6hdr); - dev->mtu = max_t(int, mtu, IPV6_MIN_MTU); + dev->mtu = max_t(int, mtu, IPV4_MIN_MTU); } /** @@ -881,7 +881,7 @@ static void vti6_dev_setup(struct net_device *dev) dev->priv_destructor = vti6_dev_free; dev->type = ARPHRD_TUNNEL6; - dev->min_mtu = IPV6_MIN_MTU; + dev->min_mtu = IPV4_MIN_MTU; dev->max_mtu = IP_MAX_MTU - sizeof(struct ipv6hdr); dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); From 88fc6f73fddf64eb507b04f7b2bd01d7291db514 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 16 Apr 2018 12:11:52 +0200 Subject: [PATCH 043/347] thermal: exynos: Reading temperature makes sense only when TMU is turned on When thermal sensor is not yet enabled, reading temperature might return random value. This might even result in stopping system booting when such temperature is higher than the critical value. Fix this by checking if TMU has been actually enabled before reading the temperature. This change fixes booting of Exynos4210-based board with TMU enabled (for example Samsung Trats board), which was broken since v4.4 kernel release. Signed-off-by: Marek Szyprowski Fixes: 9e4249b40340 ("thermal: exynos: Fix first temperature read after registering sensor") CC: stable@vger.kernel.org # v4.6+ Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Eduardo Valentin --- drivers/thermal/samsung/exynos_tmu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index ed805c7c5ace..986cbd01aaaa 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -185,6 +185,7 @@ * @regulator: pointer to the TMU regulator structure. * @reg_conf: pointer to structure to register with core thermal. * @ntrip: number of supported trip points. + * @enabled: current status of TMU device * @tmu_initialize: SoC specific TMU initialization method * @tmu_control: SoC specific TMU control method * @tmu_read: SoC specific TMU temperature read method @@ -205,6 +206,7 @@ struct exynos_tmu_data { struct regulator *regulator; struct thermal_zone_device *tzd; unsigned int ntrip; + bool enabled; int (*tmu_initialize)(struct platform_device *pdev); void (*tmu_control)(struct platform_device *pdev, bool on); @@ -398,6 +400,7 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on) mutex_lock(&data->lock); clk_enable(data->clk); data->tmu_control(pdev, on); + data->enabled = on; clk_disable(data->clk); mutex_unlock(&data->lock); } @@ -890,7 +893,7 @@ static int exynos_get_temp(void *p, int *temp) { struct exynos_tmu_data *data = p; - if (!data || !data->tmu_read) + if (!data || !data->tmu_read || !data->enabled) return -EINVAL; mutex_lock(&data->lock); From c8da6cdef57b459ac0fd5d9d348f8460a575ae90 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 16 Apr 2018 12:11:53 +0200 Subject: [PATCH 044/347] thermal: exynos: Propagate error value from tmu_read() tmu_read() in case of Exynos4210 might return error for out of bound values. Current code ignores such value, what leads to reporting critical temperature value. Add proper error code propagation to exynos_get_temp() function. Signed-off-by: Marek Szyprowski CC: stable@vger.kernel.org # v4.6+ Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Eduardo Valentin --- drivers/thermal/samsung/exynos_tmu.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 986cbd01aaaa..ac83f721db24 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -892,6 +892,7 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) static int exynos_get_temp(void *p, int *temp) { struct exynos_tmu_data *data = p; + int value, ret = 0; if (!data || !data->tmu_read || !data->enabled) return -EINVAL; @@ -899,12 +900,16 @@ static int exynos_get_temp(void *p, int *temp) mutex_lock(&data->lock); clk_enable(data->clk); - *temp = code_to_temp(data, data->tmu_read(data)) * MCELSIUS; + value = data->tmu_read(data); + if (value < 0) + ret = value; + else + *temp = code_to_temp(data, value) * MCELSIUS; clk_disable(data->clk); mutex_unlock(&data->lock); - return 0; + return ret; } #ifdef CONFIG_THERMAL_EMULATION From 779857e1870178e9f4aee600f17e44d81ff630b2 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 23 Apr 2018 09:32:40 +0200 Subject: [PATCH 045/347] dt-bindings: meson-uart: DT fix s/clocks-names/clock-names/ Signed-off-by: Geert Uytterhoeven Acked-by: Neil Armstrong Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt b/Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt index 8ff65fa632fd..c06c045126fc 100644 --- a/Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt +++ b/Documentation/devicetree/bindings/serial/amlogic,meson-uart.txt @@ -21,7 +21,7 @@ Required properties: - interrupts : identifier to the device interrupt - clocks : a list of phandle + clock-specifier pairs, one for each entry in clock names. -- clocks-names : +- clock-names : * "xtal" for external xtal clock identifier * "pclk" for the bus core clock, either the clk81 clock or the gate clock * "baud" for the source of the baudrate generator, can be either the xtal From 40626a1bf657eef557fcee9e1b8ef5b4f5b56dcd Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sun, 29 Apr 2018 08:08:24 -0700 Subject: [PATCH 046/347] hwmon: (k10temp) Fix reading critical temperature register The HTC (Hardware Temperature Control) register has moved for recent chips. Cc: stable@vger.kernel.org # v4.16+ Tested-by: Gabriel Craciunescu Signed-off-by: Guenter Roeck --- drivers/hwmon/k10temp.c | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index d2cc55e21374..34b5448b00be 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -63,10 +63,12 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #define NB_CAP_HTC 0x00000400 /* - * For F15h M60h, functionality of REG_REPORTED_TEMPERATURE - * has been moved to D0F0xBC_xD820_0CA4 [Reported Temperature - * Control] + * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL + * and REG_REPORTED_TEMPERATURE have been moved to + * D0F0xBC_xD820_0C64 [Hardware Temperature Control] + * D0F0xBC_xD820_0CA4 [Reported Temperature Control] */ +#define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET 0xd8200c64 #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET 0xd8200ca4 /* F17h M01h Access througn SMN */ @@ -74,6 +76,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); struct k10temp_data { struct pci_dev *pdev; + void (*read_htcreg)(struct pci_dev *pdev, u32 *regval); void (*read_tempreg)(struct pci_dev *pdev, u32 *regval); int temp_offset; u32 temp_adjust_mask; @@ -98,6 +101,11 @@ static const struct tctl_offset tctl_offset_table[] = { { 0x17, "AMD Ryzen Threadripper 1910", 10000 }, }; +static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval) +{ + pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval); +} + static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval) { pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval); @@ -114,6 +122,12 @@ static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn, mutex_unlock(&nb_smu_ind_mutex); } +static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval) +{ + amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8, + F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval); +} + static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval) { amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8, @@ -160,8 +174,7 @@ static ssize_t show_temp_crit(struct device *dev, u32 regval; int value; - pci_read_config_dword(data->pdev, - REG_HARDWARE_THERMAL_CONTROL, ®val); + data->read_htcreg(data->pdev, ®val); value = ((regval >> 16) & 0x7f) * 500 + 52000; if (show_hyst) value -= ((regval >> 24) & 0xf) * 500; @@ -181,13 +194,18 @@ static umode_t k10temp_is_visible(struct kobject *kobj, struct pci_dev *pdev = data->pdev; if (index >= 2) { - u32 reg_caps, reg_htc; + u32 reg; + + if (!data->read_htcreg) + return 0; pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES, - ®_caps); - pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, - ®_htc); - if (!(reg_caps & NB_CAP_HTC) || !(reg_htc & HTC_ENABLE)) + ®); + if (!(reg & NB_CAP_HTC)) + return 0; + + data->read_htcreg(data->pdev, ®); + if (!(reg & HTC_ENABLE)) return 0; } return attr->mode; @@ -268,11 +286,13 @@ static int k10temp_probe(struct pci_dev *pdev, if (boot_cpu_data.x86 == 0x15 && (boot_cpu_data.x86_model == 0x60 || boot_cpu_data.x86_model == 0x70)) { + data->read_htcreg = read_htcreg_nb_f15; data->read_tempreg = read_tempreg_nb_f15; } else if (boot_cpu_data.x86 == 0x17) { data->temp_adjust_mask = 0x80000; data->read_tempreg = read_tempreg_nb_f17; } else { + data->read_htcreg = read_htcreg_pci; data->read_tempreg = read_tempreg_pci; } From e026646c178d8292de563fbecc247bada059c282 Mon Sep 17 00:00:00 2001 From: William Breathitt Gray Date: Fri, 20 Apr 2018 09:50:20 -0400 Subject: [PATCH 047/347] gpio: pcie-idio-24: Fix off-by-one error in get_multiple loop The PCIe-IDIO-24 features 8 bits of TTL GPIO which may be configured for output or input. This patch fixes an off-by-one error in the loop conditional for the get_multiple callback so that the TTL GPIO are handled. Fixes: ca37081595a2 ("gpio: pcie-idio-24: Implement get_multiple/set_multiple callbacks") Signed-off-by: William Breathitt Gray Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pcie-idio-24.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pcie-idio-24.c b/drivers/gpio/gpio-pcie-idio-24.c index 3e77c2a9a9fd..f953541e7890 100644 --- a/drivers/gpio/gpio-pcie-idio-24.c +++ b/drivers/gpio/gpio-pcie-idio-24.c @@ -217,7 +217,7 @@ static int idio_24_gpio_get_multiple(struct gpio_chip *chip, bitmap_zero(bits, chip->ngpio); /* get bits are evaluated a gpio port register at a time */ - for (i = 0; i < ARRAY_SIZE(ports); i++) { + for (i = 0; i < ARRAY_SIZE(ports) + 1; i++) { /* gpio offset in bits array */ bits_offset = i * gpio_reg_size; From 544a591668813583021474fa5c7ff4942244d654 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 26 Apr 2018 14:18:19 +0200 Subject: [PATCH 048/347] Revert "Bluetooth: btusb: Fix quirk for Atheros 1525/QCA6174" Commit f44cb4b19ed4 ("Bluetooth: btusb: Fix quirk for Atheros 1525/QCA6174") is causing bluetooth to no longer work for several people, see: https://bugzilla.redhat.com/show_bug.cgi?id=1568911 So lets revert it for now and try to find another solution for devices which need the modified quirk. Cc: stable@vger.kernel.org Cc: Takashi Iwai Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index c8c8b0b8d333..ee31c997e695 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -231,6 +231,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x0036), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3008), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311d), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x311e), .driver_info = BTUSB_ATH3012 }, @@ -263,7 +264,6 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0489, 0xe03c), .driver_info = BTUSB_ATH3012 }, /* QCA ROME chipset */ - { USB_DEVICE(0x0cf3, 0x3004), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe010), .driver_info = BTUSB_QCA_ROME }, From fc54910280eb38bde923cdf0898e74687d8e6989 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 27 Apr 2018 11:26:43 +0200 Subject: [PATCH 049/347] Bluetooth: btusb: Only check needs_reset_resume DMI table for QCA rome chipsets Jeremy Cline correctly points out in rhbz#1514836 that a device where the QCA rome chipset needs the USB_QUIRK_RESET_RESUME quirk, may also ship with a different wifi/bt chipset in some configurations. If that is the case then we are needlessly penalizing those other chipsets with a reset-resume quirk, typically causing 0.4W extra power use because this disables runtime-pm. This commit moves the DMI table check to a btusb_check_needs_reset_resume() helper (so that we can easily also call it for other chipsets) and calls this new helper only for QCA_ROME chipsets for now. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836 Cc: stable@vger.kernel.org Cc: Jeremy Cline Suggested-by: Jeremy Cline Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index ee31c997e695..3d10e8335749 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2852,6 +2852,12 @@ static int btusb_config_oob_wake(struct hci_dev *hdev) } #endif +static void btusb_check_needs_reset_resume(struct usb_interface *intf) +{ + if (dmi_check_system(btusb_needs_reset_resume_table)) + interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; +} + static int btusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { @@ -2974,9 +2980,6 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame; hdev->notify = btusb_notify; - if (dmi_check_system(btusb_needs_reset_resume_table)) - interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME; - #ifdef CONFIG_PM err = btusb_config_oob_wake(hdev); if (err) @@ -3064,6 +3067,7 @@ static int btusb_probe(struct usb_interface *intf, data->setup_on_usb = btusb_setup_qca; hdev->set_bdaddr = btusb_set_bdaddr_ath3012; set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks); + btusb_check_needs_reset_resume(intf); } #ifdef CONFIG_BT_HCIBTUSB_RTL From c8016fa215d2d50920a58de07da2d00f7cb982f3 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 22 Apr 2018 19:57:59 +0200 Subject: [PATCH 050/347] i2c: core: ACPI: Improve OpRegion read errors When we get an error doing an ACPI SerialBus I2C OpRegion read log some useful details, like the client address and which register is being read. Signed-off-by: Hans de Goede Reviewed-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index a9126b3cda61..3dc43a009f5d 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -446,7 +446,8 @@ static int acpi_gsb_i2c_read_bytes(struct i2c_client *client, ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); if (ret < 0) - dev_err(&client->adapter->dev, "i2c read failed\n"); + dev_err(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n", + data_len, client->addr, cmd, ret); else memcpy(data, buffer, data_len); From 7781edaed63e9396fc913e0899cb197562e6f1a0 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 22 Apr 2018 19:58:00 +0200 Subject: [PATCH 051/347] i2c: core: ACPI: Log device not acking errors at dbg loglevel Unfortunately some DSDTs issue bogus i2c reads to non existing devices resulting in -EREMOTEIO errors because the non existing device of course does not ack. This happens e.g. from the The Asus T100TA's _BIX method, the DSDT on the T100TA defines 2 resources on the I2C1 bus: Name (EHID, ResourceTemplate () { I2cSerialBusV2 (0x005B, ControllerInitiated, 0x00061A80, AddressingMode7Bit, "\\_SB.I2C1", 0x00, ResourceConsumer, , Exclusive, ) }) OperationRegion (EHOR, GenericSerialBus, Zero, 0x0100) Field (EHOR, BufferAcc, NoLock, Preserve) { Connection (EHID), Offset (0x01), AccessAs (BufferAcc, AttribBytes (0x10)), ABCD, 8 } Name (UMPC, ResourceTemplate () { I2cSerialBusV2 (0x0066, ControllerInitiated, 0x00061A80, AddressingMode7Bit, "\\_SB.I2C1", 0x00, ResourceConsumer, , Exclusive, ) }) The _BIX method does a single read (on each BIX() call) from the EHID device through the ABCD Field, only to completely ignore the result. This read always fails as there is no i2c client at address 0x5b. The _BIX method also does several reads from the UMPC device and actually uses the results of those to provide battery information. IIRC I've also seen some DSTDs which do an i2c read to detect if a device is present, also leading to false positive errors being logged. Esp. the _BIX use is problematic as the _BIX method gets called periodically to monitor battery status. This commit stops the logs from filling up with errors like these: [ 57.327858] i2c i2c-0: i2c read 16 bytes from client@0x5b starting at reg 0x1 failed, error: -121 Signed-off-by: Hans de Goede Reviewed-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 3dc43a009f5d..7c3b4740b94b 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -445,11 +445,17 @@ static int acpi_gsb_i2c_read_bytes(struct i2c_client *client, msgs[1].buf = buffer; ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); - if (ret < 0) - dev_err(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n", - data_len, client->addr, cmd, ret); - else + if (ret < 0) { + /* Getting a NACK is unfortunately normal with some DSTDs */ + if (ret == -EREMOTEIO) + dev_dbg(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n", + data_len, client->addr, cmd, ret); + else + dev_err(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n", + data_len, client->addr, cmd, ret); + } else { memcpy(data, buffer, data_len); + } kfree(buffer); return ret; From 596b07a9a22656493726edf1739569102bd3e136 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 26 Apr 2018 20:52:06 +0200 Subject: [PATCH 052/347] Bluetooth: btusb: Add Dell XPS 13 9360 to btusb_needs_reset_resume_table The Dell XPS 13 9360 uses a QCA Rome chip which needs to be reset (and have its firmware reloaded) for bluetooth to work after suspend/resume. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514836 Cc: stable@vger.kernel.org Cc: Garrett LeSage Reported-and-tested-by: Garrett LeSage Signed-off-by: Hans de Goede Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btusb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 3d10e8335749..b937cc1e2c07 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -399,6 +399,13 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 3060"), }, }, + { + /* Dell XPS 9360 (QCA ROME device 0cf3:e300) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9360"), + }, + }, {} }; From fc8cec113904a47396bf0a1afc62920d66319d36 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 17 Apr 2018 18:32:26 -0400 Subject: [PATCH 053/347] dm integrity: use kvfree for kvmalloc'd memory Use kvfree instead of kfree because the array is allocated with kvmalloc. Fixes: 7eada909bfd7a ("dm: add integrity target") Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-integrity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 77d9fe58dae2..514fb4aec5d1 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -2440,7 +2440,7 @@ static void dm_integrity_free_journal_scatterlist(struct dm_integrity_c *ic, str unsigned i; for (i = 0; i < ic->journal_sections; i++) kvfree(sl[i]); - kfree(sl); + kvfree(sl); } static struct scatterlist **dm_integrity_alloc_journal_scatterlist(struct dm_integrity_c *ic, struct page_list *pl) From f7879b4cea4b7867e7382efdbd805fbe35835337 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 19 Apr 2018 08:33:00 -0400 Subject: [PATCH 054/347] dm bufio: fix buffer alignment Commit 6b5e718cc138 ("dm bufio: relax alignment constraint on slab cache") relaxed alignment on dm-bufio cache, however it may break dm-crypt or dm-integrity. dm-crypt and dm-integrity require that the size of bio vector entries (bv_len) is aligned on its sector size. bv_offset doesn't have to be aligned, but bv_len must be. XFS sends unaligned bios, but they do not cross page boundary, so the requirement for aligned bv_len is met. Commit 6b5e718cc138 made dm-bufio send unaligned bios that cross page boundary, this could break dm-crypt and dm-integrity. Reinstates the alignment. Note that misaligned entries only happen when we use slab/slub debugging. Without debugging, the entries are always aligned. Fixes: 6b5e718cc138 ("dm bufio: relax alignment constraint on slab cache") Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-bufio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 12aa9ca21d8c..dc385b70e4c3 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1681,8 +1681,9 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign if (block_size <= KMALLOC_MAX_SIZE && (block_size < PAGE_SIZE || !is_power_of_2(block_size))) { - snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", c->block_size); - c->slab_cache = kmem_cache_create(slab_name, c->block_size, ARCH_KMALLOC_MINALIGN, + unsigned align = min(1U << __ffs(block_size), (unsigned)PAGE_SIZE); + snprintf(slab_name, sizeof slab_name, "dm_bufio_cache-%u", block_size); + c->slab_cache = kmem_cache_create(slab_name, block_size, align, SLAB_RECLAIM_ACCOUNT, NULL); if (!c->slab_cache) { r = -ENOMEM; From 280884fadc5bd81628da3f158ae6821ea7386432 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 30 Apr 2018 15:40:40 -0400 Subject: [PATCH 055/347] dm cache background tracker: fix sparse warning Fix drivers/md/dm-cache-background-tracker.c:169:16: warning: symbol 'alloc_work' was not declared. Should it be static? Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-background-tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-cache-background-tracker.c b/drivers/md/dm-cache-background-tracker.c index 1d0af0a21fc7..84814e819e4c 100644 --- a/drivers/md/dm-cache-background-tracker.c +++ b/drivers/md/dm-cache-background-tracker.c @@ -166,7 +166,7 @@ static bool max_work_reached(struct background_tracker *b) atomic_read(&b->pending_demotes) >= b->max_work; } -struct bt_work *alloc_work(struct background_tracker *b) +static struct bt_work *alloc_work(struct background_tracker *b) { if (max_work_reached(b)) return NULL; From 3d97c829edd43262e7e9d720fa82c2241ba685a3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 30 Apr 2018 16:06:28 -0400 Subject: [PATCH 056/347] dm: fix some sparse warnings and whitespace in dax methods Eliminate these sparse warnings: drivers/md/dm.c:1062:9: warning: context imbalance in 'dm_dax_direct_access' - unexpected unlock drivers/md/dm.c:1086:9: warning: context imbalance in 'dm_dax_copy_from_iter' - unexpected unlock Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4ea404dbcf0b..0a7b0107ca78 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1020,7 +1020,8 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, - sector_t sector, int *srcu_idx) + sector_t sector, int *srcu_idx) + __acquires(md->io_barrier) { struct dm_table *map; struct dm_target *ti; @@ -1037,7 +1038,7 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, } static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn) { struct mapped_device *md = dax_get_private(dax_dev); sector_t sector = pgoff * PAGE_SECTORS; @@ -1065,7 +1066,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, } static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, - void *addr, size_t bytes, struct iov_iter *i) + void *addr, size_t bytes, struct iov_iter *i) { struct mapped_device *md = dax_get_private(dax_dev); sector_t sector = pgoff * PAGE_SECTORS; From 05c58752f9dce11e396676eb731a620541590ed0 Mon Sep 17 00:00:00 2001 From: CHANDAN VN Date: Mon, 30 Apr 2018 09:50:18 +0530 Subject: [PATCH 057/347] arm64: To remove initrd reserved area entry from memblock INITRD reserved area entry is not removed from memblock even though initrd reserved area is freed. After freeing the memory it is released from memblock. The same can be checked from /sys/kernel/debug/memblock/reserved. The patch makes sure that the initrd entry is removed from memblock when keepinitrd is not enabled. The patch only affects accounting and debugging. This does not fix any memory leak. Acked-by: Laura Abbott Signed-off-by: CHANDAN VN Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9f3c47acf8ff..1b18b4722420 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -646,8 +646,10 @@ static int keep_initrd __initdata; void __init free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) + if (!keep_initrd) { free_reserved_area((void *)start, (void *)end, 0, "initrd"); + memblock_free(__virt_to_phys(start), end - start); + } } static int __init keepinitrd_setup(char *__unused) From 7b240e44d08c4a94faf3ed02c04c16b52fc045de Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 23 Apr 2018 09:35:16 +0200 Subject: [PATCH 058/347] dt-bindings: mvebu-uart: DT fix s/interrupts-names/interrupt-names/ Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/serial/mvebu-uart.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/mvebu-uart.txt b/Documentation/devicetree/bindings/serial/mvebu-uart.txt index 2ae2fee7e023..b7e0e32b9ac6 100644 --- a/Documentation/devicetree/bindings/serial/mvebu-uart.txt +++ b/Documentation/devicetree/bindings/serial/mvebu-uart.txt @@ -24,7 +24,7 @@ Required properties: - Must contain two elements for the extended variant of the IP (marvell,armada-3700-uart-ext): "uart-tx" and "uart-rx", respectively the UART TX interrupt and the UART RX interrupt. A - corresponding interrupts-names property must be defined. + corresponding interrupt-names property must be defined. - For backward compatibility reasons, a single element interrupts property is also supported for the standard variant of the IP, containing only the UART sum interrupt. This form is deprecated From f130307054a59ca21d2396f386be77ebd2e8ca96 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 25 Apr 2018 09:49:38 +0200 Subject: [PATCH 059/347] dt-bindings: panel: lvds: Fix path to display timing bindings Fixes: 14da3ed8dd08c581 ("devicetree/bindings: display: Document common panel properties") Signed-off-by: Geert Uytterhoeven Reviewed-by: Laurent Pinchart Signed-off-by: Rob Herring --- .../devicetree/bindings/display/panel/panel-common.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/display/panel/panel-common.txt b/Documentation/devicetree/bindings/display/panel/panel-common.txt index 557fa765adcb..5d2519af4bb5 100644 --- a/Documentation/devicetree/bindings/display/panel/panel-common.txt +++ b/Documentation/devicetree/bindings/display/panel/panel-common.txt @@ -38,7 +38,7 @@ Display Timings require specific display timings. The panel-timing subnode expresses those timings as specified in the timing subnode section of the display timing bindings defined in - Documentation/devicetree/bindings/display/display-timing.txt. + Documentation/devicetree/bindings/display/panel/display-timing.txt. Connectivity From 054f155721d7af1f343ed52bea246626d8450ca8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 May 2018 11:37:14 -0400 Subject: [PATCH 060/347] xprtrdma: Fix list corruption / DMAR errors during MR recovery The ro_release_mr methods check whether mr->mr_list is empty. Therefore, be sure to always use list_del_init when removing an MR linked into a list using that field. Otherwise, when recovering from transport failures or device removal, list corruption can result, or MRs can get mapped or unmapped an odd number of times, resulting in IOMMU-related failures. In general this fix is appropriate back to v4.8. However, code changes since then make it impossible to apply this patch directly to stable kernels. The fix would have to be applied by hand or reworked for kernels earlier than v4.16. Backport guidance -- there are several cases: - When creating an MR, initialize mr_list so that using list_empty on an as-yet-unused MR is safe. - When an MR is being handled by the remote invalidation path, ensure that mr_list is reinitialized when it is removed from rl_registered. - When an MR is being handled by rpcrdma_destroy_mrs, it is removed from mr_all, but it may still be on an rl_registered list. In that case, the MR needs to be removed from that list before being released. - Other cases are covered by using list_del_init in rpcrdma_mr_pop. Fixes: 9d6b04097882 ('xprtrdma: Place registered MWs on a ... ') Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/fmr_ops.c | 5 +---- net/sunrpc/xprtrdma/frwr_ops.c | 9 +++------ net/sunrpc/xprtrdma/verbs.c | 5 +++++ net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index 5cc68a824f45..f2f63959fddd 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -72,6 +72,7 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) if (IS_ERR(mr->fmr.fm_mr)) goto out_fmr_err; + INIT_LIST_HEAD(&mr->mr_list); return 0; out_fmr_err: @@ -102,10 +103,6 @@ fmr_op_release_mr(struct rpcrdma_mr *mr) LIST_HEAD(unmap_list); int rc; - /* Ensure MW is not on any rl_registered list */ - if (!list_empty(&mr->mr_list)) - list_del(&mr->mr_list); - kfree(mr->fmr.fm_physaddrs); kfree(mr->mr_sg); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index c5743a0960be..c59c5c788db0 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -110,6 +110,7 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) if (!mr->mr_sg) goto out_list_err; + INIT_LIST_HEAD(&mr->mr_list); sg_init_table(mr->mr_sg, depth); init_completion(&frwr->fr_linv_done); return 0; @@ -133,10 +134,6 @@ frwr_op_release_mr(struct rpcrdma_mr *mr) { int rc; - /* Ensure MR is not on any rl_registered list */ - if (!list_empty(&mr->mr_list)) - list_del(&mr->mr_list); - rc = ib_dereg_mr(mr->frwr.fr_mr); if (rc) pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", @@ -195,7 +192,7 @@ frwr_op_recover_mr(struct rpcrdma_mr *mr) return; out_release: - pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr); + pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr); r_xprt->rx_stats.mrs_orphaned++; spin_lock(&r_xprt->rx_buf.rb_mrlock); @@ -476,7 +473,7 @@ frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) list_for_each_entry(mr, mrs, mr_list) if (mr->mr_handle == rep->rr_inv_rkey) { - list_del(&mr->mr_list); + list_del_init(&mr->mr_list); trace_xprtrdma_remoteinv(mr); mr->frwr.fr_state = FRWR_IS_INVALID; rpcrdma_mr_unmap_and_put(mr); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index fe5eaca2d197..c345d365af88 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1254,6 +1254,11 @@ rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf) list_del(&mr->mr_all); spin_unlock(&buf->rb_mrlock); + + /* Ensure MW is not on any rl_registered list */ + if (!list_empty(&mr->mr_list)) + list_del(&mr->mr_list); + ia->ri_ops->ro_release_mr(mr); count++; spin_lock(&buf->rb_mrlock); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 3d3b423fa9c1..cb41b12a3bf8 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -380,7 +380,7 @@ rpcrdma_mr_pop(struct list_head *list) struct rpcrdma_mr *mr; mr = list_first_entry(list, struct rpcrdma_mr, mr_list); - list_del(&mr->mr_list); + list_del_init(&mr->mr_list); return mr; } From 7e437d61514bc3e05167da263977e3cdd698f08a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 29 Apr 2018 13:01:11 +0100 Subject: [PATCH 061/347] ata: fix spelling mistake: "directon" -> "direction" Trivial fix to spelling mistake in sil24_cerr_info message text Signed-off-by: Colin Ian King Signed-off-by: Tejun Heo --- drivers/ata/sata_sil24.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index 4b1995e2d044..010ca101d412 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -285,13 +285,13 @@ static const struct sil24_cerr_info { [PORT_CERR_INCONSISTENT] = { AC_ERR_HSM, ATA_EH_RESET, "protocol mismatch" }, [PORT_CERR_DIRECTION] = { AC_ERR_HSM, ATA_EH_RESET, - "data directon mismatch" }, + "data direction mismatch" }, [PORT_CERR_UNDERRUN] = { AC_ERR_HSM, ATA_EH_RESET, "ran out of SGEs while writing" }, [PORT_CERR_OVERRUN] = { AC_ERR_HSM, ATA_EH_RESET, "ran out of SGEs while reading" }, [PORT_CERR_PKT_PROT] = { AC_ERR_HSM, ATA_EH_RESET, - "invalid data directon for ATAPI CDB" }, + "invalid data direction for ATAPI CDB" }, [PORT_CERR_SGT_BOUNDARY] = { AC_ERR_SYSTEM, ATA_EH_RESET, "SGT not on qword boundary" }, [PORT_CERR_SGT_TGTABRT] = { AC_ERR_HOST_BUS, ATA_EH_RESET, From 98eb6cf25f0317395d9a799d18f3d46ba26a00d3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 May 2018 11:37:19 -0400 Subject: [PATCH 062/347] sunrpc: Fix latency trace point crashes If the rpc_task survived longer than the transport, task->tk_xprt points to freed memory by the time rpc_count_iostats_metrics runs. Replace the references to task->tk_xprt with references to the task's tk_client. Reported-by: syzbot+27db1f90e2b972a5f2d3@syzkaller.appspotmail.com Fixes: 40bf7eb304b5 ('sunrpc: Add static trace point to report ...') Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 76887d60f0c0..7f1204a179b9 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -224,6 +224,8 @@ TRACE_EVENT(rpc_stats_latency, TP_ARGS(task, backlog, rtt, execute), TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) __field(u32, xid) __field(int, version) __string(progname, task->tk_client->cl_program->name) @@ -231,13 +233,11 @@ TRACE_EVENT(rpc_stats_latency, __field(unsigned long, backlog) __field(unsigned long, rtt) __field(unsigned long, execute) - __string(addr, - task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]) - __string(port, - task->tk_xprt->address_strings[RPC_DISPLAY_PORT]) ), TP_fast_assign( + __entry->client_id = task->tk_client->cl_clid; + __entry->task_id = task->tk_pid; __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid); __entry->version = task->tk_client->cl_vers; __assign_str(progname, task->tk_client->cl_program->name) @@ -245,14 +245,10 @@ TRACE_EVENT(rpc_stats_latency, __entry->backlog = ktime_to_us(backlog); __entry->rtt = ktime_to_us(rtt); __entry->execute = ktime_to_us(execute); - __assign_str(addr, - task->tk_xprt->address_strings[RPC_DISPLAY_ADDR]); - __assign_str(port, - task->tk_xprt->address_strings[RPC_DISPLAY_PORT]); ), - TP_printk("peer=[%s]:%s xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", - __get_str(addr), __get_str(port), __entry->xid, + TP_printk("task:%u@%d xid=0x%08x %sv%d %s backlog=%lu rtt=%lu execute=%lu", + __entry->task_id, __entry->client_id, __entry->xid, __get_str(progname), __entry->version, __get_str(procname), __entry->backlog, __entry->rtt, __entry->execute) ); From 5eb9a07a4ae1008b67d8bcd47bddb3dae97456b7 Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Thu, 26 Apr 2018 14:48:00 -0400 Subject: [PATCH 063/347] spi: bcm-qspi: Avoid setting MSPI_CDRAM_PCS for spi-nor master Added fix for probing of spi-nor device non-zero chip selects. Set MSPI_CDRAM_PCS (peripheral chip select) with spi master for MSPI controller and not for MSPI/BSPI spi-nor master controller. Ensure setting of cs bit in chip select register on chip select change. Fixes: fa236a7ef24048 ("spi: bcm-qspi: Add Broadcom MSPI driver") Signed-off-by: Kamal Dasu Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-bcm-qspi.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 1596d35498c5..2946989c61a2 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -519,16 +519,19 @@ static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi) static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs) { - u32 data = 0; + u32 rd = 0; + u32 wr = 0; - if (qspi->curr_cs == cs) - return; if (qspi->base[CHIP_SELECT]) { - data = bcm_qspi_read(qspi, CHIP_SELECT, 0); - data = (data & ~0xff) | (1 << cs); - bcm_qspi_write(qspi, CHIP_SELECT, 0, data); + rd = bcm_qspi_read(qspi, CHIP_SELECT, 0); + wr = (rd & ~0xff) | (1 << cs); + if (rd == wr) + return; + bcm_qspi_write(qspi, CHIP_SELECT, 0, wr); usleep_range(10, 20); } + + dev_dbg(&qspi->pdev->dev, "using cs:%d\n", cs); qspi->curr_cs = cs; } @@ -755,8 +758,13 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi) dev_dbg(&qspi->pdev->dev, "WR %04x\n", val); } mspi_cdram = MSPI_CDRAM_CONT_BIT; - mspi_cdram |= (~(1 << spi->chip_select) & - MSPI_CDRAM_PCS); + + if (has_bspi(qspi)) + mspi_cdram &= ~1; + else + mspi_cdram |= (~(1 << spi->chip_select) & + MSPI_CDRAM_PCS); + mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 : MSPI_CDRAM_BITSE_BIT); From 602805fb618b018b7a41fbb3f93c1992b078b1ae Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Thu, 26 Apr 2018 14:48:01 -0400 Subject: [PATCH 064/347] spi: bcm-qspi: Always read and set BSPI_MAST_N_BOOT_CTRL Always confirm the BSPI_MAST_N_BOOT_CTRL bit when enabling or disabling BSPI transfers. Fixes: 4e3b2d236fe00 ("spi: bcm-qspi: Add BSPI spi-nor flash controller driver") Signed-off-by: Kamal Dasu Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-bcm-qspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 2946989c61a2..6573152ce893 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -490,7 +490,7 @@ static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi, static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi) { - if (!has_bspi(qspi) || (qspi->bspi_enabled)) + if (!has_bspi(qspi)) return; qspi->bspi_enabled = 1; @@ -505,7 +505,7 @@ static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi) static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi) { - if (!has_bspi(qspi) || (!qspi->bspi_enabled)) + if (!has_bspi(qspi)) return; qspi->bspi_enabled = 0; From f4b024271ae3e9786e5d6f1c05b01b57a74e1d6d Mon Sep 17 00:00:00 2001 From: Jim Gill Date: Fri, 20 Apr 2018 19:04:47 -0700 Subject: [PATCH 065/347] scsi: vmw-pvscsi: return DID_BUS_BUSY for adapter-initated aborts The vmw_pvscsi driver returns DID_ABORT for commands aborted internally by the adapter, leading to the filesystem going read-only. Change the result to DID_BUS_BUSY, causing the kernel to retry the command. Signed-off-by: Jim Gill Signed-off-by: Martin K. Petersen --- drivers/scsi/vmw_pvscsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index c374e3b5c678..777e5f1e52d1 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -609,7 +609,7 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter, break; case BTSTAT_ABORTQUEUE: - cmd->result = (DID_ABORT << 16); + cmd->result = (DID_BUS_BUSY << 16); break; case BTSTAT_SCSIPARITY: From 7d3af7d96af7b9f51e1ef67b6f4725f545737da2 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Wed, 25 Apr 2018 10:24:20 -0600 Subject: [PATCH 066/347] scsi: aacraid: Correct hba_send to include iu_type commit b60710ec7d7a ("scsi: aacraid: enable sending of TMFs from aac_hba_send()") allows aac_hba_send() to send scsi commands, and TMF requests, but the existing code only updates the iu_type for scsi commands. For TMF requests we are sending an unknown iu_type to firmware, which causes a fault. Include iu_type prior to determining the validity of the command Reported-by: Noah Misner Fixes: b60710ec7d7ab ("aacraid: enable sending of TMFs from aac_hba_send()") Fixes: 423400e64d377 ("aacraid: Include HBA direct interface") Tested-by: Noah Misner cc: stable@vger.kernel.org Signed-off-by: Dave Carroll Reviewed-by: Raghava Aditya Renukunta Reviewed-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/commsup.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 0156c9623c35..d62ddd63f4fe 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -724,6 +724,8 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback, int wait; unsigned long flags = 0; unsigned long mflags = 0; + struct aac_hba_cmd_req *hbacmd = (struct aac_hba_cmd_req *) + fibptr->hw_fib_va; fibptr->flags = (FIB_CONTEXT_FLAG | FIB_CONTEXT_FLAG_NATIVE_HBA); if (callback) { @@ -734,11 +736,9 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback, wait = 1; - if (command == HBA_IU_TYPE_SCSI_CMD_REQ) { - struct aac_hba_cmd_req *hbacmd = - (struct aac_hba_cmd_req *)fibptr->hw_fib_va; + hbacmd->iu_type = command; - hbacmd->iu_type = command; + if (command == HBA_IU_TYPE_SCSI_CMD_REQ) { /* bit1 of request_id must be 0 */ hbacmd->request_id = cpu_to_le32((((u32)(fibptr - dev->fibs)) << 2) + 1); From b84e54616a946f24eeeca8762cb70a9074b045e7 Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Tue, 17 Apr 2018 17:08:24 +0000 Subject: [PATCH 067/347] pinctrl: meson-axg: fix the range of aobus bank The GPIOAO bank is range from GPIOAO_0 to GPIOAO_13. Fixes: 83c566806a68 ("pinctrl: meson-axg: Add new pinctrl driver for Meson AXG SoC") Reported-by: Xingyu Chen Signed-off-by: Yixun Lan Acked-by: Kevin Hilman Signed-off-by: Linus Walleij --- drivers/pinctrl/meson/pinctrl-meson-axg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson-axg.c b/drivers/pinctrl/meson/pinctrl-meson-axg.c index 4b91ff74779b..99a6ceac8e53 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-axg.c +++ b/drivers/pinctrl/meson/pinctrl-meson-axg.c @@ -898,7 +898,7 @@ static struct meson_bank meson_axg_periphs_banks[] = { static struct meson_bank meson_axg_aobus_banks[] = { /* name first last irq pullen pull dir out in */ - BANK("AO", GPIOAO_0, GPIOAO_9, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), + BANK("AO", GPIOAO_0, GPIOAO_13, 0, 13, 0, 16, 0, 0, 0, 0, 0, 16, 1, 0), }; static struct meson_pmx_bank meson_axg_periphs_pmx_banks[] = { From 83b9dc11312f48a561594a895672abb6cb2a2250 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 25 Apr 2018 13:32:11 +0300 Subject: [PATCH 068/347] pinctrl: cherryview: Associate IRQ descriptors to irqdomain When we dropped the custom Linux GPIO translation it resulted that the IRQ numbers changed slightly as well. Normally this would be fine because everyone is expected to use controller relative GPIO numbers and ACPI GpioIo/GpioInt resources. However, there is a certain set of Intel_Strago based Chromebooks where i8042 keyboard controller IRQ number is hardcoded be 182 (this is corrected with newer coreboot but the older ones still have the hardcoded Linux IRQ number). Because of this hardcoded IRQ number keyboard on those systems accidentally broke again. Fix this by iteratively associating IRQ descriptors to the chip irqdomain so that there are no gaps on those systems. Other systems are not affected. Fixes: 03c4749dd6c7 ("gpio / ACPI: Drop unnecessary ACPI GPIO to Linux GPIO translation") Link: https://bugzilla.kernel.org/show_bug.cgi?id=199463 Reported-by: Sultan Alsawaf Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index b1ae1618fefe..fee9225ca559 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1622,22 +1622,30 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq) if (!need_valid_mask) { irq_base = devm_irq_alloc_descs(pctrl->dev, -1, 0, - chip->ngpio, NUMA_NO_NODE); + community->npins, NUMA_NO_NODE); if (irq_base < 0) { dev_err(pctrl->dev, "Failed to allocate IRQ numbers\n"); return irq_base; } - } else { - irq_base = 0; } - ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, irq_base, + ret = gpiochip_irqchip_add(chip, &chv_gpio_irqchip, 0, handle_bad_irq, IRQ_TYPE_NONE); if (ret) { dev_err(pctrl->dev, "failed to add IRQ chip\n"); return ret; } + if (!need_valid_mask) { + for (i = 0; i < community->ngpio_ranges; i++) { + range = &community->gpio_ranges[i]; + + irq_domain_associate_many(chip->irq.domain, irq_base, + range->base, range->npins); + irq_base += range->npins; + } + } + gpiochip_set_chained_irqchip(chip, &chv_gpio_irqchip, irq, chv_gpio_irq_handler); return 0; From c41eb2c7f93531b8ea689b1e2bfe7e6b884a7213 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 29 Mar 2018 17:37:25 +0300 Subject: [PATCH 069/347] pinctrl: sunrisepoint: Align GPIO number space with Windows It turns out that the Windows GPIO driver for Sunrisepoint PCH-H uses similar bank structure than it does for Cannon Lake with the exception that here the bank size is always 24 pins. Starting from pad group E the BIOS/Windows GPIO numbering does not match the hardware anymore but instead there are gaps to make each pad group ("bank") consume exactly 24 pins. Because of this Linux does not use correct pins for GpioIo/GpioIo resources exposed by the BIOS. This patch aligns the GPIO number space with BIOS/Windows to make sure the same numbering scheme is used in Linux as well following what we did already for Intel Cannon Lake. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1543769 Reported-by: Vivien FRASCA Signed-off-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-sunrisepoint.c | 45 ++++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-sunrisepoint.c b/drivers/pinctrl/intel/pinctrl-sunrisepoint.c index 8870a4100164..fee3435a6f15 100644 --- a/drivers/pinctrl/intel/pinctrl-sunrisepoint.c +++ b/drivers/pinctrl/intel/pinctrl-sunrisepoint.c @@ -36,6 +36,27 @@ .npins = ((e) - (s) + 1), \ } +#define SPTH_GPP(r, s, e, g) \ + { \ + .reg_num = (r), \ + .base = (s), \ + .size = ((e) - (s) + 1), \ + .gpio_base = (g), \ + } + +#define SPTH_COMMUNITY(b, s, e, g) \ + { \ + .barno = (b), \ + .padown_offset = SPT_PAD_OWN, \ + .padcfglock_offset = SPT_PADCFGLOCK, \ + .hostown_offset = SPT_HOSTSW_OWN, \ + .ie_offset = SPT_GPI_IE, \ + .pin_base = (s), \ + .npins = ((e) - (s) + 1), \ + .gpps = (g), \ + .ngpps = ARRAY_SIZE(g), \ + } + /* Sunrisepoint-LP */ static const struct pinctrl_pin_desc sptlp_pins[] = { /* GPP_A */ @@ -531,10 +552,28 @@ static const struct intel_function spth_functions[] = { FUNCTION("i2c2", spth_i2c2_groups), }; +static const struct intel_padgroup spth_community0_gpps[] = { + SPTH_GPP(0, 0, 23, 0), /* GPP_A */ + SPTH_GPP(1, 24, 47, 24), /* GPP_B */ +}; + +static const struct intel_padgroup spth_community1_gpps[] = { + SPTH_GPP(0, 48, 71, 48), /* GPP_C */ + SPTH_GPP(1, 72, 95, 72), /* GPP_D */ + SPTH_GPP(2, 96, 108, 96), /* GPP_E */ + SPTH_GPP(3, 109, 132, 120), /* GPP_F */ + SPTH_GPP(4, 133, 156, 144), /* GPP_G */ + SPTH_GPP(5, 157, 180, 168), /* GPP_H */ +}; + +static const struct intel_padgroup spth_community3_gpps[] = { + SPTH_GPP(0, 181, 191, 192), /* GPP_I */ +}; + static const struct intel_community spth_communities[] = { - SPT_COMMUNITY(0, 0, 47), - SPT_COMMUNITY(1, 48, 180), - SPT_COMMUNITY(2, 181, 191), + SPTH_COMMUNITY(0, 0, 47, spth_community0_gpps), + SPTH_COMMUNITY(1, 48, 180, spth_community1_gpps), + SPTH_COMMUNITY(2, 181, 191, spth_community3_gpps), }; static const struct intel_pinctrl_soc_data spth_soc_data = { From 6732cfd4cac514b556f36b518670af91c8bdf19a Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Wed, 2 May 2018 12:41:32 +0200 Subject: [PATCH 070/347] mtd: onenand: omap2: Disable DMA for HIGHMEM buffers dma_map_single does not work for vmalloc-ed buffers, so disable DMA in this case. Signed-off-by: Ladislav Michl Reported-by: "H. Nikolaus Schaller" Tested-by: "H. Nikolaus Schaller" Reviewed-by: Peter Ujfalusi Signed-off-by: Boris Brezillon --- drivers/mtd/nand/onenand/omap2.c | 105 +++++++++++-------------------- 1 file changed, 38 insertions(+), 67 deletions(-) diff --git a/drivers/mtd/nand/onenand/omap2.c b/drivers/mtd/nand/onenand/omap2.c index 9c159f0dd9a6..321137158ff3 100644 --- a/drivers/mtd/nand/onenand/omap2.c +++ b/drivers/mtd/nand/onenand/omap2.c @@ -375,56 +375,42 @@ static int omap2_onenand_read_bufferram(struct mtd_info *mtd, int area, { struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); struct onenand_chip *this = mtd->priv; - dma_addr_t dma_src, dma_dst; - int bram_offset; + struct device *dev = &c->pdev->dev; void *buf = (void *)buffer; + dma_addr_t dma_src, dma_dst; + int bram_offset, err; size_t xtra; - int ret; bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; - if (bram_offset & 3 || (size_t)buf & 3 || count < 384) + /* + * If the buffer address is not DMA-able, len is not long enough to make + * DMA transfers profitable or panic_write() may be in an interrupt + * context fallback to PIO mode. + */ + if (!virt_addr_valid(buf) || bram_offset & 3 || (size_t)buf & 3 || + count < 384 || in_interrupt() || oops_in_progress ) goto out_copy; - /* panic_write() may be in an interrupt context */ - if (in_interrupt() || oops_in_progress) - goto out_copy; - - if (buf >= high_memory) { - struct page *p1; - - if (((size_t)buf & PAGE_MASK) != - ((size_t)(buf + count - 1) & PAGE_MASK)) - goto out_copy; - p1 = vmalloc_to_page(buf); - if (!p1) - goto out_copy; - buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK); - } - xtra = count & 3; if (xtra) { count -= xtra; memcpy(buf + count, this->base + bram_offset + count, xtra); } + dma_dst = dma_map_single(dev, buf, count, DMA_FROM_DEVICE); dma_src = c->phys_base + bram_offset; - dma_dst = dma_map_single(&c->pdev->dev, buf, count, DMA_FROM_DEVICE); - if (dma_mapping_error(&c->pdev->dev, dma_dst)) { - dev_err(&c->pdev->dev, - "Couldn't DMA map a %d byte buffer\n", - count); + + if (dma_mapping_error(dev, dma_dst)) { + dev_err(dev, "Couldn't DMA map a %d byte buffer\n", count); goto out_copy; } - ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); - dma_unmap_single(&c->pdev->dev, dma_dst, count, DMA_FROM_DEVICE); + err = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); + dma_unmap_single(dev, dma_dst, count, DMA_FROM_DEVICE); + if (!err) + return 0; - if (ret) { - dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); - goto out_copy; - } - - return 0; + dev_err(dev, "timeout waiting for DMA\n"); out_copy: memcpy(buf, this->base + bram_offset, count); @@ -437,49 +423,34 @@ static int omap2_onenand_write_bufferram(struct mtd_info *mtd, int area, { struct omap2_onenand *c = container_of(mtd, struct omap2_onenand, mtd); struct onenand_chip *this = mtd->priv; - dma_addr_t dma_src, dma_dst; - int bram_offset; + struct device *dev = &c->pdev->dev; void *buf = (void *)buffer; - int ret; + dma_addr_t dma_src, dma_dst; + int bram_offset, err; bram_offset = omap2_onenand_bufferram_offset(mtd, area) + area + offset; - if (bram_offset & 3 || (size_t)buf & 3 || count < 384) + /* + * If the buffer address is not DMA-able, len is not long enough to make + * DMA transfers profitable or panic_write() may be in an interrupt + * context fallback to PIO mode. + */ + if (!virt_addr_valid(buf) || bram_offset & 3 || (size_t)buf & 3 || + count < 384 || in_interrupt() || oops_in_progress ) goto out_copy; - /* panic_write() may be in an interrupt context */ - if (in_interrupt() || oops_in_progress) - goto out_copy; - - if (buf >= high_memory) { - struct page *p1; - - if (((size_t)buf & PAGE_MASK) != - ((size_t)(buf + count - 1) & PAGE_MASK)) - goto out_copy; - p1 = vmalloc_to_page(buf); - if (!p1) - goto out_copy; - buf = page_address(p1) + ((size_t)buf & ~PAGE_MASK); - } - - dma_src = dma_map_single(&c->pdev->dev, buf, count, DMA_TO_DEVICE); + dma_src = dma_map_single(dev, buf, count, DMA_TO_DEVICE); dma_dst = c->phys_base + bram_offset; - if (dma_mapping_error(&c->pdev->dev, dma_src)) { - dev_err(&c->pdev->dev, - "Couldn't DMA map a %d byte buffer\n", - count); - return -1; - } - - ret = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); - dma_unmap_single(&c->pdev->dev, dma_src, count, DMA_TO_DEVICE); - - if (ret) { - dev_err(&c->pdev->dev, "timeout waiting for DMA\n"); + if (dma_mapping_error(dev, dma_src)) { + dev_err(dev, "Couldn't DMA map a %d byte buffer\n", count); goto out_copy; } - return 0; + err = omap2_onenand_dma_transfer(c, dma_src, dma_dst, count); + dma_unmap_page(dev, dma_src, count, DMA_TO_DEVICE); + if (!err) + return 0; + + dev_err(dev, "timeout waiting for DMA\n"); out_copy: memcpy(this->base + bram_offset, buf, count); From 0b26351b910fb8fe6a056f8a1bbccabe50c0e19f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 11:50:05 +0200 Subject: [PATCH 071/347] stop_machine, sched: Fix migrate_swap() vs. active_balance() deadlock Matt reported the following deadlock: CPU0 CPU1 schedule(.prev=migrate/0) pick_next_task() ... idle_balance() migrate_swap() active_balance() stop_two_cpus() spin_lock(stopper0->lock) spin_lock(stopper1->lock) ttwu(migrate/0) smp_cond_load_acquire() -- waits for schedule() stop_one_cpu(1) spin_lock(stopper1->lock) -- waits for stopper lock Fix this deadlock by taking the wakeups out from under stopper->lock. This allows the active_balance() to queue the stop work and finish the context switch, which in turn allows the wakeup from migrate_swap() to observe the context and complete the wakeup. Signed-off-by: Peter Zijlstra (Intel) Reported-by: Matt Fleming Signed-off-by: Peter Zijlstra (Intel) Acked-by: Matt Fleming Cc: Linus Torvalds Cc: Michal Hocko Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180420095005.GH4064@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/stop_machine.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index b7591261652d..64c0291b579c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * Structure to determine completion condition and record errors. May @@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done) } static void __cpu_stop_queue_work(struct cpu_stopper *stopper, - struct cpu_stop_work *work) + struct cpu_stop_work *work, + struct wake_q_head *wakeq) { list_add_tail(&work->list, &stopper->works); - wake_up_process(stopper->thread); + wake_q_add(wakeq, stopper->thread); } /* queue @work to @stopper. if offline, @work is completed immediately */ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); + DEFINE_WAKE_Q(wakeq); unsigned long flags; bool enabled; spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) - __cpu_stop_queue_work(stopper, work); + __cpu_stop_queue_work(stopper, work, &wakeq); else if (work->done) cpu_stop_signal_done(work->done); spin_unlock_irqrestore(&stopper->lock, flags); + wake_up_q(&wakeq); + return enabled; } @@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, { struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); + DEFINE_WAKE_Q(wakeq); int err; retry: spin_lock_irq(&stopper1->lock); @@ -252,8 +258,8 @@ retry: goto unlock; err = 0; - __cpu_stop_queue_work(stopper1, work1); - __cpu_stop_queue_work(stopper2, work2); + __cpu_stop_queue_work(stopper1, work1, &wakeq); + __cpu_stop_queue_work(stopper2, work2, &wakeq); unlock: spin_unlock(&stopper2->lock); spin_unlock_irq(&stopper1->lock); @@ -263,6 +269,9 @@ unlock: cpu_relax(); goto retry; } + + wake_up_q(&wakeq); + return err; } /** From 457be908c83637ee10bda085a23dc05afa3b14a0 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 26 Apr 2018 12:19:32 +0200 Subject: [PATCH 072/347] sched/fair: Fix the update of blocked load when newly idle With commit: 31e77c93e432 ("sched/fair: Update blocked load when newly idle") ... we release the rq->lock when updating blocked load of idle CPUs. This opens a time window during which another CPU can add a task to this CPU's cfs_rq. The check for newly added task of idle_balance() is not in the common path. Move the out label to include this check. Reported-by: Heiner Kallweit Tested-by: Geert Uytterhoeven Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 31e77c93e432 ("sched/fair: Update blocked load when newly idle") Link: http://lkml.kernel.org/r/20180426103133.GA6953@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 54dc31e7ab9b..e3002e5ada31 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9847,6 +9847,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) if (curr_cost > this_rq->max_idle_balance_cost) this_rq->max_idle_balance_cost = curr_cost; +out: /* * While browsing the domains, we released the rq lock, a task could * have been enqueued in the meantime. Since we're not going idle, @@ -9855,7 +9856,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) if (this_rq->cfs.h_nr_running && !pulled_task) pulled_task = 1; -out: /* Move the next balance forward */ if (time_after(this_rq->next_balance, next_balance)) this_rq->next_balance = next_balance; From 741a76b350897604c48fb12beff1c9b77724dc96 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Apr 2018 14:50:22 +0200 Subject: [PATCH 073/347] kthread, sched/wait: Fix kthread_parkme() wait-loop Gaurav reported a problem with __kthread_parkme() where a concurrent try_to_wake_up() could result in competing stores to ->state which, when the TASK_PARKED store got lost bad things would happen. The comment near set_current_state() actually mentions this competing store, but only mentions the case against TASK_RUNNING. This same store, with different timing, can happen against a subsequent !RUNNING store. This normally is not a problem, because as per that same comment, the !RUNNING state store is inside a condition based wait-loop: for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (!need_sleep) break; schedule(); } __set_current_state(TASK_RUNNING); If we loose the (first) TASK_UNINTERRUPTIBLE store to a previous (concurrent) wakeup, the schedule() will NO-OP and we'll go around the loop once more. The problem here is that the TASK_PARKED store is not inside the KTHREAD_SHOULD_PARK condition wait-loop. There is a genuine issue with sleeps that do not have a condition; this is addressed in a subsequent patch. Reported-by: Gaurav Kohli Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- kernel/kthread.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index cd50e99202b0..cbee858e5815 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -177,12 +177,13 @@ void *kthread_probe_data(struct task_struct *task) static void __kthread_parkme(struct kthread *self) { - __set_current_state(TASK_PARKED); - while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { + for (;;) { + set_current_state(TASK_PARKED); + if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) + break; if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) complete(&self->parked); schedule(); - __set_current_state(TASK_PARKED); } clear_bit(KTHREAD_IS_PARKED, &self->flags); __set_current_state(TASK_RUNNING); From 85f1abe0019fcb3ea10df7029056cf42702283a8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 1 May 2018 18:14:45 +0200 Subject: [PATCH 074/347] kthread, sched/wait: Fix kthread_parkme() completion issue Even with the wait-loop fixed, there is a further issue with kthread_parkme(). Upon hotplug, when we do takedown_cpu(), smpboot_park_threads() can return before all those threads are in fact blocked, due to the placement of the complete() in __kthread_parkme(). When that happens, sched_cpu_dying() -> migrate_tasks() can end up migrating such a still runnable task onto another CPU. Normally the task will have hit schedule() and gone to sleep by the time we do kthread_unpark(), which will then do __kthread_bind() to re-bind the task to the correct CPU. However, when we loose the initial TASK_PARKED store to the concurrent wakeup issue described previously, do the complete(), get migrated, it is possible to either: - observe kthread_unpark()'s clearing of SHOULD_PARK and terminate the park and set TASK_RUNNING, or - __kthread_bind()'s wait_task_inactive() to observe the competing TASK_RUNNING store. Either way the WARN() in __kthread_bind() will trigger and fail to correctly set the CPU affinity. Fix this by only issuing the complete() when the kthread has scheduled out. This does away with all the icky 'still running' nonsense. The alternative is to promote TASK_PARKED to a special state, this guarantees wait_task_inactive() cannot observe a 'stale' TASK_RUNNING and we'll end up doing the right thing, but this preserves the whole icky business of potentially migating the still runnable thing. Reported-by: Gaurav Kohli Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/kthread.h | 1 + kernel/kthread.c | 43 ++++++++++++++++++----------------------- kernel/sched/core.c | 32 +++++++++++++++++++----------- 3 files changed, 41 insertions(+), 35 deletions(-) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c1961761311d..2803264c512f 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); +void kthread_park_complete(struct task_struct *k); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/kernel/kthread.c b/kernel/kthread.c index cbee858e5815..2017a39ab490 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -55,7 +55,6 @@ enum KTHREAD_BITS { KTHREAD_IS_PER_CPU = 0, KTHREAD_SHOULD_STOP, KTHREAD_SHOULD_PARK, - KTHREAD_IS_PARKED, }; static inline void set_kthread_struct(void *kthread) @@ -181,11 +180,8 @@ static void __kthread_parkme(struct kthread *self) set_current_state(TASK_PARKED); if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) break; - if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) - complete(&self->parked); schedule(); } - clear_bit(KTHREAD_IS_PARKED, &self->flags); __set_current_state(TASK_RUNNING); } @@ -195,6 +191,11 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); +void kthread_park_complete(struct task_struct *k) +{ + complete(&to_kthread(k)->parked); +} + static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -451,22 +452,15 @@ void kthread_unpark(struct task_struct *k) { struct kthread *kthread = to_kthread(k); - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); /* - * We clear the IS_PARKED bit here as we don't wait - * until the task has left the park code. So if we'd - * park before that happens we'd see the IS_PARKED bit - * which might be about to be cleared. + * Newly created kthread was parked when the CPU was offline. + * The binding was lost and we need to set it again. */ - if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { - /* - * Newly created kthread was parked when the CPU was offline. - * The binding was lost and we need to set it again. - */ - if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) - __kthread_bind(k, kthread->cpu, TASK_PARKED); - wake_up_state(k, TASK_PARKED); - } + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) + __kthread_bind(k, kthread->cpu, TASK_PARKED); + + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + wake_up_state(k, TASK_PARKED); } EXPORT_SYMBOL_GPL(kthread_unpark); @@ -489,12 +483,13 @@ int kthread_park(struct task_struct *k) if (WARN_ON(k->flags & PF_EXITING)) return -ENOSYS; - if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { - set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); - if (k != current) { - wake_up_process(k); - wait_for_completion(&kthread->parked); - } + if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))) + return -EBUSY; + + set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + if (k != current) { + wake_up_process(k); + wait_for_completion(&kthread->parked); } return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5e10aaeebfcc..7ad60e00a6a8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7,6 +7,8 @@ */ #include "sched.h" +#include + #include #include @@ -2718,20 +2720,28 @@ static struct rq *finish_task_switch(struct task_struct *prev) membarrier_mm_sync_core_before_usermode(mm); mmdrop(mm); } - if (unlikely(prev_state == TASK_DEAD)) { - if (prev->sched_class->task_dead) - prev->sched_class->task_dead(prev); + if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) { + switch (prev_state) { + case TASK_DEAD: + if (prev->sched_class->task_dead) + prev->sched_class->task_dead(prev); - /* - * Remove function-return probe instances associated with this - * task and put them back on the free list. - */ - kprobe_flush_task(prev); + /* + * Remove function-return probe instances associated with this + * task and put them back on the free list. + */ + kprobe_flush_task(prev); - /* Task is done with its stack. */ - put_task_stack(prev); + /* Task is done with its stack. */ + put_task_stack(prev); - put_task_struct(prev); + put_task_struct(prev); + break; + + case TASK_PARKED: + kthread_park_complete(prev); + break; + } } tick_nohz_task_switch(); From bc519d9574618e47a0c788000fb78da95e18d953 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 3 May 2018 13:09:44 -0500 Subject: [PATCH 075/347] spi: bcm2835aux: ensure interrupts are enabled for shared handler The BCM2835 AUX SPI has a shared interrupt line (with AUX UART). Downstream fixes this with an AUX irqchip to demux the IRQ sources and a DT change which breaks compatibility with older kernels. The AUX irqchip was already rejected for upstream[1] and the DT change would break working systems if the DTB is updated to a newer one. The latter issue was brought to my attention by Alex Graf. The root cause however is a bug in the shared handler. Shared handlers must check that interrupts are actually enabled before servicing the interrupt. Add a check that the TXEMPTY or IDLE interrupts are enabled. [1] https://patchwork.kernel.org/patch/9781221/ Cc: Alexander Graf Cc: Marc Zyngier Cc: Mark Brown Cc: Eric Anholt Cc: Stefan Wahren Cc: Florian Fainelli Cc: Ray Jui Cc: Scott Branden Cc: bcm-kernel-feedback-list@broadcom.com Cc: linux-spi@vger.kernel.org Cc: linux-rpi-kernel@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Rob Herring Reviewed-by: Eric Anholt Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835aux.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c index 1431cb98fe40..3094d818cf06 100644 --- a/drivers/spi/spi-bcm2835aux.c +++ b/drivers/spi/spi-bcm2835aux.c @@ -184,6 +184,11 @@ static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id) struct bcm2835aux_spi *bs = spi_master_get_devdata(master); irqreturn_t ret = IRQ_NONE; + /* IRQ may be shared, so return if our interrupts are disabled */ + if (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_CNTL1) & + (BCM2835_AUX_SPI_CNTL1_TXEMPTY | BCM2835_AUX_SPI_CNTL1_IDLE))) + return ret; + /* check if we have data to read */ while (bs->rx_len && (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT) & From 9ef09e35e521bf0df5325cc9cffa726a8f5f3c1b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 3 May 2018 17:04:59 +0100 Subject: [PATCH 076/347] bpf: fix possible spectre-v1 in find_and_alloc_map() It's possible for userspace to control attr->map_type. Sanitize it when using it as an array index to prevent an out-of-bounds value being used under speculation. Found by smatch. Signed-off-by: Mark Rutland Cc: Alexei Starovoitov Cc: Dan Carpenter Cc: Daniel Borkmann Cc: Peter Zijlstra Cc: netdev@vger.kernel.org Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- kernel/bpf/syscall.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index ebfe9f29dae8..8f434485abd2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -26,6 +26,7 @@ #include #include #include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ @@ -102,12 +103,14 @@ const struct bpf_map_ops bpf_map_offload_ops = { static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) { const struct bpf_map_ops *ops; + u32 type = attr->map_type; struct bpf_map *map; int err; - if (attr->map_type >= ARRAY_SIZE(bpf_map_types)) + if (type >= ARRAY_SIZE(bpf_map_types)) return ERR_PTR(-EINVAL); - ops = bpf_map_types[attr->map_type]; + type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types)); + ops = bpf_map_types[type]; if (!ops) return ERR_PTR(-EINVAL); @@ -122,7 +125,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) if (IS_ERR(map)) return map; map->ops = ops; - map->map_type = attr->map_type; + map->map_type = type; return map; } From 2eced8e917b060587fc8ed46df41c364957a5050 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 2 Feb 2018 16:11:22 +0100 Subject: [PATCH 077/347] drm/exynos/mixer: fix synchronization check in interlaced mode In case of interlace mode video processor registers and mixer config register must be check to ensure internal state is in sync with shadow registers. This patch fixes page-faults in interlaced mode. Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 10 ++++++++++ drivers/gpu/drm/exynos/regs-mixer.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 257299ec95c4..a8d978d6e4e0 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -482,6 +482,7 @@ static void vp_video_buffer(struct mixer_context *ctx, spin_lock_irqsave(&ctx->reg_slock, flags); + vp_reg_write(ctx, VP_SHADOW_UPDATE, 1); /* interlace or progressive scan mode */ val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0); vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP); @@ -699,6 +700,15 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg) /* interlace scan need to check shadow register */ if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { + if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) && + vp_reg_read(ctx, VP_SHADOW_UPDATE)) + goto out; + + base = mixer_reg_read(ctx, MXR_CFG); + shadow = mixer_reg_read(ctx, MXR_CFG_S); + if (base != shadow) + goto out; + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); if (base != shadow) diff --git a/drivers/gpu/drm/exynos/regs-mixer.h b/drivers/gpu/drm/exynos/regs-mixer.h index c311f571bdf9..189cfa2470a8 100644 --- a/drivers/gpu/drm/exynos/regs-mixer.h +++ b/drivers/gpu/drm/exynos/regs-mixer.h @@ -47,6 +47,7 @@ #define MXR_MO 0x0304 #define MXR_RESOLUTION 0x0310 +#define MXR_CFG_S 0x2004 #define MXR_GRAPHIC0_BASE_S 0x2024 #define MXR_GRAPHIC1_BASE_S 0x2044 From 0ccc1c8f0282e237a0bd6dca7cdac4ed5e318ee7 Mon Sep 17 00:00:00 2001 From: Tobias Jakobi Date: Fri, 2 Feb 2018 16:11:23 +0100 Subject: [PATCH 078/347] drm/exynos: mixer: avoid Oops in vp_video_buffer() If an interlaced video mode is selected, a IOMMU pagefault is triggered by vp_video_buffer(). Fix the most apparent bugs: - pitch value for chroma plane - divide by two of height and vpos of source and destination Signed-off-by: Tobias Jakobi [ a.hajda: Halved also destination height and vpos, updated commit message ] Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index a8d978d6e4e0..272c79f5f5bf 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -473,7 +473,7 @@ static void vp_video_buffer(struct mixer_context *ctx, chroma_addr[1] = chroma_addr[0] + 0x40; } else { luma_addr[1] = luma_addr[0] + fb->pitches[0]; - chroma_addr[1] = chroma_addr[0] + fb->pitches[0]; + chroma_addr[1] = chroma_addr[0] + fb->pitches[1]; } } else { luma_addr[1] = 0; @@ -496,21 +496,23 @@ static void vp_video_buffer(struct mixer_context *ctx, vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) | VP_IMG_VSIZE(fb->height)); /* chroma plane for NV12/NV21 is half the height of the luma plane */ - vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) | + vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[1]) | VP_IMG_VSIZE(fb->height / 2)); vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w); - vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); vp_reg_write(ctx, VP_SRC_H_POSITION, VP_SRC_H_POSITION_VAL(state->src.x)); - vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); - vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w); vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x); + if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h / 2); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y / 2); vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2); vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2); } else { + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h); vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y); } From d0f1a451e33d9ca834422622da30aa68daade56b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 May 2018 02:13:57 +0200 Subject: [PATCH 079/347] bpf: use array_index_nospec in find_prog_type Commit 9ef09e35e521 ("bpf: fix possible spectre-v1 in find_and_alloc_map()") converted find_and_alloc_map() over to use array_index_nospec() to sanitize map type that user space passes on map creation, and this patch does an analogous conversion for progs in find_prog_type() as it's also passed from user space when loading progs as attr->prog_type. Signed-off-by: Daniel Borkmann Cc: Mark Rutland Signed-off-by: Alexei Starovoitov --- kernel/bpf/syscall.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 8f434485abd2..016ef9025827 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -874,11 +874,17 @@ static const struct bpf_prog_ops * const bpf_prog_types[] = { static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) { - if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type]) + const struct bpf_prog_ops *ops; + + if (type >= ARRAY_SIZE(bpf_prog_types)) + return -EINVAL; + type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types)); + ops = bpf_prog_types[type]; + if (!ops) return -EINVAL; if (!bpf_prog_is_dev_bound(prog->aux)) - prog->aux->ops = bpf_prog_types[type]; + prog->aux->ops = ops; else prog->aux->ops = &bpf_offload_prog_ops; prog->type = type; From b5bf9a90bbebffba888c9144c5a8a10317b04064 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 30 Apr 2018 14:51:01 +0200 Subject: [PATCH 080/347] sched/core: Introduce set_special_state() Gaurav reported a perceived problem with TASK_PARKED, which turned out to be a broken wait-loop pattern in __kthread_parkme(), but the reported issue can (and does) in fact happen for states that do not do condition based sleeps. When the 'current->state = TASK_RUNNING' store of a previous (concurrent) try_to_wake_up() collides with the setting of a 'special' sleep state, we can loose the sleep state. Normal condition based wait-loops are immune to this problem, but for sleep states that are not condition based are subject to this problem. There already is a fix for TASK_DEAD. Abstract that and also apply it to TASK_STOPPED and TASK_TRACED, both of which are also without condition based wait-loop. Reported-by: Gaurav Kohli Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/sched.h | 50 ++++++++++++++++++++++++++++++++---- include/linux/sched/signal.h | 2 +- kernel/sched/core.c | 17 +----------- kernel/signal.c | 17 ++++++++++-- 4 files changed, 62 insertions(+), 24 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index b3d697f3b573..c2413703f45d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -112,17 +112,36 @@ struct task_group; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +/* + * Special states are those that do not use the normal wait-loop pattern. See + * the comment with set_special_state(). + */ +#define is_special_task_state(state) \ + ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD)) + #define __set_current_state(state_value) \ do { \ + WARN_ON_ONCE(is_special_task_state(state_value));\ current->task_state_change = _THIS_IP_; \ current->state = (state_value); \ } while (0) + #define set_current_state(state_value) \ do { \ + WARN_ON_ONCE(is_special_task_state(state_value));\ current->task_state_change = _THIS_IP_; \ smp_store_mb(current->state, (state_value)); \ } while (0) +#define set_special_state(state_value) \ + do { \ + unsigned long flags; /* may shadow */ \ + WARN_ON_ONCE(!is_special_task_state(state_value)); \ + raw_spin_lock_irqsave(¤t->pi_lock, flags); \ + current->task_state_change = _THIS_IP_; \ + current->state = (state_value); \ + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ + } while (0) #else /* * set_current_state() includes a barrier so that the write of current->state @@ -144,8 +163,8 @@ struct task_group; * * The above is typically ordered against the wakeup, which does: * - * need_sleep = false; - * wake_up_state(p, TASK_UNINTERRUPTIBLE); + * need_sleep = false; + * wake_up_state(p, TASK_UNINTERRUPTIBLE); * * Where wake_up_state() (and all other wakeup primitives) imply enough * barriers to order the store of the variable against wakeup. @@ -154,12 +173,33 @@ struct task_group; * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * - * This is obviously fine, since they both store the exact same value. + * However, with slightly different timing the wakeup TASK_RUNNING store can + * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not + * a problem either because that will result in one extra go around the loop + * and our @cond test will save the day. * * Also see the comments of try_to_wake_up(). */ -#define __set_current_state(state_value) do { current->state = (state_value); } while (0) -#define set_current_state(state_value) smp_store_mb(current->state, (state_value)) +#define __set_current_state(state_value) \ + current->state = (state_value) + +#define set_current_state(state_value) \ + smp_store_mb(current->state, (state_value)) + +/* + * set_special_state() should be used for those states when the blocking task + * can not use the regular condition based wait-loop. In that case we must + * serialize against wakeups such that any possible in-flight TASK_RUNNING stores + * will not collide with our state change. + */ +#define set_special_state(state_value) \ + do { \ + unsigned long flags; /* may shadow */ \ + raw_spin_lock_irqsave(¤t->pi_lock, flags); \ + current->state = (state_value); \ + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ + } while (0) + #endif /* Task command name length: */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index a7ce74c74e49..113d1ad1ced7 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void) { spin_lock_irq(¤t->sighand->siglock); if (current->jobctl & JOBCTL_STOP_DEQUEUED) - __set_current_state(TASK_STOPPED); + set_special_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); schedule(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7ad60e00a6a8..ffde9eebc846 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3508,23 +3508,8 @@ static void __sched notrace __schedule(bool preempt) void __noreturn do_task_dead(void) { - /* - * The setting of TASK_RUNNING by try_to_wake_up() may be delayed - * when the following two conditions become true. - * - There is race condition of mmap_sem (It is acquired by - * exit_mm()), and - * - SMI occurs before setting TASK_RUNINNG. - * (or hypervisor of virtual machine switches to other guest) - * As a result, we may become TASK_RUNNING after becoming TASK_DEAD - * - * To avoid it, we have to wait for releasing tsk->pi_lock which - * is held by try_to_wake_up() - */ - raw_spin_lock_irq(¤t->pi_lock); - raw_spin_unlock_irq(¤t->pi_lock); - /* Causes final put_task_struct in finish_task_switch(): */ - __set_current_state(TASK_DEAD); + set_special_state(TASK_DEAD); /* Tell freezer to ignore us: */ current->flags |= PF_NOFREEZE; diff --git a/kernel/signal.c b/kernel/signal.c index d4ccea599692..9c33163a6165 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) return; } + set_special_state(TASK_TRACED); + /* * We're committing to trapping. TRACED should be visible before * TRAPPING is cleared; otherwise, the tracer might fail do_wait(). * Also, transition to TRACED and updates to ->jobctl should be * atomic with respect to siglock and should be done after the arch * hook as siglock is released and regrabbed across it. + * + * TRACER TRACEE + * + * ptrace_attach() + * [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED) + * do_wait() + * set_current_state() smp_wmb(); + * ptrace_do_wait() + * wait_task_stopped() + * task_stopped_code() + * [L] task_is_traced() [S] task_clear_jobctl_trapping(); */ - set_current_state(TASK_TRACED); + smp_wmb(); current->last_siginfo = info; current->exit_code = exit_code; @@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr) if (task_participate_group_stop(current)) notify = CLD_STOPPED; - __set_current_state(TASK_STOPPED); + set_special_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); /* From b76401fc4ba720f0f38f7b1f9d54d5c2308bc18d Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Thu, 3 May 2018 14:21:28 +1200 Subject: [PATCH 081/347] mtd: rawnand: marvell: pass ms delay to wait_op marvell_nfc_wait_op() expects the delay to be expressed in milliseconds but nand_sdr_timings uses picoseconds. Use PSEC_TO_MSEC when passing tPROG_max to marvell_nfc_wait_op(). Fixes: 02f26ecf8c772 ("mtd: nand: add reworked Marvell NAND controller driver") Cc: stable@vger.kernel.org Signed-off-by: Chris Packham Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/marvell_nand.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 1d779a35ac8e..e4b964fd40d8 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1074,7 +1074,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip, return ret; ret = marvell_nfc_wait_op(chip, - chip->data_interface.timings.sdr.tPROG_max); + PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max)); return ret; } @@ -1494,7 +1494,7 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct mtd_info *mtd, } ret = marvell_nfc_wait_op(chip, - chip->data_interface.timings.sdr.tPROG_max); + PSEC_TO_MSEC(chip->data_interface.timings.sdr.tPROG_max)); marvell_nfc_disable_hw_ecc(chip); From a2ee41fd953e7c3ff6c55a3038c80354d191a318 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 3 May 2018 12:00:27 +0200 Subject: [PATCH 082/347] mtd: rawnand: marvell: fix command xtype in BCH write hook One layout supported by the Marvell NAND controller supports NAND pages of 2048 bytes, all handled in one single chunk when using BCH with a strength of 4-bit per 512 bytes. In this case, instead of the generic XTYPE_WRITE_DISPATCH/XTYPE_LAST_NAKED_RW couple, the controller expects to receive XTYPE_MONOLITHIC_RW. This fixes problems at boot like: [ 1.315475] Scanning device for bad blocks [ 3.203108] marvell-nfc f10d0000.flash: Timeout waiting for RB signal [ 3.209564] nand_bbt: error while writing BBT block -110 [ 4.243106] marvell-nfc f10d0000.flash: Timeout waiting for RB signal [ 5.283106] marvell-nfc f10d0000.flash: Timeout waiting for RB signal [ 5.289562] nand_bbt: error -110 while marking block 2047 bad [ 6.323106] marvell-nfc f10d0000.flash: Timeout waiting for RB signal [ 6.329559] nand_bbt: error while writing BBT block -110 [ 7.363106] marvell-nfc f10d0000.flash: Timeout waiting for RB signal [ 8.403105] marvell-nfc f10d0000.flash: Timeout waiting for RB signal [ 8.409559] nand_bbt: error -110 while marking block 2046 bad ... Fixes: 02f26ecf8c772 ("mtd: nand: add reworked Marvell NAND controller driver") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Tested-by: Chris Packham Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/marvell_nand.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index e4b964fd40d8..db5ec4e8bde9 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1408,6 +1408,7 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk, struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip); struct marvell_nfc *nfc = to_marvell_nfc(chip->controller); const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout; + u32 xtype; int ret; struct marvell_nfc_op nfc_op = { .ndcb[0] = NDCB0_CMD_TYPE(TYPE_WRITE) | NDCB0_LEN_OVRD, @@ -1423,7 +1424,12 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk, * last naked write. */ if (chunk == 0) { - nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_WRITE_DISPATCH) | + if (lt->nchunks == 1) + xtype = XTYPE_MONOLITHIC_RW; + else + xtype = XTYPE_WRITE_DISPATCH; + + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(xtype) | NDCB0_ADDR_CYC(marvell_nand->addr_cyc) | NDCB0_CMD1(NAND_CMD_SEQIN); nfc_op.ndcb[1] |= NDCB1_ADDRS_PAGE(page); From 65972a6fa914b16cc15ffcffcb8bea8c64e78f49 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 10 Apr 2018 21:43:15 -0700 Subject: [PATCH 083/347] dm mirror: remove VLA usage On the quest to remove all VLAs from the kernel[1], this avoids VLAs in dm-raid1.c by just using the maximum size for the stack arrays. The nr_mirrors value was already capped at 9, so this makes it a trivial adjustment to the array sizes. [1] https://lkml.org/lkml/2018/3/7/621 Signed-off-by: Kees Cook Acked-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid1.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 580c49cc8079..5903e492bb34 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -23,6 +23,8 @@ #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ +#define MAX_NR_MIRRORS (DM_KCOPYD_MAX_REGIONS + 1) + #define DM_RAID1_HANDLE_ERRORS 0x01 #define DM_RAID1_KEEP_LOG 0x02 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) @@ -255,7 +257,7 @@ static int mirror_flush(struct dm_target *ti) unsigned long error_bits; unsigned int i; - struct dm_io_region io[ms->nr_mirrors]; + struct dm_io_region io[MAX_NR_MIRRORS]; struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, @@ -651,7 +653,7 @@ static void write_callback(unsigned long error, void *context) static void do_write(struct mirror_set *ms, struct bio *bio) { unsigned int i; - struct dm_io_region io[ms->nr_mirrors], *dest = io; + struct dm_io_region io[MAX_NR_MIRRORS], *dest = io; struct mirror *m; struct dm_io_request io_req = { .bi_op = REQ_OP_WRITE, @@ -1083,7 +1085,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) argc -= args_used; if (!argc || sscanf(argv[0], "%u%c", &nr_mirrors, &dummy) != 1 || - nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) { + nr_mirrors < 2 || nr_mirrors > MAX_NR_MIRRORS) { ti->error = "Invalid number of mirrors"; dm_dirty_log_destroy(dl); return -EINVAL; @@ -1404,7 +1406,7 @@ static void mirror_status(struct dm_target *ti, status_type_t type, int num_feature_args = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); - char buffer[ms->nr_mirrors + 1]; + char buffer[MAX_NR_MIRRORS + 1]; switch (type) { case STATUSTYPE_INFO: From 23b8392201e0681b76630c4cea68e1a2e1821ec6 Mon Sep 17 00:00:00 2001 From: Bhadram Varka Date: Wed, 2 May 2018 20:43:58 +0530 Subject: [PATCH 084/347] net: phy: broadcom: add support for BCM89610 PHY It adds support for BCM89610 (Single-Port 10/100/1000BASE-T) transceiver which is used in P3310 Tegra186 platform. Signed-off-by: Bhadram Varka Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 10 ++++++++++ include/linux/brcmphy.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 3bb6b66dc7bf..f9c25912eb98 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -720,6 +720,15 @@ static struct phy_driver broadcom_drivers[] = { .get_strings = bcm_phy_get_strings, .get_stats = bcm53xx_phy_get_stats, .probe = bcm53xx_phy_probe, +}, { + .phy_id = PHY_ID_BCM89610, + .phy_id_mask = 0xfffffff0, + .name = "Broadcom BCM89610", + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = bcm54xx_config_init, + .ack_interrupt = bcm_phy_ack_intr, + .config_intr = bcm_phy_config_intr, } }; module_phy_driver(broadcom_drivers); @@ -741,6 +750,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCMAC131, 0xfffffff0 }, { PHY_ID_BCM5241, 0xfffffff0 }, { PHY_ID_BCM5395, 0xfffffff0 }, + { PHY_ID_BCM89610, 0xfffffff0 }, { } }; diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index d3339dd48b1a..b324e01ccf2d 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -25,6 +25,7 @@ #define PHY_ID_BCM54612E 0x03625e60 #define PHY_ID_BCM54616S 0x03625d10 #define PHY_ID_BCM57780 0x03625d90 +#define PHY_ID_BCM89610 0x03625cd0 #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7260 0xae025190 From 72f17baf2352ded6a1d3f4bb2d15da8c678cd2cb Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Thu, 3 May 2018 18:13:25 +0200 Subject: [PATCH 085/347] openvswitch: Don't swap table in nlattr_set() after OVS_ATTR_NESTED is found If an OVS_ATTR_NESTED attribute type is found while walking through netlink attributes, we call nlattr_set() recursively passing the length table for the following nested attributes, if different from the current one. However, once we're done with those sub-nested attributes, we should continue walking through attributes using the current table, instead of using the one related to the sub-nested attributes. For example, given this sequence: 1 OVS_KEY_ATTR_PRIORITY 2 OVS_KEY_ATTR_TUNNEL 3 OVS_TUNNEL_KEY_ATTR_ID 4 OVS_TUNNEL_KEY_ATTR_IPV4_SRC 5 OVS_TUNNEL_KEY_ATTR_IPV4_DST 6 OVS_TUNNEL_KEY_ATTR_TTL 7 OVS_TUNNEL_KEY_ATTR_TP_SRC 8 OVS_TUNNEL_KEY_ATTR_TP_DST 9 OVS_KEY_ATTR_IN_PORT 10 OVS_KEY_ATTR_SKB_MARK 11 OVS_KEY_ATTR_MPLS we switch to the 'ovs_tunnel_key_lens' table on attribute #3, and we don't switch back to 'ovs_key_lens' while setting attributes #9 to #11 in the sequence. As OVS_KEY_ATTR_MPLS evaluates to 21, and the array size of 'ovs_tunnel_key_lens' is 15, we also get this kind of KASan splat while accessing the wrong table: [ 7654.586496] ================================================================== [ 7654.594573] BUG: KASAN: global-out-of-bounds in nlattr_set+0x164/0xde9 [openvswitch] [ 7654.603214] Read of size 4 at addr ffffffffc169ecf0 by task handler29/87430 [ 7654.610983] [ 7654.612644] CPU: 21 PID: 87430 Comm: handler29 Kdump: loaded Not tainted 3.10.0-866.el7.test.x86_64 #1 [ 7654.623030] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.1.7 06/16/2016 [ 7654.631379] Call Trace: [ 7654.634108] [] dump_stack+0x19/0x1b [ 7654.639843] [] print_address_description+0x33/0x290 [ 7654.647129] [] ? nlattr_set+0x164/0xde9 [openvswitch] [ 7654.654607] [] kasan_report.part.3+0x242/0x330 [ 7654.661406] [] __asan_report_load4_noabort+0x34/0x40 [ 7654.668789] [] nlattr_set+0x164/0xde9 [openvswitch] [ 7654.676076] [] ovs_nla_get_match+0x10c8/0x1900 [openvswitch] [ 7654.684234] [] ? genl_rcv+0x28/0x40 [ 7654.689968] [] ? netlink_unicast+0x3f3/0x590 [ 7654.696574] [] ? ovs_nla_put_tunnel_info+0xb0/0xb0 [openvswitch] [ 7654.705122] [] ? unwind_get_return_address+0xb0/0xb0 [ 7654.712503] [] ? system_call_fastpath+0x1c/0x21 [ 7654.719401] [] ? update_stack_state+0x229/0x370 [ 7654.726298] [] ? update_stack_state+0x229/0x370 [ 7654.733195] [] ? kasan_unpoison_shadow+0x35/0x50 [ 7654.740187] [] ? kasan_kmalloc+0xaa/0xe0 [ 7654.746406] [] ? kasan_slab_alloc+0x12/0x20 [ 7654.752914] [] ? memset+0x31/0x40 [ 7654.758456] [] ovs_flow_cmd_new+0x2b2/0xf00 [openvswitch] [snip] [ 7655.132484] The buggy address belongs to the variable: [ 7655.138226] ovs_tunnel_key_lens+0xf0/0xffffffffffffd400 [openvswitch] [ 7655.145507] [ 7655.147166] Memory state around the buggy address: [ 7655.152514] ffffffffc169eb80: 00 00 00 00 00 00 00 00 00 00 fa fa fa fa fa fa [ 7655.160585] ffffffffc169ec00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 7655.168644] >ffffffffc169ec80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fa fa [ 7655.176701] ^ [ 7655.184372] ffffffffc169ed00: fa fa fa fa 00 00 00 00 fa fa fa fa 00 00 00 05 [ 7655.192431] ffffffffc169ed80: fa fa fa fa 00 00 00 00 00 00 00 00 00 00 00 00 [ 7655.200490] ================================================================== Reported-by: Hangbin Liu Fixes: 982b52700482 ("openvswitch: Fix mask generation for nested attributes.") Signed-off-by: Stefano Brivio Reviewed-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/openvswitch/flow_netlink.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 7322aa1e382e..492ab0c36f7c 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1712,13 +1712,10 @@ static void nlattr_set(struct nlattr *attr, u8 val, /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { - if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) { - if (tbl[nla_type(nla)].next) - tbl = tbl[nla_type(nla)].next; - nlattr_set(nla, val, tbl); - } else { + if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) + nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl); + else memset(nla_data(nla), val, nla_len(nla)); - } if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; From 2be147f7459db5bbf292e0a6f135037b55e20b39 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 3 May 2018 13:17:12 -0500 Subject: [PATCH 086/347] atm: zatm: Fix potential Spectre v1 pool can be indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: drivers/atm/zatm.c:1462 zatm_ioctl() warn: potential spectre issue 'zatm_dev->pool_info' (local cap) Fix this by sanitizing pool before using it to index zatm_dev->pool_info Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- drivers/atm/zatm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 1ef67db03c8e..9c9a22958717 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "uPD98401.h" #include "uPD98402.h" @@ -1458,6 +1459,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg) return -EFAULT; if (pool < 0 || pool > ZATM_LAST_POOL) return -EINVAL; + pool = array_index_nospec(pool, + ZATM_LAST_POOL + 1); spin_lock_irqsave(&zatm_dev->lock, flags); info = zatm_dev->pool_info[pool]; if (cmd == ZATM_GETPOOLZ) { From acf784bd0ce257fe43da7ca266f7a10b837479d2 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 3 May 2018 13:45:58 -0500 Subject: [PATCH 087/347] net: atm: Fix potential Spectre v1 ioc_data.dev_num can be controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: net/atm/lec.c:702 lec_vcc_attach() warn: potential spectre issue 'dev_lec' Fix this by sanitizing ioc_data.dev_num before using it to index dev_lec. Also, notice that there is another instance in which array dev_lec is being indexed using ioc_data.dev_num at line 705: lec_vcc_added(netdev_priv(dev_lec[ioc_data.dev_num]), Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://marc.info/?l=linux-kernel&m=152449131114778&w=2 Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- net/atm/lec.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/atm/lec.c b/net/atm/lec.c index 01d5d20a6eb1..3138a869b5c0 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -41,6 +41,9 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; #include #include +/* Hardening for Spectre-v1 */ +#include + #include "lec.h" #include "lec_arpc.h" #include "resources.h" @@ -687,8 +690,10 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); if (bytes_left != 0) pr_info("copy from user failed for %d bytes\n", bytes_left); - if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF || - !dev_lec[ioc_data.dev_num]) + if (ioc_data.dev_num < 0 || ioc_data.dev_num >= MAX_LEC_ITF) + return -EINVAL; + ioc_data.dev_num = array_index_nospec(ioc_data.dev_num, MAX_LEC_ITF); + if (!dev_lec[ioc_data.dev_num]) return -EINVAL; vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL); if (!vpriv) From af50e4ba34f4c45e92535364133d4deb5931c1c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 May 2018 13:37:54 -0700 Subject: [PATCH 088/347] nsh: fix infinite loop syzbot caught an infinite recursion in nsh_gso_segment(). Problem here is that we need to make sure the NSH header is of reasonable length. BUG: MAX_LOCK_DEPTH too low! turning off the locking correctness validator. depth: 48 max: 48! 48 locks held by syz-executor0/10189: #0: (ptrval) (rcu_read_lock_bh){....}, at: __dev_queue_xmit+0x30f/0x34c0 net/core/dev.c:3517 #1: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #1: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #2: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #2: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #3: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #3: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #4: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #4: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #5: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #5: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #6: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #6: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #7: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #7: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #8: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #8: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #9: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #9: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #10: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #10: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #11: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #11: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #12: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #12: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #13: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #13: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #14: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #14: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #15: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #15: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #16: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #16: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #17: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #17: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #18: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #18: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #19: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #19: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #20: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #20: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #21: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #21: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #22: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #22: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #23: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #23: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #24: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #24: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #25: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #25: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #26: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #26: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #27: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #27: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #28: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #28: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #29: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #29: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #30: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #30: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #31: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #31: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 dccp_close: ABORT with 65423 bytes unread #32: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #32: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #33: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #33: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #34: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #34: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #35: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #35: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #36: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #36: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #37: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #37: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #38: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #38: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #39: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #39: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #40: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #40: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #41: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #41: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #42: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #42: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #43: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #43: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #44: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #44: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #45: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #45: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #46: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #46: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 #47: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] #47: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 INFO: lockdep is turned off. CPU: 1 PID: 10189 Comm: syz-executor0 Not tainted 4.17.0-rc2+ #26 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 __lock_acquire+0x1788/0x5140 kernel/locking/lockdep.c:3449 lock_acquire+0x1dc/0x520 kernel/locking/lockdep.c:3920 rcu_lock_acquire include/linux/rcupdate.h:246 [inline] rcu_read_lock include/linux/rcupdate.h:632 [inline] skb_mac_gso_segment+0x25b/0x720 net/core/dev.c:2789 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 __skb_gso_segment+0x3bb/0x870 net/core/dev.c:2865 skb_gso_segment include/linux/netdevice.h:4025 [inline] validate_xmit_skb+0x54d/0xd90 net/core/dev.c:3118 validate_xmit_skb_list+0xbf/0x120 net/core/dev.c:3168 sch_direct_xmit+0x354/0x11e0 net/sched/sch_generic.c:312 qdisc_restart net/sched/sch_generic.c:399 [inline] __qdisc_run+0x741/0x1af0 net/sched/sch_generic.c:410 __dev_xmit_skb net/core/dev.c:3243 [inline] __dev_queue_xmit+0x28ea/0x34c0 net/core/dev.c:3551 dev_queue_xmit+0x17/0x20 net/core/dev.c:3616 packet_snd net/packet/af_packet.c:2951 [inline] packet_sendmsg+0x40f8/0x6070 net/packet/af_packet.c:2976 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:639 __sys_sendto+0x3d7/0x670 net/socket.c:1789 __do_sys_sendto net/socket.c:1801 [inline] __se_sys_sendto net/socket.c:1797 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1797 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fixes: c411ed854584 ("nsh: add GSO support") Signed-off-by: Eric Dumazet Cc: Jiri Benc Reported-by: syzbot Acked-by: Jiri Benc Signed-off-by: David S. Miller --- net/nsh/nsh.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index d7da99a0b0b8..9696ef96b719 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -57,6 +57,8 @@ int nsh_pop(struct sk_buff *skb) return -ENOMEM; nh = (struct nshhdr *)(skb->data); length = nsh_hdr_len(nh); + if (length < NSH_BASE_HDR_LEN) + return -EINVAL; inner_proto = tun_p_to_eth_p(nh->np); if (!pskb_may_pull(skb, length)) return -ENOMEM; @@ -90,6 +92,8 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) goto out; nsh_len = nsh_hdr_len(nsh_hdr(skb)); + if (nsh_len < NSH_BASE_HDR_LEN) + goto out; if (unlikely(!pskb_may_pull(skb, nsh_len))) goto out; From d89a2adb8bfe6f8949ff389acdb9fa298b6e8e12 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 3 May 2018 20:04:27 -0400 Subject: [PATCH 089/347] tg3: Fix vunmap() BUG_ON() triggered from tg3_free_consistent(). tg3_free_consistent() calls dma_free_coherent() to free tp->hw_stats under spinlock and can trigger BUG_ON() in vunmap() because vunmap() may sleep. Fix it by removing the spinlock and relying on the TG3_FLAG_INIT_COMPLETE flag to prevent race conditions between tg3_get_stats64() and tg3_free_consistent(). TG3_FLAG_INIT_COMPLETE is always cleared under tp->lock before tg3_free_consistent() and therefore tg3_get_stats64() can safely access tp->hw_stats under tp->lock if TG3_FLAG_INIT_COMPLETE is set. Fixes: f5992b72ebe0 ("tg3: Fix race condition in tg3_get_stats64().") Reported-by: Zumeng Chen Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/tg3.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 08bbb639be1a..9f59b1270a7c 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -8733,14 +8733,15 @@ static void tg3_free_consistent(struct tg3 *tp) tg3_mem_rx_release(tp); tg3_mem_tx_release(tp); - /* Protect tg3_get_stats64() from reading freed tp->hw_stats. */ - tg3_full_lock(tp, 0); + /* tp->hw_stats can be referenced safely: + * 1. under rtnl_lock + * 2. or under tp->lock if TG3_FLAG_INIT_COMPLETE is set. + */ if (tp->hw_stats) { dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats), tp->hw_stats, tp->stats_mapping); tp->hw_stats = NULL; } - tg3_full_unlock(tp); } /* @@ -14178,7 +14179,7 @@ static void tg3_get_stats64(struct net_device *dev, struct tg3 *tp = netdev_priv(dev); spin_lock_bh(&tp->lock); - if (!tp->hw_stats) { + if (!tp->hw_stats || !tg3_flag(tp, INIT_COMPLETE)) { *stats = tp->net_stats_prev; spin_unlock_bh(&tp->lock); return; From ae552ac2785d69189c865dcea7e71da02180c59c Mon Sep 17 00:00:00 2001 From: YU Bo Date: Thu, 3 May 2018 23:09:23 -0400 Subject: [PATCH 090/347] net/netlink: make sure the headers line up actual value output Making sure the headers line up properly with the actual value output of the command `cat /proc/net/netlink` Before the patch: sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode >0000000033203952 0 897 00000113 0 0 0 2 0 14906 Signed-off-by: Bo YU Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 55342c4d5cec..2e2dd88fc79f 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2606,13 +2606,13 @@ static int netlink_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, - "sk Eth Pid Groups " - "Rmem Wmem Dump Locks Drops Inode\n"); + "sk Eth Pid Groups " + "Rmem Wmem Dump Locks Drops Inode\n"); } else { struct sock *s = v; struct netlink_sock *nlk = nlk_sk(s); - seq_printf(seq, "%pK %-3d %-6u %08x %-8d %-8d %d %-8d %-8d %-8lu\n", + seq_printf(seq, "%pK %-3d %-10u %08x %-8d %-8d %-5d %-8d %-8d %-8lu\n", s, s->sk_protocol, nlk->portid, From 14224923c3600bae2ac4dcae3bf0c3d4dc2812be Mon Sep 17 00:00:00 2001 From: Rob Taglang Date: Thu, 3 May 2018 17:13:06 -0400 Subject: [PATCH 091/347] net: ethernet: sun: niu set correct packet size in skb Currently, skb->len and skb->data_len are set to the page size, not the packet size. This causes the frame check sequence to not be located at the "end" of the packet resulting in ethernet frame check errors. The driver does work currently, but stricter kernel facing networking solutions like OpenVSwitch will drop these packets as invalid. These changes set the packet size correctly so that these errors no longer occur. The length does not include the frame check sequence, so that subtraction was removed. Tested on Oracle/SUN Multithreaded 10-Gigabit Ethernet Network Controller [108e:abcd] and validated in wireshark. Signed-off-by: Rob Taglang Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index f081de4f38d7..88c12474a0c3 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3443,7 +3443,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, len = (val & RCR_ENTRY_L2_LEN) >> RCR_ENTRY_L2_LEN_SHIFT; - len -= ETH_FCS_LEN; + append_size = len + ETH_HLEN + ETH_FCS_LEN; addr = (val & RCR_ENTRY_PKT_BUF_ADDR) << RCR_ENTRY_PKT_BUF_ADDR_SHIFT; @@ -3453,7 +3453,6 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, RCR_ENTRY_PKTBUFSZ_SHIFT]; off = addr & ~PAGE_MASK; - append_size = rcr_size; if (num_rcr == 1) { int ptype; @@ -3466,7 +3465,7 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, else skb_checksum_none_assert(skb); } else if (!(val & RCR_ENTRY_MULTI)) - append_size = len - skb->len; + append_size = append_size - skb->len; niu_rx_skb_append(skb, page, off, append_size, rcr_size); if ((page->index + rp->rbr_block_size) - rcr_size == addr) { From 52c5cd1bf0cecf4b146ca07dc513cbe2f4583bb5 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 4 May 2018 17:10:54 +0200 Subject: [PATCH 092/347] net: phy: sfp: fix the BR,min computation In an SFP EEPROM values can be read to get information about a given SFP module. One of those is the bitrate, which can be determined using a nominal bitrate in addition with min and max values (in %). The SFP code currently compute both BR,min and BR,max values thanks to this nominal and min,max values. This patch fixes the BR,min computation as the min value should be subtracted to the nominal one, not added. Fixes: 9962acf7fb8c ("sfp: add support for 1000Base-PX and 1000Base-BX10") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/phy/sfp-bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 0381da78d228..fd6c23f69c2f 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -125,7 +125,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, if (id->base.br_nominal) { if (id->base.br_nominal != 255) { br_nom = id->base.br_nominal * 100; - br_min = br_nom + id->base.br_nominal * id->ext.br_min; + br_min = br_nom - id->base.br_nominal * id->ext.br_min; br_max = br_nom + id->base.br_nominal * id->ext.br_max; } else if (id->ext.br_max) { br_nom = 250 * id->ext.br_max; From 7281c8dec8a87685cb54d503d8cceef5a0fc2fdd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:29:51 +0200 Subject: [PATCH 093/347] sched/core: Fix possible Spectre-v1 indexing for sched_prio_to_weight[] > kernel/sched/core.c:6921 cpu_weight_nice_write_s64() warn: potential spectre issue 'sched_prio_to_weight' Userspace controls @nice, so sanitize the value before using it to index an array. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ffde9eebc846..092f7c4de903 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8,6 +8,7 @@ #include "sched.h" #include +#include #include #include @@ -6923,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, s64 nice) { unsigned long weight; + int idx; if (nice < MIN_NICE || nice > MAX_NICE) return -ERANGE; - weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; + idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO; + idx = array_index_nospec(idx, 40); + weight = sched_prio_to_weight[idx]; + return sched_group_set_shares(css_tg(css), scale_load(weight)); } #endif From 354d7793070611b4df5a79fbb0f12752d0ed0cc5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 15:03:45 +0200 Subject: [PATCH 094/347] sched/autogroup: Fix possible Spectre-v1 indexing for sched_prio_to_weight[] > kernel/sched/autogroup.c:230 proc_sched_autogroup_set_nice() warn: potential spectre issue 'sched_prio_to_weight' Userspace controls @nice, sanitize the array index. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Signed-off-by: Ingo Molnar --- kernel/sched/autogroup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 6be6c575b6cd..2d4ff5353ded 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -2,6 +2,7 @@ /* * Auto-group scheduling implementation: */ +#include #include "sched.h" unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; @@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) static unsigned long next = INITIAL_JIFFIES; struct autogroup *ag; unsigned long shares; - int err; + int err, idx; if (nice < MIN_NICE || nice > MAX_NICE) return -EINVAL; @@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) next = HZ / 10 + jiffies; ag = autogroup_task_get(p); - shares = scale_load(sched_prio_to_weight[nice + 20]); + + idx = array_index_nospec(nice + 20, 40); + shares = scale_load(sched_prio_to_weight[idx]); down_write(&ag->lock); err = sched_group_set_shares(ag->tg, shares); From 4411ec1d1993e8dbff2898390e3fed280d88e446 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:03:18 +0200 Subject: [PATCH 095/347] perf/core: Fix possible Spectre-v1 indexing for ->aux_pages[] > kernel/events/ring_buffer.c:871 perf_mmap_to_page() warn: potential spectre issue 'rb->aux_pages' Userspace controls @pgoff through the fault address. Sanitize the array index before doing the array dereference. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6c6b3c48db71..1d8ca9ea9979 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" @@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) return NULL; /* AUX space */ - if (pgoff >= rb->aux_pgoff) - return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); + if (pgoff >= rb->aux_pgoff) { + int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages); + return virt_to_page(rb->aux_pages[aux_pgoff]); + } } return __perf_mmap_to_page(rb, pgoff); From ef9ee4ad38445a30909c48998624861716f2a994 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:06:29 +0200 Subject: [PATCH 096/347] perf/x86: Fix possible Spectre-v1 indexing for hw_perf_event cache_* > arch/x86/events/core.c:319 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_event_ids[cache_type]' (local cap) > arch/x86/events/core.c:319 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_event_ids' (local cap) > arch/x86/events/core.c:328 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_extra_regs[cache_type]' (local cap) > arch/x86/events/core.c:328 set_ext_hw_attr() warn: potential spectre issue 'hw_cache_extra_regs' (local cap) Userspace controls @config which contains 3 (byte) fields used for a 3 dimensional array deref. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a6006e7bb729..b1be0ac51ce0 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -304,17 +304,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) config = attr->config; - cache_type = (config >> 0) & 0xff; + cache_type = (config >> 0) & 0xff; if (cache_type >= PERF_COUNT_HW_CACHE_MAX) return -EINVAL; + cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); cache_op = (config >> 8) & 0xff; if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) return -EINVAL; + cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); cache_result = (config >> 16) & 0xff; if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); val = hw_cache_event_ids[cache_type][cache_op][cache_result]; From 46b1b577229a091b137831becaa0fae8690ee15a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:08:58 +0200 Subject: [PATCH 097/347] perf/x86: Fix possible Spectre-v1 indexing for x86_pmu::event_map() > arch/x86/events/intel/cstate.c:307 cstate_pmu_event_init() warn: potential spectre issue 'pkg_msr' (local cap) > arch/x86/events/intel/core.c:337 intel_pmu_event_map() warn: potential spectre issue 'intel_perfmon_event_map' > arch/x86/events/intel/knc.c:122 knc_pmu_event_map() warn: potential spectre issue 'knc_perfmon_event_map' > arch/x86/events/intel/p4.c:722 p4_pmu_event_map() warn: potential spectre issue 'p4_general_events' > arch/x86/events/intel/p6.c:116 p6_pmu_event_map() warn: potential spectre issue 'p6_perfmon_event_map' > arch/x86/events/amd/core.c:132 amd_pmu_event_map() warn: potential spectre issue 'amd_perfmon_event_map' Userspace controls @attr, sanitize @attr->config before passing it on to x86_pmu::event_map(). Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index b1be0ac51ce0..45b2b1c93d04 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -424,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event) if (attr->config >= x86_pmu.max_events) return -EINVAL; + attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); + /* * The generic map: */ From 06ce6e9b6d6c09d4129c6e24a1314a395d816c10 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:23:36 +0200 Subject: [PATCH 098/347] perf/x86/msr: Fix possible Spectre-v1 indexing in the MSR driver > arch/x86/events/msr.c:178 msr_event_init() warn: potential spectre issue 'msr' (local cap) Userspace controls @attr, sanitize cfg (attr->config) before using it to index an array. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/msr.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index e7edf19e64c2..b4771a6ddbc1 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include enum perf_msr_id { @@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (cfg >= PERF_MSR_EVENT_MAX) - return -EINVAL; - /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || @@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (cfg >= PERF_MSR_EVENT_MAX) + return -EINVAL; + + cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); + if (!msr[cfg].attr) return -EINVAL; From a5f81290ce475489fa2551c01a07470c1a4c932e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 20 Apr 2018 14:25:48 +0200 Subject: [PATCH 099/347] perf/x86/cstate: Fix possible Spectre-v1 indexing for pkg_msr > arch/x86/events/intel/cstate.c:307 cstate_pmu_event_init() warn: potential spectre issue 'pkg_msr' (local cap) Userspace controls @attr, sanitize cfg (attr->config) before using it to index an array. Reported-by: Dan Carpenter Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Signed-off-by: Ingo Molnar --- arch/x86/events/intel/cstate.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9aca448bb8e6..9f8084f18d58 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include "../perf_event.h" @@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event) } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; From e0f6d1a526b6adfa9ca3b336b83ece0eed345033 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 4 May 2018 19:59:35 +0200 Subject: [PATCH 100/347] x86/vdso: Remove unused file commit da861e18eccc ("x86, vdso: Get rid of the fake section mechanism") left this file behind; nothing is using it anymore. Signed-off-by: Jann Horn Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Link: http://lkml.kernel.org/r/20180504175935.104085-1-jannh@google.com Signed-off-by: Ingo Molnar --- arch/x86/entry/vdso/vdso32/vdso-fakesections.c | 1 - 1 file changed, 1 deletion(-) delete mode 100644 arch/x86/entry/vdso/vdso32/vdso-fakesections.c diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c deleted file mode 100644 index 541468e25265..000000000000 --- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso-fakesections.c" From 6cb465972c4eb6741b3094a58a65e527fc63c100 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sat, 5 May 2018 16:40:23 -0400 Subject: [PATCH 101/347] sh: fix build failure for J2 cpu with SMP disabled The sh asm/smp.h defines a fallback hard_smp_processor_id macro for the !SMP case, but linux/smp.h never includes asm/smp.h in the !SMP case. Signed-off-by: Rich Felker --- arch/sh/kernel/cpu/sh2/probe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c index 4205f6d42b69..a5bd03642678 100644 --- a/arch/sh/kernel/cpu/sh2/probe.c +++ b/arch/sh/kernel/cpu/sh2/probe.c @@ -43,7 +43,11 @@ void __ref cpu_probe(void) #endif #if defined(CONFIG_CPU_J2) +#if defined(CONFIG_SMP) unsigned cpu = hard_smp_processor_id(); +#else + unsigned cpu = 0; +#endif if (cpu == 0) of_scan_flat_dt(scan_cache, NULL); if (j2_ccr_base) __raw_writel(0x80000303, j2_ccr_base + 4*cpu); if (cpu != 0) return; From b9826a4929bbd4dcb245429fd7434145c4fcdc9b Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 18 Apr 2018 11:42:35 +0200 Subject: [PATCH 102/347] sh: mm: Fix unprotected access to struct device With commit ce88313069c36eef80f21fd7 ("arch/sh: make the DMA mapping operations observe dev->dma_pfn_offset") the generic DMA allocation function on which the SH 'dma_alloc_coherent()' function relies on, accesses the 'dma_pfn_offset' field of struct device. Unfortunately the 'dma_generic_alloc_coherent()' function is called from several places with a NULL struct device argument, halting the CPU during the boot process. This patch fixes the issue by protecting access to dev->dma_pfn_offset, with a trivial check for validity. It also passes a valid 'struct device' in the 'platform_resource_setup_memory()' function which is the main user of 'dma_alloc_coherent()', and inserts a WARN_ON() check to remind to future (and existing) bogus users of this function to provide a valid 'struct device' whenever possible. Fixes: ce88313069c36eef80f21fd7 ("arch/sh: make the DMA mapping operations observe dev->dma_pfn_offset") Signed-off-by: Jacopo Mondi Reviewed-by: Geert Uytterhoeven Reviewed-by: Thomas Petazzoni Signed-off-by: Rich Felker --- arch/sh/mm/consistent.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 8ce98691d822..f1b44697ad68 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -59,7 +59,9 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order); - *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset); + *dma_handle = virt_to_phys(ret); + if (!WARN_ON(!dev)) + *dma_handle -= PFN_PHYS(dev->dma_pfn_offset); return ret_nocache; } @@ -69,9 +71,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size, unsigned long attrs) { int order = get_order(size); - unsigned long pfn = (dma_handle >> PAGE_SHIFT) + dev->dma_pfn_offset; + unsigned long pfn = dma_handle >> PAGE_SHIFT; int k; + if (!WARN_ON(!dev)) + pfn += dev->dma_pfn_offset; + for (k = 0; k < (1 << order); k++) __free_pages(pfn_to_page(pfn + k), 0); @@ -143,7 +148,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev, if (!memsize) return 0; - buf = dma_alloc_coherent(NULL, memsize, &dma_handle, GFP_KERNEL); + buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL); if (!buf) { pr_warning("%s: unable to allocate memory\n", name); return -ENOMEM; From 6dd85fbb87d1d6b87a3b1f02ca28d7b2abd2e7ba Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 20 Apr 2018 16:49:46 +0200 Subject: [PATCH 103/347] s390: move expoline assembler macros to a header To be able to use the expoline branches in different assembler files move the associated macros from entry.S to a new header nospec-insn.h. While we are at it make the macros a bit nicer to use. Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/nospec-insn.h | 127 ++++++++++++++++++++++++++++ arch/s390/kernel/entry.S | 105 ++++++----------------- 2 files changed, 151 insertions(+), 81 deletions(-) create mode 100644 arch/s390/include/asm/nospec-insn.h diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h new file mode 100644 index 000000000000..440689cbcf51 --- /dev/null +++ b/arch/s390/include/asm/nospec-insn.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_NOSPEC_ASM_H +#define _ASM_S390_NOSPEC_ASM_H + +#include + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_EXPOLINE + +/* + * The expoline macros are used to create thunks in the same format + * as gcc generates them. The 'comdat' section flag makes sure that + * the various thunks are merged into a single copy. + */ + .macro __THUNK_PROLOG_NAME name + .pushsection .text.\name,"axG",@progbits,\name,comdat + .globl \name + .hidden \name + .type \name,@function +\name: + CFI_STARTPROC + .endm + + .macro __THUNK_EPILOG + CFI_ENDPROC + .popsection + .endm + + .macro __THUNK_PROLOG_BR r1,r2 + __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1 + .endm + + .macro __THUNK_BR r1,r2 + jg __s390x_indirect_jump_r\r2\()use_r\r1 + .endm + + .macro __THUNK_BRASL r1,r2,r3 + brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2 + .endm + + .macro __DECODE_RR expand,reg,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \reg,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r2 + \expand \r1,\r2 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_RR failed" + .endif + .endm + + .macro __DECODE_RRR expand,rsave,rtarget,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rsave,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rtarget,%r\r2 + .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r3 + \expand \r1,\r2,\r3 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_RRR failed" + .endif + .endm + + .macro __THUNK_EX_BR reg,ruse +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,555f + j . +#else + larl \ruse,555f + ex 0,0(\ruse) + j . +#endif +555: br \reg + .endm + + .macro GEN_BR_THUNK reg,ruse=%r1 + __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse + __THUNK_EX_BR \reg,\ruse + __THUNK_EPILOG + .endm + + .macro BR_EX reg,ruse=%r1 +557: __DECODE_RR __THUNK_BR,\reg,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 557b-. + .popsection + .endm + + .macro BASR_EX rsave,rtarget,ruse=%r1 +559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 559b-. + .popsection + .endm + +#else + .macro GEN_BR_THUNK reg,ruse=%r1 + .endm + + .macro BR_EX reg,ruse=%r1 + br \reg + .endm + + .macro BASR_EX rsave,rtarget,ruse=%r1 + basr \rsave,\rtarget + .endm +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_NOSPEC_ASM_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3f22f139a041..f03402efab4b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -28,6 +28,7 @@ #include #include #include +#include __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -183,67 +184,9 @@ _LPP_OFFSET = __LC_LPP "jnz .+8; .long 0xb2e8d000", 82 .endm -#ifdef CONFIG_EXPOLINE - - .macro GEN_BR_THUNK name,reg,tmp - .section .text.\name,"axG",@progbits,\name,comdat - .globl \name - .hidden \name - .type \name,@function -\name: - CFI_STARTPROC -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - exrl 0,0f -#else - larl \tmp,0f - ex 0,0(\tmp) -#endif - j . -0: br \reg - CFI_ENDPROC - .endm - - GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 - GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1 - GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11 - - .macro BASR_R14_R9 -0: brasl %r14,__s390x_indirect_jump_r1use_r9 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - - .macro BR_R1USE_R14 -0: jg __s390x_indirect_jump_r1use_r14 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - - .macro BR_R11USE_R14 -0: jg __s390x_indirect_jump_r11use_r14 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - -#else /* CONFIG_EXPOLINE */ - - .macro BASR_R14_R9 - basr %r14,%r9 - .endm - - .macro BR_R1USE_R14 - br %r14 - .endm - - .macro BR_R11USE_R14 - br %r14 - .endm - -#endif /* CONFIG_EXPOLINE */ - + GEN_BR_THUNK %r9 + GEN_BR_THUNK %r14 + GEN_BR_THUNK %r14,%r11 .section .kprobes.text, "ax" .Ldummy: @@ -260,7 +203,7 @@ _LPP_OFFSET = __LC_LPP ENTRY(__bpon) .globl __bpon BPON - BR_R1USE_R14 + BR_EX %r14 /* * Scheduler resume function, called by switch_to @@ -284,7 +227,7 @@ ENTRY(__switch_to) mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 - BR_R1USE_R14 + BR_EX %r14 .L__critical_start: @@ -351,7 +294,7 @@ sie_exit: xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers lg %r2,__SF_SIE_REASON(%r15) # return exit reason code - BR_R1USE_R14 + BR_EX %r14 .Lsie_fault: lghi %r14,-EFAULT stg %r14,__SF_SIE_REASON(%r15) # set exit reason code @@ -410,7 +353,7 @@ ENTRY(system_call) lgf %r9,0(%r8,%r10) # get system call add. TSTMSK __TI_flags(%r12),_TIF_TRACE jnz .Lsysc_tracesys - BASR_R14_R9 # call sys_xxxx + BASR_EX %r14,%r9 # call sys_xxxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: @@ -595,7 +538,7 @@ ENTRY(system_call) lmg %r3,%r7,__PT_R3(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r2,__PT_ORIG_GPR2(%r11) - BASR_R14_R9 # call sys_xxx + BASR_EX %r14,%r9 # call sys_xxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_tracenogo: TSTMSK __TI_flags(%r12),_TIF_TRACE @@ -619,7 +562,7 @@ ENTRY(ret_from_fork) lmg %r9,%r10,__PT_R9(%r11) # load gprs ENTRY(kernel_thread_starter) la %r2,0(%r10) - BASR_R14_R9 + BASR_EX %r14,%r9 j .Lsysc_tracenogo /* @@ -701,7 +644,7 @@ ENTRY(pgm_check_handler) je .Lpgm_return lgf %r9,0(%r10,%r1) # load address of handler routine lgr %r2,%r11 # pass pointer to pt_regs - BASR_R14_R9 # branch to interrupt-handler + BASR_EX %r14,%r9 # branch to interrupt-handler .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? @@ -1019,7 +962,7 @@ ENTRY(psw_idle) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: lpswe __SF_EMPTY(%r15) - BR_R1USE_R14 + BR_EX %r14 .Lpsw_idle_end: /* @@ -1061,7 +1004,7 @@ ENTRY(save_fpu_regs) .Lsave_fpu_regs_done: oi __LC_CPU_FLAGS+7,_CIF_FPU .Lsave_fpu_regs_exit: - BR_R1USE_R14 + BR_EX %r14 .Lsave_fpu_regs_end: EXPORT_SYMBOL(save_fpu_regs) @@ -1107,7 +1050,7 @@ load_fpu_regs: .Lload_fpu_regs_done: ni __LC_CPU_FLAGS+7,255-_CIF_FPU .Lload_fpu_regs_exit: - BR_R1USE_R14 + BR_EX %r14 .Lload_fpu_regs_end: .L__critical_end: @@ -1322,7 +1265,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: BR_R11USE_R14 +0: BR_EX %r14 .align 8 .Lcleanup_table: @@ -1358,7 +1301,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - BR_R11USE_R14 + BR_EX %r14 #endif .Lcleanup_system_call: @@ -1412,7 +1355,7 @@ cleanup_critical: stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit larl %r9,.Lsysc_do_svc - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_system_call_insn: .quad system_call .quad .Lsysc_stmg @@ -1424,7 +1367,7 @@ cleanup_critical: .Lcleanup_sysc_tif: larl %r9,.Lsysc_tif - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_sysc_restore: # check if stpt has been executed @@ -1441,14 +1384,14 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_sysc_restore_insn: .quad .Lsysc_exit_timer .quad .Lsysc_done - 4 .Lcleanup_io_tif: larl %r9,.Lio_tif - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_io_restore: # check if stpt has been executed @@ -1462,7 +1405,7 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_io_restore_insn: .quad .Lio_exit_timer .quad .Lio_done - 4 @@ -1515,17 +1458,17 @@ cleanup_critical: # prepare return psw nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_idle_insn: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: larl %r9,save_fpu_regs - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_load_fpu_regs: larl %r9,load_fpu_regs - BR_R11USE_R14 + BR_EX %r14,%r11 /* * Integer constants From 467a3bf219cee12259182c5cb4821f88fd518a51 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 Apr 2018 14:31:36 +0200 Subject: [PATCH 104/347] s390/crc32-vx: use expoline for indirect branches The return from the crc32_le_vgfm_16/crc32c_le_vgfm_16 and the crc32_be_vgfm_16 functions are done with "br %r14". These are indirect branches as well and need to use execute trampolines for CONFIG_EXPOLINE=y. Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/crc32be-vx.S | 5 ++++- arch/s390/crypto/crc32le-vx.S | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S index e8077f0971f8..2bf01ba44107 100644 --- a/arch/s390/crypto/crc32be-vx.S +++ b/arch/s390/crypto/crc32be-vx.S @@ -13,6 +13,7 @@ */ #include +#include #include /* Vector register range containing CRC-32 constants */ @@ -67,6 +68,8 @@ .previous + GEN_BR_THUNK %r14 + .text /* * The CRC-32 function(s) use these calling conventions: @@ -203,6 +206,6 @@ ENTRY(crc32_be_vgfm_16) .Ldone: VLGVF %r2,%v2,3 - br %r14 + BR_EX %r14 .previous diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S index d8c67a58c0c5..7d6f568bd3ad 100644 --- a/arch/s390/crypto/crc32le-vx.S +++ b/arch/s390/crypto/crc32le-vx.S @@ -14,6 +14,7 @@ */ #include +#include #include /* Vector register range containing CRC-32 constants */ @@ -76,6 +77,7 @@ .previous + GEN_BR_THUNK %r14 .text @@ -264,6 +266,6 @@ crc32_le_vgfm_generic: .Ldone: VLGVF %r2,%v2,2 - br %r14 + BR_EX %r14 .previous From 97489e0663fa700d6e7febddc43b58df98d7bcda Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 Apr 2018 14:31:36 +0200 Subject: [PATCH 105/347] s390/lib: use expoline for indirect branches The return from the memmove, memset, memcpy, __memset16, __memset32 and __memset64 functions are done with "br %r14". These are indirect branches as well and need to use execute trampolines for CONFIG_EXPOLINE=y. Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/lib/mem.S | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 495c9c4bacc7..2311f15be9cf 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -7,6 +7,9 @@ #include #include +#include + + GEN_BR_THUNK %r14 /* * void *memmove(void *dest, const void *src, size_t n) @@ -33,14 +36,14 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) stc %r0,0(%r4,%r1) brctg %r4,.Lmemmove_reverse ic %r0,0(%r4,%r3) stc %r0,0(%r4,%r1) - br %r14 + BR_EX %r14 .Lmemmove_mvc: mvc 0(1,%r1),0(%r3) EXPORT_SYMBOL(memmove) @@ -77,7 +80,7 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) - br %r14 + BR_EX %r14 .Lmemset_fill: cghi %r4,1 lgr %r1,%r2 @@ -95,10 +98,10 @@ ENTRY(memset) stc %r3,0(%r1) larl %r5,.Lmemset_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemset_fill_exit: stc %r3,0(%r1) - br %r14 + BR_EX %r14 .Lmemset_xc: xc 0(1,%r1),0(%r1) .Lmemset_mvc: @@ -121,7 +124,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) la %r1,256(%r1) @@ -159,10 +162,10 @@ ENTRY(__memset\bits) \insn %r3,0(%r1) larl %r5,.L__memset_mvc\bits ex %r4,0(%r5) - br %r14 + BR_EX %r14 .L__memset_exit\bits: \insn %r3,0(%r2) - br %r14 + BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) .endm From c4ec1f0353b342473b93637fd0c3fb524bedbb2d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 2 May 2018 16:57:54 +1000 Subject: [PATCH 106/347] powerpc/64: Remove unused paca->soft_enabled In commit 4e26bc4a4ed6 ("powerpc/64: Rename soft_enabled to irq_soft_mask") we renamed paca->soft_enabled. But then in commit 8e0b634b1327 ("powerpc/64s: Do not allocate lppaca if we are not virtualized") we added it back. Oops. This happened because the two patches were in flight at the same time and rebased vs each other multiple times, and we missed it in review. Fixes: 8e0b634b1327 ("powerpc/64s: Do not allocate lppaca if we are not virtualized") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/paca.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 4185f1c96125..3f109a3e3edb 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -165,7 +165,6 @@ struct paca_struct { u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 irq_soft_mask; /* mask for irq soft masking */ - u8 soft_enabled; /* irq soft-enable flag */ u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ From 0b7758aaf6543b9a10c8671db559e9d374a3fd95 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 4 May 2018 18:44:24 +0530 Subject: [PATCH 107/347] powerpc/trace/syscalls: Update syscall name matching logic On powerpc64 ABIv1, we are enabling syscall tracing for only ~20 syscalls. This is due to commit e145242ea0df6 ("syscalls/core, syscalls/x86: Clean up syscall stub naming convention") which has changed the syscall entry wrapper prefix from "SyS" to "__se_sys". Update the logic for ABIv1 to not just skip the initial dot, but also the "__se_sys" prefix. Fixes: commit e145242ea0df6 ("syscalls/core, syscalls/x86: Clean up syscall stub naming convention") Reported-by: Michael Ellerman Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ftrace.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 9abddde372ab..24103fa57b54 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -73,13 +73,9 @@ struct dyn_arch_ftrace { #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { - /* - * Compare the symbol name with the system call name. Skip the .sys or .SyS - * prefix from the symbol name and the sys prefix from the system call name and - * just match the rest. This is only needed on ppc64 since symbol names on - * 32bit do not start with a period so the generic function will work. - */ - return !strcmp(sym + 4, name + 3); + /* We need to skip past the initial dot, and the __se_sys alias */ + return !strcmp(sym + 1, name) || + (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)); } #endif #endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */ From edf6a2dfe3889daf97e7c164891a87832169e3e4 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 4 May 2018 18:44:25 +0530 Subject: [PATCH 108/347] powerpc/trace/syscalls: Update syscall name matching logic to account for ppc_ prefix Some syscall entry functions on powerpc are prefixed with ppc_/ppc32_/ppc64_ rather than the usual sys_/__se_sys prefix. fork(), clone(), swapcontext() are some examples of syscalls with such entry points. We need to match against these names when initializing ftrace syscall tracing. Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ftrace.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 24103fa57b54..b2dabd06659d 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -69,13 +69,30 @@ struct dyn_arch_ftrace { #endif #if defined(CONFIG_FTRACE_SYSCALLS) && !defined(__ASSEMBLY__) -#ifdef PPC64_ELF_ABI_v1 +/* + * Some syscall entry functions on powerpc start with "ppc_" (fork and clone, + * for instance) or ppc32_/ppc64_. We should also match the sys_ variant with + * those. + */ #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +#ifdef PPC64_ELF_ABI_v1 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { /* We need to skip past the initial dot, and the __se_sys alias */ return !strcmp(sym + 1, name) || - (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)); + (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) || + (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) || + (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) || + (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4)); +} +#else +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + return !strcmp(sym, name) || + (!strncmp(sym, "__se_sys", 8) && !strcmp(sym + 5, name)) || + (!strncmp(sym, "ppc_", 4) && !strcmp(sym + 4, name + 4)) || + (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) || + (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4)); } #endif #endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */ From e8f90c74e6ab64824f3a21521640de7b21050b9d Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 24 Apr 2018 11:08:35 +0800 Subject: [PATCH 109/347] mac80211_hwsim: fix a possible memory leak in hwsim_new_radio_nl() 'hwname' should be freed before leaving from the error handling cases, otherwise it will cause mem leak Fixes: cb1a5bae5684 ("mac80211_hwsim: add permanent mac address option for new radios") Signed-off-by: YueHaibing Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 96d26cfae90b..4a017a0d71ea 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3236,6 +3236,7 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info) GENL_SET_ERR_MSG(info,"MAC is no valid source addr"); NL_SET_BAD_ATTR(info->extack, info->attrs[HWSIM_ATTR_PERM_ADDR]); + kfree(hwname); return -EINVAL; } From 4bf01ca21e2e0e4561d1a03c48c3d740418702db Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 26 Apr 2018 09:31:52 +0200 Subject: [PATCH 110/347] rfkill: gpio: fix memory leak in probe error path Make sure to free the rfkill device in case registration fails during probe. Fixes: 5e7ca3937fbe ("net: rfkill: gpio: convert to resource managed allocation") Cc: stable # 3.13 Cc: Heikki Krogerus Signed-off-by: Johan Hovold Reviewed-by: Heikki Krogerus Signed-off-by: Johannes Berg --- net/rfkill/rfkill-gpio.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c index 41bd496531d4..00192a996be0 100644 --- a/net/rfkill/rfkill-gpio.c +++ b/net/rfkill/rfkill-gpio.c @@ -137,13 +137,18 @@ static int rfkill_gpio_probe(struct platform_device *pdev) ret = rfkill_register(rfkill->rfkill_dev); if (ret < 0) - return ret; + goto err_destroy; platform_set_drvdata(pdev, rfkill); dev_info(&pdev->dev, "%s device registered.\n", rfkill->name); return 0; + +err_destroy: + rfkill_destroy(rfkill->rfkill_dev); + + return ret; } static int rfkill_gpio_remove(struct platform_device *pdev) From d1361b32e6aec7440c01d5c8fcc54189930a342d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 26 Apr 2018 18:17:31 -0700 Subject: [PATCH 111/347] mac80211: fix kernel-doc "bad line" warning Fix 88 instances of a kernel-doc warning: ../include/net/mac80211.h:2083: warning: bad line: > Signed-off-by: Randy Dunlap Cc: linux-wireless@vger.kernel.org Cc: Johannes Berg Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index d2279b2d61aa..b2f3a0c018e7 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2080,7 +2080,7 @@ struct ieee80211_txq { * virtual interface might not be given air time for the transmission of * the frame, as it is not synced with the AP/P2P GO yet, and thus the * deauthentication frame might not be transmitted. - > + * * @IEEE80211_HW_DOESNT_SUPPORT_QOS_NDP: The driver (or firmware) doesn't * support QoS NDP for AP probing - that's most likely a driver bug. * From 407879b690ba3a6bf29be896d02dad63463bd1c0 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 20 Apr 2018 13:49:20 +0300 Subject: [PATCH 112/347] mac80211: Adjust SAE authentication timeout The IEEE P802.11-REVmd D1.0 specification updated the SAE authentication timeout to be 2000 milliseconds (see dot11RSNASAERetransPeriod). Update the SAE timeout setting accordingly. While at it, reduce some code duplication in the timeout configuration. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/mlme.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6fe72ef182a1..233068756502 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -36,6 +36,7 @@ #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) #define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) +#define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) @@ -3814,16 +3815,19 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) tx_flags); if (tx_flags == 0) { - auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; - auth_data->timeout_started = true; - run_again(sdata, auth_data->timeout); + if (auth_data->algorithm == WLAN_AUTH_SAE) + auth_data->timeout = jiffies + + IEEE80211_AUTH_TIMEOUT_SAE; + else + auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; } else { auth_data->timeout = round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG); - auth_data->timeout_started = true; - run_again(sdata, auth_data->timeout); } + auth_data->timeout_started = true; + run_again(sdata, auth_data->timeout); + return 0; } @@ -3894,8 +3898,15 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) ifmgd->status_received = false; if (ifmgd->auth_data && ieee80211_is_auth(fc)) { if (status_acked) { - ifmgd->auth_data->timeout = - jiffies + IEEE80211_AUTH_TIMEOUT_SHORT; + if (ifmgd->auth_data->algorithm == + WLAN_AUTH_SAE) + ifmgd->auth_data->timeout = + jiffies + + IEEE80211_AUTH_TIMEOUT_SAE; + else + ifmgd->auth_data->timeout = + jiffies + + IEEE80211_AUTH_TIMEOUT_SHORT; run_again(sdata, ifmgd->auth_data->timeout); } else { ifmgd->auth_data->timeout = jiffies - 1; From f0b408eebc993310bea3f2daae286c40bd3f063b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 2 May 2018 21:32:47 +0300 Subject: [PATCH 113/347] drm/atomic: Clean old_state/new_state in drm_atomic_state_default_clear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clear the old_state and new_state pointers for every object in drm_atomic_state_default_clear(). Otherwise drm_atomic_get_{new,old}_*_state() will hand out stale pointers to anyone who hasn't first confirmed that the object is in fact part of the current atomic transcation, if they are called after we've done the ww backoff dance while hanging on to the same drm_atomic_state. For example, handle_conflicting_encoders() looks like it could hit this since it iterates the full connector list and just calls drm_atomic_get_new_connector_state() for each. And I believe we have now witnessed this happening at least once in i915 check_digital_port_conflicts(). Commit 8b69449d2663 ("drm/i915: Remove last references to drm_atomic_get_existing* macros") changed the safe drm_atomic_get_existing_connector_state() to the unsafe drm_atomic_get_new_connector_state(), which opened the doors for this particular bug there as well. v2: Split private objs out to a separate patch (Daniel) Cc: stable@vger.kernel.org Cc: Maarten Lankhorst Cc: Laurent Pinchart Cc: Abhay Kumar Fixes: 581e49fe6b41 ("drm/atomic: Add new iterators over all state, v3.") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180502183247.5746-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul --- drivers/gpu/drm/drm_atomic.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 7d25c42f22db..4fa19ed7517a 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -155,6 +155,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->connectors[i].state); state->connectors[i].ptr = NULL; state->connectors[i].state = NULL; + state->connectors[i].old_state = NULL; + state->connectors[i].new_state = NULL; drm_connector_put(connector); } @@ -169,6 +171,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->crtcs[i].ptr = NULL; state->crtcs[i].state = NULL; + state->crtcs[i].old_state = NULL; + state->crtcs[i].new_state = NULL; } for (i = 0; i < config->num_total_plane; i++) { @@ -181,6 +185,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->planes[i].state); state->planes[i].ptr = NULL; state->planes[i].state = NULL; + state->planes[i].old_state = NULL; + state->planes[i].new_state = NULL; } for (i = 0; i < state->num_private_objs; i++) { From b5cb2e5a1f64d882a155add7522247ab0523051e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 2 May 2018 21:32:47 +0300 Subject: [PATCH 114/347] drm/atomic: Clean private obj old_state/new_state in drm_atomic_state_default_clear() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clear the old_state and new_state pointers for private objects in drm_atomic_state_default_clear(). We don't actually have functions to get the new/old state for private objects so getting access to the potentially stale pointers requires a bit more manual labour than for other object types. But let's clear the pointers for private objects as well, if only to avoid future surprises when someone decides to add the functions to get at them. v2: Split private objs to a separate patch (Daniel) Cc: # v4.14+ Cc: Maarten Lankhorst Cc: Laurent Pinchart Cc: Abhay Kumar Fixes: a4370c777406 (drm/atomic: Make private objs proper objects) Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180502183247.5746-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul --- drivers/gpu/drm/drm_atomic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 4fa19ed7517a..c825c76edc1d 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -196,6 +196,8 @@ void drm_atomic_state_default_clear(struct drm_atomic_state *state) state->private_objs[i].state); state->private_objs[i].ptr = NULL; state->private_objs[i].state = NULL; + state->private_objs[i].old_state = NULL; + state->private_objs[i].new_state = NULL; } state->num_private_objs = 0; From 164c2416dd40770aba5814f93da835e8a9f7196d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 9 Mar 2018 15:32:56 -0800 Subject: [PATCH 115/347] drm/vc4: Fix oops dereferencing DPI's connector since panel_bridge. In the cleanup, I didn't notice that we needed to dereference the connector for the bus_format. Fix the regression by looking up the first (and only) connector attached to us, and assume that its bus_format is what we want. Some day it would be good to have that part of display_info attached to the bridge, instead. v2: Fix stray whitespace change Signed-off-by: Eric Anholt Fixes: 7b1298e05310 ("drm/vc4: Switch DPI to using the panel-bridge helper.") Link: https://patchwork.freedesktop.org/patch/msgid/20180309233256.1667-1-eric@anholt.net Reviewed-by: Sean Paul Reviewed-by: Boris Brezillon Signed-off-by: Sean Paul --- drivers/gpu/drm/vc4/vc4_dpi.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c index 72c9dbd81d7f..f185812970da 100644 --- a/drivers/gpu/drm/vc4/vc4_dpi.c +++ b/drivers/gpu/drm/vc4/vc4_dpi.c @@ -96,7 +96,6 @@ struct vc4_dpi { struct platform_device *pdev; struct drm_encoder *encoder; - struct drm_connector *connector; void __iomem *regs; @@ -164,14 +163,31 @@ static void vc4_dpi_encoder_disable(struct drm_encoder *encoder) static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) { + struct drm_device *dev = encoder->dev; struct drm_display_mode *mode = &encoder->crtc->mode; struct vc4_dpi_encoder *vc4_encoder = to_vc4_dpi_encoder(encoder); struct vc4_dpi *dpi = vc4_encoder->dpi; + struct drm_connector_list_iter conn_iter; + struct drm_connector *connector = NULL, *connector_scan; u32 dpi_c = DPI_ENABLE | DPI_OUTPUT_ENABLE_MODE; int ret; - if (dpi->connector->display_info.num_bus_formats) { - u32 bus_format = dpi->connector->display_info.bus_formats[0]; + /* Look up the connector attached to DPI so we can get the + * bus_format. Ideally the bridge would tell us the + * bus_format we want, but it doesn't yet, so assume that it's + * uniform throughout the bridge chain. + */ + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector_scan, &conn_iter) { + if (connector_scan->encoder == encoder) { + connector = connector_scan; + break; + } + } + drm_connector_list_iter_end(&conn_iter); + + if (connector && connector->display_info.num_bus_formats) { + u32 bus_format = connector->display_info.bus_formats[0]; switch (bus_format) { case MEDIA_BUS_FMT_RGB888_1X24: @@ -199,6 +215,9 @@ static void vc4_dpi_encoder_enable(struct drm_encoder *encoder) DRM_ERROR("Unknown media bus format %d\n", bus_format); break; } + } else { + /* Default to 24bit if no connector found. */ + dpi_c |= VC4_SET_FIELD(DPI_FORMAT_24BIT_888_RGB, DPI_FORMAT); } if (mode->flags & DRM_MODE_FLAG_NHSYNC) From 4a9fbfcab19d3f71ad2bf0bcb653c4ee84e69c7f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 18 Apr 2018 17:29:37 +0300 Subject: [PATCH 116/347] drm/omap: silence unititialized variable warning Smatch complains that "area_free" could be used without being initialized. This code is several years old and premusably works fine so this can't be a very serious bug. But it's easy enough to silence the warning. If "area_free" is false at the end of the function then we return -ENOMEM. Signed-off-by: Dan Carpenter Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20180418142937.GA13828@mwanda Signed-off-by: Sean Paul --- drivers/gpu/drm/omapdrm/tcm-sita.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/tcm-sita.c b/drivers/gpu/drm/omapdrm/tcm-sita.c index d7f7bc9f061a..817be3c41863 100644 --- a/drivers/gpu/drm/omapdrm/tcm-sita.c +++ b/drivers/gpu/drm/omapdrm/tcm-sita.c @@ -90,7 +90,7 @@ static int l2r_t2b(u16 w, u16 h, u16 a, s16 offset, { int i; unsigned long index; - bool area_free; + bool area_free = false; unsigned long slots_per_band = PAGE_SIZE / slot_bytes; unsigned long bit_offset = (offset > 0) ? offset / slot_bytes : 0; unsigned long curr_bit = bit_offset; From 77eeac24b10fc84d3ffd5b11a897dff88dde244d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 29 Mar 2018 13:40:36 +0300 Subject: [PATCH 117/347] drm/omap: fix uninitialized ret variable audio_config function for both HDMI4 and HDMI5 return uninitialized value as the error code if the display is not currently enabled. For some reason this has not caused any issues. Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20180329104038.29154-1-tomi.valkeinen@ti.com Reviewed-by: Emil Velikov Reviewed-by: Laurent Pinchart Signed-off-by: Sean Paul --- drivers/gpu/drm/omapdrm/dss/hdmi4.c | 2 +- drivers/gpu/drm/omapdrm/dss/hdmi5.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c index 97c88861d67a..5879f45f6fc9 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c @@ -679,7 +679,7 @@ static int hdmi_audio_config(struct device *dev, struct omap_dss_audio *dss_audio) { struct omap_hdmi *hd = dev_get_drvdata(dev); - int ret; + int ret = 0; mutex_lock(&hd->lock); diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c index d28da9ac3e90..ae1a001d1b83 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c @@ -671,7 +671,7 @@ static int hdmi_audio_config(struct device *dev, struct omap_dss_audio *dss_audio) { struct omap_hdmi *hd = dev_get_drvdata(dev); - int ret; + int ret = 0; mutex_lock(&hd->lock); From 6a0f0c55619f0b82a677cab72e77c3444a5eee58 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 29 Mar 2018 13:40:37 +0300 Subject: [PATCH 118/347] drm/omap: fix possible NULL ref issue in tiler_reserve_2d tiler_reserve_2d allocates memory but does not check if it got the memory. Add the check and return ENOMEM on failure. Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20180329104038.29154-2-tomi.valkeinen@ti.com Reviewed-by: Emil Velikov Reviewed-by: Laurent Pinchart Signed-off-by: Sean Paul --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index f9fa1c90b35c..401c02e9e6b2 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -401,12 +401,16 @@ int tiler_unpin(struct tiler_block *block) struct tiler_block *tiler_reserve_2d(enum tiler_fmt fmt, u16 w, u16 h, u16 align) { - struct tiler_block *block = kzalloc(sizeof(*block), GFP_KERNEL); + struct tiler_block *block; u32 min_align = 128; int ret; unsigned long flags; u32 slot_bytes; + block = kzalloc(sizeof(*block), GFP_KERNEL); + if (!block) + return ERR_PTR(-ENOMEM); + BUG_ON(!validfmt(fmt)); /* convert width/height to slots */ From 4d6cb5e2fee52af17001e92950f0894304706ee4 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 2 May 2018 12:11:56 +0300 Subject: [PATCH 119/347] drm/omap: check return value from soc_device_match soc_device_match() can return NULL, so add a check and fail if soc_device_match() fails. Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20180502091159.7071-2-tomi.valkeinen@ti.com Reviewed-by: Benoit Parrot Reviewed-by: Peter Ujfalusi Signed-off-by: Sean Paul --- drivers/gpu/drm/omapdrm/dss/hdmi4_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c index 35ed2add6189..813ba42f2753 100644 --- a/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c +++ b/drivers/gpu/drm/omapdrm/dss/hdmi4_core.c @@ -922,8 +922,13 @@ int hdmi4_core_init(struct platform_device *pdev, struct hdmi_core_data *core) { const struct hdmi4_features *features; struct resource *res; + const struct soc_device_attribute *soc; - features = soc_device_match(hdmi4_soc_devices)->data; + soc = soc_device_match(hdmi4_soc_devices); + if (!soc) + return -ENODEV; + + features = soc->data; core->cts_swmode = features->cts_swmode; core->audio_use_mclk = features->audio_use_mclk; From e1cdab6e5f9a52e0bdf34363ec0fdab0e2328ba9 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 2 May 2018 12:11:57 +0300 Subject: [PATCH 120/347] drm/omap: handle error if scale coefs are not found If get_scale_coef functions fail, they return NULL, but we never check the return value and could do a NULL deref. This should not happen as we ought to validate the amount of scaling already earlier, but to be safe, add the necessary check. Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20180502091159.7071-3-tomi.valkeinen@ti.com Reviewed-by: Benoit Parrot Reviewed-by: Peter Ujfalusi Signed-off-by: Sean Paul --- drivers/gpu/drm/omapdrm/dss/dispc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c index 5e2e65e88847..b8fdb63e5bb3 100644 --- a/drivers/gpu/drm/omapdrm/dss/dispc.c +++ b/drivers/gpu/drm/omapdrm/dss/dispc.c @@ -828,6 +828,12 @@ static void dispc_ovl_set_scale_coef(struct dispc_device *dispc, h_coef = dispc_ovl_get_scale_coef(fir_hinc, true); v_coef = dispc_ovl_get_scale_coef(fir_vinc, five_taps); + if (!h_coef || !v_coef) { + dev_err(&dispc->pdev->dev, "%s: failed to find scale coefs\n", + __func__); + return; + } + for (i = 0; i < 8; i++) { u32 h, hv; From 7f26eee572167926806b6fcb703bb5f8e452c398 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 2 May 2018 12:11:58 +0300 Subject: [PATCH 121/347] drm/omap: add missing linefeeds to prints A bunch of debug and error prints are missing linefeeds. Add those. Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20180502091159.7071-4-tomi.valkeinen@ti.com Reviewed-by: Benoit Parrot Reviewed-by: Peter Ujfalusi Signed-off-by: Sean Paul --- drivers/gpu/drm/omapdrm/dss/dispc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c index b8fdb63e5bb3..7f3ac6b13b56 100644 --- a/drivers/gpu/drm/omapdrm/dss/dispc.c +++ b/drivers/gpu/drm/omapdrm/dss/dispc.c @@ -2348,7 +2348,7 @@ static int dispc_ovl_calc_scaling_24xx(struct dispc_device *dispc, } if (in_width > maxsinglelinewidth) { - DSSERR("Cannot scale max input width exceeded"); + DSSERR("Cannot scale max input width exceeded\n"); return -EINVAL; } return 0; @@ -2430,13 +2430,13 @@ again: } if (in_width > (maxsinglelinewidth * 2)) { - DSSERR("Cannot setup scaling"); - DSSERR("width exceeds maximum width possible"); + DSSERR("Cannot setup scaling\n"); + DSSERR("width exceeds maximum width possible\n"); return -EINVAL; } if (in_width > maxsinglelinewidth && *five_taps) { - DSSERR("cannot setup scaling with five taps"); + DSSERR("cannot setup scaling with five taps\n"); return -EINVAL; } return 0; @@ -2478,7 +2478,7 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc, in_width > maxsinglelinewidth && ++*decim_x); if (in_width > maxsinglelinewidth) { - DSSERR("Cannot scale width exceeds max line width"); + DSSERR("Cannot scale width exceeds max line width\n"); return -EINVAL; } @@ -2496,7 +2496,7 @@ static int dispc_ovl_calc_scaling_44xx(struct dispc_device *dispc, * bandwidth. Despite what theory says this appears to * be true also for 16-bit color formats. */ - DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)", *decim_x); + DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)\n", *decim_x); return -EINVAL; } @@ -4639,7 +4639,7 @@ static int dispc_errata_i734_wa_init(struct dispc_device *dispc) i734_buf.size, &i734_buf.paddr, GFP_KERNEL); if (!i734_buf.vaddr) { - dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed", + dev_err(&dispc->pdev->dev, "%s: dma_alloc_writecombine failed\n", __func__); return -ENOMEM; } From 47aaaec818dfd1009d1358974a2931f05dd57203 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 2 May 2018 12:11:59 +0300 Subject: [PATCH 122/347] drm/omap: handle alloc failures in omap_connector Handle memory allocation failures in omap_connector to avoid NULL derefs. Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20180502091159.7071-5-tomi.valkeinen@ti.com Reviewed-by: Benoit Parrot Reviewed-by: Peter Ujfalusi Signed-off-by: Sean Paul --- drivers/gpu/drm/omapdrm/omap_connector.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/omap_connector.c b/drivers/gpu/drm/omapdrm/omap_connector.c index a0d7b1d905e8..5cde26ac937b 100644 --- a/drivers/gpu/drm/omapdrm/omap_connector.c +++ b/drivers/gpu/drm/omapdrm/omap_connector.c @@ -121,6 +121,9 @@ static int omap_connector_get_modes(struct drm_connector *connector) if (dssdrv->read_edid) { void *edid = kzalloc(MAX_EDID, GFP_KERNEL); + if (!edid) + return 0; + if ((dssdrv->read_edid(dssdev, edid, MAX_EDID) > 0) && drm_edid_is_valid(edid)) { drm_mode_connector_update_edid_property( @@ -139,6 +142,9 @@ static int omap_connector_get_modes(struct drm_connector *connector) struct drm_display_mode *mode = drm_mode_create(dev); struct videomode vm = {0}; + if (!mode) + return 0; + dssdrv->get_timings(dssdev, &vm); drm_display_mode_from_videomode(&vm, mode); @@ -200,6 +206,10 @@ static int omap_connector_mode_valid(struct drm_connector *connector, if (!r) { /* check if vrefresh is still valid */ new_mode = drm_mode_duplicate(dev, mode); + + if (!new_mode) + return MODE_BAD; + new_mode->clock = vm.pixelclock / 1000; new_mode->vrefresh = 0; if (mode->vrefresh == drm_mode_vrefresh(new_mode)) From 7a109673899b6d5b147307999efe2392181acee3 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 9 Apr 2018 08:27:08 +0200 Subject: [PATCH 123/347] drm/bridge/sii8620: add Kconfig dependency on extcon The driver can work with or without extcon framework, but if extcon is build as module, sii8620 should be build as module as well. Fixes: 688838442147 ("drm/bridge/sii8620: use micro-USB cable detection logic to detect MHL") Reported-by: kbuild test robot Signed-off-by: Andrzej Hajda Reviewed-by: Chanwoo Choi Link: https://patchwork.freedesktop.org/patch/msgid/20180409062708.4326-1-a.hajda@samsung.com Signed-off-by: Sean Paul --- drivers/gpu/drm/bridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 3aa65bdecb0e..684ac626ac53 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -74,6 +74,7 @@ config DRM_SIL_SII8620 tristate "Silicon Image SII8620 HDMI/MHL bridge" depends on OF && RC_CORE select DRM_KMS_HELPER + imply EXTCON help Silicon Image SII8620 HDMI/MHL bridge chip driver. From 12d9f07022dcde261ad16e9a11f45096dc68b03c Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Fri, 4 May 2018 16:01:57 +0800 Subject: [PATCH 124/347] nvme: fix use-after-free in nvme_free_ns_head Currently only nvme_ctrl will take a reference counter of nvme_subsystem, nvme_ns_head also needs it. Otherwise nvme_free_ns_head will access the nvme_subsystem.ns_ida which has been freed by __nvme_release_subsystem after all the reference of nvme_subsystem have been released by nvme_free_ctrl. This could cause memory corruption. BUG: KASAN: use-after-free in radix_tree_next_chunk+0x9f/0x4b0 Read of size 8 at addr ffff88036494d2e8 by task fio/1815 CPU: 1 PID: 1815 Comm: fio Kdump: loaded Tainted: G W 4.17.0-rc1+ #18 Hardware name: LENOVO 10MLS0E339/3106, BIOS M1AKT22A 06/27/2017 Call Trace: dump_stack+0x91/0xeb print_address_description+0x6b/0x290 kasan_report+0x261/0x360 radix_tree_next_chunk+0x9f/0x4b0 ida_remove+0x8b/0x180 ida_simple_remove+0x26/0x40 nvme_free_ns_head+0x58/0xc0 __blkdev_put+0x30a/0x3a0 blkdev_close+0x44/0x50 __fput+0x184/0x380 task_work_run+0xaf/0xe0 do_exit+0x501/0x1440 do_group_exit+0x89/0x140 __x64_sys_exit_group+0x28/0x30 do_syscall_64+0x72/0x230 Signed-off-by: Jianchao Wang Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a3771c5729f5..2cbc378bc0d6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -99,6 +99,7 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); +static void nvme_put_subsystem(struct nvme_subsystem *subsys); int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { @@ -350,6 +351,7 @@ static void nvme_free_ns_head(struct kref *ref) ida_simple_remove(&head->subsys->ns_ida, head->instance); list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); + nvme_put_subsystem(head->subsys); kfree(head); } @@ -2861,6 +2863,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_cleanup_srcu; list_add_tail(&head->entry, &ctrl->subsys->nsheads); + + kref_get(&ctrl->subsys->ref); + return head; out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); From 2abb80dad3afa9170ae19ca03bb7b4cd1ec06d62 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Wed, 25 Apr 2018 16:25:07 +0800 Subject: [PATCH 125/347] perf bench numa: Fix typo in options 'R' means access the data via reads instead of writes, fix this typo. Signed-off-by: Yisheng Xie Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1524644707-11030-1-git-send-email-xieyisheng1@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 944070e98a2c..63eb49082774 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -175,7 +175,7 @@ static const struct option options[] = { OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), - OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), + OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"), OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), From 0d74d872c3f8b9cb3d096fb932a063b43b37f188 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Sat, 5 May 2018 22:00:37 +0200 Subject: [PATCH 126/347] driver core: add __printf verification to __ata_ehi_pushv_desc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __printf is useful to verify format and arguments. Remove the following warning (with W=1): drivers/ata/libata-eh.c:183:10: warning: function might be possible candidate for ‘gnu_printf’ format attribute [-Wsuggest-attribute=format] Signed-off-by: Mathieu Malaterre Signed-off-by: Tejun Heo --- drivers/ata/libata-eh.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c016829a38fd..513b260bcff1 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -175,8 +175,8 @@ static void ata_eh_handle_port_resume(struct ata_port *ap) { } #endif /* CONFIG_PM */ -static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, - va_list args) +static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, + const char *fmt, va_list args) { ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, ATA_EH_DESC_LEN - ehi->desc_len, From f142f08bf7ecc41c3e71e05b765ea654047cf0c0 Mon Sep 17 00:00:00 2001 From: Florian La Roche Date: Sun, 6 May 2018 19:34:07 +0200 Subject: [PATCH 127/347] Fix typo in comment. CONFIG_PRREMPT -> CONFIG_PREEMPT Signed-off-by: Florian La Roche Signed-off-by: Linus Torvalds --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index b795aa341a3a..a404936d85d8 100644 --- a/init/main.c +++ b/init/main.c @@ -423,7 +423,7 @@ static noinline void __ref rest_init(void) /* * Enable might_sleep() and smp_processor_id() checks. - * They cannot be enabled earlier because with CONFIG_PRREMPT=y + * They cannot be enabled earlier because with CONFIG_PREEMPT=y * kernel_thread() would trigger might_sleep() splats. With * CONFIG_PREEMPT_VOLUNTARY=y the init task might have scheduled * already, but it's stuck on the kthreadd_done completion. From ea9032fa2e4e91ae15facff5b7c4b2a84c1e40af Mon Sep 17 00:00:00 2001 From: William Cohen Date: Thu, 3 May 2018 15:50:32 -0400 Subject: [PATCH 128/347] perf vendor events intel: Remove duplicated entry for westmereep-dp in mapfile.csv Signed-off-by: William Cohen Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180503195032.28871-1-wcohen@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/mapfile.csv | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 93656f2fd53a..7e3cce3bcf3b 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -29,7 +29,6 @@ GenuineIntel-6-4D,v13,silvermont,core GenuineIntel-6-4C,v13,silvermont,core GenuineIntel-6-2A,v15,sandybridge,core GenuineIntel-6-2C,v2,westmereep-dp,core -GenuineIntel-6-2C,v2,westmereep-dp,core GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-55,v1,skylakex,core From 605e71cd007b1897a4c5eda952acadd61b6a98f0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 May 2018 12:22:37 -0300 Subject: [PATCH 129/347] tools headers: Sync x86 cpufeatures.h with the kernel sources The 912413057395 ("x86/cpufeatures: Enumerate cldemote instruction") doesn't requires changes in the tools, just copy it to silence this warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' Cc: Adrian Hunter Cc: David Ahern Cc: Fenghua Yu Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-1vo20y5z2drlujfpltjudwk8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d554c11e01ff..578793e97431 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -320,6 +320,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ From d0e9f4c1a4e777c4d1122758137fc47238f4f015 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 May 2018 12:26:23 -0300 Subject: [PATCH 130/347] tools headers kvm: Sync uapi/linux/kvm.h with the kernel sources MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The changes in 5e62493f1a70 ("x86/headers/UAPI: Move DISABLE_EXITS KVM capability bits to the UAPI") do not requires changes in the tooling nor will trigger the automatic update of used ioctl string tables, copy it to silence this build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: KarimAllah Ahmed Cc: Namhyung Kim Cc: Radim Krčmář Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-8o5auh1lqglsgl1q97x00tlv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 1065006c9bf5..b02c41e53d56 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -676,6 +676,13 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ + KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_PAUSE) + /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { /* in */ From 5981ec36688c49b7262f399c1a10edecc6e55ed2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 May 2018 13:23:19 -0300 Subject: [PATCH 131/347] tools headers kvm: Sync ARM UAPI headers with the kernel sources To sync with the changes made in 85bd0ba1ff98 ("arm/arm64: KVM: Add PSCI version selection API"), that do not cause any changes in the tools, just to silence the build warning: Warning: Kernel ABI header at 'tools/arch/arm/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm/include/uapi/asm/kvm.h' Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/kvm.h' differs from latest version at 'arch/arm64/include/uapi/asm/kvm.h' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Marc Zyngier Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-7u37pv09xtvet1ll27840w73@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm/include/uapi/asm/kvm.h | 6 ++++++ tools/arch/arm64/include/uapi/asm/kvm.h | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 2ba95d6fe852..caae4843cb70 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -195,6 +195,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 9abbf3044654..04b3256f8e6d 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -206,6 +206,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 From 914eac248d876f9c00cd1792ffec3d182c863f13 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Fri, 20 Apr 2018 13:49:19 +0300 Subject: [PATCH 132/347] mac80211: use timeout from the AddBA response instead of the request 2016 spec, section 10.24.2 specifies that the block ack timeout in the ADD BA request is advisory. That means we should check the value in the response and act upon it (same as buffer size). Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/agg-tx.c | 4 ++++ net/mac80211/tx.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 595c662a61e8..ac4295296514 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -8,6 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -970,6 +971,9 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, sta->ampdu_mlme.addba_req_num[tid] = 0; + tid_tx->timeout = + le16_to_cpu(mgmt->u.action.u.addba_resp.timeout); + if (tid_tx->timeout) { mod_timer(&tid_tx->session_timer, TU_TO_EXP_TIME(tid_tx->timeout)); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 535de3161a78..05a265cd573d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2007 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -1135,7 +1136,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, } /* reset session timer */ - if (reset_agg_timer && tid_tx->timeout) + if (reset_agg_timer) tid_tx->last_tx = jiffies; return queued; From d1ecfa9d1f402366b1776fbf84e635678a51414f Mon Sep 17 00:00:00 2001 From: "van der Linden, Frank" Date: Fri, 4 May 2018 16:11:00 -0400 Subject: [PATCH 133/347] x86/xen: Reset VCPU0 info pointer after shared_info remap This patch fixes crashes during boot for HVM guests on older (pre HVM vector callback) Xen versions. Without this, current kernels will always fail to boot on those Xen versions. Sample stack trace: BUG: unable to handle kernel paging request at ffffffffff200000 IP: __xen_evtchn_do_upcall+0x1e/0x80 PGD 1e0e067 P4D 1e0e067 PUD 1e10067 PMD 235c067 PTE 0 Oops: 0002 [#1] SMP PTI Modules linked in: CPU: 0 PID: 512 Comm: kworker/u2:0 Not tainted 4.14.33-52.13.amzn1.x86_64 #1 Hardware name: Xen HVM domU, BIOS 3.4.3.amazon 11/11/2016 task: ffff88002531d700 task.stack: ffffc90000480000 RIP: 0010:__xen_evtchn_do_upcall+0x1e/0x80 RSP: 0000:ffff880025403ef0 EFLAGS: 00010046 RAX: ffffffff813cc760 RBX: ffffffffff200000 RCX: ffffc90000483ef0 RDX: ffff880020540a00 RSI: ffff880023c78000 RDI: 000000000000001c RBP: 0000000000000001 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff880025403f5c R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff880025400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffff200000 CR3: 0000000001e0a000 CR4: 00000000000006f0 Call Trace: do_hvm_evtchn_intr+0xa/0x10 __handle_irq_event_percpu+0x43/0x1a0 handle_irq_event_percpu+0x20/0x50 handle_irq_event+0x39/0x60 handle_fasteoi_irq+0x80/0x140 handle_irq+0xaf/0x120 do_IRQ+0x41/0xd0 common_interrupt+0x7d/0x7d During boot, the HYPERVISOR_shared_info page gets remapped to make it work with KASLR. This means that any pointer derived from it needs to be adjusted. The only value that this applies to is the vcpu_info pointer for VCPU 0. For PV and HVM with the callback vector feature, this gets done via the smp_ops prepare_boot_cpu callback. Older Xen versions do not support the HVM callback vector, so there is no Xen-specific smp_ops set up in that scenario. So, the vcpu_info pointer for VCPU 0 never gets set to the proper value, and the first reference of it will be bad. Fix this by resetting it immediately after the remap. Signed-off-by: Frank van der Linden Reviewed-by: Eduardo Valentin Reviewed-by: Alakesh Haloi Reviewed-by: Vallish Vaidyeshwara Reviewed-by: Boris Ostrovsky Cc: Juergen Gross Cc: Boris Ostrovsky Cc: xen-devel@lists.xenproject.org Signed-off-by: Boris Ostrovsky --- arch/x86/xen/enlighten_hvm.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 826898701045..19c1ff542387 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -65,6 +65,19 @@ static void __init xen_hvm_init_mem_mapping(void) { early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE); HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn)); + + /* + * The virtual address of the shared_info page has changed, so + * the vcpu_info pointer for VCPU 0 is now stale. + * + * The prepare_boot_cpu callback will re-initialize it via + * xen_vcpu_setup, but we can't rely on that to be called for + * old Xen versions (xen_have_vector_callback == 0). + * + * It is, in any case, bad to have a stale vcpu_info pointer + * so reset it now. + */ + xen_vcpu_info_reset(0); } static void __init init_hvm_pv_info(void) From 23a4d7fd34856da8218c4cfc23dba7a6ec0a423a Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 25 Apr 2018 18:35:26 +0200 Subject: [PATCH 134/347] s390/ftrace: use expoline for indirect branches The return from the ftrace_stub, _mcount, ftrace_caller and return_to_handler functions is done with "br %r14" and "br %r1". These are indirect branches as well and need to use execute trampolines for CONFIG_EXPOLINE=y. The ftrace_caller function is a special case as it returns to the start of a function and may only use %r0 and %r1. For a pre z10 machine the standard execute trampoline uses a LARL + EX to do this, but this requires *two* registers in the range %r1..%r15. To get around this the 'br %r1' located in the lowcore is used, then the EX instruction does not need an address register. But the lowcore trick may only be used for pre z14 machines, with noexec=on the mapping for the first page may not contain instructions. The solution for that is an ALTERNATIVE in the expoline THUNK generated by 'GEN_BR_THUNK %r1' to switch to EXRL, this relies on the fact that a machine that supports noexec=on has EXRL as well. Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/nospec-insn.h | 12 ++++++++++++ arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/mcount.S | 14 +++++++++----- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 440689cbcf51..7d7640e1cf90 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -2,12 +2,16 @@ #ifndef _ASM_S390_NOSPEC_ASM_H #define _ASM_S390_NOSPEC_ASM_H +#include +#include #include #ifdef __ASSEMBLY__ #ifdef CONFIG_EXPOLINE +_LC_BR_R1 = __LC_BR_R1 + /* * The expoline macros are used to create thunks in the same format * as gcc generates them. The 'comdat' section flag makes sure that @@ -78,13 +82,21 @@ .endm .macro __THUNK_EX_BR reg,ruse + # Be very careful when adding instructions to this macro! + # The ALTERNATIVE replacement code has a .+10 which targets + # the "br \reg" after the code has been patched. #ifdef CONFIG_HAVE_MARCH_Z10_FEATURES exrl 0,555f j . #else + .ifc \reg,%r1 + ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35 + j . + .else larl \ruse,555f ex 0,0(\ruse) j . + .endif #endif 555: br \reg .endm diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index eb2a5c0443cd..11aea745a2a6 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -181,6 +181,7 @@ int main(void) OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); + OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 82df7d80fab2..27110f3294ed 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -9,13 +9,17 @@ #include #include #include +#include #include #include + GEN_BR_THUNK %r1 + GEN_BR_THUNK %r14 + .section .kprobes.text, "ax" ENTRY(ftrace_stub) - br %r14 + BR_EX %r14 #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) @@ -23,7 +27,7 @@ ENTRY(ftrace_stub) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) ENTRY(_mcount) - br %r14 + BR_EX %r14 EXPORT_SYMBOL(_mcount) @@ -53,7 +57,7 @@ ENTRY(ftrace_caller) #endif lgr %r3,%r14 la %r5,STACK_PTREGS(%r15) - basr %r14,%r1 + BASR_EX %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. @@ -68,7 +72,7 @@ ftrace_graph_caller_end: #endif lg %r1,(STACK_PTREGS_PSW+8)(%r15) lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) - br %r1 + BR_EX %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -81,6 +85,6 @@ ENTRY(return_to_handler) aghi %r15,STACK_FRAME_OVERHEAD lgr %r14,%r2 lmg %r2,%r5,32(%r15) - br %r14 + BR_EX %r14 #endif From c50c84c3ac4d5db683904bdb3257798b6ef980ae Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 25 Apr 2018 18:41:30 +0200 Subject: [PATCH 135/347] s390/kernel: use expoline for indirect branches The assember code in arch/s390/kernel uses a few more indirect branches which need to be done with execute trampolines for CONFIG_EXPOLINE=y. Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/base.S | 24 ++++++++++++++---------- arch/s390/kernel/reipl.S | 7 +++++-- arch/s390/kernel/swsusp.S | 10 ++++++---- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index f6c56009e822..b65874b0b412 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -9,18 +9,22 @@ #include #include +#include #include #include + GEN_BR_THUNK %r9 + GEN_BR_THUNK %r14 + ENTRY(s390_base_mcck_handler) basr %r13,0 0: lg %r15,__LC_PANIC_STACK # load panic stack aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_mcck_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 1: la %r1,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) lpswe __LC_MCK_OLD_PSW @@ -37,10 +41,10 @@ ENTRY(s390_base_ext_handler) basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_ext_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpswe __LC_EXT_OLD_PSW @@ -57,10 +61,10 @@ ENTRY(s390_base_pgm_handler) basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_pgm_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 lmg %r0,%r15,__LC_SAVE_AREA_SYNC lpswe __LC_PGM_OLD_PSW 1: lpswe disabled_wait_psw-0b(%r13) @@ -117,7 +121,7 @@ ENTRY(diag308_reset) larl %r4,.Lcontinue_psw # Restore PSW flags lpswe 0(%r4) .Lcontinue: - br %r14 + BR_EX %r14 .align 16 .Lrestart_psw: .long 0x00080000,0x80000000 + .Lrestart_part2 diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 73cc3750f0d3..7f14adf512c6 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -7,8 +7,11 @@ #include #include +#include #include + GEN_BR_THUNK %r9 + # # Issue "store status" for the current CPU to its prefix page # and call passed function afterwards @@ -67,9 +70,9 @@ ENTRY(store_status) st %r4,0(%r1) st %r5,4(%r1) stg %r2,8(%r1) - lgr %r1,%r2 + lgr %r9,%r2 lgr %r2,%r3 - br %r1 + BR_EX %r9 .section .bss .align 8 diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index e99187149f17..a049a7b9d6e8 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -13,6 +13,7 @@ #include #include #include +#include #include /* @@ -24,6 +25,8 @@ * (see below) in the resume process. * This function runs with disabled interrupts. */ + GEN_BR_THUNK %r14 + .section .text ENTRY(swsusp_arch_suspend) stmg %r6,%r15,__SF_GPRS(%r15) @@ -103,7 +106,7 @@ ENTRY(swsusp_arch_suspend) spx 0x318(%r1) lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) lghi %r2,0 - br %r14 + BR_EX %r14 /* * Restore saved memory image to correct place and restore register context. @@ -197,11 +200,10 @@ pgm_check_entry: larl %r15,init_thread_union ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) larl %r2,.Lpanic_string - larl %r3,sclp_early_printk lghi %r1,0 sam31 sigp %r1,%r0,SIGP_SET_ARCHITECTURE - basr %r14,%r3 + brasl %r14,sclp_early_printk larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: @@ -267,7 +269,7 @@ restore_registers: /* Return 0 */ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) lghi %r2,0 - br %r14 + BR_EX %r14 .section .data..nosave,"aw",@progbits .align 8 From 4253b0e0627ee3461e64c2495c616f1c8f6b127b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Apr 2018 08:23:54 +0200 Subject: [PATCH 136/347] s390: move spectre sysfs attribute code The nospec-branch.c file is compiled without the gcc options to generate expoline thunks. The return branch of the sysfs show functions cpu_show_spectre_v1 and cpu_show_spectre_v2 is an indirect branch as well. These need to be compiled with expolines. Move the sysfs functions for spectre reporting to a separate file and loose an '.' for one of the messages. Cc: stable@vger.kernel.org # 4.16 Fixes: d424986f1d ("s390: add sysfs attributes for spectre") Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/Makefile | 1 + arch/s390/kernel/nospec-branch.c | 19 ------------------- arch/s390/kernel/nospec-sysfs.c | 21 +++++++++++++++++++++ 3 files changed, 22 insertions(+), 19 deletions(-) create mode 100644 arch/s390/kernel/nospec-sysfs.c diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 84ea6225efb4..f92dd8ed3884 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -65,6 +65,7 @@ obj-y += nospec-branch.o extra-y += head.o head64.o vmlinux.lds +obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 46d49a11663f..834cf29f2599 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include static int __init nobp_setup_early(char *str) @@ -44,24 +43,6 @@ static int __init nospec_report(void) } arch_initcall(nospec_report); -#ifdef CONFIG_SYSFS -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); -} - -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) - return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) - return sprintf(buf, "Mitigation: limited branch prediction.\n"); - return sprintf(buf, "Vulnerable\n"); -} -#endif - #ifdef CONFIG_EXPOLINE int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c new file mode 100644 index 000000000000..8affad5f18cb --- /dev/null +++ b/arch/s390/kernel/nospec-sysfs.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) + return sprintf(buf, "Mitigation: execute trampolines\n"); + if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + return sprintf(buf, "Mitigation: limited branch prediction\n"); + return sprintf(buf, "Vulnerable\n"); +} From 9f18fff63cfd6f559daa1eaae60640372c65f84b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Apr 2018 11:18:49 +0200 Subject: [PATCH 137/347] s390: remove indirect branch from do_softirq_own_stack The inline assembly to call __do_softirq on the irq stack uses an indirect branch. This can be replaced with a normal relative branch. Cc: stable@vger.kernel.org # 4.16 Fixes: f19fbd5ed6 ("s390: introduce execute-trampolines for branches") Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/irq.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 94f2099bceb0..3d17c41074ca 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -176,10 +176,9 @@ void do_softirq_own_stack(void) new -= STACK_FRAME_OVERHEAD; ((struct stack_frame *) new)->back_chain = old; asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" + " brasl 14,__do_softirq\n" " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) + : : "a" (new), "a" (old) : "0", "1", "2", "3", "4", "5", "14", "cc", "memory" ); } else { From 6deaa3bbca804b2a3627fd685f75de64da7be535 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 24 Apr 2018 15:32:08 +0200 Subject: [PATCH 138/347] s390: extend expoline to BC instructions The BPF JIT uses a 'b (%r)' instruction in the definition of the sk_load_word and sk_load_half functions. Add support for branch-on-condition instructions contained in the thunk code of an expoline. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/nospec-insn.h | 57 +++++++++++++++++++++++++++++ arch/s390/kernel/nospec-branch.c | 25 ++++++++++--- 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h index 7d7640e1cf90..a01f81186e86 100644 --- a/arch/s390/include/asm/nospec-insn.h +++ b/arch/s390/include/asm/nospec-insn.h @@ -35,10 +35,18 @@ _LC_BR_R1 = __LC_BR_R1 __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1 .endm + .macro __THUNK_PROLOG_BC d0,r1,r2 + __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1 + .endm + .macro __THUNK_BR r1,r2 jg __s390x_indirect_jump_r\r2\()use_r\r1 .endm + .macro __THUNK_BC d0,r1,r2 + jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1 + .endm + .macro __THUNK_BRASL r1,r2,r3 brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2 .endm @@ -81,6 +89,23 @@ _LC_BR_R1 = __LC_BR_R1 .endif .endm + .macro __DECODE_DRR expand,disp,reg,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \reg,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r2 + \expand \disp,\r1,\r2 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_DRR failed" + .endif + .endm + .macro __THUNK_EX_BR reg,ruse # Be very careful when adding instructions to this macro! # The ALTERNATIVE replacement code has a .+10 which targets @@ -101,17 +126,42 @@ _LC_BR_R1 = __LC_BR_R1 555: br \reg .endm + .macro __THUNK_EX_BC disp,reg,ruse +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,556f + j . +#else + larl \ruse,556f + ex 0,0(\ruse) + j . +#endif +556: b \disp(\reg) + .endm + .macro GEN_BR_THUNK reg,ruse=%r1 __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse __THUNK_EX_BR \reg,\ruse __THUNK_EPILOG .endm + .macro GEN_B_THUNK disp,reg,ruse=%r1 + __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse + __THUNK_EX_BC \disp,\reg,\ruse + __THUNK_EPILOG + .endm + .macro BR_EX reg,ruse=%r1 557: __DECODE_RR __THUNK_BR,\reg,\ruse .pushsection .s390_indirect_branches,"a",@progbits .long 557b-. .popsection + .endm + + .macro B_EX disp,reg,ruse=%r1 +558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 558b-. + .popsection .endm .macro BASR_EX rsave,rtarget,ruse=%r1 @@ -123,10 +173,17 @@ _LC_BR_R1 = __LC_BR_R1 #else .macro GEN_BR_THUNK reg,ruse=%r1 + .endm + + .macro GEN_B_THUNK disp,reg,ruse=%r1 .endm .macro BR_EX reg,ruse=%r1 br \reg + .endm + + .macro B_EX disp,reg,ruse=%r1 + b \disp(\reg) .endm .macro BASR_EX rsave,rtarget,ruse=%r1 diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 834cf29f2599..8ad6a7128b3a 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -93,7 +93,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) s32 *epo; /* Second part of the instruction replace is always a nop */ - memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4); for (epo = start; epo < end; epo++) { instr = (u8 *) epo + *epo; if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) @@ -114,18 +113,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) br = thunk + (*(int *)(thunk + 2)) * 2; else continue; - if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) + /* Check for unconditional branch 0x07f? or 0x47f???? */ + if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0) continue; + + memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4); switch (type) { case BRCL_EXPOLINE: - /* brcl to thunk, replace with br + nop */ insnbuf[0] = br[0]; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + if (br[0] == 0x47) { + /* brcl to b, replace with bc + nopr */ + insnbuf[2] = br[2]; + insnbuf[3] = br[3]; + } else { + /* brcl to br, replace with bcr + nop */ + } break; case BRASL_EXPOLINE: - /* brasl to thunk, replace with basr + nop */ - insnbuf[0] = 0x0d; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + if (br[0] == 0x47) { + /* brasl to b, replace with bas + nopr */ + insnbuf[0] = 0x4d; + insnbuf[2] = br[2]; + insnbuf[3] = br[3]; + } else { + /* brasl to br, replace with basr + nop */ + insnbuf[0] = 0x0d; + } break; } From de5cb6eb514ebe241e3edeb290cb41deb380b81d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 23 Apr 2018 14:31:36 +0200 Subject: [PATCH 139/347] s390: use expoline thunks in the BPF JIT The BPF JIT need safe guarding against spectre v2 in the sk_load_xxx assembler stubs and the indirect branches generated by the JIT itself need to be converted to expolines. Signed-off-by: Martin Schwidefsky --- arch/s390/net/bpf_jit.S | 16 +++++---- arch/s390/net/bpf_jit_comp.c | 63 ++++++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S index 25bb4643c4f4..9f794869c1b0 100644 --- a/arch/s390/net/bpf_jit.S +++ b/arch/s390/net/bpf_jit.S @@ -9,6 +9,7 @@ */ #include +#include #include "bpf_jit.h" /* @@ -54,7 +55,7 @@ ENTRY(sk_load_##NAME##_pos); \ clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \ jh sk_load_##NAME##_slow; \ LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \ - b OFF_OK(%r6); /* Return */ \ + B_EX OFF_OK,%r6; /* Return */ \ \ sk_load_##NAME##_slow:; \ lgr %r2,%r7; /* Arg1 = skb pointer */ \ @@ -64,11 +65,14 @@ sk_load_##NAME##_slow:; \ brasl %r14,skb_copy_bits; /* Get data from skb */ \ LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \ ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \ - br %r6; /* Return */ + BR_EX %r6; /* Return */ sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */ sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */ + GEN_BR_THUNK %r6 + GEN_B_THUNK OFF_OK,%r6 + /* * Load 1 byte from SKB (optimized version) */ @@ -80,7 +84,7 @@ ENTRY(sk_load_byte_pos) clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen? jnl sk_load_byte_slow llgc %r14,0(%r3,%r12) # Get byte from skb - b OFF_OK(%r6) # Return OK + B_EX OFF_OK,%r6 # Return OK sk_load_byte_slow: lgr %r2,%r7 # Arg1 = skb pointer @@ -90,7 +94,7 @@ sk_load_byte_slow: brasl %r14,skb_copy_bits # Get data from skb llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer ltgr %r2,%r2 # Set cc to (%r2 != 0) - br %r6 # Return cc + BR_EX %r6 # Return cc #define sk_negative_common(NAME, SIZE, LOAD) \ sk_load_##NAME##_slow_neg:; \ @@ -104,7 +108,7 @@ sk_load_##NAME##_slow_neg:; \ jz bpf_error; \ LOAD %r14,0(%r2); /* Get data from pointer */ \ xr %r3,%r3; /* Set cc to zero */ \ - br %r6; /* Return cc */ + BR_EX %r6; /* Return cc */ sk_negative_common(word, 4, llgf) sk_negative_common(half, 2, llgh) @@ -113,4 +117,4 @@ sk_negative_common(byte, 1, llgc) bpf_error: # force a return 0 from jit handler ltgr %r15,%r15 # Set condition code - br %r6 + BR_EX %r6 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 78a19c93b380..dd2bcf0e7d00 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include "bpf_jit.h" @@ -41,6 +43,8 @@ struct bpf_jit { int base_ip; /* Base address for literal pool */ int ret0_ip; /* Address of return 0 */ int exit_ip; /* Address of exit */ + int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ + int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ int labels[1]; /* Labels for local jumps */ }; @@ -250,6 +254,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) REG_SET_SEEN(b2); \ }) +#define EMIT6_PCREL_RILB(op, b, target) \ +({ \ + int rel = (target - jit->prg) / 2; \ + _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \ + REG_SET_SEEN(b); \ +}) + +#define EMIT6_PCREL_RIL(op, target) \ +({ \ + int rel = (target - jit->prg) / 2; \ + _EMIT6(op | rel >> 16, rel & 0xffff); \ +}) + #define _EMIT6_IMM(op, imm) \ ({ \ unsigned int __imm = (imm); \ @@ -469,8 +486,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ save_restore_regs(jit, REGS_RESTORE, stack_depth); + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) { + jit->r14_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r14 thunk */ + if (test_facility(35)) { + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + } else { + /* larl %r1,.+14 */ + EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); + /* ex 0,0(%r1) */ + EMIT4_DISP(0x44000000, REG_0, REG_1, 0); + } + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + } /* br %r14 */ _EMIT2(0x07fe); + + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable && + (jit->seen & SEEN_FUNC)) { + jit->r1_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r1 thunk */ + if (test_facility(35)) { + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + /* br %r1 */ + _EMIT2(0x07f1); + } else { + /* larl %r1,.+14 */ + EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); + /* ex 0,S390_lowcore.br_r1_tampoline */ + EMIT4_DISP(0x44000000, REG_0, REG_0, + offsetof(struct lowcore, br_r1_trampoline)); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + } + } } /* @@ -966,8 +1020,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i /* lg %w1,(%l) */ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L, EMIT_CONST_U64(func)); - /* basr %r14,%w1 */ - EMIT2(0x0d00, REG_14, REG_W1); + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) { + /* brasl %r14,__s390_indirect_jump_r1 */ + EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); + } else { + /* basr %r14,%w1 */ + EMIT2(0x0d00, REG_14, REG_W1); + } /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); if ((jit->seen & SEEN_SKB) && From 4a35a9027f64d588d2fd9436dda4126e8d5647d7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 7 May 2018 15:27:01 -0300 Subject: [PATCH 140/347] Revert "perf pmu: Fix pmu events parsing rule" As reported by Adrian Hunter, this breaks intel_pt event parsing: # perf record -e intel_pt//u uname event syntax error: 'intel_pt//u' \___ parser error Run 'perf list' for a list of valid events Usage: perf record [] [] or: perf record [] -- [] -e, --event event selector. use 'perf list' to list available events # This reverts commit 9a4a931ce847f4aaa12edf11b2e050e18bf45910. Reported-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ye1o2mji7x68xotiot1tn1gp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d14464c42714..7afeb80cc39e 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -224,15 +224,15 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME '/' event_config '/' +PE_NAME opt_event_config { struct list_head *list, *orig_terms, *terms; - if (parse_events_copy_term_list($3, &orig_terms)) + if (parse_events_copy_term_list($2, &orig_terms)) YYABORT; ALLOC_LIST(list); - if (parse_events_add_pmu(_parse_state, list, $1, $3, false)) { + if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { struct perf_pmu *pmu = NULL; int ok = 0; char *pattern; @@ -262,7 +262,7 @@ PE_NAME '/' event_config '/' if (!ok) YYABORT; } - parse_events_terms__delete($3); + parse_events_terms__delete($2); parse_events_terms__delete(orig_terms); $$ = list; } From 1751eb42ddb56b1e0a28ade5c5f19b7961a6f0c2 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Fri, 4 May 2018 18:47:25 +0200 Subject: [PATCH 141/347] selftests: net: use TEST_PROGS_EXTENDED When a script file that isn't generated uses the variable TEST_GEN_PROGS_EXTENDED and a 'make -C tools/testing/selftests clean' is performed the script file gets removed and git shows the file as deleted. For script files that isn't generated TEST_PROGS_EXTENDED should be used. Fixes: 9faedd643fd9 ("selftests: net: add in_netns.sh TEST_GEN_PROGS_EXTENDED") Signed-off-by: Anders Roxell Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index daf5effec3f0..3ff81a478dbe 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -6,7 +6,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh -TEST_GEN_PROGS_EXTENDED := in_netns.sh +TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa From abcd3d6fc640aff48b17900734eff134f27fdf2a Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Fri, 4 May 2018 10:18:33 -0700 Subject: [PATCH 142/347] net: nixge: Fix error path for obtaining mac address Fix issue where nixge_get_nvmem_address() returns a non-NULL return value on a failed nvmem_cell_get() that causes an invalid access when error value encoded in pointer is dereferenced. Furthermore ensure that buffer allocated by nvmem_cell_read() actually gets kfreed() if the function succeeds. Fixes commit 492caffa8a1a ("net: ethernet: nixge: Add support for National Instruments XGE netdev") Reported-by: Alex Williams Signed-off-by: Moritz Fischer Signed-off-by: David S. Miller --- drivers/net/ethernet/ni/nixge.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index 27364b7572fc..c41fea9253e3 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -1170,7 +1170,7 @@ static void *nixge_get_nvmem_address(struct device *dev) cell = nvmem_cell_get(dev, "address"); if (IS_ERR(cell)) - return cell; + return NULL; mac = nvmem_cell_read(cell, &cell_size); nvmem_cell_put(cell); @@ -1202,10 +1202,12 @@ static int nixge_probe(struct platform_device *pdev) ndev->max_mtu = NIXGE_JUMBO_MTU; mac_addr = nixge_get_nvmem_address(&pdev->dev); - if (mac_addr && is_valid_ether_addr(mac_addr)) + if (mac_addr && is_valid_ether_addr(mac_addr)) { ether_addr_copy(ndev->dev_addr, mac_addr); - else + kfree(mac_addr); + } else { eth_hw_addr_random(ndev); + } priv = netdev_priv(ndev); priv->ndev = ndev; From a86b74d363708d0b04305f356fd3429c7b560a64 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Fri, 4 May 2018 10:18:34 -0700 Subject: [PATCH 143/347] net: nixge: Address compiler warnings about signedness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following warnings: warning: pointer targets in passing argument 1 of ‘is_valid_ether_addr’ differ in signedness [-Wpointer-sign] if (mac_addr && is_valid_ether_addr(mac_addr)) { ^~~~~~~~ expected ‘const u8 * {aka const unsigned char *}’ but argument is of type ‘const char *’ static inline bool is_valid_ether_addr(const u8 *addr) ^~~~~~~~~~~~~~~~~~~ warning: pointer targets in passing argument 2 of ‘ether_addr_copy’ differ in signedness [-Wpointer-sign] ether_addr_copy(ndev->dev_addr, mac_addr); ^~~~~~~~ expected ‘const u8 * {aka const unsigned char *}’ but argument is of type ‘const char *’ static inline void ether_addr_copy(u8 *dst, const u8 *src) Signed-off-by: Moritz Fischer Signed-off-by: David S. Miller --- drivers/net/ethernet/ni/nixge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index c41fea9253e3..b092894dd128 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -1183,7 +1183,7 @@ static int nixge_probe(struct platform_device *pdev) struct nixge_priv *priv; struct net_device *ndev; struct resource *dmares; - const char *mac_addr; + const u8 *mac_addr; int err; ndev = alloc_etherdev(sizeof(*priv)); From 59d8d4434f429b4fa8a346fd889058bda427a837 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 5 May 2018 14:59:47 +0800 Subject: [PATCH 144/347] sctp: delay the authentication for the duplicated cookie-echo chunk Now sctp only delays the authentication for the normal cookie-echo chunk by setting chunk->auth_chunk in sctp_endpoint_bh_rcv(). But for the duplicated one with auth, in sctp_assoc_bh_rcv(), it does authentication first based on the old asoc, which will definitely fail due to the different auth info in the old asoc. The duplicated cookie-echo chunk will create a new asoc with the auth info from this chunk, and the authentication should also be done with the new asoc's auth info for all of the collision 'A', 'B' and 'D'. Otherwise, the duplicated cookie-echo chunk with auth will never pass the authentication and create the new connection. This issue exists since very beginning, and this fix is to make sctp_assoc_bh_rcv() follow the way sctp_endpoint_bh_rcv() does for the normal cookie-echo chunk to delay the authentication. While at it, remove the unused params from sctp_sf_authenticate() and define sctp_auth_chunk_verify() used for all the places that do the delayed authentication. v1->v2: fix the typo in changelog as Marcelo noticed. Acked-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/associola.c | 30 +++++++++++++- net/sctp/sm_statefuns.c | 86 ++++++++++++++++++++++------------------- 2 files changed, 75 insertions(+), 41 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 837806dd5799..a47179da24e6 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1024,8 +1024,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) struct sctp_endpoint *ep; struct sctp_chunk *chunk; struct sctp_inq *inqueue; - int state; + int first_time = 1; /* is this the first time through the loop */ int error = 0; + int state; /* The association should be held so we should be safe. */ ep = asoc->ep; @@ -1036,6 +1037,30 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) state = asoc->state; subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); + /* If the first chunk in the packet is AUTH, do special + * processing specified in Section 6.3 of SCTP-AUTH spec + */ + if (first_time && subtype.chunk == SCTP_CID_AUTH) { + struct sctp_chunkhdr *next_hdr; + + next_hdr = sctp_inq_peek(inqueue); + if (!next_hdr) + goto normal; + + /* If the next chunk is COOKIE-ECHO, skip the AUTH + * chunk while saving a pointer to it so we can do + * Authentication later (during cookie-echo + * processing). + */ + if (next_hdr->type == SCTP_CID_COOKIE_ECHO) { + chunk->auth_chunk = skb_clone(chunk->skb, + GFP_ATOMIC); + chunk->auth = 1; + continue; + } + } + +normal: /* SCTP-AUTH, Section 6.3: * The receiver has a list of chunk types which it expects * to be received only after an AUTH-chunk. This list has @@ -1074,6 +1099,9 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) /* If there is an error on chunk, discard this packet. */ if (error && chunk) chunk->pdiscard = 1; + + if (first_time) + first_time = 0; } sctp_association_put(asoc); } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 28c070e187c2..c9ae3404b1bb 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -153,10 +153,7 @@ static enum sctp_disposition sctp_sf_violation_chunk( struct sctp_cmd_seq *commands); static enum sctp_ierror sctp_sf_authenticate( - struct net *net, - const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const union sctp_subtype type, struct sctp_chunk *chunk); static enum sctp_disposition __sctp_sf_do_9_1_abort( @@ -626,6 +623,38 @@ enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net, return SCTP_DISPOSITION_CONSUME; } +static bool sctp_auth_chunk_verify(struct net *net, struct sctp_chunk *chunk, + const struct sctp_association *asoc) +{ + struct sctp_chunk auth; + + if (!chunk->auth_chunk) + return true; + + /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo + * is supposed to be authenticated and we have to do delayed + * authentication. We've just recreated the association using + * the information in the cookie and now it's much easier to + * do the authentication. + */ + + /* Make sure that we and the peer are AUTH capable */ + if (!net->sctp.auth_enable || !asoc->peer.auth_capable) + return false; + + /* set-up our fake chunk so that we can process it */ + auth.skb = chunk->auth_chunk; + auth.asoc = chunk->asoc; + auth.sctp_hdr = chunk->sctp_hdr; + auth.chunk_hdr = (struct sctp_chunkhdr *) + skb_push(chunk->auth_chunk, + sizeof(struct sctp_chunkhdr)); + skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr)); + auth.transport = chunk->transport; + + return sctp_sf_authenticate(asoc, &auth) == SCTP_IERROR_NO_ERROR; +} + /* * Respond to a normal COOKIE ECHO chunk. * We are the side that is being asked for an association. @@ -763,37 +792,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, if (error) goto nomem_init; - /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo - * is supposed to be authenticated and we have to do delayed - * authentication. We've just recreated the association using - * the information in the cookie and now it's much easier to - * do the authentication. - */ - if (chunk->auth_chunk) { - struct sctp_chunk auth; - enum sctp_ierror ret; - - /* Make sure that we and the peer are AUTH capable */ - if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { - sctp_association_free(new_asoc); - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - } - - /* set-up our fake chunk so that we can process it */ - auth.skb = chunk->auth_chunk; - auth.asoc = chunk->asoc; - auth.sctp_hdr = chunk->sctp_hdr; - auth.chunk_hdr = (struct sctp_chunkhdr *) - skb_push(chunk->auth_chunk, - sizeof(struct sctp_chunkhdr)); - skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr)); - auth.transport = chunk->transport; - - ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); - if (ret != SCTP_IERROR_NO_ERROR) { - sctp_association_free(new_asoc); - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - } + if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) { + sctp_association_free(new_asoc); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } repl = sctp_make_cookie_ack(new_asoc, chunk); @@ -1797,13 +1798,15 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC)) goto nomem; + if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) + return SCTP_DISPOSITION_DISCARD; + /* Make sure no new addresses are being added during the * restart. Though this is a pretty complicated attack * since you'd have to get inside the cookie. */ - if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) { + if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) return SCTP_DISPOSITION_CONSUME; - } /* If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes * the peer has restarted (Action A), it MUST NOT setup a new @@ -1912,6 +1915,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b( if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC)) goto nomem; + if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) + return SCTP_DISPOSITION_DISCARD; + /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, @@ -2009,6 +2015,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( * a COOKIE ACK. */ + if (!sctp_auth_chunk_verify(net, chunk, asoc)) + return SCTP_DISPOSITION_DISCARD; + /* Don't accidentally move back into established state. */ if (asoc->state < SCTP_STATE_ESTABLISHED) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -4171,10 +4180,7 @@ gen_shutdown: * The return value is the disposition of the chunk. */ static enum sctp_ierror sctp_sf_authenticate( - struct net *net, - const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const union sctp_subtype type, struct sctp_chunk *chunk) { struct sctp_shared_key *sh_key = NULL; @@ -4275,7 +4281,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net, commands); auth_hdr = (struct sctp_authhdr *)chunk->skb->data; - error = sctp_sf_authenticate(net, ep, asoc, type, chunk); + error = sctp_sf_authenticate(asoc, chunk); switch (error) { case SCTP_IERROR_AUTH_BAD_HMAC: /* Generate the ERROR chunk and discard the rest From 98f0a39529e553bb45c0a7d775d2cc2afe9ed91d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 5 May 2018 08:35:04 -0700 Subject: [PATCH 145/347] tls: fix use after free in tls_sk_proto_close syzbot reported a use-after-free in tls_sk_proto_close Add a boolean value to cleanup a bit this function. BUG: KASAN: use-after-free in tls_sk_proto_close+0x8ab/0x9c0 net/tls/tls_main.c:297 Read of size 1 at addr ffff8801ae40a858 by task syz-executor363/4503 CPU: 0 PID: 4503 Comm: syz-executor363 Not tainted 4.17.0-rc3+ #34 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 tls_sk_proto_close+0x8ab/0x9c0 net/tls/tls_main.c:297 inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:460 sock_release+0x96/0x1b0 net/socket.c:594 sock_close+0x16/0x20 net/socket.c:1149 __fput+0x34d/0x890 fs/file_table.c:209 ____fput+0x15/0x20 fs/file_table.c:243 task_work_run+0x1e4/0x290 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x1aee/0x2730 kernel/exit.c:865 do_group_exit+0x16f/0x430 kernel/exit.c:968 get_signal+0x886/0x1960 kernel/signal.c:2469 do_signal+0x98/0x2040 arch/x86/kernel/signal.c:810 exit_to_usermode_loop+0x28a/0x310 arch/x86/entry/common.c:162 prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline] syscall_return_slowpath arch/x86/entry/common.c:265 [inline] do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4457b9 RSP: 002b:00007fdf4d766da8 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca RAX: fffffffffffffe00 RBX: 00000000006dac3c RCX: 00000000004457b9 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000006dac3c RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dac38 R13: 3692738801137283 R14: 6bf92c39443c4c1d R15: 0000000000000006 Allocated by task 4498: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 kmem_cache_alloc_trace+0x152/0x780 mm/slab.c:3620 kmalloc include/linux/slab.h:512 [inline] kzalloc include/linux/slab.h:701 [inline] create_ctx net/tls/tls_main.c:521 [inline] tls_init+0x1f9/0xb00 net/tls/tls_main.c:633 tcp_set_ulp+0x1bc/0x520 net/ipv4/tcp_ulp.c:153 do_tcp_setsockopt.isra.39+0x44a/0x2600 net/ipv4/tcp.c:2588 tcp_setsockopt+0xc1/0xe0 net/ipv4/tcp.c:2893 sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3039 __sys_setsockopt+0x1bd/0x390 net/socket.c:1903 __do_sys_setsockopt net/socket.c:1914 [inline] __se_sys_setsockopt net/socket.c:1911 [inline] __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1911 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 4503: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xd9/0x260 mm/slab.c:3813 tls_sw_free_resources+0x2a3/0x360 net/tls/tls_sw.c:1037 tls_sk_proto_close+0x67c/0x9c0 net/tls/tls_main.c:288 inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427 inet6_release+0x50/0x70 net/ipv6/af_inet6.c:460 sock_release+0x96/0x1b0 net/socket.c:594 sock_close+0x16/0x20 net/socket.c:1149 __fput+0x34d/0x890 fs/file_table.c:209 ____fput+0x15/0x20 fs/file_table.c:243 task_work_run+0x1e4/0x290 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x1aee/0x2730 kernel/exit.c:865 do_group_exit+0x16f/0x430 kernel/exit.c:968 get_signal+0x886/0x1960 kernel/signal.c:2469 do_signal+0x98/0x2040 arch/x86/kernel/signal.c:810 exit_to_usermode_loop+0x28a/0x310 arch/x86/entry/common.c:162 prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline] syscall_return_slowpath arch/x86/entry/common.c:265 [inline] do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801ae40a800 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 88 bytes inside of 256-byte region [ffff8801ae40a800, ffff8801ae40a900) The buggy address belongs to the page: page:ffffea0006b90280 count:1 mapcount:0 mapping:ffff8801ae40a080 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffff8801ae40a080 0000000000000000 000000010000000c raw: ffffea0006bea9e0 ffffea0006bc94a0 ffff8801da8007c0 0000000000000000 page dumped because: kasan: bad access detected Fixes: dd0bed1665d6 ("tls: support for Inline tls record") Signed-off-by: Eric Dumazet Cc: Atul Gupta Cc: Steve Wise Cc: Ilya Lesokhin Cc: Aviad Yehezkel Cc: Dave Watson Reported-by: syzbot Signed-off-by: David S. Miller --- net/tls/tls_main.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index cc03e00785c7..74ed1e7af3d9 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -248,16 +248,13 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) struct tls_context *ctx = tls_get_ctx(sk); long timeo = sock_sndtimeo(sk, 0); void (*sk_proto_close)(struct sock *sk, long timeout); + bool free_ctx = false; lock_sock(sk); sk_proto_close = ctx->sk_proto_close; - if (ctx->conf == TLS_HW_RECORD) - goto skip_tx_cleanup; - - if (ctx->conf == TLS_BASE) { - kfree(ctx); - ctx = NULL; + if (ctx->conf == TLS_BASE || ctx->conf == TLS_HW_RECORD) { + free_ctx = true; goto skip_tx_cleanup; } @@ -294,7 +291,7 @@ skip_tx_cleanup: /* free ctx for TLS_HW_RECORD, used by tcp_set_state * for sk->sk_prot->unhash [tls_hw_unhash] */ - if (ctx && ctx->conf == TLS_HW_RECORD) + if (free_ctx) kfree(ctx); } From 9255bacd57a5a247be91ce2b81ff09e15a0b8822 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 5 May 2018 20:58:22 +0200 Subject: [PATCH 146/347] net: dsa: mv88e6xxx: Fix PHY interrupts by parameterising PHY base address Most of the mv88e6xxx switches have the PHYs at address 0, 1, 2, ... The 6341 however has the PHYs at 0x10, 0x11, 0x12. Add a parameter to the info structure for this base address. Testing of 6f88284f3bd7 ("net: dsa: mv88e6xxx: Add MDIO interrupts for internal PHYs") was performed on the 6341. So it works only on the 6341. Use this base information to correctly set the interrupt. Fixes: 6f88284f3bd7 ("net: dsa: mv88e6xxx: Add MDIO interrupts for internal PHYs") Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx/chip.c | 26 ++++++++++++++++++++++++++ drivers/net/dsa/mv88e6xxx/chip.h | 1 + drivers/net/dsa/mv88e6xxx/global2.c | 2 +- 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 3d2091099f7f..5b4374f21d76 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3370,6 +3370,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3391,6 +3392,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 0, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3410,6 +3412,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 8, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3431,6 +3434,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3452,6 +3456,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 0, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3472,6 +3477,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 11, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x10, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3493,6 +3499,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3514,6 +3521,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 0, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3535,6 +3543,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3557,6 +3566,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3578,6 +3588,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3600,6 +3611,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3621,6 +3633,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 0, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3641,6 +3654,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .tag_protocol = DSA_TAG_PROTO_DSA, @@ -3663,6 +3677,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3684,6 +3699,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 11, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3707,6 +3723,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3730,6 +3747,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3753,6 +3771,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3776,6 +3795,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3798,6 +3818,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 11, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x10, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3820,6 +3841,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3841,6 +3863,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_internal_phys = 5, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3863,6 +3886,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 15, .max_vid = 4095, .port_base_addr = 0x10, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 15000, @@ -3885,6 +3909,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, @@ -3907,6 +3932,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_gpio = 16, .max_vid = 8191, .port_base_addr = 0x0, + .phy_base_addr = 0x0, .global1_addr = 0x1b, .global2_addr = 0x1c, .age_time_coeff = 3750, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 80490f66bc06..12b7f4649b25 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -114,6 +114,7 @@ struct mv88e6xxx_info { unsigned int num_gpio; unsigned int max_vid; unsigned int port_base_addr; + unsigned int phy_base_addr; unsigned int global1_addr; unsigned int global2_addr; unsigned int age_time_coeff; diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c index 0ce627fded48..8d22d66d84b7 100644 --- a/drivers/net/dsa/mv88e6xxx/global2.c +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -1118,7 +1118,7 @@ int mv88e6xxx_g2_irq_mdio_setup(struct mv88e6xxx_chip *chip, err = irq; goto out; } - bus->irq[chip->info->port_base_addr + phy] = irq; + bus->irq[chip->info->phy_base_addr + phy] = irq; } return 0; out: From 53bc017f72b3b7f7f1bad3043c2d6dd87331ea44 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 6 May 2018 13:23:52 +0200 Subject: [PATCH 147/347] net: flow_dissector: fix typo 'can by' to 'can be' Signed-off-by: Wolfram Sang Signed-off-by: David S. Miller --- include/net/flow_dissector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 9a074776f70b..d1fcf2442a42 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -251,7 +251,7 @@ extern struct flow_dissector flow_keys_buf_dissector; * This structure is used to hold a digest of the full flow keys. This is a * larger "hash" of a flow to allow definitively matching specific flows where * the 32 bit skb->hash is not large enough. The size is limited to 16 bytes so - * that it can by used in CB of skb (see sch_choke for an example). + * that it can be used in CB of skb (see sch_choke for an example). */ #define FLOW_KEYS_DIGEST_LEN 16 struct flow_keys_digest { From 080324c36ade319f57e505633ab54f6f53289b45 Mon Sep 17 00:00:00 2001 From: Andre Tomt Date: Mon, 7 May 2018 04:24:39 +0200 Subject: [PATCH 148/347] net/tls: Fix connection stall on partial tls record In the case of writing a partial tls record we forgot to clear the ctx->in_tcp_sendpages flag, causing some connections to stall. Fixes: c212d2c7fc47 ("net/tls: Don't recursively call push_record during tls_write_space callbacks") Signed-off-by: Andre Tomt Signed-off-by: David S. Miller --- net/tls/tls_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 74ed1e7af3d9..20cd93be6236 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -135,6 +135,7 @@ retry: offset -= sg->offset; ctx->partially_sent_offset = offset; ctx->partially_sent_record = (void *)sg; + ctx->in_tcp_sendpages = false; return ret; } From 5ed3fde21331407a319e966a0582195988b4f4f0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 7 May 2018 12:39:30 +0200 Subject: [PATCH 149/347] MAINTAINERS: Update the 3c59x network driver entry Replace my old E-Mail address with a working one. While at it, change the maintainance status to 'Odd Fixes'. I'm still around with some knowledge, but don't actively maintain it anymore. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b1ccabd0dbc3..b3cbf1c3ed07 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -137,9 +137,9 @@ Maintainers List (try to look for most precise areas first) ----------------------------------- 3C59X NETWORK DRIVER -M: Steffen Klassert +M: Steffen Klassert L: netdev@vger.kernel.org -S: Maintained +S: Odd Fixes F: Documentation/networking/vortex.txt F: drivers/net/ethernet/3com/3c59x.c From a9f71d0de68db84de610b09cb72e8eb0f621d0e1 Mon Sep 17 00:00:00 2001 From: Georg Hofmann Date: Mon, 7 May 2018 14:03:18 +0200 Subject: [PATCH 150/347] trivial: fix inconsistent help texts This patch removes "experimental" from the help text where depends on CONFIG_EXPERIMENTAL was already removed. Signed-off-by: Georg Hofmann Signed-off-by: David S. Miller --- net/ipv6/Kconfig | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 6794ddf0547c..11e4e80cf7e9 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -34,16 +34,15 @@ config IPV6_ROUTE_INFO bool "IPv6: Route Information (RFC 4191) support" depends on IPV6_ROUTER_PREF ---help--- - This is experimental support of Route Information. + Support of Route Information. If unsure, say N. config IPV6_OPTIMISTIC_DAD bool "IPv6: Enable RFC 4429 Optimistic DAD" ---help--- - This is experimental support for optimistic Duplicate - Address Detection. It allows for autoconfigured addresses - to be used more quickly. + Support for optimistic Duplicate Address Detection. It allows for + autoconfigured addresses to be used more quickly. If unsure, say N. @@ -280,7 +279,7 @@ config IPV6_MROUTE depends on IPV6 select IP_MROUTE_COMMON ---help--- - Experimental support for IPv6 multicast forwarding. + Support for IPv6 multicast forwarding. If unsure, say N. config IPV6_MROUTE_MULTIPLE_TABLES From 8c61ab7f111a2b29d051348b9cb9a39804ebf1f8 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Mon, 7 May 2018 16:10:38 +0300 Subject: [PATCH 151/347] net: aquantia: driver should correctly declare vlan_features bits In particular, not reporting SG forced skbs to be linear for vlan interfaces over atlantic NIC. With this fix it is possible to enable SG feature on device and therefore optimize performance. Reported-by: Ma Yuying Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 32f6d2e24d66..720760d467fa 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -246,6 +246,8 @@ void aq_nic_ndev_init(struct aq_nic_s *self) self->ndev->hw_features |= aq_hw_caps->hw_features; self->ndev->features = aq_hw_caps->hw_features; + self->ndev->vlan_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_RXHASH | NETIF_F_SG | NETIF_F_LRO; self->ndev->priv_flags = aq_hw_caps->hw_priv_flags; self->ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; From a09bd81b5413d1b4d705c6c5303b5d311069da22 Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Mon, 7 May 2018 16:10:39 +0300 Subject: [PATCH 152/347] net: aquantia: Limit number of vectors to actually allocated irqs Driver should use pci_alloc_irq_vectors return value to correct number of allocated vectors and napi instances. Otherwise it'll panic later in pci_irq_vector. Driver also should allow more than one MSI vectors to be allocated. Error return path from pci_alloc_irq_vectors is also fixed to revert resources in a correct sequence when error happens. Reported-by: Long, Nicholas Fixes: 23ee07a ("net: aquantia: Cleanup pci functions module") Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- .../net/ethernet/aquantia/atlantic/aq_nic.c | 1 + .../net/ethernet/aquantia/atlantic/aq_nic.h | 1 + .../ethernet/aquantia/atlantic/aq_pci_func.c | 18 +++++++++--------- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 720760d467fa..1a1a6380c128 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -95,6 +95,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self) /*rss rings */ cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF); cfg->vecs = min(cfg->vecs, num_online_cpus()); + cfg->vecs = min(cfg->vecs, self->irqvecs); /* cfg->vecs should be power of 2 for RSS */ if (cfg->vecs >= 8U) cfg->vecs = 8U; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h index 219b550d1665..faa533a0ec47 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h @@ -80,6 +80,7 @@ struct aq_nic_s { struct pci_dev *pdev; unsigned int msix_entry_mask; + u32 irqvecs; }; static inline struct device *aq_nic_get_dev(struct aq_nic_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index ecc6306f940f..a50e08bb4748 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -267,16 +267,16 @@ static int aq_pci_probe(struct pci_dev *pdev, numvecs = min(numvecs, num_online_cpus()); /*enable interrupts */ #if !AQ_CFG_FORCE_LEGACY_INT - err = pci_alloc_irq_vectors(self->pdev, numvecs, numvecs, - PCI_IRQ_MSIX); + numvecs = pci_alloc_irq_vectors(self->pdev, 1, numvecs, + PCI_IRQ_MSIX | PCI_IRQ_MSI | + PCI_IRQ_LEGACY); - if (err < 0) { - err = pci_alloc_irq_vectors(self->pdev, 1, 1, - PCI_IRQ_MSI | PCI_IRQ_LEGACY); - if (err < 0) - goto err_hwinit; + if (numvecs < 0) { + err = numvecs; + goto err_hwinit; } #endif + self->irqvecs = numvecs; /* net device init */ aq_nic_cfg_start(self); @@ -298,9 +298,9 @@ err_free_aq_hw: kfree(self->aq_hw); err_ioremap: free_netdev(ndev); -err_pci_func: - pci_release_regions(pdev); err_ndev: + pci_release_regions(pdev); +err_pci_func: pci_disable_device(pdev); return err; } From 2c5d5b13c6eb79f5677e206b8aad59b3a2097f60 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 May 2018 09:02:25 -0700 Subject: [PATCH 153/347] llc: better deal with too small mtu syzbot loves to set very small mtu on devices, since it brings joy. We must make llc_ui_sendmsg() fool proof. usercopy: Kernel memory overwrite attempt detected to wrapped address (offset 0, size 18446612139802320068)! kernel BUG at mm/usercopy.c:100! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 0 PID: 17464 Comm: syz-executor1 Not tainted 4.17.0-rc3+ #36 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:usercopy_abort+0xbb/0xbd mm/usercopy.c:88 RSP: 0018:ffff8801868bf800 EFLAGS: 00010282 RAX: 000000000000006c RBX: ffffffff87d2fb00 RCX: 0000000000000000 RDX: 000000000000006c RSI: ffffffff81610731 RDI: ffffed0030d17ef6 RBP: ffff8801868bf858 R08: ffff88018daa4200 R09: ffffed003b5c4fb0 R10: ffffed003b5c4fb0 R11: ffff8801dae27d87 R12: ffffffff87d2f8e0 R13: ffffffff87d2f7a0 R14: ffffffff87d2f7a0 R15: ffffffff87d2f7a0 FS: 00007f56a14ac700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2bc21000 CR3: 00000001abeb1000 CR4: 00000000001426f0 DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000030602 Call Trace: check_bogus_address mm/usercopy.c:153 [inline] __check_object_size+0x5d9/0x5d9 mm/usercopy.c:256 check_object_size include/linux/thread_info.h:108 [inline] check_copy_size include/linux/thread_info.h:139 [inline] copy_from_iter_full include/linux/uio.h:121 [inline] memcpy_from_msg include/linux/skbuff.h:3305 [inline] llc_ui_sendmsg+0x4b1/0x1530 net/llc/af_llc.c:941 sock_sendmsg_nosec net/socket.c:629 [inline] sock_sendmsg+0xd5/0x120 net/socket.c:639 __sys_sendto+0x3d7/0x670 net/socket.c:1789 __do_sys_sendto net/socket.c:1801 [inline] __se_sys_sendto net/socket.c:1797 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1797 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x455979 RSP: 002b:00007f56a14abc68 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00007f56a14ac6d4 RCX: 0000000000455979 RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000018 RBP: 000000000072bea0 R08: 00000000200012c0 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff R13: 0000000000000548 R14: 00000000006fbf60 R15: 0000000000000000 Code: 55 c0 e8 c0 55 bb ff ff 75 c8 48 8b 55 c0 4d 89 f9 ff 75 d0 4d 89 e8 48 89 d9 4c 89 e6 41 56 48 c7 c7 80 fa d2 87 e8 a0 0b a3 ff <0f> 0b e8 95 55 bb ff e8 c0 a8 f7 ff 8b 95 14 ff ff ff 4d 89 e8 RIP: usercopy_abort+0xbb/0xbd mm/usercopy.c:88 RSP: ffff8801868bf800 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/llc/af_llc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index cb80ebb38311..1beeea9549fa 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -930,6 +930,9 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (size > llc->dev->mtu) size = llc->dev->mtu; copied = size - hdrlen; + rc = -EINVAL; + if (copied < 0) + goto release; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); From 6c0a8f6b5a45ac892a763b6299bd3c5324fc5e02 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 8 May 2018 14:59:56 +1000 Subject: [PATCH 154/347] powerpc/pseries: Fix CONFIG_NUMA=n build The build is failing with CONFIG_NUMA=n and some compiler versions: arch/powerpc/platforms/pseries/hotplug-cpu.o: In function `dlpar_online_cpu': hotplug-cpu.c:(.text+0x12c): undefined reference to `timed_topology_update' arch/powerpc/platforms/pseries/hotplug-cpu.o: In function `dlpar_cpu_remove': hotplug-cpu.c:(.text+0x400): undefined reference to `timed_topology_update' Fix it by moving the empty version of timed_topology_update() into the existing #ifdef block, which has the right guard of SPLPAR && NUMA. Fixes: cee5405da402 ("powerpc/hotplug: Improve responsiveness of hotplug change") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/topology.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 9f421641a35c..16b077801a5f 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -91,6 +91,7 @@ extern int start_topology_update(void); extern int stop_topology_update(void); extern int prrn_is_enabled(void); extern int find_and_online_cpu_nid(int cpu); +extern int timed_topology_update(int nsecs); #else static inline int start_topology_update(void) { @@ -108,16 +109,12 @@ static inline int find_and_online_cpu_nid(int cpu) { return 0; } +static inline int timed_topology_update(int nsecs) +{ + return 0; +} #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES) -#if defined(CONFIG_PPC_SPLPAR) -extern int timed_topology_update(int nsecs); -#else -#define timed_topology_update(nsecs) -#endif /* CONFIG_PPC_SPLPAR */ -#endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */ - #include #ifdef CONFIG_SMP From 71c23a821c6bcacba71a094efe49ee689605906b Mon Sep 17 00:00:00 2001 From: Jakob Unterwurzacher Date: Wed, 18 Apr 2018 16:10:03 +0200 Subject: [PATCH 155/347] can: dev: increase bus-off message severity bus-off is usually caused by hardware malfunction or configuration error (baud rate mismatch) and causes a complete loss of communication. Increase the "bus-off" message's severity from netdev_dbg() to netdev_info() to make it visible to the user. A can interface going into bus-off is similar in severity to ethernet's "Link is Down" message, which is also printed at info level. It is debatable whether the the "restarted" message should also be changed to netdev_info() to make the interface state changes comprehensible from the kernel log. I have chosen to keep the "restarted" message at dbg for now as the "bus-off" message should be enough for the user to notice and investigate the problem. Signed-off-by: Jakob Unterwurzacher Cc: linux-can@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index b1779566c5bb..3c71f1cb205f 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -605,7 +605,7 @@ void can_bus_off(struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); - netdev_dbg(dev, "bus-off\n"); + netdev_info(dev, "bus-off\n"); netif_carrier_off(dev); From 0e030a373df3b8792b8991740fc31fe0629c6e58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 25 Apr 2018 16:50:39 +0200 Subject: [PATCH 156/347] can: flexcan: fix endianess detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 88462d2a7830 ("can: flexcan: Remodel FlexCAN register r/w APIs for big endian FlexCAN controllers.") the following logic was implemented: if the dt property "big-endian" is given or the device is compatible to "fsl,p1010-flexcan": use big-endian mode; else use little-endian mode; This relies on commit d50f4630c2e1 ("arm: dts: Remove p1010-flexcan compatible from imx series dts") which was applied a few commits later. Without this commit (or an old device tree used for booting a new kernel) the flexcan devices on i.MX25, i.MX28, i.MX35 and i.MX53 match the 'the device is compatible to "fsl,p1010-flexcan"' test and so are switched erroneously to big endian mode. Instead of the check above put a quirk in devtype data and rely on of_match_device yielding the most compatible match Fixes: 88462d2a7830 ("can: flexcan: Remodel FlexCAN register r/w APIs for big endian FlexCAN controllers.") Signed-off-by: Uwe Kleine-König Tested-by: Gavin Schenk Cc: linux-stable # >= v4.16 Signed-off-by: Marc Kleine-Budde --- drivers/net/can/flexcan.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 634c51e6b8ae..d53a45bf2a72 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -200,6 +200,7 @@ #define FLEXCAN_QUIRK_DISABLE_MECR BIT(4) /* Disable Memory error detection */ #define FLEXCAN_QUIRK_USE_OFF_TIMESTAMP BIT(5) /* Use timestamp based offloading */ #define FLEXCAN_QUIRK_BROKEN_PERR_STATE BIT(6) /* No interrupt for error passive */ +#define FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN BIT(7) /* default to BE register access */ /* Structure of the message buffer */ struct flexcan_mb { @@ -287,6 +288,12 @@ struct flexcan_priv { }; static const struct flexcan_devtype_data fsl_p1010_devtype_data = { + .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | + FLEXCAN_QUIRK_BROKEN_PERR_STATE | + FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN, +}; + +static const struct flexcan_devtype_data fsl_imx25_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_WERR_STATE | FLEXCAN_QUIRK_BROKEN_PERR_STATE, }; @@ -1251,9 +1258,9 @@ static void unregister_flexcandev(struct net_device *dev) static const struct of_device_id flexcan_of_match[] = { { .compatible = "fsl,imx6q-flexcan", .data = &fsl_imx6q_devtype_data, }, { .compatible = "fsl,imx28-flexcan", .data = &fsl_imx28_devtype_data, }, - { .compatible = "fsl,imx53-flexcan", .data = &fsl_p1010_devtype_data, }, - { .compatible = "fsl,imx35-flexcan", .data = &fsl_p1010_devtype_data, }, - { .compatible = "fsl,imx25-flexcan", .data = &fsl_p1010_devtype_data, }, + { .compatible = "fsl,imx53-flexcan", .data = &fsl_imx25_devtype_data, }, + { .compatible = "fsl,imx35-flexcan", .data = &fsl_imx25_devtype_data, }, + { .compatible = "fsl,imx25-flexcan", .data = &fsl_imx25_devtype_data, }, { .compatible = "fsl,p1010-flexcan", .data = &fsl_p1010_devtype_data, }, { .compatible = "fsl,vf610-flexcan", .data = &fsl_vf610_devtype_data, }, { .compatible = "fsl,ls1021ar2-flexcan", .data = &fsl_ls1021a_r2_devtype_data, }, @@ -1337,18 +1344,13 @@ static int flexcan_probe(struct platform_device *pdev) priv = netdev_priv(dev); - if (of_property_read_bool(pdev->dev.of_node, "big-endian")) { + if (of_property_read_bool(pdev->dev.of_node, "big-endian") || + devtype_data->quirks & FLEXCAN_QUIRK_DEFAULT_BIG_ENDIAN) { priv->read = flexcan_read_be; priv->write = flexcan_write_be; } else { - if (of_device_is_compatible(pdev->dev.of_node, - "fsl,p1010-flexcan")) { - priv->read = flexcan_read_be; - priv->write = flexcan_write_be; - } else { - priv->read = flexcan_read_le; - priv->write = flexcan_write_le; - } + priv->read = flexcan_read_le; + priv->write = flexcan_write_le; } priv->can.clock.freq = clock_freq; From 9a62dcf486c10daf5366f29df1c799f69b1510f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 25 Apr 2018 16:50:40 +0200 Subject: [PATCH 157/347] arm: dts: imx[35]*: declare flexcan devices to be compatible to imx25's flexcan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d50f4630c2e1 ("arm: dts: Remove p1010-flexcan compatible from imx series dts") removed the fallback compatible "fsl,p1010-flexcan" from the imx device trees. As the flexcan cores on i.MX25, i.MX35 and i.MX53 are identical, introduce the first as fallback for the two latter ones. Fixes: d50f4630c2e1 ("arm: dts: Remove p1010-flexcan compatible from imx series dts") Signed-off-by: Uwe Kleine-König Cc: linux-stable # >= v4.16 Signed-off-by: Marc Kleine-Budde --- arch/arm/boot/dts/imx35.dtsi | 4 ++-- arch/arm/boot/dts/imx53.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index bf343195697e..54111ed218b1 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -303,7 +303,7 @@ }; can1: can@53fe4000 { - compatible = "fsl,imx35-flexcan"; + compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan"; reg = <0x53fe4000 0x1000>; clocks = <&clks 33>, <&clks 33>; clock-names = "ipg", "per"; @@ -312,7 +312,7 @@ }; can2: can@53fe8000 { - compatible = "fsl,imx35-flexcan"; + compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan"; reg = <0x53fe8000 0x1000>; clocks = <&clks 34>, <&clks 34>; clock-names = "ipg", "per"; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index 7d647d043f52..3d65c0192f69 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -551,7 +551,7 @@ }; can1: can@53fc8000 { - compatible = "fsl,imx53-flexcan"; + compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan"; reg = <0x53fc8000 0x4000>; interrupts = <82>; clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>, @@ -561,7 +561,7 @@ }; can2: can@53fcc000 { - compatible = "fsl,imx53-flexcan"; + compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan"; reg = <0x53fcc000 0x4000>; interrupts = <83>; clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>, From 6ee00865ffe4e8c8ba4a68d26db53c7ec09bbb89 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Fri, 20 Apr 2018 14:38:46 +0200 Subject: [PATCH 158/347] can: kvaser_usb: Increase correct stats counter in kvaser_usb_rx_can_msg() Increase rx_dropped, if alloc_can_skb() fails, not tx_dropped. Signed-off-by: Jimmy Assarsson Cc: linux-stable Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/kvaser_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 63587b8e6825..daed57d3d209 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -1179,7 +1179,7 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev, skb = alloc_can_skb(priv->netdev, &cf); if (!skb) { - stats->tx_dropped++; + stats->rx_dropped++; return; } From 1469c5f033a287dc25d113ea65c498c0603fbaa1 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 3 May 2018 15:02:33 +0200 Subject: [PATCH 159/347] dt-bindings: can: rcar_can: Fix R8A7796 SoC name R8A7796 is R-Car M3-W. Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Signed-off-by: Marc Kleine-Budde --- Documentation/devicetree/bindings/net/can/rcar_canfd.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt index 93c3a6ae32f9..1a4ee1d2506d 100644 --- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt +++ b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt @@ -5,7 +5,7 @@ Required properties: - compatible: Must contain one or more of the following: - "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller. - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller. - - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3) compatible controller. + - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller. When compatible with the generic version, nodes must list the SoC-specific version corresponding to the platform first, followed by the From 0a4fe40efb04686529d998716d1680429d0b586b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 26 Apr 2018 22:41:14 +0300 Subject: [PATCH 160/347] DT: net: can: rcar_canfd: document R8A77970 bindings Document the R-Car V3M (R8A77970) SoC support in the R-Car CAN-FD bindings. Signed-off-by: Sergei Shtylyov Reviewed-by: Ramesh Shanmugasundaram Reviewed-by: Simon Horman Signed-off-by: Marc Kleine-Budde --- Documentation/devicetree/bindings/net/can/rcar_canfd.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt index 1a4ee1d2506d..59dd13aab97f 100644 --- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt +++ b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt @@ -6,6 +6,7 @@ Required properties: - "renesas,rcar-gen3-canfd" for R-Car Gen3 compatible controller. - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller. - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller. + - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller. When compatible with the generic version, nodes must list the SoC-specific version corresponding to the platform first, followed by the From 7a25ac2f71a409e77dd5c85cf3cbe1cbf2ae77f3 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 27 Apr 2018 21:53:33 +0300 Subject: [PATCH 161/347] DT: net: can: rcar_canfd: document R8A77980 bindings Document the R-Car V3H (R8A77980) SoC support in the R-Car CAN-FD bindings. Signed-off-by: Sergei Shtylyov Reviewed-by: Simon Horman Signed-off-by: Marc Kleine-Budde --- Documentation/devicetree/bindings/net/can/rcar_canfd.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt index 59dd13aab97f..ac71daa46195 100644 --- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt +++ b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt @@ -7,6 +7,7 @@ Required properties: - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller. - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller. - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller. + - "renesas,r8a77980-canfd" for R8A77980 (R-Car V3H) compatible controller. When compatible with the generic version, nodes must list the SoC-specific version corresponding to the platform first, followed by the From 9d219554d9bf59875b4e571a0392d620e8954879 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 2 May 2018 10:52:55 -0700 Subject: [PATCH 162/347] drm/i915: Adjust eDP's logical vco in a reliable place. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On intel_dp_compute_config() we were calculating the needed vco for eDP on gen9 and we stashing it in intel_atomic_state.cdclk.logical.vco However few moments later on intel_modeset_checks() we fully replace entire intel_atomic_state.cdclk.logical with dev_priv->cdclk.logical fully overwriting the logical desired vco for eDP on gen9. So, with wrong VCO value we end up with wrong desired cdclk, but also it will raise a lot of WARNs: On gen9, when we read CDCLK_CTL to verify if we configured properly the desired frequency the CD Frequency Select bits [27:26] == 10b can mean 337.5 or 308.57 MHz depending on the VCO. So if we have wrong VCO value stashed we will believe the frequency selection didn't stick and start to raise WARNs of cdclk mismatch. [ 42.857519] [drm:intel_dump_cdclk_state [i915]] Changing CDCLK to 308571 kHz, VCO 8640000 kHz, ref 24000 kHz, bypass 24000 kHz, voltage level 0 [ 42.897269] cdclk state doesn't match! [ 42.901052] WARNING: CPU: 5 PID: 1116 at drivers/gpu/drm/i915/intel_cdclk.c:2084 intel_set_cdclk+0x5d/0x110 [i915] [ 42.938004] RIP: 0010:intel_set_cdclk+0x5d/0x110 [i915] [ 43.155253] WARNING: CPU: 5 PID: 1116 at drivers/gpu/drm/i915/intel_cdclk.c:2084 intel_set_cdclk+0x5d/0x110 [i915] [ 43.170277] [drm:intel_dump_cdclk_state [i915]] [hw state] 337500 kHz, VCO 8100000 kHz, ref 24000 kHz, bypass 24000 kHz, voltage level 0 [ 43.182566] [drm:intel_dump_cdclk_state [i915]] [sw state] 308571 kHz, VCO 8640000 kHz, ref 24000 kHz, bypass 24000 kHz, voltage level 0 v2: Move the entire eDP's vco logical adjustment to inside the skl_modeset_calc_cdclk as suggested by Ville. Cc: Ville Syrjälä Signed-off-by: Rodrigo Vivi Reviewed-by: Ville Syrjälä Fixes: bb0f4aab0e76 ("drm/i915: Track full cdclk state for the logical and actual cdclk frequencies") Cc: # v4.12+ Link: https://patchwork.freedesktop.org/patch/msgid/20180502175255.5344-1-rodrigo.vivi@intel.com (cherry picked from commit 3297234a05ab1e90091b0574db4c397ef0e90d5f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_cdclk.c | 41 +++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_dp.c | 20 --------------- 2 files changed, 37 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 32d24c69da3c..704ddb4d3ca7 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -2302,9 +2302,44 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } +static int skl_dpll0_vco(struct intel_atomic_state *intel_state) +{ + struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev); + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + int vco, i; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; + + for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) { + if (!crtc_state->base.enable) + continue; + + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + continue; + + /* + * DPLL0 VCO may need to be adjusted to get the correct + * clock for eDP. This will affect cdclk as well. + */ + switch (crtc_state->port_clock / 2) { + case 108000: + case 216000: + vco = 8640000; + break; + default: + vco = 8100000; + break; + } + } + + return vco; +} + static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); int min_cdclk, cdclk, vco; @@ -2312,9 +2347,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) if (min_cdclk < 0) return min_cdclk; - vco = intel_state->cdclk.logical.vco; - if (!vco) - vco = dev_priv->skl_preferred_vco_freq; + vco = skl_dpll0_vco(intel_state); /* * FIXME should also account for plane ratio diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9a4a51e79fa1..b7b4cfdeb974 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1881,26 +1881,6 @@ found: reduce_m_n); } - /* - * DPLL0 VCO may need to be adjusted to get the correct - * clock for eDP. This will affect cdclk as well. - */ - if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { - int vco; - - switch (pipe_config->port_clock / 2) { - case 108000: - case 216000: - vco = 8640000; - break; - default: - vco = 8100000; - break; - } - - to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; - } - if (!HAS_DDI(dev_priv)) intel_dp_set_clock(encoder, pipe_config); From 660d88e74cf6e16252e366616f158d84dc9dc6a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 26 Apr 2018 19:30:15 +0300 Subject: [PATCH 163/347] drm/i915: Correctly populate user mode h/vdisplay with pipe src size during readout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During state readout we first read out the pipe src size, store that information in the user mode h/vdisplay, but later on we overwrite that with the actual crtc timings. That makes our read out crtc state inconsistent with itself when the BIOS has enabled the panel fitter to scale the pipe contents. Let's preserve the pipe src size based information in the user mode to make things consistent again. This fixes a problem introduced by commit a2936e3d9a9c ("drm/i915: Use drm_mode_get_hv_timing() to populate plane clip rectangle") where the inconsistent state is now leading the plane clipping code to report a failure on account the plane dst coordinates not matching the user mode size. Previously we did the plane clipping based on the pipe src size instead and thus never noticed the inconsistency. The failure manifests as a WARN: [ 0.762117] [drm:intel_dump_pipe_config [i915]] requested mode: [ 0.762142] [drm:drm_mode_debug_printmodeline [drm]] Modeline 0:"1366x768" 60 72143 1366 1414 1446 1526 768 771 777 784 0x40 0xa ... [ 0.762327] [drm:intel_dump_pipe_config [i915]] port clock: 72143, pipe src size: 1024x768, pixel rate 72143 ... [ 0.764666] [drm:drm_atomic_helper_check_plane_state [drm_kms_helper]] Plane must cover entire CRTC [ 0.764690] [drm:drm_rect_debug_print [drm]] dst: 1024x768+0+0 [ 0.764711] [drm:drm_rect_debug_print [drm]] clip: 1366x768+0+0 [ 0.764713] ------------[ cut here ]------------ [ 0.764714] Could not determine valid watermarks for inherited state [ 0.764792] WARNING: CPU: 4 PID: 159 at drivers/gpu/drm/i915/intel_display.c:14584 intel_modeset_init+0x3ce/0x19d0 [i915] ... Cc: FadeMind Cc: Dave Jones Cc: Daniel Vetter Reported-by: FadeMind Reported-by: Dave Jones Tested-by: Dave Jones References: https://lists.freedesktop.org/archives/intel-gfx/2018-April/163186.html Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105992 Fixes: a2936e3d9a9c ("drm/i915: Use drm_mode_get_hv_timing() to populate plane clip rectangle") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180426163015.14232-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson Tested-by: Larry Finger Tested-by: FadeMind (cherry picked from commit bd4cd03c81010dcd4e6f0e02e4c15f44aefe12d1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_display.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3b48fd2561fe..56004ffbd8bb 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15178,6 +15178,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); if (crtc_state->base.active) { intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); + crtc->base.mode.hdisplay = crtc_state->pipe_src_w; + crtc->base.mode.vdisplay = crtc_state->pipe_src_h; intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); From e8f48f96db7e482995743f461b3e8a5c1a102533 Mon Sep 17 00:00:00 2001 From: Florent Flament Date: Thu, 19 Apr 2018 19:07:00 +0300 Subject: [PATCH 164/347] drm/i915: Fix drm:intel_enable_lvds ERROR message in kernel log MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix `[drm:intel_enable_lvds] *ERROR* timed out waiting for panel to power on` in kernel log at boot time. Toshiba Satellite Z930 laptops needs between 1 and 2 seconds to power on its screen during Intel i915 DRM initialization. This currently results in a `[drm:intel_enable_lvds] *ERROR* timed out waiting for panel to power on` message appearing in the kernel log during boot time and when stopping the machine. This change increases the timeout of the `intel_enable_lvds` function from 1 to 5 seconds, letting enough time for the Satellite 930 LCD screen to power on, and suppressing the error message from the kernel log. This patch has been successfully tested on Linux 4.14 running on a Toshiba Satellite Z930. [vsyrjala: bump the timeout from 2 to 5 seconds to match the DP code and properly cover the max hw timeout of ~4 seconds, and drop the comment about the specific machine since this is not a particulary surprising issue, nor specific to that one machine] Signed-off-by: Florent Flament Cc: stable@vger.kernel.org Cc: Pavel Petrovic Cc: Sérgio M. Basto Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=103414 References: https://bugzilla.kernel.org/show_bug.cgi?id=57591 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180419160700.19828-1-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 280b54ade5914d3b4abe4f0ebe083ddbd4603246) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lvds.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index d35d2d50f595..8691c86f579c 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -326,7 +326,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder, I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON); POSTING_READ(lvds_encoder->reg); - if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000)) + + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 5000)) DRM_ERROR("timed out waiting for panel to power on\n"); intel_panel_enable_backlight(pipe_config, conn_state); From 4bbaf2584b86b0772413edeac22ff448f36351b1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 3 May 2018 15:56:15 +0200 Subject: [PATCH 165/347] s390/cpum_sf: ensure sample frequency of perf event attributes is non-zero Correct a trinity finding for the perf_event_open() system call with a perf event attribute structure that uses a frequency but has the sampling frequency set to zero. This causes a FP divide exception during the sample rate initialization for the hardware sampling facility. Fixes: 8c069ff4bd606 ("s390/perf: add support for the CPU-Measurement Sampling Facility") Cc: stable@vger.kernel.org # 3.14+ Reviewed-by: Heiko Carstens Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1c9ddd7aa5ec..0292d68e7dde 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -753,6 +753,10 @@ static int __hw_perf_event_init(struct perf_event *event) */ rate = 0; if (attr->freq) { + if (!attr->sample_freq) { + err = -EINVAL; + goto out; + } rate = freq_to_sample_rate(&si, attr->sample_freq); rate = hw_limit_rate(&si, rate); attr->freq = 0; From 6f2db7dc901a1b89fbc50f7b38f0f7ee17205703 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 2 May 2018 09:40:25 +0200 Subject: [PATCH 166/347] drm/exynos: hdmi: avoid duplicating drm_bridge_attach drm_bridge_attach takes care of these assignments, so there is no need to open-code them a second time. Signed-off-by: Peter Rosin Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index abd84cbcf1c2..09c4bc0b1859 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -954,8 +954,6 @@ static int hdmi_create_connector(struct drm_encoder *encoder) drm_mode_connector_attach_encoder(connector, encoder); if (hdata->bridge) { - encoder->bridge = hdata->bridge; - hdata->bridge->encoder = encoder; ret = drm_bridge_attach(encoder, hdata->bridge, NULL); if (ret) DRM_ERROR("Failed to attach bridge\n"); From 3148dedfe79e422f448a10250d3e2cdf8b7ee617 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 7 May 2018 21:11:21 +0200 Subject: [PATCH 167/347] r8169: fix powering up RTL8168h Since commit a92a08499b1f "r8169: improve runtime pm in general and suspend unused ports" interfaces w/o link are runtime-suspended after 10s. On systems where drivers take longer to load this can lead to the situation that the interface is runtime-suspended already when it's initially brought up. This shouldn't be a problem because rtl_open() resumes MAC/PHY. However with at least one chip version the interface doesn't properly come up, as reported here: https://bugzilla.kernel.org/show_bug.cgi?id=199549 The vendor driver uses a delay to give certain chip versions some time to resume before starting the PHY configuration. So let's do the same. I don't know which chip versions may be affected, therefore apply this delay always. This patch was reported to fix the issue for RTL8168h. I was able to reproduce the issue on an Asus H310I-Plus which also uses a RTL8168h. Also in my case the patch fixed the issue. Reported-by: Slava Kardakov Tested-by: Slava Kardakov Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 604ae78381ae..c7aac1fc99e8 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -4981,6 +4981,9 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) static void rtl_pll_power_up(struct rtl8169_private *tp) { rtl_generic_op(tp, tp->pll_power_ops.up); + + /* give MAC/PHY some time to resume */ + msleep(20); } static void rtl_init_pll_power_ops(struct rtl8169_private *tp) From 9a0e9802217291e54c4dd1fc5462f189a4be14ec Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 7 May 2018 14:13:03 +0200 Subject: [PATCH 168/347] drm/vc4: Fix scaling of uni-planar formats When using uni-planar formats (like RGB), the scaling parameters are stored in plane 0, not plane 1. Fixes: fc04023fafec ("drm/vc4: Add support for YUV planes.") Cc: stable@vger.kernel.org Signed-off-by: Boris Brezillon Reviewed-by: Eric Anholt Link: https://patchwork.freedesktop.org/patch/msgid/20180507121303.5610-1-boris.brezillon@bootlin.com --- drivers/gpu/drm/vc4/vc4_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index ce39390be389..13dcaad06798 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -503,7 +503,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, * the scl fields here. */ if (num_planes == 1) { - scl0 = vc4_get_scl_field(state, 1); + scl0 = vc4_get_scl_field(state, 0); scl1 = scl0; } else { scl0 = vc4_get_scl_field(state, 1); From c72a0ded8d4ac9d99c04200035bbfcc82e15842c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Wed, 25 Apr 2018 12:07:03 +0200 Subject: [PATCH 169/347] PM: docs: sleep-states: Fix a typo ("includig") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a typo in admin-guide/pm/sleep-states.rst. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/sleep-states.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/pm/sleep-states.rst b/Documentation/admin-guide/pm/sleep-states.rst index 1e5c0f00cb2f..dbf5acd49f35 100644 --- a/Documentation/admin-guide/pm/sleep-states.rst +++ b/Documentation/admin-guide/pm/sleep-states.rst @@ -15,7 +15,7 @@ Sleep States That Can Be Supported ================================== Depending on its configuration and the capabilities of the platform it runs on, -the Linux kernel can support up to four system sleep states, includig +the Linux kernel can support up to four system sleep states, including hibernation and up to three variants of system suspend. The sleep states that can be supported by the kernel are listed below. From 13610c93488b3c290f393c76744b308445921094 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Tue, 8 May 2018 17:12:09 +0200 Subject: [PATCH 170/347] PM: docs: intel_pstate: fix Active Mode w/o HWP paragraph P-state selection algorithm (powersave or performance) is selected by echoing the desired choice to scaling_governor sysfs attribute and not to scaling_cur_freq (as currently stated). Fix it. Signed-off-by: Juri Lelli Reviewed-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_pstate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index d2b6fda3d67b..ab2fe0eda1d7 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -145,7 +145,7 @@ feature enabled.] In this mode ``intel_pstate`` registers utilization update callbacks with the CPU scheduler in order to run a P-state selection algorithm, either -``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy +``powersave`` or ``performance``, depending on the ``scaling_governor`` policy setting in ``sysfs``. The current CPU frequency information to be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is periodically updated by those utilization update callbacks too. From a744490f12707d9f0b205272b29adf5bdb3ba193 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 9 May 2018 10:40:51 +0200 Subject: [PATCH 171/347] cpufreq: schedutil: remove stale comment After commit 794a56ebd9a57 (sched/cpufreq: Change the worker kthread to SCHED_DEADLINE) schedutil kthreads are "ignored" for a clock frequency selection point of view, so the potential corner case for RT tasks is not possible at all now. Remove the stale comment mentioning it. Signed-off-by: Juri Lelli Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index d2c6083304b4..23ef19070137 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -396,19 +396,6 @@ static void sugov_irq_work(struct irq_work *irq_work) sg_policy = container_of(irq_work, struct sugov_policy, irq_work); - /* - * For RT tasks, the schedutil governor shoots the frequency to maximum. - * Special care must be taken to ensure that this kthread doesn't result - * in the same behavior. - * - * This is (mostly) guaranteed by the work_in_progress flag. The flag is - * updated only at the end of the sugov_work() function and before that - * the schedutil governor rejects all other frequency scaling requests. - * - * There is a very rare case though, where the RT thread yields right - * after the work_in_progress flag is cleared. The effects of that are - * neglected for now. - */ kthread_queue_work(&sg_policy->worker, &sg_policy->work); } From 97739501f207efe33145b918817f305b822987f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 May 2018 11:44:56 +0200 Subject: [PATCH 172/347] cpufreq: schedutil: Avoid using invalid next_freq If the next_freq field of struct sugov_policy is set to UINT_MAX, it shouldn't be used for updating the CPU frequency (this is a special "invalid" value), but after commit b7eaf1aab9f8 (cpufreq: schedutil: Avoid reducing frequency of busy CPUs prematurely) it may be passed as the new frequency to sugov_update_commit() in sugov_update_single(). Fix that by adding an extra check for the special UINT_MAX value of next_freq to sugov_update_single(). Fixes: b7eaf1aab9f8 (cpufreq: schedutil: Avoid reducing frequency of busy CPUs prematurely) Reported-by: Viresh Kumar Cc: 4.12+ # 4.12+ Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- kernel/sched/cpufreq_schedutil.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 23ef19070137..e13df951aca7 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -305,7 +305,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. */ - if (busy && next_f < sg_policy->next_freq) { + if (busy && next_f < sg_policy->next_freq && + sg_policy->next_freq != UINT_MAX) { next_f = sg_policy->next_freq; /* Reset cached freq as next_freq has changed */ From 8feaec33b9868582654cd3d5355225dcb79aeca6 Mon Sep 17 00:00:00 2001 From: Kai Heng Feng Date: Mon, 7 May 2018 14:11:20 +0800 Subject: [PATCH 173/347] PCI / PM: Always check PME wakeup capability for runtime wakeup support USB controller ASM1042 stops working after commit de3ef1eb1cd0 (PM / core: Drop run_wake flag from struct dev_pm_info). The device in question is not power managed by platform firmware, furthermore, it only supports PME# from D3cold: Capabilities: [78] Power Management version 3 Flags: PMEClk- DSI- D1- D2- AuxCurrent=55mA PME(D0-,D1-,D2-,D3hot-,D3cold+) Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME- Before commit de3ef1eb1cd0, the device never gets runtime suspended. After that commit, the device gets runtime suspended to D3hot, which can not generate any PME#. usb_hcd_pci_probe() unconditionally calls device_wakeup_enable(), hence device_can_wakeup() in pci_dev_run_wake() always returns true. So pci_dev_run_wake() needs to check PME wakeup capability as its first condition. In addition, change wakeup flag passed to pci_target_state() from false to true, because we want to find the deepest state different from D3cold that the device can still generate PME#. In this case, it's D0 for the device in question. Fixes: de3ef1eb1cd0 (PM / core: Drop run_wake flag from struct dev_pm_info) Signed-off-by: Kai-Heng Feng Cc: 4.13+ # 4.13+ Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- drivers/pci/pci.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a04197ce767d..c2616cad3a1d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2138,16 +2138,16 @@ bool pci_dev_run_wake(struct pci_dev *dev) { struct pci_bus *bus = dev->bus; - if (device_can_wakeup(&dev->dev)) - return true; - if (!dev->pme_support) return false; /* PME-capable in principle, but not from the target power state */ - if (!pci_pme_capable(dev, pci_target_state(dev, false))) + if (!pci_pme_capable(dev, pci_target_state(dev, true))) return false; + if (device_can_wakeup(&dev->dev)) + return true; + while (bus->parent) { struct pci_dev *bridge = bus->self; From 070b9637dd8fa85c3ba7ecc60fe57fa4da9c2d1d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 3 May 2018 11:32:33 +0200 Subject: [PATCH 174/347] HID: i2c-hid: Add RESEND_REPORT_DESCR quirk for Toshiba Click Mini L9W-B The 0457:10fb touchscreen found on the Toshiba Click Mini L9W-B needs to have a report-decriptors command send to it on resume in order for the touchscreen to start generating events again on resume. Signed-off-by: Hans de Goede Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/i2c-hid/i2c-hid.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ec73aa486315..46f5ecd11bf7 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -972,6 +972,7 @@ #define USB_DEVICE_ID_SIS817_TOUCH 0x0817 #define USB_DEVICE_ID_SIS_TS 0x1013 #define USB_DEVICE_ID_SIS1030_TOUCH 0x1030 +#define USB_DEVICE_ID_SIS10FB_TOUCH 0x10fb #define USB_VENDOR_ID_SKYCABLE 0x1223 #define USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER 0x3F07 diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid.c index 963328674e93..cc33622253aa 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid.c @@ -174,6 +174,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, { I2C_VENDOR_ID_RAYD, I2C_PRODUCT_ID_RAYD_3118, I2C_HID_QUIRK_RESEND_REPORT_DESCR }, + { USB_VENDOR_ID_SIS_TOUCH, USB_DEVICE_ID_SIS10FB_TOUCH, + I2C_HID_QUIRK_RESEND_REPORT_DESCR }, { 0, 0 } }; From 1b06bd8dd95f7a19ab33fdf0f477c94950822ab3 Mon Sep 17 00:00:00 2001 From: David Gilhooley Date: Tue, 8 May 2018 15:49:42 -0700 Subject: [PATCH 175/347] arm64: Add MIDR encoding for NVIDIA CPUs This patch adds the MIDR encodings for NVIDIA as well as the Denver and Carmel CPUs used in Tegra SoCs. Signed-off-by: David Gilhooley Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 30014a9f8f2b..ea690b3562af 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_IMP_CAVIUM 0x43 #define ARM_CPU_IMP_BRCM 0x42 #define ARM_CPU_IMP_QCOM 0x51 +#define ARM_CPU_IMP_NVIDIA 0x4E #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -99,6 +100,9 @@ #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define NVIDIA_CPU_PART_DENVER 0x003 +#define NVIDIA_CPU_PART_CARMEL 0x004 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -114,6 +118,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) +#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #ifndef __ASSEMBLY__ From 0583a4ef05987f7e0f3a7bdd3365e5dc36ca306d Mon Sep 17 00:00:00 2001 From: David Gilhooley Date: Tue, 8 May 2018 15:49:43 -0700 Subject: [PATCH 176/347] arm64: capabilities: Add NVIDIA Denver CPU to bp_harden list The NVIDIA Denver CPU also needs a PSCI call to harden the branch predictor. Signed-off-by: David Gilhooley Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a900befadfe8..e4a1182deff7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -316,6 +316,7 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER), {}, }; From 76aa3de7095f15af7300012cb29ea8ab93eec348 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 7 May 2018 12:08:37 +0200 Subject: [PATCH 177/347] eeprom: at24: fix retrieving the at24_chip_data structure Commit feb2f19b1e8f ("eeprom: at24: move platform data processing into a separate routine") introduced a bug where we incorrectly retireve the at24_chip_data structure. Remove the unnecessary ampersand operator. Fixes: feb2f19b1e8f ("eeprom: at24: move platform data processing into a separate routine") Reported-by: Vadim Pasternak Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 0c125f207aea..33053b0d1fdf 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -518,7 +518,7 @@ static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata) if (of_node && of_match_device(at24_of_match, dev)) cdata = of_device_get_match_data(dev); else if (id) - cdata = (void *)&id->driver_data; + cdata = (void *)id->driver_data; else cdata = acpi_device_get_match_data(dev); From 2796d303e3c5ec213c578ed3a66872205c126eb8 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 25 Apr 2018 11:30:04 -0700 Subject: [PATCH 178/347] cifs: Allocate validate negotiation request through kmalloc The data buffer allocated on the stack can't be DMA'ed, ib_dma_map_page will return an invalid DMA address for a buffer on stack. Even worse, this incorrect address can't be detected by ib_dma_mapping_error. Sending data from this address to hardware will not fail, but the remote peer will get junk data. Fix this by allocating the request on the heap in smb3_validate_negotiate. Changes in v2: Removed duplicated code on freeing buffers on function exit. (Thanks to Parav Pandit ) Fixed typo in the patch title. Changes in v3: Added "Fixes" to the patch. Changed several sizeof() to use *pointer in place of struct. Changes in v4: Added detailed comments on the failure through RDMA. Allocate request buffer using GPF_NOFS. Fixed possible memory leak. Changes in v5: Removed variable ret for checking return value. Changed to use pneg_inbuf->Dialects[0] to calculate unused space in pneg_inbuf. Fixes: ff1c038addc4 ("Check SMB3 dialects against downgrade attacks") Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg Reviewed-by: Tom Talpey --- fs/cifs/smb2pdu.c | 68 ++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 60db51bae0e3..260e9c4219d8 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -730,8 +730,8 @@ neg_exit: int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) { - int rc = 0; - struct validate_negotiate_info_req vneg_inbuf; + int rc; + struct validate_negotiate_info_req *pneg_inbuf; struct validate_negotiate_info_rsp *pneg_rsp = NULL; u32 rsplen; u32 inbuflen; /* max of 4 dialects */ @@ -765,63 +765,69 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n"); - vneg_inbuf.Capabilities = + pneg_inbuf = kmalloc(sizeof(*pneg_inbuf), GFP_NOFS); + if (!pneg_inbuf) + return -ENOMEM; + + pneg_inbuf->Capabilities = cpu_to_le32(tcon->ses->server->vals->req_capabilities); - memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, + memcpy(pneg_inbuf->Guid, tcon->ses->server->client_guid, SMB2_CLIENT_GUID_SIZE); if (tcon->ses->sign) - vneg_inbuf.SecurityMode = + pneg_inbuf->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); else if (global_secflags & CIFSSEC_MAY_SIGN) - vneg_inbuf.SecurityMode = + pneg_inbuf->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); else - vneg_inbuf.SecurityMode = 0; + pneg_inbuf->SecurityMode = 0; if (strcmp(tcon->ses->server->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { - vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID); - vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID); - vneg_inbuf.DialectCount = cpu_to_le16(2); + pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID); + pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID); + pneg_inbuf->DialectCount = cpu_to_le16(2); /* structure is big enough for 3 dialects, sending only 2 */ - inbuflen = sizeof(struct validate_negotiate_info_req) - 2; + inbuflen = sizeof(*pneg_inbuf) - + sizeof(pneg_inbuf->Dialects[0]); } else if (strcmp(tcon->ses->server->vals->version_string, SMBDEFAULT_VERSION_STRING) == 0) { - vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID); - vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID); - vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID); - vneg_inbuf.DialectCount = cpu_to_le16(3); + pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); + pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); + pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); + pneg_inbuf->DialectCount = cpu_to_le16(3); /* structure is big enough for 3 dialects */ - inbuflen = sizeof(struct validate_negotiate_info_req); + inbuflen = sizeof(*pneg_inbuf); } else { /* otherwise specific dialect was requested */ - vneg_inbuf.Dialects[0] = + pneg_inbuf->Dialects[0] = cpu_to_le16(tcon->ses->server->vals->protocol_id); - vneg_inbuf.DialectCount = cpu_to_le16(1); + pneg_inbuf->DialectCount = cpu_to_le16(1); /* structure is big enough for 3 dialects, sending only 1 */ - inbuflen = sizeof(struct validate_negotiate_info_req) - 4; + inbuflen = sizeof(*pneg_inbuf) - + sizeof(pneg_inbuf->Dialects[0]) * 2; } rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, - (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), - (char **)&pneg_rsp, &rsplen); + (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen); if (rc != 0) { cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); - return -EIO; + rc = -EIO; + goto out_free_inbuf; } - if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { + rc = -EIO; + if (rsplen != sizeof(*pneg_rsp)) { cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n", rsplen); /* relax check since Mac returns max bufsize allowed on ioctl */ - if ((rsplen > CIFSMaxBufSize) - || (rsplen < sizeof(struct validate_negotiate_info_rsp))) - goto err_rsp_free; + if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp)) + goto out_free_rsp; } /* check validate negotiate info response matches what we got earlier */ @@ -838,15 +844,17 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) goto vneg_out; /* validate negotiate successful */ + rc = 0; cifs_dbg(FYI, "validate negotiate info successful\n"); - kfree(pneg_rsp); - return 0; + goto out_free_rsp; vneg_out: cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); -err_rsp_free: +out_free_rsp: kfree(pneg_rsp); - return -EIO; +out_free_inbuf: + kfree(pneg_inbuf); + return rc; } enum securityEnum From f7c439668a291ca94f358e44d3a3e9f2a2524b8a Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 25 Apr 2018 11:30:05 -0700 Subject: [PATCH 179/347] cifs: smbd: Enable signing with smbdirect Now signing is supported with RDMA transport. Remove the code that disabled it. Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Ronnie Sahlberg --- fs/cifs/connect.c | 8 -------- fs/cifs/smb2pdu.c | 5 ----- 2 files changed, 13 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a5aa158d535a..7a10a5d0731f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1977,14 +1977,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } -#ifdef CONFIG_CIFS_SMB_DIRECT - if (vol->rdma && vol->sign) { - cifs_dbg(VFS, "Currently SMB direct doesn't support signing." - " This is being fixed\n"); - goto cifs_parse_mount_err; - } -#endif - #ifndef CONFIG_KEYS /* Muliuser mounts require CONFIG_KEYS support */ if (vol->multiuser) { diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 260e9c4219d8..0f48741a0130 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -738,11 +738,6 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) cifs_dbg(FYI, "validate negotiate\n"); -#ifdef CONFIG_CIFS_SMB_DIRECT - if (tcon->ses->server->rdma) - return 0; -#endif - /* In SMB3.11 preauth integrity supersedes validate negotiate */ if (tcon->ses->server->dialect == SMB311_PROT_ID) return 0; From ae2cd7fb478b8da707906ee1706ae1379968a8f9 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Fri, 4 May 2018 11:25:26 -0300 Subject: [PATCH 180/347] cifs: smb2ops: Fix listxattr() when there are no EAs As per listxattr(2): On success, a nonnegative number is returned indicating the size of the extended attribute name list. On failure, -1 is returned and errno is set appropriately. In SMB1, when the server returns an empty EA list through a listxattr(), it will correctly return 0 as there are no EAs for the given file. However, in SMB2+, it returns -ENODATA in listxattr() which is wrong since the request and response were sent successfully, although there's no actual EA for the given file. This patch fixes listxattr() for SMB2+ by returning 0 in cifs_listxattr() when the server returns an empty list of EAs. Signed-off-by: Paulo Alcantara Reviewed-by: Aurelien Aptel Signed-off-by: Steve French --- fs/cifs/smb2ops.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b76b85881dcc..9c6d95ffca97 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -589,9 +589,15 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + /* + * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's + * not an error. Otherwise, the specified ea_name was not found. + */ if (!rc) rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data, SMB2_MAX_EA_BUF, ea_name); + else if (!ea_name && rc == -ENODATA) + rc = 0; kfree(smb2_data); return rc; From c5191133405ac317d20d23c8510416e18842031d Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 20 Apr 2018 11:05:07 -0400 Subject: [PATCH 181/347] drm/amd/display: Add VG12 ASIC IDs Signed-off-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/include/dal_asic_id.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/include/dal_asic_id.h b/drivers/gpu/drm/amd/display/include/dal_asic_id.h index 9831cb5eaa7c..9b0a04f99ac8 100644 --- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h +++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h @@ -113,9 +113,14 @@ #define AI_GREENLAND_P_A0 1 #define AI_GREENLAND_P_A1 2 +#define AI_UNKNOWN 0xFF -#define ASICREV_IS_GREENLAND_M(eChipRev) (eChipRev < AI_UNKNOWN) -#define ASICREV_IS_GREENLAND_P(eChipRev) (eChipRev < AI_UNKNOWN) +#define AI_VEGA12_P_A0 20 +#define ASICREV_IS_GREENLAND_M(eChipRev) (eChipRev < AI_VEGA12_P_A0) +#define ASICREV_IS_GREENLAND_P(eChipRev) (eChipRev < AI_VEGA12_P_A0) + +#define ASICREV_IS_VEGA12_P(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN)) +#define ASICREV_IS_VEGA12_p(eChipRev) ((eChipRev >= AI_VEGA12_P_A0) && (eChipRev < AI_UNKNOWN)) /* DCN1_0 */ #define INTERNAL_REV_RAVEN_A0 0x00 /* First spin of Raven */ From 60a5205fb5f3da3907b8b53561571a790e7b1e70 Mon Sep 17 00:00:00 2001 From: "Jerry (Fangzhi) Zuo" Date: Mon, 5 Mar 2018 14:59:57 -0500 Subject: [PATCH 182/347] drm/amd: Add BIOS smu_info v3_3 required struct def. Signed-off-by: Jerry (Fangzhi) Zuo Reviewed-by: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/include/atomfirmware.h | 170 ++++++++++++++++++++- 1 file changed, 168 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index 0f5ad54d3fd3..de177ce8ca80 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -501,6 +501,32 @@ enum atom_cooling_solution_id{ LIQUID_COOLING = 0x01 }; +struct atom_firmware_info_v3_2 { + struct atom_common_table_header table_header; + uint32_t firmware_revision; + uint32_t bootup_sclk_in10khz; + uint32_t bootup_mclk_in10khz; + uint32_t firmware_capability; // enum atombios_firmware_capability + uint32_t main_call_parser_entry; /* direct address of main parser call in VBIOS binary. */ + uint32_t bios_scratch_reg_startaddr; // 1st bios scratch register dword address + uint16_t bootup_vddc_mv; + uint16_t bootup_vddci_mv; + uint16_t bootup_mvddc_mv; + uint16_t bootup_vddgfx_mv; + uint8_t mem_module_id; + uint8_t coolingsolution_id; /*0: Air cooling; 1: Liquid cooling ... */ + uint8_t reserved1[2]; + uint32_t mc_baseaddr_high; + uint32_t mc_baseaddr_low; + uint8_t board_i2c_feature_id; // enum of atom_board_i2c_feature_id_def + uint8_t board_i2c_feature_gpio_id; // i2c id find in gpio_lut data table gpio_id + uint8_t board_i2c_feature_slave_addr; + uint8_t reserved3; + uint16_t bootup_mvddq_mv; + uint16_t bootup_mvpp_mv; + uint32_t zfbstartaddrin16mb; + uint32_t reserved2[3]; +}; /* *************************************************************************** @@ -1169,7 +1195,29 @@ struct atom_gfx_info_v2_2 uint32_t rlc_gpu_timer_refclk; }; - +struct atom_gfx_info_v2_3 { + struct atom_common_table_header table_header; + uint8_t gfxip_min_ver; + uint8_t gfxip_max_ver; + uint8_t max_shader_engines; + uint8_t max_tile_pipes; + uint8_t max_cu_per_sh; + uint8_t max_sh_per_se; + uint8_t max_backends_per_se; + uint8_t max_texture_channel_caches; + uint32_t regaddr_cp_dma_src_addr; + uint32_t regaddr_cp_dma_src_addr_hi; + uint32_t regaddr_cp_dma_dst_addr; + uint32_t regaddr_cp_dma_dst_addr_hi; + uint32_t regaddr_cp_dma_command; + uint32_t regaddr_cp_status; + uint32_t regaddr_rlc_gpu_clock_32; + uint32_t rlc_gpu_timer_refclk; + uint8_t active_cu_per_sh; + uint8_t active_rb_per_se; + uint16_t gcgoldenoffset; + uint32_t rm21_sram_vmin_value; +}; /* *************************************************************************** @@ -1198,6 +1246,76 @@ struct atom_smu_info_v3_1 uint8_t fw_ctf_polarity; // GPIO polarity for CTF }; +struct atom_smu_info_v3_2 { + struct atom_common_table_header table_header; + uint8_t smuip_min_ver; + uint8_t smuip_max_ver; + uint8_t smu_rsd1; + uint8_t gpuclk_ss_mode; + uint16_t sclk_ss_percentage; + uint16_t sclk_ss_rate_10hz; + uint16_t gpuclk_ss_percentage; // in unit of 0.001% + uint16_t gpuclk_ss_rate_10hz; + uint32_t core_refclk_10khz; + uint8_t ac_dc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for AC/DC switching, =0xff means invalid + uint8_t ac_dc_polarity; // GPIO polarity for AC/DC switching + uint8_t vr0hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR0 HOT event, =0xff means invalid + uint8_t vr0hot_polarity; // GPIO polarity for VR0 HOT event + uint8_t vr1hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR1 HOT event , =0xff means invalid + uint8_t vr1hot_polarity; // GPIO polarity for VR1 HOT event + uint8_t fw_ctf_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for CTF, =0xff means invalid + uint8_t fw_ctf_polarity; // GPIO polarity for CTF + uint8_t pcc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for PCC, =0xff means invalid + uint8_t pcc_gpio_polarity; // GPIO polarity for CTF + uint16_t smugoldenoffset; + uint32_t gpupll_vco_freq_10khz; + uint32_t bootup_smnclk_10khz; + uint32_t bootup_socclk_10khz; + uint32_t bootup_mp0clk_10khz; + uint32_t bootup_mp1clk_10khz; + uint32_t bootup_lclk_10khz; + uint32_t bootup_dcefclk_10khz; + uint32_t ctf_threshold_override_value; + uint32_t reserved[5]; +}; + +struct atom_smu_info_v3_3 { + struct atom_common_table_header table_header; + uint8_t smuip_min_ver; + uint8_t smuip_max_ver; + uint8_t smu_rsd1; + uint8_t gpuclk_ss_mode; + uint16_t sclk_ss_percentage; + uint16_t sclk_ss_rate_10hz; + uint16_t gpuclk_ss_percentage; // in unit of 0.001% + uint16_t gpuclk_ss_rate_10hz; + uint32_t core_refclk_10khz; + uint8_t ac_dc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for AC/DC switching, =0xff means invalid + uint8_t ac_dc_polarity; // GPIO polarity for AC/DC switching + uint8_t vr0hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR0 HOT event, =0xff means invalid + uint8_t vr0hot_polarity; // GPIO polarity for VR0 HOT event + uint8_t vr1hot_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for VR1 HOT event , =0xff means invalid + uint8_t vr1hot_polarity; // GPIO polarity for VR1 HOT event + uint8_t fw_ctf_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for CTF, =0xff means invalid + uint8_t fw_ctf_polarity; // GPIO polarity for CTF + uint8_t pcc_gpio_bit; // GPIO bit shift in SMU_GPIOPAD_A configured for PCC, =0xff means invalid + uint8_t pcc_gpio_polarity; // GPIO polarity for CTF + uint16_t smugoldenoffset; + uint32_t gpupll_vco_freq_10khz; + uint32_t bootup_smnclk_10khz; + uint32_t bootup_socclk_10khz; + uint32_t bootup_mp0clk_10khz; + uint32_t bootup_mp1clk_10khz; + uint32_t bootup_lclk_10khz; + uint32_t bootup_dcefclk_10khz; + uint32_t ctf_threshold_override_value; + uint32_t syspll3_0_vco_freq_10khz; + uint32_t syspll3_1_vco_freq_10khz; + uint32_t bootup_fclk_10khz; + uint32_t bootup_waflclk_10khz; + uint32_t reserved[3]; +}; + /* *************************************************************************** Data Table smc_dpm_info structure @@ -1283,7 +1401,6 @@ struct atom_smc_dpm_info_v4_1 uint32_t boardreserved[10]; }; - /* *************************************************************************** Data Table asic_profiling_info structure @@ -1864,6 +1981,55 @@ enum atom_smu9_syspll0_clock_id SMU9_SYSPLL0_DISPCLK_ID = 11, // DISPCLK }; +enum atom_smu11_syspll_id { + SMU11_SYSPLL0_ID = 0, + SMU11_SYSPLL1_0_ID = 1, + SMU11_SYSPLL1_1_ID = 2, + SMU11_SYSPLL1_2_ID = 3, + SMU11_SYSPLL2_ID = 4, + SMU11_SYSPLL3_0_ID = 5, + SMU11_SYSPLL3_1_ID = 6, +}; + + +enum atom_smu11_syspll0_clock_id { + SMU11_SYSPLL0_SOCCLK_ID = 0, // SOCCLK + SMU11_SYSPLL0_MP0CLK_ID = 1, // MP0CLK + SMU11_SYSPLL0_DCLK_ID = 2, // DCLK + SMU11_SYSPLL0_VCLK_ID = 3, // VCLK + SMU11_SYSPLL0_ECLK_ID = 4, // ECLK + SMU11_SYSPLL0_DCEFCLK_ID = 5, // DCEFCLK +}; + + +enum atom_smu11_syspll1_0_clock_id { + SMU11_SYSPLL1_0_UCLKA_ID = 0, // UCLK_a +}; + +enum atom_smu11_syspll1_1_clock_id { + SMU11_SYSPLL1_0_UCLKB_ID = 0, // UCLK_b +}; + +enum atom_smu11_syspll1_2_clock_id { + SMU11_SYSPLL1_0_FCLK_ID = 0, // FCLK +}; + +enum atom_smu11_syspll2_clock_id { + SMU11_SYSPLL2_GFXCLK_ID = 0, // GFXCLK +}; + +enum atom_smu11_syspll3_0_clock_id { + SMU11_SYSPLL3_0_WAFCLK_ID = 0, // WAFCLK + SMU11_SYSPLL3_0_DISPCLK_ID = 1, // DISPCLK + SMU11_SYSPLL3_0_DPREFCLK_ID = 2, // DPREFCLK +}; + +enum atom_smu11_syspll3_1_clock_id { + SMU11_SYSPLL3_1_MP1CLK_ID = 0, // MP1CLK + SMU11_SYSPLL3_1_SMNCLK_ID = 1, // SMNCLK + SMU11_SYSPLL3_1_LCLK_ID = 2, // LCLK +}; + struct atom_get_smu_clock_info_output_parameters_v3_1 { union { From 6e65fb862064663ad3a08f964af1e8f3f2abf688 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Fri, 20 Apr 2018 10:56:18 -0400 Subject: [PATCH 183/347] drm/amd/display: Add get_firmware_info_v3_2 for VG12 Signed-off-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../drm/amd/display/dc/bios/bios_parser2.c | 86 ++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c index 985fe8c22875..10a5807a7e8b 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c @@ -70,6 +70,10 @@ static enum bp_result get_firmware_info_v3_1( struct bios_parser *bp, struct dc_firmware_info *info); +static enum bp_result get_firmware_info_v3_2( + struct bios_parser *bp, + struct dc_firmware_info *info); + static struct atom_hpd_int_record *get_hpd_record(struct bios_parser *bp, struct atom_display_object_path_v2 *object); @@ -1321,9 +1325,11 @@ static enum bp_result bios_parser_get_firmware_info( case 3: switch (revision.minor) { case 1: - case 2: result = get_firmware_info_v3_1(bp, info); break; + case 2: + result = get_firmware_info_v3_2(bp, info); + break; default: break; } @@ -1383,6 +1389,84 @@ static enum bp_result get_firmware_info_v3_1( return BP_RESULT_OK; } +static enum bp_result get_firmware_info_v3_2( + struct bios_parser *bp, + struct dc_firmware_info *info) +{ + struct atom_firmware_info_v3_2 *firmware_info; + struct atom_display_controller_info_v4_1 *dce_info = NULL; + struct atom_common_table_header *header; + struct atom_data_revision revision; + struct atom_smu_info_v3_2 *smu_info_v3_2 = NULL; + struct atom_smu_info_v3_3 *smu_info_v3_3 = NULL; + + if (!info) + return BP_RESULT_BADINPUT; + + firmware_info = GET_IMAGE(struct atom_firmware_info_v3_2, + DATA_TABLES(firmwareinfo)); + + dce_info = GET_IMAGE(struct atom_display_controller_info_v4_1, + DATA_TABLES(dce_info)); + + if (!firmware_info || !dce_info) + return BP_RESULT_BADBIOSTABLE; + + memset(info, 0, sizeof(*info)); + + header = GET_IMAGE(struct atom_common_table_header, + DATA_TABLES(smu_info)); + get_atom_data_table_revision(header, &revision); + + if (revision.minor == 2) { + /* Vega12 */ + smu_info_v3_2 = GET_IMAGE(struct atom_smu_info_v3_2, + DATA_TABLES(smu_info)); + + if (!smu_info_v3_2) + return BP_RESULT_BADBIOSTABLE; + + info->default_engine_clk = smu_info_v3_2->bootup_dcefclk_10khz * 10; + } else if (revision.minor == 3) { + /* Vega20 */ + smu_info_v3_3 = GET_IMAGE(struct atom_smu_info_v3_3, + DATA_TABLES(smu_info)); + + if (!smu_info_v3_3) + return BP_RESULT_BADBIOSTABLE; + + info->default_engine_clk = smu_info_v3_3->bootup_dcefclk_10khz * 10; + } + + // We need to convert from 10KHz units into KHz units. + info->default_memory_clk = firmware_info->bootup_mclk_in10khz * 10; + + /* 27MHz for Vega10 & Vega12; 100MHz for Vega20 */ + info->pll_info.crystal_frequency = dce_info->dce_refclk_10khz * 10; + /* Hardcode frequency if BIOS gives no DCE Ref Clk */ + if (info->pll_info.crystal_frequency == 0) { + if (revision.minor == 2) + info->pll_info.crystal_frequency = 27000; + else if (revision.minor == 3) + info->pll_info.crystal_frequency = 100000; + } + /*dp_phy_ref_clk is not correct for atom_display_controller_info_v4_2, but we don't use it*/ + info->dp_phy_ref_clk = dce_info->dpphy_refclk_10khz * 10; + info->i2c_engine_ref_clk = dce_info->i2c_engine_refclk_10khz * 10; + + /* Get GPU PLL VCO Clock */ + if (bp->cmd_tbl.get_smu_clock_info != NULL) { + if (revision.minor == 2) + info->smu_gpu_pll_output_freq = + bp->cmd_tbl.get_smu_clock_info(bp, SMU9_SYSPLL0_ID) * 10; + else if (revision.minor == 3) + info->smu_gpu_pll_output_freq = + bp->cmd_tbl.get_smu_clock_info(bp, SMU11_SYSPLL3_0_ID) * 10; + } + + return BP_RESULT_OK; +} + static enum bp_result bios_parser_get_encoder_cap_info( struct dc_bios *dcb, struct graphics_object_id object_id, From 018d82e5f02ef3583411bcaa4e00c69786f46f19 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 24 Apr 2018 10:49:20 -0400 Subject: [PATCH 184/347] drm/amd/display: Don't return ddc result and read_bytes in same return value The two ranges overlap. Signed-off-by: Harry Wentland Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 20 +++++++++++-------- .../gpu/drm/amd/display/dc/core/dc_link_ddc.c | 10 +++++++--- .../gpu/drm/amd/display/dc/inc/dc_link_ddc.h | 5 +++-- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index ace9ad578ca0..4304d9e408b8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -83,21 +83,22 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, enum i2c_mot_mode mot = (msg->request & DP_AUX_I2C_MOT) ? I2C_MOT_TRUE : I2C_MOT_FALSE; enum ddc_result res; - ssize_t read_bytes; + uint32_t read_bytes = msg->size; if (WARN_ON(msg->size > 16)) return -E2BIG; switch (msg->request & ~DP_AUX_I2C_MOT) { case DP_AUX_NATIVE_READ: - read_bytes = dal_ddc_service_read_dpcd_data( + res = dal_ddc_service_read_dpcd_data( TO_DM_AUX(aux)->ddc_service, false, I2C_MOT_UNDEF, msg->address, msg->buffer, - msg->size); - return read_bytes; + msg->size, + &read_bytes); + break; case DP_AUX_NATIVE_WRITE: res = dal_ddc_service_write_dpcd_data( TO_DM_AUX(aux)->ddc_service, @@ -108,14 +109,15 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, msg->size); break; case DP_AUX_I2C_READ: - read_bytes = dal_ddc_service_read_dpcd_data( + res = dal_ddc_service_read_dpcd_data( TO_DM_AUX(aux)->ddc_service, true, mot, msg->address, msg->buffer, - msg->size); - return read_bytes; + msg->size, + &read_bytes); + break; case DP_AUX_I2C_WRITE: res = dal_ddc_service_write_dpcd_data( TO_DM_AUX(aux)->ddc_service, @@ -137,7 +139,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux, r == DDC_RESULT_SUCESSFULL); #endif - return msg->size; + if (res != DDC_RESULT_SUCESSFULL) + return -EIO; + return read_bytes; } static enum drm_connector_status diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c index 49c2face1e7a..ae48d603ebd6 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c @@ -629,13 +629,14 @@ bool dal_ddc_service_query_ddc_data( return ret; } -ssize_t dal_ddc_service_read_dpcd_data( +enum ddc_result dal_ddc_service_read_dpcd_data( struct ddc_service *ddc, bool i2c, enum i2c_mot_mode mot, uint32_t address, uint8_t *data, - uint32_t len) + uint32_t len, + uint32_t *read) { struct aux_payload read_payload = { .i2c_over_aux = i2c, @@ -652,6 +653,8 @@ ssize_t dal_ddc_service_read_dpcd_data( .mot = mot }; + *read = 0; + if (len > DEFAULT_AUX_MAX_DATA_SIZE) { BREAK_TO_DEBUGGER(); return DDC_RESULT_FAILED_INVALID_OPERATION; @@ -661,7 +664,8 @@ ssize_t dal_ddc_service_read_dpcd_data( ddc->ctx->i2caux, ddc->ddc_pin, &command)) { - return (ssize_t)command.payloads->length; + *read = command.payloads->length; + return DDC_RESULT_SUCESSFULL; } return DDC_RESULT_FAILED_OPERATION; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h index 090b7a8dd67b..30b3a08b91be 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_ddc.h @@ -102,13 +102,14 @@ bool dal_ddc_service_query_ddc_data( uint8_t *read_buf, uint32_t read_size); -ssize_t dal_ddc_service_read_dpcd_data( +enum ddc_result dal_ddc_service_read_dpcd_data( struct ddc_service *ddc, bool i2c, enum i2c_mot_mode mot, uint32_t address, uint8_t *data, - uint32_t len); + uint32_t len, + uint32_t *read); enum ddc_result dal_ddc_service_write_dpcd_data( struct ddc_service *ddc, From bd4caed47a19f25fe8674344ea06d469c27ac314 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 17 Apr 2018 12:25:22 +0200 Subject: [PATCH 185/347] drm/amd/display: Use kvzalloc for potentially large allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocating up to 32 physically contiguous pages can easily fail (and has failed for me), and isn't necessary anyway. Reviewed-by: Harry Wentland Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/dc/core/dc_surface.c | 14 ++-- .../amd/display/modules/color/color_gamma.c | 72 ++++++++++--------- 2 files changed, 45 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index ade5b8ee9c3c..132eef3826e2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -66,8 +66,8 @@ struct dc_plane_state *dc_create_plane_state(struct dc *dc) { struct dc *core_dc = dc; - struct dc_plane_state *plane_state = kzalloc(sizeof(*plane_state), - GFP_KERNEL); + struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state), + GFP_KERNEL); if (NULL == plane_state) return NULL; @@ -120,7 +120,7 @@ static void dc_plane_state_free(struct kref *kref) { struct dc_plane_state *plane_state = container_of(kref, struct dc_plane_state, refcount); destruct(plane_state); - kfree(plane_state); + kvfree(plane_state); } void dc_plane_state_release(struct dc_plane_state *plane_state) @@ -136,7 +136,7 @@ void dc_gamma_retain(struct dc_gamma *gamma) static void dc_gamma_free(struct kref *kref) { struct dc_gamma *gamma = container_of(kref, struct dc_gamma, refcount); - kfree(gamma); + kvfree(gamma); } void dc_gamma_release(struct dc_gamma **gamma) @@ -147,7 +147,7 @@ void dc_gamma_release(struct dc_gamma **gamma) struct dc_gamma *dc_create_gamma(void) { - struct dc_gamma *gamma = kzalloc(sizeof(*gamma), GFP_KERNEL); + struct dc_gamma *gamma = kvzalloc(sizeof(*gamma), GFP_KERNEL); if (gamma == NULL) goto alloc_fail; @@ -167,7 +167,7 @@ void dc_transfer_func_retain(struct dc_transfer_func *tf) static void dc_transfer_func_free(struct kref *kref) { struct dc_transfer_func *tf = container_of(kref, struct dc_transfer_func, refcount); - kfree(tf); + kvfree(tf); } void dc_transfer_func_release(struct dc_transfer_func *tf) @@ -177,7 +177,7 @@ void dc_transfer_func_release(struct dc_transfer_func *tf) struct dc_transfer_func *dc_create_transfer_func(void) { - struct dc_transfer_func *tf = kzalloc(sizeof(*tf), GFP_KERNEL); + struct dc_transfer_func *tf = kvzalloc(sizeof(*tf), GFP_KERNEL); if (tf == NULL) goto alloc_fail; diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c index e7e374f56864..b3747a019deb 100644 --- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c +++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c @@ -1093,19 +1093,19 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, output_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), - GFP_KERNEL); + rgb_user = kvzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_user) goto rgb_user_alloc_fail; - rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS), - GFP_KERNEL); + rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_regamma) goto rgb_regamma_alloc_fail; - axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + 3), - GFP_KERNEL); + axix_x = kvzalloc(sizeof(*axix_x) * (ramp->num_entries + 3), + GFP_KERNEL); if (!axix_x) goto axix_x_alloc_fail; - coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); + coeff = kvzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); if (!coeff) goto coeff_alloc_fail; @@ -1157,13 +1157,13 @@ bool mod_color_calculate_regamma_params(struct dc_transfer_func *output_tf, ret = true; - kfree(coeff); + kvfree(coeff); coeff_alloc_fail: - kfree(axix_x); + kvfree(axix_x); axix_x_alloc_fail: - kfree(rgb_regamma); + kvfree(rgb_regamma); rgb_regamma_alloc_fail: - kfree(rgb_user); + kvfree(rgb_user); rgb_user_alloc_fail: return ret; } @@ -1192,19 +1192,19 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, input_tf->type = TF_TYPE_DISTRIBUTED_POINTS; - rgb_user = kzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), - GFP_KERNEL); + rgb_user = kvzalloc(sizeof(*rgb_user) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_user) goto rgb_user_alloc_fail; - curve = kzalloc(sizeof(*curve) * (MAX_HW_POINTS + _EXTRA_POINTS), - GFP_KERNEL); + curve = kvzalloc(sizeof(*curve) * (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!curve) goto curve_alloc_fail; - axix_x = kzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS), - GFP_KERNEL); + axix_x = kvzalloc(sizeof(*axix_x) * (ramp->num_entries + _EXTRA_POINTS), + GFP_KERNEL); if (!axix_x) goto axix_x_alloc_fail; - coeff = kzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); + coeff = kvzalloc(sizeof(*coeff) * (MAX_HW_POINTS + _EXTRA_POINTS), GFP_KERNEL); if (!coeff) goto coeff_alloc_fail; @@ -1246,13 +1246,13 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf, ret = true; - kfree(coeff); + kvfree(coeff); coeff_alloc_fail: - kfree(axix_x); + kvfree(axix_x); axix_x_alloc_fail: - kfree(curve); + kvfree(curve); curve_alloc_fail: - kfree(rgb_user); + kvfree(rgb_user); rgb_user_alloc_fail: return ret; @@ -1281,8 +1281,9 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, } ret = true; } else if (trans == TRANSFER_FUNCTION_PQ) { - rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + - _EXTRA_POINTS), GFP_KERNEL); + rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_regamma) goto rgb_regamma_alloc_fail; points->end_exponent = 7; @@ -1302,11 +1303,12 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, } ret = true; - kfree(rgb_regamma); + kvfree(rgb_regamma); } else if (trans == TRANSFER_FUNCTION_SRGB || trans == TRANSFER_FUNCTION_BT709) { - rgb_regamma = kzalloc(sizeof(*rgb_regamma) * (MAX_HW_POINTS + - _EXTRA_POINTS), GFP_KERNEL); + rgb_regamma = kvzalloc(sizeof(*rgb_regamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_regamma) goto rgb_regamma_alloc_fail; points->end_exponent = 0; @@ -1324,7 +1326,7 @@ bool mod_color_calculate_curve(enum dc_transfer_func_predefined trans, } ret = true; - kfree(rgb_regamma); + kvfree(rgb_regamma); } rgb_regamma_alloc_fail: return ret; @@ -1348,8 +1350,9 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, } ret = true; } else if (trans == TRANSFER_FUNCTION_PQ) { - rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_POINTS + - _EXTRA_POINTS), GFP_KERNEL); + rgb_degamma = kvzalloc(sizeof(*rgb_degamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_degamma) goto rgb_degamma_alloc_fail; @@ -1364,11 +1367,12 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, } ret = true; - kfree(rgb_degamma); + kvfree(rgb_degamma); } else if (trans == TRANSFER_FUNCTION_SRGB || trans == TRANSFER_FUNCTION_BT709) { - rgb_degamma = kzalloc(sizeof(*rgb_degamma) * (MAX_HW_POINTS + - _EXTRA_POINTS), GFP_KERNEL); + rgb_degamma = kvzalloc(sizeof(*rgb_degamma) * + (MAX_HW_POINTS + _EXTRA_POINTS), + GFP_KERNEL); if (!rgb_degamma) goto rgb_degamma_alloc_fail; @@ -1382,7 +1386,7 @@ bool mod_color_calculate_degamma_curve(enum dc_transfer_func_predefined trans, } ret = true; - kfree(rgb_degamma); + kvfree(rgb_degamma); } points->end_exponent = 0; points->x_point_at_y1_red = 1; From da291320baec914f0bb4e65a9dccb86bd6c728f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Wed, 25 Apr 2018 17:32:10 +0200 Subject: [PATCH 186/347] drm/ttm: Use GFP_TRANSHUGE_LIGHT for allocating huge pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GFP_TRANSHUGE tries very hard to allocate huge pages, which can result in long delays with high memory pressure. I have observed firefox freezing for up to around a minute due to this while restic was taking a full system backup. Since we don't really need huge pages, use GFP_TRANSHUGE_LIGHT | __GFP_NORETRY instead, in order to fail quickly when there are no huge pages available. Set __GFP_KSWAPD_RECLAIM as well, in order for huge pages to be freed up in the background if necessary. With these changes, I'm no longer seeing freezes during a restic backup. Cc: stable@vger.kernel.org Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/ttm/ttm_page_alloc.c | 11 ++++++++--- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 3 ++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index f0481b7b60c5..06c94e3a5f15 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -910,7 +910,8 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags, while (npages >= HPAGE_PMD_NR) { gfp_t huge_flags = gfp_flags; - huge_flags |= GFP_TRANSHUGE; + huge_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + __GFP_KSWAPD_RECLAIM; huge_flags &= ~__GFP_MOVABLE; huge_flags &= ~__GFP_COMP; p = alloc_pages(huge_flags, HPAGE_PMD_ORDER); @@ -1027,11 +1028,15 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) GFP_USER | GFP_DMA32, "uc dma", 0); ttm_page_pool_init_locked(&_manager->wc_pool_huge, - GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP), + (GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + __GFP_KSWAPD_RECLAIM) & + ~(__GFP_MOVABLE | __GFP_COMP), "wc huge", order); ttm_page_pool_init_locked(&_manager->uc_pool_huge, - GFP_TRANSHUGE & ~(__GFP_MOVABLE | __GFP_COMP) + (GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + __GFP_KSWAPD_RECLAIM) & + ~(__GFP_MOVABLE | __GFP_COMP) , "uc huge", order); _manager->options.max_size = max_pages; diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index 8a25d1974385..f63d99c302e4 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -910,7 +910,8 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge) gfp_flags |= __GFP_ZERO; if (huge) { - gfp_flags |= GFP_TRANSHUGE; + gfp_flags |= GFP_TRANSHUGE_LIGHT | __GFP_NORETRY | + __GFP_KSWAPD_RECLAIM; gfp_flags &= ~__GFP_MOVABLE; gfp_flags &= ~__GFP_COMP; } From e6a5b9f9aee145c2f2c24431d84edfbb0d49eea5 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Mon, 30 Apr 2018 10:04:42 -0400 Subject: [PATCH 187/347] drm/amdgpu: Switch to interruptable wait to recover from ring hang. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v2: Use dma_fence_wait instead of dma_fence_wait_timeout(...,MAX_SCHEDULE_TIMEOUT) Avoid printing error message for ERESTARTSYS Originally-by: David Panariti Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 09d35051fdd6..3fabf9f97022 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -419,9 +419,11 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) if (other) { signed long r; - r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + r = dma_fence_wait(other, true); if (r < 0) { - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + if (r != -ERESTARTSYS) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + return r; } } From 639f790223e62339b9cb7319ea3fae9e02c39bdb Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 7 May 2018 14:23:04 +0800 Subject: [PATCH 188/347] drm/amd/pp: Refine the output of pp_power_profile_mode on VI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to keep consist with Vega, the output format of the pp_power_profile_mode would be < “*” for current profile>:"detail settings" and remove the "CURRENT" mode line. for example: NUM MODE_NAME SCLK_UP_HYST SCLK_DOWN_HYST SCLK_ACTIVE_LEVEL MCLK_UP_HYST MCLK_DOWN_HYST MCLK_ACTIVE_LEVEL 0 3D_FULL_SCREEN: 0 100 30 0 100 10 1 POWER_SAVING: 10 0 30 - - - 2 VIDEO: - - - 10 16 31 3 VR: 0 11 50 0 100 10 4 COMPUTE: 0 5 30 - - - 5 CUSTOM *: 0 5 30 0 100 10 NUM MODE_NAME SCLK_UP_HYST SCLK_DOWN_HYST SCLK_ACTIVE_LEVEL MCLK_UP_HYST MCLK_DOWN_HYST MCLK_ACTIVE_LEVEL 0 3D_FULL_SCREEN: 0 100 30 0 100 10 1 POWER_SAVING *: 10 0 30 0 100 10 2 VIDEO: - - - 10 16 31 3 VR: 0 11 50 0 100 10 4 COMPUTE: 0 5 30 - - - 5 CUSTOM: - - - - - - Reviewed-by: Evan Quan Acked-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 52 ++++++++----------- .../gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h | 1 - 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 26fbeafc3c96..18b5b2ff47fe 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -79,12 +79,13 @@ #define PCIE_BUS_CLK 10000 #define TCLK (PCIE_BUS_CLK / 10) -static const struct profile_mode_setting smu7_profiling[5] = +static const struct profile_mode_setting smu7_profiling[6] = {{1, 0, 100, 30, 1, 0, 100, 10}, {1, 10, 0, 30, 0, 0, 0, 0}, {0, 0, 0, 0, 1, 10, 16, 31}, {1, 0, 11, 50, 1, 0, 100, 10}, {1, 0, 5, 30, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, }; /** Values for the CG_THERMAL_CTRL::DPM_EVENT_SRC field. */ @@ -4864,6 +4865,17 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) len = sizeof(smu7_profiling) / sizeof(struct profile_mode_setting); for (i = 0; i < len; i++) { + if (i == hwmgr->power_profile_mode) { + size += sprintf(buf + size, "%3d %14s %s: %8d %16d %16d %16d %16d %16d\n", + i, profile_name[i], "*", + data->current_profile_setting.sclk_up_hyst, + data->current_profile_setting.sclk_down_hyst, + data->current_profile_setting.sclk_activity, + data->current_profile_setting.mclk_up_hyst, + data->current_profile_setting.mclk_down_hyst, + data->current_profile_setting.mclk_activity); + continue; + } if (smu7_profiling[i].bupdate_sclk) size += sprintf(buf + size, "%3d %16s: %8d %16d %16d ", i, profile_name[i], smu7_profiling[i].sclk_up_hyst, @@ -4883,24 +4895,6 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) "-", "-", "-"); } - size += sprintf(buf + size, "%3d %16s: %8d %16d %16d %16d %16d %16d\n", - i, profile_name[i], - data->custom_profile_setting.sclk_up_hyst, - data->custom_profile_setting.sclk_down_hyst, - data->custom_profile_setting.sclk_activity, - data->custom_profile_setting.mclk_up_hyst, - data->custom_profile_setting.mclk_down_hyst, - data->custom_profile_setting.mclk_activity); - - size += sprintf(buf + size, "%3s %16s: %8d %16d %16d %16d %16d %16d\n", - "*", "CURRENT", - data->current_profile_setting.sclk_up_hyst, - data->current_profile_setting.sclk_down_hyst, - data->current_profile_setting.sclk_activity, - data->current_profile_setting.mclk_up_hyst, - data->current_profile_setting.mclk_down_hyst, - data->current_profile_setting.mclk_activity); - return size; } @@ -4939,16 +4933,16 @@ static int smu7_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint if (size < 8) return -EINVAL; - data->custom_profile_setting.bupdate_sclk = input[0]; - data->custom_profile_setting.sclk_up_hyst = input[1]; - data->custom_profile_setting.sclk_down_hyst = input[2]; - data->custom_profile_setting.sclk_activity = input[3]; - data->custom_profile_setting.bupdate_mclk = input[4]; - data->custom_profile_setting.mclk_up_hyst = input[5]; - data->custom_profile_setting.mclk_down_hyst = input[6]; - data->custom_profile_setting.mclk_activity = input[7]; - if (!smum_update_dpm_settings(hwmgr, &data->custom_profile_setting)) { - memcpy(&data->current_profile_setting, &data->custom_profile_setting, sizeof(struct profile_mode_setting)); + tmp.bupdate_sclk = input[0]; + tmp.sclk_up_hyst = input[1]; + tmp.sclk_down_hyst = input[2]; + tmp.sclk_activity = input[3]; + tmp.bupdate_mclk = input[4]; + tmp.mclk_up_hyst = input[5]; + tmp.mclk_down_hyst = input[6]; + tmp.mclk_activity = input[7]; + if (!smum_update_dpm_settings(hwmgr, &tmp)) { + memcpy(&data->current_profile_setting, &tmp, sizeof(struct profile_mode_setting)); hwmgr->power_profile_mode = mode; } break; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h index f40179c9ca97..b8d0bb378595 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h @@ -325,7 +325,6 @@ struct smu7_hwmgr { uint16_t mem_latency_high; uint16_t mem_latency_low; uint32_t vr_config; - struct profile_mode_setting custom_profile_setting; struct profile_mode_setting current_profile_setting; }; From dec60f3a9b7251f2657d743d96ba9a83dca02351 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Sat, 5 May 2018 21:54:05 +0200 Subject: [PATCH 189/347] agp: uninorth: make two functions static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both ‘uninorth_remove_memory’ and ‘null_cache_flush’ can be made static. So make them. Silence the following gcc warning (W=1): drivers/char/agp/uninorth-agp.c:198:5: warning: no previous prototype for ‘uninorth_remove_memory’ [-Wmissing-prototypes] and drivers/char/agp/uninorth-agp.c:473:6: warning: no previous prototype for ‘null_cache_flush’ [-Wmissing-prototypes] Signed-off-by: Mathieu Malaterre Signed-off-by: Dave Airlie --- drivers/char/agp/uninorth-agp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c index c381c8e396fc..79d8c84693a1 100644 --- a/drivers/char/agp/uninorth-agp.c +++ b/drivers/char/agp/uninorth-agp.c @@ -195,7 +195,7 @@ static int uninorth_insert_memory(struct agp_memory *mem, off_t pg_start, int ty return 0; } -int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type) +static int uninorth_remove_memory(struct agp_memory *mem, off_t pg_start, int type) { size_t i; u32 *gp; @@ -470,7 +470,7 @@ static int uninorth_free_gatt_table(struct agp_bridge_data *bridge) return 0; } -void null_cache_flush(void) +static void null_cache_flush(void) { mb(); } From 0d5a03c3d9254813ca76d7886ff9ed76a0aea545 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:47 +1000 Subject: [PATCH 190/347] drm/nouveau/ttm: don't dereference nvbo::cli, it can outlive client Potentially responsible for some random OOPSes. Signed-off-by: Ben Skeggs Cc: stable@vger.kernel.org [v4.15+] --- drivers/gpu/drm/nouveau/nouveau_bo.c | 1 - drivers/gpu/drm/nouveau/nouveau_bo.h | 2 -- drivers/gpu/drm/nouveau/nouveau_ttm.c | 6 +++--- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 6f402c4f2bdd..ab61c038f42c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -214,7 +214,6 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, INIT_LIST_HEAD(&nvbo->entry); INIT_LIST_HEAD(&nvbo->vma_list); nvbo->bo.bdev = &drm->ttm.bdev; - nvbo->cli = cli; /* This is confusing, and doesn't actually mean we want an uncached * mapping, but is what NOUVEAU_GEM_DOMAIN_COHERENT gets translated diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index be8e00b49cde..73c48440d4d7 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -26,8 +26,6 @@ struct nouveau_bo { struct list_head vma_list; - struct nouveau_cli *cli; - unsigned contig:1; unsigned page:5; unsigned kind:8; diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index dff51a0ee028..8c093ca4222e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -63,7 +63,7 @@ nouveau_vram_manager_new(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) { struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_mem *mem; int ret; @@ -103,7 +103,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) { struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_mem *mem; int ret; @@ -131,7 +131,7 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg) { struct nouveau_bo *nvbo = nouveau_bo(bo); - struct nouveau_drm *drm = nvbo->cli->drm; + struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_mem *mem; int ret; From 352672db857290ab5b0e2b6a99c414f92bee024c Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Wed, 2 May 2018 19:38:48 -0400 Subject: [PATCH 191/347] drm/nouveau: Fix deadlock in nv50_mstm_register_connector() Currently; we're grabbing all of the modesetting locks before adding MST connectors to fbdev. This isn't actually necessary, and causes a deadlock as well: ====================================================== WARNING: possible circular locking dependency detected 4.17.0-rc3Lyude-Test+ #1 Tainted: G O ------------------------------------------------------ kworker/1:0/18 is trying to acquire lock: 00000000c832f62d (&helper->lock){+.+.}, at: drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] but task is already holding lock: 00000000942e28e2 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_backoff+0x8e/0x1c0 [drm] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (crtc_ww_class_mutex){+.+.}: ww_mutex_lock+0x43/0x80 drm_modeset_lock+0x71/0x130 [drm] drm_helper_probe_single_connector_modes+0x7d/0x6b0 [drm_kms_helper] drm_setup_crtcs+0x15e/0xc90 [drm_kms_helper] __drm_fb_helper_initial_config_and_unlock+0x29/0x480 [drm_kms_helper] nouveau_fbcon_init+0x138/0x1a0 [nouveau] nouveau_drm_load+0x173/0x7e0 [nouveau] drm_dev_register+0x134/0x1c0 [drm] drm_get_pci_dev+0x8e/0x160 [drm] nouveau_drm_probe+0x1a9/0x230 [nouveau] pci_device_probe+0xcd/0x150 driver_probe_device+0x30b/0x480 __driver_attach+0xbc/0xe0 bus_for_each_dev+0x67/0x90 bus_add_driver+0x164/0x260 driver_register+0x57/0xc0 do_one_initcall+0x4d/0x323 do_init_module+0x5b/0x1f8 load_module+0x20e5/0x2ac0 __do_sys_finit_module+0xb7/0xd0 do_syscall_64+0x60/0x1b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #2 (crtc_ww_class_acquire){+.+.}: drm_helper_probe_single_connector_modes+0x58/0x6b0 [drm_kms_helper] drm_setup_crtcs+0x15e/0xc90 [drm_kms_helper] __drm_fb_helper_initial_config_and_unlock+0x29/0x480 [drm_kms_helper] nouveau_fbcon_init+0x138/0x1a0 [nouveau] nouveau_drm_load+0x173/0x7e0 [nouveau] drm_dev_register+0x134/0x1c0 [drm] drm_get_pci_dev+0x8e/0x160 [drm] nouveau_drm_probe+0x1a9/0x230 [nouveau] pci_device_probe+0xcd/0x150 driver_probe_device+0x30b/0x480 __driver_attach+0xbc/0xe0 bus_for_each_dev+0x67/0x90 bus_add_driver+0x164/0x260 driver_register+0x57/0xc0 do_one_initcall+0x4d/0x323 do_init_module+0x5b/0x1f8 load_module+0x20e5/0x2ac0 __do_sys_finit_module+0xb7/0xd0 do_syscall_64+0x60/0x1b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #1 (&dev->mode_config.mutex){+.+.}: drm_setup_crtcs+0x10c/0xc90 [drm_kms_helper] __drm_fb_helper_initial_config_and_unlock+0x29/0x480 [drm_kms_helper] nouveau_fbcon_init+0x138/0x1a0 [nouveau] nouveau_drm_load+0x173/0x7e0 [nouveau] drm_dev_register+0x134/0x1c0 [drm] drm_get_pci_dev+0x8e/0x160 [drm] nouveau_drm_probe+0x1a9/0x230 [nouveau] pci_device_probe+0xcd/0x150 driver_probe_device+0x30b/0x480 __driver_attach+0xbc/0xe0 bus_for_each_dev+0x67/0x90 bus_add_driver+0x164/0x260 driver_register+0x57/0xc0 do_one_initcall+0x4d/0x323 do_init_module+0x5b/0x1f8 load_module+0x20e5/0x2ac0 __do_sys_finit_module+0xb7/0xd0 do_syscall_64+0x60/0x1b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (&helper->lock){+.+.}: __mutex_lock+0x70/0x9d0 drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] nv50_mstm_register_connector+0x2c/0x50 [nouveau] drm_dp_add_port+0x2f5/0x420 [drm_kms_helper] drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] drm_dp_add_port+0x33f/0x420 [drm_kms_helper] drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] drm_dp_check_and_send_link_address+0x87/0xd0 [drm_kms_helper] drm_dp_mst_link_probe_work+0x4d/0x80 [drm_kms_helper] process_one_work+0x20d/0x650 worker_thread+0x3a/0x390 kthread+0x11e/0x140 ret_from_fork+0x3a/0x50 other info that might help us debug this: Chain exists of: &helper->lock --> crtc_ww_class_acquire --> crtc_ww_class_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(crtc_ww_class_mutex); lock(crtc_ww_class_acquire); lock(crtc_ww_class_mutex); lock(&helper->lock); *** DEADLOCK *** 5 locks held by kworker/1:0/18: #0: 000000004a05cd50 ((wq_completion)"events_long"){+.+.}, at: process_one_work+0x187/0x650 #1: 00000000601c11d1 ((work_completion)(&mgr->work)){+.+.}, at: process_one_work+0x187/0x650 #2: 00000000586ca0df (&dev->mode_config.mutex){+.+.}, at: drm_modeset_lock_all+0x3a/0x1b0 [drm] #3: 00000000d3ca0ffa (crtc_ww_class_acquire){+.+.}, at: drm_modeset_lock_all+0x44/0x1b0 [drm] #4: 00000000942e28e2 (crtc_ww_class_mutex){+.+.}, at: drm_modeset_backoff+0x8e/0x1c0 [drm] stack backtrace: CPU: 1 PID: 18 Comm: kworker/1:0 Tainted: G O 4.17.0-rc3Lyude-Test+ #1 Hardware name: Gateway FX6840/FX6840, BIOS P01-A3 05/17/2010 Workqueue: events_long drm_dp_mst_link_probe_work [drm_kms_helper] Call Trace: dump_stack+0x85/0xcb print_circular_bug.isra.38+0x1ce/0x1db __lock_acquire+0x128f/0x1350 ? lock_acquire+0x9f/0x200 ? lock_acquire+0x9f/0x200 ? __ww_mutex_lock.constprop.13+0x8f/0x1000 lock_acquire+0x9f/0x200 ? drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] ? drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] __mutex_lock+0x70/0x9d0 ? drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] ? ww_mutex_lock+0x43/0x80 ? _cond_resched+0x15/0x30 ? ww_mutex_lock+0x43/0x80 ? drm_modeset_lock+0xb2/0x130 [drm] ? drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] drm_fb_helper_add_one_connector+0x2a/0x60 [drm_kms_helper] nv50_mstm_register_connector+0x2c/0x50 [nouveau] drm_dp_add_port+0x2f5/0x420 [drm_kms_helper] ? mark_held_locks+0x50/0x80 ? kfree+0xcf/0x2a0 ? drm_dp_check_mstb_guid+0xd6/0x120 [drm_kms_helper] ? trace_hardirqs_on_caller+0xed/0x180 ? drm_dp_check_mstb_guid+0xd6/0x120 [drm_kms_helper] drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] drm_dp_add_port+0x33f/0x420 [drm_kms_helper] ? nouveau_connector_aux_xfer+0x7c/0xb0 [nouveau] ? find_held_lock+0x2d/0x90 ? drm_dp_dpcd_access+0xd9/0xf0 [drm_kms_helper] ? __mutex_unlock_slowpath+0x3b/0x280 ? drm_dp_dpcd_access+0xd9/0xf0 [drm_kms_helper] drm_dp_send_link_address+0x155/0x1e0 [drm_kms_helper] drm_dp_check_and_send_link_address+0x87/0xd0 [drm_kms_helper] drm_dp_mst_link_probe_work+0x4d/0x80 [drm_kms_helper] process_one_work+0x20d/0x650 worker_thread+0x3a/0x390 ? process_one_work+0x650/0x650 kthread+0x11e/0x140 ? kthread_create_worker_on_cpu+0x50/0x50 ret_from_fork+0x3a/0x50 Taking example from i915, the only time we need to hold any modesetting locks is when changing the port on the mstc, and in that case we only need to hold the connection mutex. Signed-off-by: Lyude Paul Cc: Karol Herbst Cc: stable@vger.kernel.org Signed-off-by: Lyude Paul Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nv50_display.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 8bd739cfd00d..2b3ccd850750 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -3264,10 +3264,11 @@ nv50_mstm_destroy_connector(struct drm_dp_mst_topology_mgr *mgr, drm_connector_unregister(&mstc->connector); - drm_modeset_lock_all(drm->dev); drm_fb_helper_remove_one_connector(&drm->fbcon->helper, &mstc->connector); + + drm_modeset_lock(&drm->dev->mode_config.connection_mutex, NULL); mstc->port = NULL; - drm_modeset_unlock_all(drm->dev); + drm_modeset_unlock(&drm->dev->mode_config.connection_mutex); drm_connector_unreference(&mstc->connector); } @@ -3277,9 +3278,7 @@ nv50_mstm_register_connector(struct drm_connector *connector) { struct nouveau_drm *drm = nouveau_drm(connector->dev); - drm_modeset_lock_all(drm->dev); drm_fb_helper_add_one_connector(&drm->fbcon->helper, connector); - drm_modeset_unlock_all(drm->dev); drm_connector_register(connector); } From 3057fcef385348fe85173f1b0c824d89f1176f72 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 4 May 2018 21:24:31 +0200 Subject: [PATCH 192/347] mtd: rawnand: Make sure we wait tWB before polling the STATUS reg NAND chips require a bit of time to take the NAND operation into account and set the BUSY bit in the STATUS reg. Make sure we don't poll the STATUS reg too early in nand_soft_waitrdy(). Fixes: 8878b126df76 ("mtd: nand: add ->exec_op() implementation") Cc: Signed-off-by: Boris Brezillon Acked-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_base.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 72f3a89da513..f28c3a555861 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -706,12 +706,17 @@ static void nand_wait_status_ready(struct mtd_info *mtd, unsigned long timeo) */ int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms) { + const struct nand_sdr_timings *timings; u8 status = 0; int ret; if (!chip->exec_op) return -ENOTSUPP; + /* Wait tWB before polling the STATUS reg. */ + timings = nand_get_sdr_timings(&chip->data_interface); + ndelay(PSEC_TO_NSEC(timings->tWB_max)); + ret = nand_status_op(chip, NULL); if (ret) return ret; From 3a15b38fd2efc1d648cb33186bf71e9138c93491 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 3 May 2018 16:10:09 +0200 Subject: [PATCH 193/347] ceph: fix rsize/wsize capping in ceph_direct_read_write() rsize/wsize cap should be applied before ceph_osdc_new_request() is called. Otherwise, if the size is limited by the cap instead of the stripe unit, ceph_osdc_new_request() would setup an extent op that is bigger than what dio_get_pages_alloc() would pin and add to the page vector, triggering asserts in the messenger. Cc: stable@vger.kernel.org Fixes: 95cca2b44e54 ("ceph: limit osd write size") Signed-off-by: Ilya Dryomov Reviewed-by: "Yan, Zheng" --- fs/ceph/file.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f85040d73e3d..8ce7849f3fbd 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -918,6 +918,11 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, size_t start = 0; ssize_t len; + if (write) + size = min_t(u64, size, fsc->mount_options->wsize); + else + size = min_t(u64, size, fsc->mount_options->rsize); + vino = ceph_vino(inode); req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, pos, &size, 0, @@ -933,11 +938,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, break; } - if (write) - size = min_t(u64, size, fsc->mount_options->wsize); - else - size = min_t(u64, size, fsc->mount_options->rsize); - len = size; pages = dio_get_pages_alloc(iter, len, &start, &num_pages); if (IS_ERR(pages)) { From 0010f7052d6cb71c4b120238e28cd3fa413913d1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 4 May 2018 16:57:30 +0200 Subject: [PATCH 194/347] libceph: add osd_req_op_extent_osd_data_bvecs() ... and store num_bvecs for client code's convenience. Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Reviewed-by: "Yan, Zheng" --- drivers/block/rbd.c | 4 +++- include/linux/ceph/osd_client.h | 12 ++++++++++-- net/ceph/osd_client.c | 27 +++++++++++++++++++++++---- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 8e8b04cc569a..33b36fea1d73 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -2366,7 +2366,9 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes) osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd", "copyup"); osd_req_op_cls_request_data_bvecs(obj_req->osd_req, 0, - obj_req->copyup_bvecs, bytes); + obj_req->copyup_bvecs, + obj_req->copyup_bvec_count, + bytes); switch (obj_req->img_request->op_type) { case OBJ_OP_WRITE: diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 528ccc943cee..96bb32285989 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -77,7 +77,10 @@ struct ceph_osd_data { u32 bio_length; }; #endif /* CONFIG_BLOCK */ - struct ceph_bvec_iter bvec_pos; + struct { + struct ceph_bvec_iter bvec_pos; + u32 num_bvecs; + }; }; }; @@ -412,6 +415,10 @@ void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, struct ceph_bio_iter *bio_pos, u32 bio_length); #endif /* CONFIG_BLOCK */ +void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, + unsigned int which, + struct bio_vec *bvecs, u32 num_bvecs, + u32 bytes); void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bvec_iter *bvec_pos); @@ -426,7 +433,8 @@ extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *, bool own_pages); void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, unsigned int which, - struct bio_vec *bvecs, u32 bytes); + struct bio_vec *bvecs, u32 num_bvecs, + u32 bytes); extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *, unsigned int which, struct page **pages, u64 length, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ea2a6c9fb7ce..d2667e5dddc3 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -157,10 +157,12 @@ static void ceph_osd_data_bio_init(struct ceph_osd_data *osd_data, #endif /* CONFIG_BLOCK */ static void ceph_osd_data_bvecs_init(struct ceph_osd_data *osd_data, - struct ceph_bvec_iter *bvec_pos) + struct ceph_bvec_iter *bvec_pos, + u32 num_bvecs) { osd_data->type = CEPH_OSD_DATA_TYPE_BVECS; osd_data->bvec_pos = *bvec_pos; + osd_data->num_bvecs = num_bvecs; } #define osd_req_op_data(oreq, whch, typ, fld) \ @@ -237,6 +239,22 @@ void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req, EXPORT_SYMBOL(osd_req_op_extent_osd_data_bio); #endif /* CONFIG_BLOCK */ +void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req, + unsigned int which, + struct bio_vec *bvecs, u32 num_bvecs, + u32 bytes) +{ + struct ceph_osd_data *osd_data; + struct ceph_bvec_iter it = { + .bvecs = bvecs, + .iter = { .bi_size = bytes }, + }; + + osd_data = osd_req_op_data(osd_req, which, extent, osd_data); + ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs); +} +EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvecs); + void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, unsigned int which, struct ceph_bvec_iter *bvec_pos) @@ -244,7 +262,7 @@ void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req, struct ceph_osd_data *osd_data; osd_data = osd_req_op_data(osd_req, which, extent, osd_data); - ceph_osd_data_bvecs_init(osd_data, bvec_pos); + ceph_osd_data_bvecs_init(osd_data, bvec_pos, 0); } EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos); @@ -287,7 +305,8 @@ EXPORT_SYMBOL(osd_req_op_cls_request_data_pages); void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, unsigned int which, - struct bio_vec *bvecs, u32 bytes) + struct bio_vec *bvecs, u32 num_bvecs, + u32 bytes) { struct ceph_osd_data *osd_data; struct ceph_bvec_iter it = { @@ -296,7 +315,7 @@ void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req, }; osd_data = osd_req_op_data(osd_req, which, cls, request_data); - ceph_osd_data_bvecs_init(osd_data, &it); + ceph_osd_data_bvecs_init(osd_data, &it, num_bvecs); osd_req->r_ops[which].cls.indata_len += bytes; osd_req->r_ops[which].indata_len += bytes; } From fc218544fbc800d1c91348ec834cacfb257348f7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 4 May 2018 16:57:31 +0200 Subject: [PATCH 195/347] ceph: fix iov_iter issues in ceph_direct_read_write() dio_get_pagev_size() and dio_get_pages_alloc() introduced in commit b5b98989dc7e ("ceph: combine as many iovec as possile into one OSD request") assume that the passed iov_iter is ITER_IOVEC. This isn't the case with splice where it ends up poking into the guts of ITER_BVEC or ITER_PIPE iterators, causing lockups and crashes easily reproduced with generic/095. Rather than trying to figure out gap alignment and stuff pages into a page vector, add a helper for going from iov_iter to a bio_vec array and make use of the new CEPH_OSD_DATA_TYPE_BVECS code. Fixes: b5b98989dc7e ("ceph: combine as many iovec as possile into one OSD request") Link: http://tracker.ceph.com/issues/18130 Signed-off-by: Ilya Dryomov Reviewed-by: Jeff Layton Reviewed-by: "Yan, Zheng" Tested-by: Luis Henriques --- fs/ceph/file.c | 195 +++++++++++++++++++++++++++++-------------------- 1 file changed, 117 insertions(+), 78 deletions(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8ce7849f3fbd..cf0e45b10121 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -70,69 +70,104 @@ static __le32 ceph_flags_sys2wire(u32 flags) */ /* - * Calculate the length sum of direct io vectors that can - * be combined into one page vector. + * How many pages to get in one call to iov_iter_get_pages(). This + * determines the size of the on-stack array used as a buffer. */ -static size_t dio_get_pagev_size(const struct iov_iter *it) -{ - const struct iovec *iov = it->iov; - const struct iovec *iovend = iov + it->nr_segs; - size_t size; +#define ITER_GET_BVECS_PAGES 64 - size = iov->iov_len - it->iov_offset; - /* - * An iov can be page vectored when both the current tail - * and the next base are page aligned. - */ - while (PAGE_ALIGNED((iov->iov_base + iov->iov_len)) && - (++iov < iovend && PAGE_ALIGNED((iov->iov_base)))) { - size += iov->iov_len; - } - dout("dio_get_pagevlen len = %zu\n", size); - return size; +static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize, + struct bio_vec *bvecs) +{ + size_t size = 0; + int bvec_idx = 0; + + if (maxsize > iov_iter_count(iter)) + maxsize = iov_iter_count(iter); + + while (size < maxsize) { + struct page *pages[ITER_GET_BVECS_PAGES]; + ssize_t bytes; + size_t start; + int idx = 0; + + bytes = iov_iter_get_pages(iter, pages, maxsize - size, + ITER_GET_BVECS_PAGES, &start); + if (bytes < 0) + return size ?: bytes; + + iov_iter_advance(iter, bytes); + size += bytes; + + for ( ; bytes; idx++, bvec_idx++) { + struct bio_vec bv = { + .bv_page = pages[idx], + .bv_len = min_t(int, bytes, PAGE_SIZE - start), + .bv_offset = start, + }; + + bvecs[bvec_idx] = bv; + bytes -= bv.bv_len; + start = 0; + } + } + + return size; } /* - * Allocate a page vector based on (@it, @nbytes). - * The return value is the tuple describing a page vector, - * that is (@pages, @page_align, @num_pages). + * iov_iter_get_pages() only considers one iov_iter segment, no matter + * what maxsize or maxpages are given. For ITER_BVEC that is a single + * page. + * + * Attempt to get up to @maxsize bytes worth of pages from @iter. + * Return the number of bytes in the created bio_vec array, or an error. */ -static struct page ** -dio_get_pages_alloc(const struct iov_iter *it, size_t nbytes, - size_t *page_align, int *num_pages) +static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize, + struct bio_vec **bvecs, int *num_bvecs) { - struct iov_iter tmp_it = *it; - size_t align; - struct page **pages; - int ret = 0, idx, npages; + struct bio_vec *bv; + size_t orig_count = iov_iter_count(iter); + ssize_t bytes; + int npages; - align = (unsigned long)(it->iov->iov_base + it->iov_offset) & - (PAGE_SIZE - 1); - npages = calc_pages_for(align, nbytes); - pages = kvmalloc(sizeof(*pages) * npages, GFP_KERNEL); - if (!pages) - return ERR_PTR(-ENOMEM); + iov_iter_truncate(iter, maxsize); + npages = iov_iter_npages(iter, INT_MAX); + iov_iter_reexpand(iter, orig_count); - for (idx = 0; idx < npages; ) { - size_t start; - ret = iov_iter_get_pages(&tmp_it, pages + idx, nbytes, - npages - idx, &start); - if (ret < 0) - goto fail; + /* + * __iter_get_bvecs() may populate only part of the array -- zero it + * out. + */ + bv = kvmalloc_array(npages, sizeof(*bv), GFP_KERNEL | __GFP_ZERO); + if (!bv) + return -ENOMEM; - iov_iter_advance(&tmp_it, ret); - nbytes -= ret; - idx += (ret + start + PAGE_SIZE - 1) / PAGE_SIZE; + bytes = __iter_get_bvecs(iter, maxsize, bv); + if (bytes < 0) { + /* + * No pages were pinned -- just free the array. + */ + kvfree(bv); + return bytes; } - BUG_ON(nbytes != 0); - *num_pages = npages; - *page_align = align; - dout("dio_get_pages_alloc: got %d pages align %zu\n", npages, align); - return pages; -fail: - ceph_put_page_vector(pages, idx, false); - return ERR_PTR(ret); + *bvecs = bv; + *num_bvecs = npages; + return bytes; +} + +static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty) +{ + int i; + + for (i = 0; i < num_bvecs; i++) { + if (bvecs[i].bv_page) { + if (should_dirty) + set_page_dirty_lock(bvecs[i].bv_page); + put_page(bvecs[i].bv_page); + } + } + kvfree(bvecs); } /* @@ -746,11 +781,12 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) struct inode *inode = req->r_inode; struct ceph_aio_request *aio_req = req->r_priv; struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0); - int num_pages = calc_pages_for((u64)osd_data->alignment, - osd_data->length); - dout("ceph_aio_complete_req %p rc %d bytes %llu\n", - inode, rc, osd_data->length); + BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS); + BUG_ON(!osd_data->num_bvecs); + + dout("ceph_aio_complete_req %p rc %d bytes %u\n", + inode, rc, osd_data->bvec_pos.iter.bi_size); if (rc == -EOLDSNAPC) { struct ceph_aio_work *aio_work; @@ -768,9 +804,10 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) } else if (!aio_req->write) { if (rc == -ENOENT) rc = 0; - if (rc >= 0 && osd_data->length > rc) { - int zoff = osd_data->alignment + rc; - int zlen = osd_data->length - rc; + if (rc >= 0 && osd_data->bvec_pos.iter.bi_size > rc) { + struct iov_iter i; + int zlen = osd_data->bvec_pos.iter.bi_size - rc; + /* * If read is satisfied by single OSD request, * it can pass EOF. Otherwise read is within @@ -785,13 +822,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) aio_req->total_len = rc + zlen; } - if (zlen > 0) - ceph_zero_page_vector_range(zoff, zlen, - osd_data->pages); + iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs, + osd_data->num_bvecs, + osd_data->bvec_pos.iter.bi_size); + iov_iter_advance(&i, rc); + iov_iter_zero(zlen, &i); } } - ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty); + put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs, + aio_req->should_dirty); ceph_osdc_put_request(req); if (rc < 0) @@ -879,7 +919,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_vino vino; struct ceph_osd_request *req; - struct page **pages; + struct bio_vec *bvecs; struct ceph_aio_request *aio_req = NULL; int num_pages = 0; int flags; @@ -914,8 +954,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, } while (iov_iter_count(iter) > 0) { - u64 size = dio_get_pagev_size(iter); - size_t start = 0; + u64 size = iov_iter_count(iter); ssize_t len; if (write) @@ -938,13 +977,14 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, break; } - len = size; - pages = dio_get_pages_alloc(iter, len, &start, &num_pages); - if (IS_ERR(pages)) { + len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages); + if (len < 0) { ceph_osdc_put_request(req); - ret = PTR_ERR(pages); + ret = len; break; } + if (len != size) + osd_req_op_extent_update(req, 0, len); /* * To simplify error handling, allow AIO when IO within i_size @@ -977,8 +1017,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, req->r_mtime = mtime; } - osd_req_op_extent_osd_data_pages(req, 0, pages, len, start, - false, false); + osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); if (aio_req) { aio_req->total_len += len; @@ -991,7 +1030,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, list_add_tail(&req->r_unsafe_item, &aio_req->osd_reqs); pos += len; - iov_iter_advance(iter, len); continue; } @@ -1004,25 +1042,26 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (ret == -ENOENT) ret = 0; if (ret >= 0 && ret < len && pos + ret < size) { + struct iov_iter i; int zlen = min_t(size_t, len - ret, size - pos - ret); - ceph_zero_page_vector_range(start + ret, zlen, - pages); + + iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages, + len); + iov_iter_advance(&i, ret); + iov_iter_zero(zlen, &i); ret += zlen; } if (ret >= 0) len = ret; } - ceph_put_page_vector(pages, num_pages, should_dirty); - + put_bvecs(bvecs, num_pages, should_dirty); ceph_osdc_put_request(req); if (ret < 0) break; pos += len; - iov_iter_advance(iter, len); - if (!write && pos >= size) break; From 06cb616b1bca7080824acfedb3d4c898e7a64836 Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sat, 28 Apr 2018 16:56:06 +0300 Subject: [PATCH 196/347] i2c: designware: fix poll-after-enable regression Not all revisions of DW I2C controller implement the enable status register. On platforms where that's the case (e.g. BG2CD and SPEAr ARM SoCs), waiting for enable will time out as reading the unimplemented register yields zero. It was observed that reading the IC_ENABLE_STATUS register once suffices to avoid getting it stuck on Bay Trail hardware, so replace polling with one dummy read of the register. Fixes: fba4adbbf670 ("i2c: designware: must wait for enable") Signed-off-by: Alexander Monakov Tested-by: Ben Gardner Acked-by: Jarkko Nikula Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-designware-master.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index fd36c39ddf4e..0cdba29ae0a9 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -209,7 +209,10 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) i2c_dw_disable_int(dev); /* Enable the adapter */ - __i2c_dw_enable_and_wait(dev, true); + __i2c_dw_enable(dev, true); + + /* Dummy read to avoid the register getting stuck on Bay Trail */ + dw_readl(dev, DW_IC_ENABLE_STATUS); /* Clear and enable interrupts */ dw_readl(dev, DW_IC_CLR_INTR); From 4a026da91caaa36004a53a844dd00959370ea8fc Mon Sep 17 00:00:00 2001 From: Sun Lianwen Date: Tue, 8 May 2018 09:49:38 +0800 Subject: [PATCH 197/347] net/9p: correct some comment errors in 9p file system code There are follow comment errors: 1 The function name is wrong in p9_release_pages() comment. 2 The function name and variable name is wrong in p9_poll_workfn() comment. 3 There is no variable dm_mr and lkey in struct p9_trans_rdma. 4 The function name is wrong in rdma_create_trans() comment. 5 There is no variable initialized in struct virtio_chan. 6 The variable name is wrong in p9_virtio_zc_request() comment. Signed-off-by: Sun Lianwen Reviewed-by: Randy Dunlap Reviewed-by: Randy Dunlap Signed-off-by: David S. Miller --- net/9p/trans_common.c | 2 +- net/9p/trans_fd.c | 4 ++-- net/9p/trans_rdma.c | 4 +--- net/9p/trans_virtio.c | 5 ++--- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index 38aa6345bdfa..b718db2085b2 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -16,7 +16,7 @@ #include /** - * p9_release_req_pages - Release pages after the transaction. + * p9_release_pages - Release pages after the transaction. */ void p9_release_pages(struct page **pages, int nr_pages) { diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 0cfba919d167..848969fe7979 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1092,8 +1092,8 @@ static struct p9_trans_module p9_fd_trans = { }; /** - * p9_poll_proc - poll worker thread - * @a: thread state and arguments + * p9_poll_workfn - poll worker thread + * @work: work queue * * polls all v9fs transports for new events and queues the appropriate * work to the work queue diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 6d8e3031978f..3d414acb7015 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -68,8 +68,6 @@ * @pd: Protection Domain pointer * @qp: Queue Pair pointer * @cq: Completion Queue pointer - * @dm_mr: DMA Memory Region pointer - * @lkey: The local access only memory region key * @timeout: Number of uSecs to wait for connection management events * @privport: Whether a privileged port may be used * @port: The port to use @@ -632,7 +630,7 @@ static int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) } /** - * trans_create_rdma - Transport method for creating atransport instance + * rdma_create_trans - Transport method for creating a transport instance * @client: client instance * @addr: IP address string * @args: Mount options string diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 3aa5a93ad107..4d0372263e5d 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -60,7 +60,6 @@ static atomic_t vp_pinned = ATOMIC_INIT(0); /** * struct virtio_chan - per-instance transport information - * @initialized: whether the channel is initialized * @inuse: whether the channel is in use * @lock: protects multiple elements within this structure * @client: client instance @@ -385,8 +384,8 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, * @uidata: user bffer that should be ued for zero copy read * @uodata: user buffer that shoud be user for zero copy write * @inlen: read buffer size - * @olen: write buffer size - * @hdrlen: reader header size, This is the size of response protocol data + * @outlen: write buffer size + * @in_hdr_len: reader header size, This is the size of response protocol data * */ static int From 94f6a80c0c11828cb7b3d79294459dd8d761ca89 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 8 May 2018 21:44:06 +0800 Subject: [PATCH 198/347] tipc: eliminate KMSAN uninit-value in strcmp complaint When we get link properties through netlink interface with tipc_nl_node_get_link(), we don't validate TIPC_NLA_LINK_NAME attribute at all, instead we directly use it. As a consequence, KMSAN detected the TIPC_NLA_LINK_NAME attribute was an uninitialized value, and then posted the following complaint: ================================================================== BUG: KMSAN: uninit-value in strcmp+0xf7/0x160 lib/string.c:329 CPU: 1 PID: 4527 Comm: syz-executor655 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 strcmp+0xf7/0x160 lib/string.c:329 tipc_nl_node_get_link+0x220/0x6f0 net/tipc/node.c:1881 genl_family_rcv_msg net/netlink/genetlink.c:599 [inline] genl_rcv_msg+0x1686/0x1810 net/netlink/genetlink.c:624 netlink_rcv_skb+0x378/0x600 net/netlink/af_netlink.c:2447 genl_rcv+0x63/0x80 net/netlink/genetlink.c:635 netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline] netlink_unicast+0x166b/0x1740 net/netlink/af_netlink.c:1337 netlink_sendmsg+0x1048/0x1310 net/netlink/af_netlink.c:1900 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 RIP: 0033:0x445589 RSP: 002b:00007fb7ee66cdb8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00000000006dac24 RCX: 0000000000445589 RDX: 0000000000000000 RSI: 0000000020023000 RDI: 0000000000000003 RBP: 00000000006dac20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fffa2bf3f3f R14: 00007fb7ee66d9c0 R15: 0000000000000001 Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:278 [inline] kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:188 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:314 kmsan_slab_alloc+0x11/0x20 mm/kmsan/kmsan.c:321 slab_post_alloc_hook mm/slab.h:445 [inline] slab_alloc_node mm/slub.c:2737 [inline] __kmalloc_node_track_caller+0xaed/0x11c0 mm/slub.c:4369 __kmalloc_reserve net/core/skbuff.c:138 [inline] __alloc_skb+0x2cf/0x9f0 net/core/skbuff.c:206 alloc_skb include/linux/skbuff.h:984 [inline] netlink_alloc_large_skb net/netlink/af_netlink.c:1183 [inline] netlink_sendmsg+0x9a6/0x1310 net/netlink/af_netlink.c:1875 sock_sendmsg_nosec net/socket.c:630 [inline] sock_sendmsg net/socket.c:640 [inline] ___sys_sendmsg+0xec0/0x1310 net/socket.c:2046 __sys_sendmsg net/socket.c:2080 [inline] SYSC_sendmsg+0x2a3/0x3d0 net/socket.c:2091 SyS_sendmsg+0x54/0x80 net/socket.c:2087 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 ================================================================== To quiet the complaint, TIPC_NLA_LINK_NAME attribute has been validated in tipc_nl_node_get_link() before it's used. Reported-by: syzbot+df0257c92ffd4fcc58cd@syzkaller.appspotmail.com Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/node.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index baaf93f12cbd..f29549de9245 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1950,6 +1950,7 @@ out: int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) { struct net *net = genl_info_net(info); + struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1]; struct tipc_nl_msg msg; char *name; int err; @@ -1957,9 +1958,19 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) msg.portid = info->snd_portid; msg.seq = info->snd_seq; - if (!info->attrs[TIPC_NLA_LINK_NAME]) + if (!info->attrs[TIPC_NLA_LINK]) return -EINVAL; - name = nla_data(info->attrs[TIPC_NLA_LINK_NAME]); + + err = nla_parse_nested(attrs, TIPC_NLA_LINK_MAX, + info->attrs[TIPC_NLA_LINK], + tipc_nl_link_policy, info->extack); + if (err) + return err; + + if (!attrs[TIPC_NLA_LINK_NAME]) + return -EINVAL; + + name = nla_data(attrs[TIPC_NLA_LINK_NAME]); msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!msg.skb) From 7fc6311b174091e3283c28381e58bed3d12b6591 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 10 May 2018 19:51:09 +0800 Subject: [PATCH 199/347] drm/amd/pp: Fix performance drop on Fiji The performance drop if the default TDP more than 256 Watt Reviewed-by: Alex Deucher Reviewed-by: Junwei Zhang Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c index 03bc7453f3b1..d9e92e306535 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c @@ -852,12 +852,10 @@ int smu7_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); - n = (n & 0xff) << 8; - if (data->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) return smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_PkgPwrSetLimit, n); + PPSMC_MSG_PkgPwrSetLimit, n<<8); return 0; } From cfcadfaad7251d8b640713724b388164d75465b2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 May 2018 00:18:32 +0200 Subject: [PATCH 200/347] PCI / PM: Check device_may_wakeup() in pci_enable_wake() Commit 0847684cfc5f0 (PCI / PM: Simplify device wakeup settings code) went too far and dropped the device_may_wakeup() check from pci_enable_wake() which causes wakeup to be enabled during system suspend, hibernation or shutdown for some PCI devices that are not allowed by user space to wake up the system from sleep (or power off). As a result of this, excessive power is drawn by some of the affected systems while in sleep states or off. Restore the device_may_wakeup() check in pci_enable_wake(), but make sure that the PCI bus type's runtime suspend callback will not call device_may_wakeup() which is about system wakeup from sleep and not about device wakeup from runtime suspend. Fixes: 0847684cfc5f0 (PCI / PM: Simplify device wakeup settings code) Reported-by: Joseph Salisbury Cc: 4.13+ # 4.13+ Signed-off-by: Rafael J. Wysocki Acked-by: Bjorn Helgaas --- drivers/pci/pci.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c2616cad3a1d..dbfe7c4f3776 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1910,7 +1910,7 @@ void pci_pme_active(struct pci_dev *dev, bool enable) EXPORT_SYMBOL(pci_pme_active); /** - * pci_enable_wake - enable PCI device as wakeup event source + * __pci_enable_wake - enable PCI device as wakeup event source * @dev: PCI device affected * @state: PCI state from which device will issue wakeup events * @enable: True to enable event generation; false to disable @@ -1928,7 +1928,7 @@ EXPORT_SYMBOL(pci_pme_active); * Error code depending on the platform is returned if both the platform and * the native mechanism fail to enable the generation of wake-up events */ -int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) +static int __pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) { int ret = 0; @@ -1969,6 +1969,23 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable) return ret; } + +/** + * pci_enable_wake - change wakeup settings for a PCI device + * @pci_dev: Target device + * @state: PCI state from which device will issue wakeup events + * @enable: Whether or not to enable event generation + * + * If @enable is set, check device_may_wakeup() for the device before calling + * __pci_enable_wake() for it. + */ +int pci_enable_wake(struct pci_dev *pci_dev, pci_power_t state, bool enable) +{ + if (enable && !device_may_wakeup(&pci_dev->dev)) + return -EINVAL; + + return __pci_enable_wake(pci_dev, state, enable); +} EXPORT_SYMBOL(pci_enable_wake); /** @@ -1981,9 +1998,9 @@ EXPORT_SYMBOL(pci_enable_wake); * should not be called twice in a row to enable wake-up due to PCI PM vs ACPI * ordering constraints. * - * This function only returns error code if the device is not capable of - * generating PME# from both D3_hot and D3_cold, and the platform is unable to - * enable wake-up power for it. + * This function only returns error code if the device is not allowed to wake + * up the system from sleep or it is not capable of generating PME# from both + * D3_hot and D3_cold and the platform is unable to enable wake-up power for it. */ int pci_wake_from_d3(struct pci_dev *dev, bool enable) { @@ -2114,7 +2131,7 @@ int pci_finish_runtime_suspend(struct pci_dev *dev) dev->runtime_d3cold = target_state == PCI_D3cold; - pci_enable_wake(dev, target_state, pci_dev_run_wake(dev)); + __pci_enable_wake(dev, target_state, pci_dev_run_wake(dev)); error = pci_set_power_state(dev, target_state); From 28700a36232ba61fb6ac59466821546cb25aec69 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 10 May 2018 11:18:49 -0400 Subject: [PATCH 201/347] dm thin: update Documentation to clarify when "read_only" is valid Due to user confusion, clarify that it doesn't make sense to try to create a thin-pool with "read_only" mode enabled. Signed-off-by: Mike Snitzer --- Documentation/device-mapper/thin-provisioning.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt index 4bcd4b7f79f9..3d01948ea061 100644 --- a/Documentation/device-mapper/thin-provisioning.txt +++ b/Documentation/device-mapper/thin-provisioning.txt @@ -264,7 +264,10 @@ i) Constructor data device, but just remove the mapping. read_only: Don't allow any changes to be made to the pool - metadata. + metadata. This mode is only available after the + thin-pool has been created and first used in full + read/write mode. It cannot be specified on initial + thin-pool creation. error_if_no_space: Error IOs, instead of queueing, if no space. From 5cec9425b41dcf834c3d48776900d6acb7e96f38 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 9 May 2018 14:38:43 +0200 Subject: [PATCH 202/347] can: hi311x: Acquire SPI lock on ->do_get_berr_counter hi3110_get_berr_counter() may run concurrently to the rest of the driver but neglects to acquire the lock protecting access to the SPI device. As a result, it and the rest of the driver may clobber each other's tx and rx buffers. We became aware of this issue because transmission of packets with "cangen -g 0 -i -x" frequently hung. It turns out that agetty executes ->do_get_berr_counter every few seconds via the following call stack: CPU: 2 PID: 1605 Comm: agetty [<7f3f7500>] (hi3110_get_berr_counter [hi311x]) [<7f130204>] (can_fill_info [can_dev]) [<80693bc0>] (rtnl_fill_ifinfo) [<806949ec>] (rtnl_dump_ifinfo) [<806b4834>] (netlink_dump) [<806b4bc8>] (netlink_recvmsg) [<8065f180>] (sock_recvmsg) [<80660f90>] (___sys_recvmsg) [<80661e7c>] (__sys_recvmsg) [<80661ec0>] (SyS_recvmsg) [<80108b20>] (ret_fast_syscall+0x0/0x1c) agetty listens to netlink messages in order to update the login prompt when IP addresses change (if /etc/issue contains \4 or \6 escape codes): https://git.kernel.org/pub/scm/utils/util-linux/util-linux.git/commit/?id=e36deb6424e8 It's a useful feature, though it seems questionable that it causes CAN bit error statistics to be queried. Be that as it may, if hi3110_get_berr_counter() is invoked while a frame is sent by hi3110_hw_tx(), bogus SPI transfers like the following may occur: => 12 00 (hi3110_get_berr_counter() wanted to transmit EC 00 to query the transmit error counter, but the first byte was overwritten by hi3110_hw_tx_frame()) => EA 00 3E 80 01 FB (hi3110_hw_tx_frame() wanted to transmit a frame, but the first byte was overwritten by hi3110_get_berr_counter() because it wanted to query the receive error counter) This sequence hangs the transmission because the driver believes it has sent a frame and waits for the interrupt signaling completion, but in reality the chip has never sent away the frame since the commands it received were malformed. Fix by acquiring the SPI lock in hi3110_get_berr_counter(). I've scrutinized the entire driver for further unlocked SPI accesses but found no others. Cc: Mathias Duckeck Cc: Akshay Bhat Cc: Casey Fitzpatrick Cc: Stef Walter Cc: Karel Zak Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Lukas Wunner Reviewed-by: Akshay Bhat Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/hi311x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index 5590c559a8ca..c2cf254e4e95 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -427,8 +427,10 @@ static int hi3110_get_berr_counter(const struct net_device *net, struct hi3110_priv *priv = netdev_priv(net); struct spi_device *spi = priv->spi; + mutex_lock(&priv->hi3110_lock); bec->txerr = hi3110_read(spi, HI3110_READ_TEC); bec->rxerr = hi3110_read(spi, HI3110_READ_REC); + mutex_unlock(&priv->hi3110_lock); return 0; } From 32bee8f48fa048a3198109de50e51c092507ff52 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 9 May 2018 14:43:43 +0200 Subject: [PATCH 203/347] can: hi311x: Work around TX complete interrupt erratum When sending packets as fast as possible using "cangen -g 0 -i -x", the HI-3110 occasionally latches the interrupt pin high on completion of a packet, but doesn't set the TXCPLT bit in the INTF register. The INTF register contains 0x00 as if no interrupt has occurred. Even waiting for a few milliseconds after the interrupt doesn't help. Work around this apparent erratum by instead checking the TXMTY bit in the STATF register ("TX FIFO empty"). We know that we've queued up a packet for transmission if priv->tx_len is nonzero. If the TX FIFO is empty, transmission of that packet must have completed. Note that this is congruent with our handling of received packets, which likewise gleans from the STATF register whether a packet is waiting in the RX FIFO, instead of looking at the INTF register. Cc: Mathias Duckeck Cc: Akshay Bhat Cc: Casey Fitzpatrick Cc: stable@vger.kernel.org # v4.12+ Signed-off-by: Lukas Wunner Acked-by: Akshay Bhat Signed-off-by: Marc Kleine-Budde --- drivers/net/can/spi/hi311x.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index c2cf254e4e95..53e320c92a8b 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -91,6 +91,7 @@ #define HI3110_STAT_BUSOFF BIT(2) #define HI3110_STAT_ERRP BIT(3) #define HI3110_STAT_ERRW BIT(4) +#define HI3110_STAT_TXMTY BIT(7) #define HI3110_BTR0_SJW_SHIFT 6 #define HI3110_BTR0_BRP_SHIFT 0 @@ -737,10 +738,7 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) } } - if (intf == 0) - break; - - if (intf & HI3110_INT_TXCPLT) { + if (priv->tx_len && statf & HI3110_STAT_TXMTY) { net->stats.tx_packets++; net->stats.tx_bytes += priv->tx_len - 1; can_led_event(net, CAN_LED_EVENT_TX); @@ -750,6 +748,9 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) } netif_wake_queue(net); } + + if (intf == 0) + break; } mutex_unlock(&priv->hi3110_lock); return IRQ_HANDLED; From af6858ee423a309d93054c361c61099b8eb12bbf Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 8 May 2018 21:29:18 +0300 Subject: [PATCH 204/347] qed: Fix l2 initializations over iWARP personality If qede driver was loaded on a device configured for iWARP the l2 mutex wouldn't be allocated, and some l2 related resources wouldn't be freed. fixes: c851a9dc4359 ("qed: Introduce iWARP personality") Signed-off-by: Michal Kalderon Signed-off-by: Sudarsana Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index e874504e8b28..8667799d0069 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -115,8 +115,7 @@ int qed_l2_alloc(struct qed_hwfn *p_hwfn) void qed_l2_setup(struct qed_hwfn *p_hwfn) { - if (p_hwfn->hw_info.personality != QED_PCI_ETH && - p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + if (!QED_IS_L2_PERSONALITY(p_hwfn)) return; mutex_init(&p_hwfn->p_l2_info->lock); @@ -126,8 +125,7 @@ void qed_l2_free(struct qed_hwfn *p_hwfn) { u32 i; - if (p_hwfn->hw_info.personality != QED_PCI_ETH && - p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + if (!QED_IS_L2_PERSONALITY(p_hwfn)) return; if (!p_hwfn->p_l2_info) From 090477e4acb31c5dd674940c7c01d4f16bd1ac41 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Tue, 8 May 2018 21:29:19 +0300 Subject: [PATCH 205/347] qede: Fix gfp flags sent to rdma event node allocation A previous commit 4609adc27175 ("qede: Fix qedr link update") added a flow that could allocate rdma event objects from an interrupt path (link notification). Therefore the kzalloc call should be done with GFP_ATOMIC. fixes: 4609adc27175 ("qede: Fix qedr link update") Signed-off-by: Michal Kalderon Signed-off-by: Sudarsana Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 50b142fad6b8..1900bf7e67d1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -238,7 +238,7 @@ qede_rdma_get_free_event_node(struct qede_dev *edev) } if (!found) { - event_node = kzalloc(sizeof(*event_node), GFP_KERNEL); + event_node = kzalloc(sizeof(*event_node), GFP_ATOMIC); if (!event_node) { DP_NOTICE(edev, "qedr: Could not allocate memory for rdma work\n"); From 9af5573f3ae1b5ad7340b2f77bba81b066a426d7 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 May 2018 23:01:51 +0100 Subject: [PATCH 206/347] firestream: fix spelling mistake: "reseverd" -> "reserved" Trivial fix to spelling mistake in res_strings string array Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/atm/firestream.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index d97c05690faa..4e46dc9e41ad 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -191,7 +191,7 @@ static char *res_strings[] = { "reserved 37", "reserved 38", "reserved 39", - "reseverd 40", + "reserved 40", "reserved 41", "reserved 42", "reserved 43", From 39a2d5cbaa8ce03bdd8eb9d3891d7a1b3f5fff96 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 8 May 2018 23:24:28 +0100 Subject: [PATCH 207/347] sctp: fix spelling mistake: "max_retans" -> "max_retrans" Trivial fix to spelling mistake in error string Signed-off-by: Colin Ian King Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5a4fb1dc8400..e62addb60434 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1152,7 +1152,7 @@ struct sctp_chunk *sctp_make_violation_max_retrans( const struct sctp_association *asoc, const struct sctp_chunk *chunk) { - static const char error[] = "Association exceeded its max_retans count"; + static const char error[] = "Association exceeded its max_retrans count"; size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr); struct sctp_chunk *retval; From 680a284635808594b8ac34939a7513dd4320e907 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 May 2018 10:48:33 +0100 Subject: [PATCH 208/347] net/9p: fix spelling mistake: "suspsend" -> "suspend" Trivial fix to spelling mistake in dev_warn message text Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- net/9p/trans_xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 086a4abdfa7c..0f19960390a6 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -485,7 +485,7 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev, static int xen_9pfs_front_resume(struct xenbus_device *dev) { - dev_warn(&dev->dev, "suspsend/resume unsupported\n"); + dev_warn(&dev->dev, "suspend/resume unsupported\n"); return 0; } From df13c59b54a9f8d3e20a2fb565d54eee8dac8768 Mon Sep 17 00:00:00 2001 From: Pieter Jansen van Vuuren Date: Wed, 9 May 2018 00:18:58 -0700 Subject: [PATCH 209/347] nfp: flower: remove headroom from max MTU calculation Since commit 29a5dcae2790 ("nfp: flower: offload phys port MTU change") we take encapsulation headroom into account when calculating the max allowed MTU. This is unnecessary as the max MTU advertised by firmware should have already accounted for encap headroom. Subtracting headroom twice brings the max MTU below what's necessary for some deployments. Fixes: 29a5dcae2790 ("nfp: flower: offload phys port MTU change") Signed-off-by: Pieter Jansen van Vuuren Reviewed-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/flower/main.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index a997e34bcec2..84e3b9f5abb1 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -52,8 +52,6 @@ #define NFP_FLOWER_ALLOWED_VER 0x0001000000010000UL -#define NFP_FLOWER_FRAME_HEADROOM 158 - static const char *nfp_flower_extra_cap(struct nfp_app *app, struct nfp_net *nn) { return "FLOWER"; @@ -559,22 +557,6 @@ static void nfp_flower_clean(struct nfp_app *app) app->priv = NULL; } -static int -nfp_flower_check_mtu(struct nfp_app *app, struct net_device *netdev, - int new_mtu) -{ - /* The flower fw reserves NFP_FLOWER_FRAME_HEADROOM bytes of the - * supported max MTU to allow for appending tunnel headers. To prevent - * unexpected behaviour this needs to be accounted for. - */ - if (new_mtu > netdev->max_mtu - NFP_FLOWER_FRAME_HEADROOM) { - nfp_err(app->cpp, "New MTU (%d) is not valid\n", new_mtu); - return -EINVAL; - } - - return 0; -} - static bool nfp_flower_check_ack(struct nfp_flower_priv *app_priv) { bool ret; @@ -656,7 +638,6 @@ const struct nfp_app_type app_flower = { .init = nfp_flower_init, .clean = nfp_flower_clean, - .check_mtu = nfp_flower_check_mtu, .repr_change_mtu = nfp_flower_repr_change_mtu, .vnic_alloc = nfp_flower_vnic_alloc, From 55be9f25be1ca5bda75c39808fc77e42691bc07f Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Wed, 9 May 2018 10:17:34 +0200 Subject: [PATCH 210/347] hv_netvsc: Fix net device attach on older Windows hosts On older windows hosts the net_device instance is returned to the caller of rndis_filter_device_add() without having the presence bit set first. This would cause any subsequent calls to network device operations (e.g. MTU change, channel change) to fail after the device is detached once, returning -ENODEV. Instead of returning the device instabce, we take the exit path where we call netif_device_attach() Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") Signed-off-by: Mohammed Gamal Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/rndis_filter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 6b127be781d9..e7ca5b5f39ed 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1288,7 +1288,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, rndis_device->link_state ? "down" : "up"); if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) - return net_device; + goto out; rndis_filter_query_link_speed(rndis_device, net_device); From 0e8411e426e277f55bd21e287ec89fab6f8eacae Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 9 May 2018 18:06:44 +0800 Subject: [PATCH 211/347] ipv4: reset fnhe_mtu_locked after cache route flushed After route cache is flushed via ipv4_sysctl_rtcache_flush(), we forget to reset fnhe_mtu_locked in rt_bind_exception(). When pmtu is updated in __ip_rt_update_pmtu(), it will return directly since the pmtu is still locked. e.g. + ip netns exec client ping 10.10.1.1 -c 1 -s 1400 -M do PING 10.10.1.1 (10.10.1.1) 1400(1428) bytes of data. >From 10.10.0.254 icmp_seq=1 Frag needed and DF set (mtu = 0) Signed-off-by: David S. Miller --- net/ipv4/route.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1412a7baf0b9..29268efad247 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1375,6 +1375,7 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; + fnhe->fnhe_mtu_locked = false; fnhe_flush_routes(fnhe); orig = NULL; } From 69678bcd4d2dedbc3e8fcd6d7d99f283d83c531a Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 9 May 2018 12:42:34 +0200 Subject: [PATCH 212/347] udp: fix SO_BINDTODEVICE Damir reported a breakage of SO_BINDTODEVICE for UDP sockets. In absence of VRF devices, after commit fb74c27735f0 ("net: ipv4: add second dif to udp socket lookups") the dif mismatch isn't fatal anymore for UDP socket lookup with non null sk_bound_dev_if, breaking SO_BINDTODEVICE semantics. This changeset addresses the issue making the dif match mandatory again in the above scenario. Reported-by: Damir Mansurov Fixes: fb74c27735f0 ("net: ipv4: add second dif to udp socket lookups") Fixes: 1801b570dd2a ("net: ipv6: add second dif to udp socket lookups") Signed-off-by: Paolo Abeni Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 24b5c59b1c53..c2a292dfd137 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -401,9 +401,9 @@ static int compute_score(struct sock *sk, struct net *net, bool dev_match = (sk->sk_bound_dev_if == dif || sk->sk_bound_dev_if == sdif); - if (exact_dif && !dev_match) + if (!dev_match) return -1; - if (sk->sk_bound_dev_if && dev_match) + if (sk->sk_bound_dev_if) score += 4; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4ec76a87aeb8..ea0730028e5d 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,9 +148,9 @@ static int compute_score(struct sock *sk, struct net *net, bool dev_match = (sk->sk_bound_dev_if == dif || sk->sk_bound_dev_if == sdif); - if (exact_dif && !dev_match) + if (!dev_match) return -1; - if (sk->sk_bound_dev_if && dev_match) + if (sk->sk_bound_dev_if) score++; } From 2b928749f9ba781085442d1f8832f057cbf458a1 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 9 May 2018 18:40:09 +0530 Subject: [PATCH 213/347] cxgb4: zero the HMA memory firmware expects HMA memory to be zeroed, use __GFP_ZERO for HMA memory allocation. Fixes: 8b4e6b3ca2ed ("cxgb4: Add HMA support") Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 24d2865b8806..c3ae5750d414 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -3433,8 +3433,8 @@ static int adap_config_hma(struct adapter *adapter) sgl = adapter->hma.sgt->sgl; node = dev_to_node(adapter->pdev_dev); for_each_sg(sgl, iter, sgt->orig_nents, i) { - newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL, - page_order); + newpage = alloc_pages_node(node, __GFP_NOWARN | GFP_KERNEL | + __GFP_ZERO, page_order); if (!newpage) { dev_err(adapter->pdev_dev, "Not enough memory for HMA page allocation\n"); From aca06eafd09f48ca4d97f3c0b2a12c8d631116f0 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 9 May 2018 19:00:35 +0530 Subject: [PATCH 214/347] cxgb4: copy mbox log size to PF0-3 adap instances copy mbox size to adapter instances of PF0-3 to avoid mbox log overflow. This fixes the possible protection fault. Fixes: baf5086840ab ("cxgb4: restructure VF mgmt code") Signed-off-by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c3ae5750d414..005283c7cdfe 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5474,6 +5474,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } spin_lock_init(&adapter->mbox_lock); INIT_LIST_HEAD(&adapter->mlist.list); + adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS; pci_set_drvdata(pdev, adapter); if (func != ent->driver_data) { @@ -5508,8 +5509,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_free_adapter; } - adapter->mbox_log->size = T4_OS_LOG_MBOX_CMDS; - /* PCI device has been enabled */ adapter->flags |= DEV_ENABLED; memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map)); From 6ad4e91c6d796b38a7f0e724db1de28eeb122bad Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Wed, 9 May 2018 18:35:13 +0300 Subject: [PATCH 215/347] net/mlx4_en: Verify coalescing parameters are in range Add check of coalescing parameters received through ethtool are within range of values supported by the HW. Driver gets the coalescing rx/tx-usecs and rx/tx-frames as set by the users through ethtool. The ethtool support up to 32 bit value for each. However, mlx4 modify cq limits the coalescing time parameter and coalescing frames parameters to 16 bits. Return out of range error if user tries to set these parameters to higher values. Change type of sample-interval and adaptive_rx_coal parameters in mlx4 driver to u32 as the ethtool holds them as u32 and these parameters are not limited due to mlx4 HW. Fixes: c27a02cd94d6 ('mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC') Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 16 ++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 7 +++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index a30a2e95d13f..f11b45001cad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1027,6 +1027,22 @@ static int mlx4_en_set_coalesce(struct net_device *dev, if (!coal->tx_max_coalesced_frames_irq) return -EINVAL; + if (coal->tx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs_low > MLX4_EN_MAX_COAL_TIME || + coal->rx_coalesce_usecs_high > MLX4_EN_MAX_COAL_TIME) { + netdev_info(dev, "%s: maximum coalesce time supported is %d usecs\n", + __func__, MLX4_EN_MAX_COAL_TIME); + return -ERANGE; + } + + if (coal->tx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS || + coal->rx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS) { + netdev_info(dev, "%s: maximum coalesced frames supported is %d\n", + __func__, MLX4_EN_MAX_COAL_PKTS); + return -ERANGE; + } + priv->rx_frames = (coal->rx_max_coalesced_frames == MLX4_EN_AUTO_CONF) ? MLX4_EN_RX_COAL_TARGET : diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index f7c81133594f..ace6545f82e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -132,6 +132,9 @@ #define MLX4_EN_TX_COAL_PKTS 16 #define MLX4_EN_TX_COAL_TIME 0x10 +#define MLX4_EN_MAX_COAL_PKTS U16_MAX +#define MLX4_EN_MAX_COAL_TIME U16_MAX + #define MLX4_EN_RX_RATE_LOW 400000 #define MLX4_EN_RX_COAL_TIME_LOW 0 #define MLX4_EN_RX_RATE_HIGH 450000 @@ -552,8 +555,8 @@ struct mlx4_en_priv { u16 rx_usecs_low; u32 pkt_rate_high; u16 rx_usecs_high; - u16 sample_interval; - u16 adaptive_rx_coal; + u32 sample_interval; + u32 adaptive_rx_coal; u32 msg_enable; u32 loopback_ok; u32 validate_loopback; From f7017cafcdd7574680fc7faabcb73f91172a14ab Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 9 May 2018 18:45:42 +0200 Subject: [PATCH 216/347] tc-testing: fix tdc tests for 'bpf' action - correct a typo in the value of 'matchPattern' of test 282d, potentially causing false negative - allow errors when 'teardown' executes '$TC action flush action bpf' in test 282d, to fix false positive when it is run with act_bpf unloaded - correct the value of 'matchPattern' in test e939, causing false positive in case the BPF JIT is enabled Fixes: 440ea4ae1828 ("tc-testing: add selftests for 'bpf' action") Signed-off-by: Davide Caratti Acked-by: Lucas Bates Signed-off-by: David S. Miller --- .../selftests/tc-testing/tc-tests/actions/bpf.json | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 5b012f4981d4..6f289a49e5ec 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -66,7 +66,7 @@ "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf", @@ -92,10 +92,15 @@ "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ - "$TC action flush action bpf", + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ], "rm -f _c.o" ] }, From 09c8b9718a7af674036643fa2e0dbb2f09aba75e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 May 2018 09:50:22 -0700 Subject: [PATCH 217/347] tipc: fix one byte leak in tipc_sk_set_orig_addr() sysbot/KMSAN reported an uninit-value in recvmsg() that I tracked down to tipc_sk_set_orig_addr(), missing srcaddr->member.scope initialization. This patches moves srcaddr->sock.scope init to follow fields order and ease future verifications. BUG: KMSAN: uninit-value in copy_to_user include/linux/uaccess.h:184 [inline] BUG: KMSAN: uninit-value in move_addr_to_user+0x32e/0x530 net/socket.c:226 CPU: 0 PID: 4549 Comm: syz-executor287 Not tainted 4.17.0-rc3+ #88 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:113 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 kmsan_internal_check_memory+0x135/0x1e0 mm/kmsan/kmsan.c:1157 kmsan_copy_to_user+0x69/0x160 mm/kmsan/kmsan.c:1199 copy_to_user include/linux/uaccess.h:184 [inline] move_addr_to_user+0x32e/0x530 net/socket.c:226 ___sys_recvmsg+0x4e2/0x810 net/socket.c:2285 __sys_recvmsg net/socket.c:2328 [inline] __do_sys_recvmsg net/socket.c:2338 [inline] __se_sys_recvmsg net/socket.c:2335 [inline] __x64_sys_recvmsg+0x325/0x460 net/socket.c:2335 do_syscall_64+0x154/0x220 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x4455e9 RSP: 002b:00007fe3bd36ddb8 EFLAGS: 00000246 ORIG_RAX: 000000000000002f RAX: ffffffffffffffda RBX: 00000000006dac24 RCX: 00000000004455e9 RDX: 0000000000002002 RSI: 0000000020000400 RDI: 0000000000000003 RBP: 00000000006dac20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fff98ce4b6f R14: 00007fe3bd36e9c0 R15: 0000000000000003 Local variable description: ----addr@___sys_recvmsg Variable was created at: ___sys_recvmsg+0xd5/0x810 net/socket.c:2246 __sys_recvmsg net/socket.c:2328 [inline] __do_sys_recvmsg net/socket.c:2338 [inline] __se_sys_recvmsg net/socket.c:2335 [inline] __x64_sys_recvmsg+0x325/0x460 net/socket.c:2335 Byte 19 of 32 is uninitialized Fixes: 31c82a2d9d51 ("tipc: add second source address to recvmsg()/recvfrom()") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jon Maloy Cc: Ying Xue Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 252a52ae0893..6be21575503a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1516,10 +1516,10 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) srcaddr->sock.family = AF_TIPC; srcaddr->sock.addrtype = TIPC_ADDR_ID; + srcaddr->sock.scope = 0; srcaddr->sock.addr.id.ref = msg_origport(hdr); srcaddr->sock.addr.id.node = msg_orignode(hdr); srcaddr->sock.addr.name.domain = 0; - srcaddr->sock.scope = 0; m->msg_namelen = sizeof(struct sockaddr_tipc); if (!msg_in_group(hdr)) @@ -1528,6 +1528,7 @@ static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) /* Group message users may also want to know sending member's id */ srcaddr->member.family = AF_TIPC; srcaddr->member.addrtype = TIPC_ADDR_NAME; + srcaddr->member.scope = 0; srcaddr->member.addr.name.name.type = msg_nametype(hdr); srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; srcaddr->member.addr.name.domain = 0; From 97f3efb64323beb0690576e9d74e94998ad6e82a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 9 May 2018 14:09:04 -0700 Subject: [PATCH 218/347] hv_netvsc: set master device The hyper-v transparent bonding should have used master_dev_link. The netvsc device should look like a master bond device not like the upper side of a tunnel. This makes the semantics the same so that userspace applications looking at network devices see the correct master relationshipship. Fixes: 0c195567a8f6 ("netvsc: transparent VF management") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index ecc84954c511..da07ccdf84bf 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1840,7 +1840,8 @@ static int netvsc_vf_join(struct net_device *vf_netdev, goto rx_handler_failed; } - ret = netdev_upper_dev_link(vf_netdev, ndev, NULL); + ret = netdev_master_upper_dev_link(vf_netdev, ndev, + NULL, NULL, NULL); if (ret != 0) { netdev_err(vf_netdev, "can not set master device %s (err = %d)\n", From a577d868b768a3baf16cdd4841ab8cfb165521d6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 10 May 2018 09:06:04 +0200 Subject: [PATCH 219/347] net/mlx4_en: Fix an error handling path in 'mlx4_en_init_netdev()' If an error occurs, 'mlx4_en_destroy_netdev()' is called. It then calls 'mlx4_en_free_resources()' which does the needed resources cleanup. So, doing some explicit kfree in the error handling path would lead to some double kfree. Simplify code to avoid such a case. Fixes: 67f8b1dcb9ee ("net/mlx4_en: Refactor the XDP forwarding rings scheme") Signed-off-by: Christophe JAILLET Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e0adac4a9a19..9670b33fc9b1 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3324,12 +3324,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_ring[t]) { err = -ENOMEM; - goto err_free_tx; + goto out; } priv->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_cq[t]) { - kfree(priv->tx_ring[t]); err = -ENOMEM; goto out; } @@ -3582,11 +3581,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, return 0; -err_free_tx: - while (t--) { - kfree(priv->tx_ring[t]); - kfree(priv->tx_cq[t]); - } out: mlx4_en_destroy_netdev(dev); return err; From 6910e25de2257e2c82c7a2d126e3463cd8e50810 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 10 May 2018 17:34:13 +0800 Subject: [PATCH 220/347] sctp: remove sctp_chunk_put from fail_mark err path in sctp_ulpevent_make_rcvmsg In Commit 1f45f78f8e51 ("sctp: allow GSO frags to access the chunk too"), it held the chunk in sctp_ulpevent_make_rcvmsg to access it safely later in recvmsg. However, it also added sctp_chunk_put in fail_mark err path, which is only triggered before holding the chunk. syzbot reported a use-after-free crash happened on this err path, where it shouldn't call sctp_chunk_put. This patch simply removes this call. Fixes: 1f45f78f8e51 ("sctp: allow GSO frags to access the chunk too") Reported-by: syzbot+141d898c5f24489db4aa@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/ulpevent.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 84207ad33e8e..8cb7d9858270 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -715,7 +715,6 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, return event; fail_mark: - sctp_chunk_put(chunk); kfree_skb(skb); fail: return NULL; From 2fdae0349f587ffde4d1010c65fb543569bab787 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 10 May 2018 15:03:27 +0100 Subject: [PATCH 221/347] qed: fix spelling mistake: "taskelt" -> "tasklet" Trivial fix to spelling mistake in DP_VERBOSE message text Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 9854aa9139af..7870ae2a6f7e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -680,7 +680,7 @@ static int qed_nic_stop(struct qed_dev *cdev) tasklet_disable(p_hwfn->sp_dpc); p_hwfn->b_sp_dpc_enabled = false; DP_VERBOSE(cdev, NETIF_MSG_IFDOWN, - "Disabled sp taskelt [hwfn %d] at %p\n", + "Disabled sp tasklet [hwfn %d] at %p\n", i, p_hwfn->sp_dpc); } } From c54e43d752c7187595c8c62a231e0b0d53c7fded Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 23:26:00 +0100 Subject: [PATCH 222/347] rxrpc: Fix missing start of call timeout The expect_rx_by call timeout is supposed to be set when a call is started to indicate that we need to receive a packet by that point. This is currently put back every time we receive a packet, but it isn't started when we first send a packet. Without this, the call may wait forever if the server doesn't deign to reply. Fix this by setting the timeout upon a successful UDP sendmsg call for the first DATA packet. The timeout is initiated only for initial transmission and not for subsequent retries as we don't want the retry mechanism to extend the timeout indefinitely. Fixes: a158bdd3247b ("rxrpc: Fix call timeouts") Reported-by: Marc Dionne Signed-off-by: David Howells --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 2 +- net/rxrpc/output.c | 11 +++++++++++ net/rxrpc/sendmsg.c | 10 ++++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 90d7079e0aa9..19975d2ca9a2 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -476,6 +476,7 @@ enum rxrpc_call_flag { RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ + RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ }; /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 0410d2277ca2..b5fd6381313d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -971,7 +971,7 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, if (timo) { unsigned long now = jiffies, expect_rx_by; - expect_rx_by = jiffies + timo; + expect_rx_by = now + timo; WRITE_ONCE(call->expect_rx_by, expect_rx_by); rxrpc_reduce_call_timer(call, expect_rx_by, now, rxrpc_timer_set_for_normal); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 7f1fc04775b3..6b9d27f0d7ec 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -414,6 +414,17 @@ done: rxrpc_timer_set_for_lost_ack); } } + + if (sp->hdr.seq == 1 && + !test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, + &call->flags)) { + unsigned long nowj = jiffies, expect_rx_by; + + expect_rx_by = nowj + call->next_rx_timo; + WRITE_ONCE(call->expect_rx_by, expect_rx_by); + rxrpc_reduce_call_timer(call, expect_rx_by, nowj, + rxrpc_timer_set_for_normal); + } } rxrpc_set_keepalive(call); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 206e802ccbdc..be01f9c5d963 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -223,6 +223,15 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, ret = rxrpc_send_data_packet(call, skb, false); if (ret < 0) { + switch (ret) { + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + rxrpc_set_call_completion(call, + RXRPC_CALL_LOCAL_ERROR, + 0, ret); + goto out; + } _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { @@ -241,6 +250,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_timer_set_for_send); } +out: rxrpc_free_skb(skb, rxrpc_skb_tx_freed); _leave(""); } From f2aeed3a591ff29a82495eeaa92ac4780bad7487 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 23:26:00 +0100 Subject: [PATCH 223/347] rxrpc: Fix error reception on AF_INET6 sockets AF_RXRPC tries to turn on IP_RECVERR and IP_MTU_DISCOVER on the UDP socket it just opened for communications with the outside world, regardless of the type of socket. Unfortunately, this doesn't work with an AF_INET6 socket. Fix this by turning on IPV6_RECVERR and IPV6_MTU_DISCOVER instead if the socket is of the AF_INET6 family. Without this, kAFS server and address rotation doesn't work correctly because the algorithm doesn't detect received network errors. Fixes: 75b54cb57ca3 ("rxrpc: Add IPv6 support") Signed-off-by: David Howells --- net/rxrpc/local_object.c | 57 +++++++++++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 8b54e9531d52..b493e6b62740 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -134,22 +134,49 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } } - /* we want to receive ICMP errors */ - opt = 1; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; - } + switch (local->srx.transport.family) { + case AF_INET: + /* we want to receive ICMP errors */ + opt = 1; + ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR, + (char *) &opt, sizeof(opt)); + if (ret < 0) { + _debug("setsockopt failed"); + goto error; + } - /* we want to set the don't fragment bit */ - opt = IP_PMTUDISC_DO; - ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, - (char *) &opt, sizeof(opt)); - if (ret < 0) { - _debug("setsockopt failed"); - goto error; + /* we want to set the don't fragment bit */ + opt = IP_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER, + (char *) &opt, sizeof(opt)); + if (ret < 0) { + _debug("setsockopt failed"); + goto error; + } + break; + + case AF_INET6: + /* we want to receive ICMP errors */ + opt = 1; + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR, + (char *) &opt, sizeof(opt)); + if (ret < 0) { + _debug("setsockopt failed"); + goto error; + } + + /* we want to set the don't fragment bit */ + opt = IPV6_PMTUDISC_DO; + ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_MTU_DISCOVER, + (char *) &opt, sizeof(opt)); + if (ret < 0) { + _debug("setsockopt failed"); + goto error; + } + break; + + default: + BUG(); } /* set the socket up */ From 93864fc3ffcc4bf70e96cfb5cc6e941630419ad0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 23:26:01 +0100 Subject: [PATCH 224/347] rxrpc: Fix the min security level for kernel calls Fix the kernel call initiation to set the minimum security level for kernel initiated calls (such as from kAFS) from the sockopt value. Fixes: 19ffa01c9c45 ("rxrpc: Use structs to hold connection params and protocol info") Signed-off-by: David Howells --- net/rxrpc/af_rxrpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9a2c8e7c000e..2b463047dd7b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -313,7 +313,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, memset(&cp, 0, sizeof(cp)); cp.local = rx->local; cp.key = key; - cp.security_level = 0; + cp.security_level = rx->min_sec_level; cp.exclusive = false; cp.upgrade = upgrade; cp.service_id = srx->srx_service; From 494337c918403db5743af0348d963cafb9b20cee Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 23:26:01 +0100 Subject: [PATCH 225/347] rxrpc: Add a tracepoint to log ICMP/ICMP6 and error messages Add a tracepoint to log received ICMP/ICMP6 events and other error messages. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 30 +++++++++++++++++++++++ net/rxrpc/peer_event.c | 46 ++++++++++++++++++------------------ 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 9e96c2fe2793..497d0b67f421 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -15,6 +15,7 @@ #define _TRACE_RXRPC_H #include +#include /* * Define enums for tracing information. @@ -1374,6 +1375,35 @@ TRACE_EVENT(rxrpc_resend, __entry->anno) ); +TRACE_EVENT(rxrpc_rx_icmp, + TP_PROTO(struct rxrpc_peer *peer, struct sock_extended_err *ee, + struct sockaddr_rxrpc *srx), + + TP_ARGS(peer, ee, srx), + + TP_STRUCT__entry( + __field(unsigned int, peer ) + __field_struct(struct sock_extended_err, ee ) + __field_struct(struct sockaddr_rxrpc, srx ) + ), + + TP_fast_assign( + __entry->peer = peer->debug_id; + memcpy(&__entry->ee, ee, sizeof(__entry->ee)); + memcpy(&__entry->srx, srx, sizeof(__entry->srx)); + ), + + TP_printk("P=%08x o=%u t=%u c=%u i=%u d=%u e=%d %pISp", + __entry->peer, + __entry->ee.ee_origin, + __entry->ee.ee_type, + __entry->ee.ee_code, + __entry->ee.ee_info, + __entry->ee.ee_data, + __entry->ee.ee_errno, + &__entry->srx.transport) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 78c2f95d1f22..0ed8b651cec2 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -28,39 +28,39 @@ static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); * Find the peer associated with an ICMP packet. */ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, - const struct sk_buff *skb) + const struct sk_buff *skb, + struct sockaddr_rxrpc *srx) { struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); - struct sockaddr_rxrpc srx; _enter(""); - memset(&srx, 0, sizeof(srx)); - srx.transport_type = local->srx.transport_type; - srx.transport_len = local->srx.transport_len; - srx.transport.family = local->srx.transport.family; + memset(srx, 0, sizeof(*srx)); + srx->transport_type = local->srx.transport_type; + srx->transport_len = local->srx.transport_len; + srx->transport.family = local->srx.transport.family; /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice * versa? */ - switch (srx.transport.family) { + switch (srx->transport.family) { case AF_INET: - srx.transport.sin.sin_port = serr->port; + srx->transport.sin.sin_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP: _net("Rx ICMP"); - memcpy(&srx.transport.sin.sin_addr, + memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; case SO_EE_ORIGIN_ICMP6: _net("Rx ICMP6 on v4 sock"); - memcpy(&srx.transport.sin.sin_addr, + memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset + 12, sizeof(struct in_addr)); break; default: - memcpy(&srx.transport.sin.sin_addr, &ip_hdr(skb)->saddr, + memcpy(&srx->transport.sin.sin_addr, &ip_hdr(skb)->saddr, sizeof(struct in_addr)); break; } @@ -68,25 +68,25 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: - srx.transport.sin6.sin6_port = serr->port; + srx->transport.sin6.sin6_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP6: _net("Rx ICMP6"); - memcpy(&srx.transport.sin6.sin6_addr, + memcpy(&srx->transport.sin6.sin6_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in6_addr)); break; case SO_EE_ORIGIN_ICMP: _net("Rx ICMP on v6 sock"); - srx.transport.sin6.sin6_addr.s6_addr32[0] = 0; - srx.transport.sin6.sin6_addr.s6_addr32[1] = 0; - srx.transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12, + srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; + srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + memcpy(srx->transport.sin6.sin6_addr.s6_addr + 12, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; default: - memcpy(&srx.transport.sin6.sin6_addr, + memcpy(&srx->transport.sin6.sin6_addr, &ipv6_hdr(skb)->saddr, sizeof(struct in6_addr)); break; @@ -98,7 +98,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, BUG(); } - return rxrpc_lookup_peer_rcu(local, &srx); + return rxrpc_lookup_peer_rcu(local, srx); } /* @@ -146,6 +146,7 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *se void rxrpc_error_report(struct sock *sk) { struct sock_exterr_skb *serr; + struct sockaddr_rxrpc srx; struct rxrpc_local *local = sk->sk_user_data; struct rxrpc_peer *peer; struct sk_buff *skb; @@ -166,7 +167,7 @@ void rxrpc_error_report(struct sock *sk) } rcu_read_lock(); - peer = rxrpc_lookup_peer_icmp_rcu(local, skb); + peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx); if (peer && !rxrpc_get_peer_maybe(peer)) peer = NULL; if (!peer) { @@ -176,6 +177,8 @@ void rxrpc_error_report(struct sock *sk) return; } + trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); + if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP && serr->ee.ee_type == ICMP_DEST_UNREACH && serr->ee.ee_code == ICMP_FRAG_NEEDED)) { @@ -209,9 +212,6 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, ee = &serr->ee; - _net("Rx Error o=%d t=%d c=%d e=%d", - ee->ee_origin, ee->ee_type, ee->ee_code, ee->ee_errno); - err = ee->ee_errno; switch (ee->ee_origin) { From 6b47fe1d1ca3aec3a1a8623439c22fbf51016cd8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 23:26:01 +0100 Subject: [PATCH 226/347] rxrpc: Trace UDP transmission failure Add a tracepoint to log transmission failure from the UDP transport socket being used by AF_RXRPC. Signed-off-by: David Howells --- include/trace/events/rxrpc.h | 55 ++++++++++++++++++++++++++++++++++++ net/rxrpc/conn_event.c | 11 ++++++-- net/rxrpc/local_event.c | 3 +- net/rxrpc/output.c | 23 +++++++++++++-- net/rxrpc/rxkad.c | 6 ++-- 5 files changed, 90 insertions(+), 8 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 497d0b67f421..077e664ac9a2 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -211,6 +211,20 @@ enum rxrpc_congest_change { rxrpc_cong_saw_nack, }; +enum rxrpc_tx_fail_trace { + rxrpc_tx_fail_call_abort, + rxrpc_tx_fail_call_ack, + rxrpc_tx_fail_call_data_frag, + rxrpc_tx_fail_call_data_nofrag, + rxrpc_tx_fail_call_final_resend, + rxrpc_tx_fail_conn_abort, + rxrpc_tx_fail_conn_challenge, + rxrpc_tx_fail_conn_response, + rxrpc_tx_fail_reject, + rxrpc_tx_fail_version_keepalive, + rxrpc_tx_fail_version_reply, +}; + #endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */ /* @@ -438,6 +452,19 @@ enum rxrpc_congest_change { EM(RXRPC_CALL_LOCAL_ERROR, "LocalError") \ E_(RXRPC_CALL_NETWORK_ERROR, "NetError") +#define rxrpc_tx_fail_traces \ + EM(rxrpc_tx_fail_call_abort, "CallAbort") \ + EM(rxrpc_tx_fail_call_ack, "CallAck") \ + EM(rxrpc_tx_fail_call_data_frag, "CallDataFrag") \ + EM(rxrpc_tx_fail_call_data_nofrag, "CallDataNofrag") \ + EM(rxrpc_tx_fail_call_final_resend, "CallFinalResend") \ + EM(rxrpc_tx_fail_conn_abort, "ConnAbort") \ + EM(rxrpc_tx_fail_conn_challenge, "ConnChall") \ + EM(rxrpc_tx_fail_conn_response, "ConnResp") \ + EM(rxrpc_tx_fail_reject, "Reject") \ + EM(rxrpc_tx_fail_version_keepalive, "VerKeepalive") \ + E_(rxrpc_tx_fail_version_reply, "VerReply") + /* * Export enum symbols via userspace. */ @@ -461,6 +488,7 @@ rxrpc_propose_ack_traces; rxrpc_propose_ack_outcomes; rxrpc_congest_modes; rxrpc_congest_changes; +rxrpc_tx_fail_traces; /* * Now redefine the EM() and E_() macros to map the enums to the strings that @@ -1404,6 +1432,33 @@ TRACE_EVENT(rxrpc_rx_icmp, &__entry->srx.transport) ); +TRACE_EVENT(rxrpc_tx_fail, + TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial, int ret, + enum rxrpc_tx_fail_trace what), + + TP_ARGS(debug_id, serial, ret, what), + + TP_STRUCT__entry( + __field(unsigned int, debug_id ) + __field(rxrpc_serial_t, serial ) + __field(int, ret ) + __field(enum rxrpc_tx_fail_trace, what ) + ), + + TP_fast_assign( + __entry->debug_id = debug_id; + __entry->serial = serial; + __entry->ret = ret; + __entry->what = what; + ), + + TP_printk("c=%08x r=%x ret=%d %s", + __entry->debug_id, + __entry->serial, + __entry->ret, + __print_symbolic(__entry->what, rxrpc_tx_fail_traces)) + ); + #endif /* _TRACE_RXRPC_H */ /* This part must be outside protection */ diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index c717152070df..1350f1be8037 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -40,7 +40,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, } __attribute__((packed)) pkt; struct rxrpc_ackinfo ack_info; size_t len; - int ioc; + int ret, ioc; u32 serial, mtu, call_id, padding; _enter("%d", conn->debug_id); @@ -135,10 +135,13 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, break; } - kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); conn->params.peer->last_tx_at = ktime_get_real(); + if (ret < 0) + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_fail_call_final_resend); + _leave(""); - return; } /* @@ -236,6 +239,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_fail_conn_abort); _debug("sendmsg failed: %d", ret); return -EAGAIN; } diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 93b5d910b4a1..8325f1b86840 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -71,7 +71,8 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, ret = kernel_sendmsg(local->socket, &msg, iov, 2, len); if (ret < 0) - _debug("sendmsg failed: %d", ret); + trace_rxrpc_tx_fail(local->debug_id, 0, ret, + rxrpc_tx_fail_version_reply); _leave(""); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 6b9d27f0d7ec..f03de1c59ba3 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -210,6 +210,9 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping, if (ping) call->ping_time = now; conn->params.peer->last_tx_at = ktime_get_real(); + if (ret < 0) + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_fail_call_ack); if (call->state < RXRPC_CALL_COMPLETE) { if (ret < 0) { @@ -294,6 +297,10 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, sizeof(pkt)); conn->params.peer->last_tx_at = ktime_get_real(); + if (ret < 0) + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_fail_call_abort); + rxrpc_put_connection(conn); return ret; @@ -387,6 +394,9 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, conn->params.peer->last_tx_at = ktime_get_real(); up_read(&conn->params.local->defrag_sem); + if (ret < 0) + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_fail_call_data_nofrag); if (ret == -EMSGSIZE) goto send_fragmentable; @@ -476,6 +486,10 @@ send_fragmentable: #endif } + if (ret < 0) + trace_rxrpc_tx_fail(call->debug_id, serial, ret, + rxrpc_tx_fail_call_data_frag); + up_write(&conn->params.local->defrag_sem); goto done; } @@ -493,6 +507,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) struct kvec iov[2]; size_t size; __be32 code; + int ret; _enter("%d", local->debug_id); @@ -527,7 +542,10 @@ void rxrpc_reject_packets(struct rxrpc_local *local) whdr.flags ^= RXRPC_CLIENT_INITIATED; whdr.flags &= RXRPC_CLIENT_INITIATED; - kernel_sendmsg(local->socket, &msg, iov, 2, size); + ret = kernel_sendmsg(local->socket, &msg, iov, 2, size); + if (ret < 0) + trace_rxrpc_tx_fail(local->debug_id, 0, ret, + rxrpc_tx_fail_reject); } rxrpc_free_skb(skb, rxrpc_skb_rx_freed); @@ -578,7 +596,8 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) ret = kernel_sendmsg(peer->local->socket, &msg, iov, 2, len); if (ret < 0) - _debug("sendmsg failed: %d", ret); + trace_rxrpc_tx_fail(peer->debug_id, 0, ret, + rxrpc_tx_fail_version_keepalive); peer->last_tx_at = ktime_get_real(); _leave(""); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 588fea0dd362..6c0ae27fff84 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -664,7 +664,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { - _debug("sendmsg failed: %d", ret); + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_fail_conn_challenge); return -EAGAIN; } @@ -719,7 +720,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn, ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len); if (ret < 0) { - _debug("sendmsg failed: %d", ret); + trace_rxrpc_tx_fail(conn->debug_id, serial, ret, + rxrpc_tx_fail_conn_response); return -EAGAIN; } From 1ef903bf795be01c91c10c93a0f9d9d6f2f7921b Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 26 Mar 2018 13:35:29 -0500 Subject: [PATCH 227/347] net/mlx5: Free IRQs in shutdown path Some platforms require IRQs to be free'd in the shutdown path. Otherwise they will fail to be reallocated after a kexec. Fixes: 8812c24d28f4 ("net/mlx5: Add fast unload support in shutdown flow") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 28 +++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/main.c | 8 ++++++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 2 ++ 3 files changed, 38 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index c1c94974e16b..1814f803bd2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -34,6 +34,9 @@ #include #include #include +#ifdef CONFIG_RFS_ACCEL +#include +#endif #include "mlx5_core.h" #include "fpga/core.h" #include "eswitch.h" @@ -923,3 +926,28 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, MLX5_SET(query_eq_in, in, eq_number, eq->eqn); return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } + +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_table *table = &dev->priv.eq_table; + struct mlx5_eq *eq; + +#ifdef CONFIG_RFS_ACCEL + if (dev->rmap) { + free_irq_cpu_rmap(dev->rmap); + dev->rmap = NULL; + } +#endif + list_for_each_entry(eq, &table->comp_eqs_list, list) + free_irq(eq->irqn, eq); + + free_irq(table->pages_eq.irqn, &table->pages_eq); + free_irq(table->async_eq.irqn, &table->async_eq); + free_irq(table->cmd_eq.irqn, &table->cmd_eq); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (MLX5_CAP_GEN(dev, pg)) + free_irq(table->pfault_eq.irqn, &table->pfault_eq); +#endif + pci_free_irq_vectors(dev->pdev); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 63a8ea31601c..e2c465b0b3f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1587,6 +1587,14 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) mlx5_enter_error_state(dev, true); + /* Some platforms requiring freeing the IRQ's in the shutdown + * flow. If they aren't freed they can't be allocated after + * kexec. There is no need to cleanup the mlx5_core software + * contexts. + */ + mlx5_irq_clear_affinity_hints(dev); + mlx5_core_eq_free_irqs(dev); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 7d001fe6e631..023882d9a22e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -128,6 +128,8 @@ int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u32 *out, int outlen); int mlx5_start_eqs(struct mlx5_core_dev *dev); void mlx5_stop_eqs(struct mlx5_core_dev *dev); +/* This function should only be called after mlx5_cmd_force_teardown_hca */ +void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq); void mlx5_cq_tasklet_cb(unsigned long data); From 88d725bbb43cd63a40c8ef70dd373f1d38ead2e3 Mon Sep 17 00:00:00 2001 From: Adi Nissim Date: Wed, 25 Apr 2018 11:21:32 +0300 Subject: [PATCH 228/347] net/mlx5: E-Switch, Include VF RDMA stats in vport statistics The host side reporting of VF vport statistics didn't include the VF RDMA traffic. Fixes: 3b751a2a418a ("net/mlx5: E-Switch, Introduce get vf statistics") Signed-off-by: Adi Nissim Reported-by: Ariel Almog Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 332bc56306bf..1352d13eedb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2175,26 +2175,35 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, memset(vf_stats, 0, sizeof(*vf_stats)); vf_stats->rx_packets = MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_ib_unicast.packets) + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets) + MLX5_GET_CTR(out, received_eth_broadcast.packets); vf_stats->rx_bytes = MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_ib_unicast.octets) + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_ib_multicast.octets) + MLX5_GET_CTR(out, received_eth_broadcast.octets); vf_stats->tx_packets = MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.packets) + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.packets) + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); vf_stats->tx_bytes = MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_unicast.octets) + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.octets) + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); vf_stats->multicast = - MLX5_GET_CTR(out, received_eth_multicast.packets); + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets); vf_stats->broadcast = MLX5_GET_CTR(out, received_eth_broadcast.packets); From f85900c3e13fdb61f040c9feecbcda601e0cdcfb Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 22 Mar 2018 18:51:37 +0200 Subject: [PATCH 229/347] net/mlx5e: Err if asked to offload TC match on frag being first The HW doesn't support matching on frag first/later, return error if we are asked to offload that. Fixes: 3f7d0eb42d59 ("net/mlx5e: Offload TC matching on packets being IP fragments") Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3c534fc43400..b94276db3ce9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1261,6 +1261,10 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, f->mask); addr_type = key->addr_type; + /* the HW doesn't support frag first/later */ + if (mask->flags & FLOW_DIS_FIRST_FRAG) + return -EOPNOTSUPP; + if (mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, From 6e70c267e68d77679534dcf4aaf84e66f2cf1425 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 10 May 2018 10:59:37 -0500 Subject: [PATCH 230/347] smb3: directory sync should not return an error As with NFS, which ignores sync on directory handles, fsync on a directory handle is a noop for CIFS/SMB3. Do not return an error on it. It breaks some database apps otherwise. Signed-off-by: Steve French CC: Stable Reviewed-by: Ronnie Sahlberg Reviewed-by: Pavel Shilovsky --- fs/cifs/cifsfs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f715609b13f3..5a5a0158cc8f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1047,6 +1047,18 @@ out: return rc; } +/* + * Directory operations under CIFS/SMB2/SMB3 are synchronous, so fsync() + * is a dummy operation. + */ +static int cifs_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + cifs_dbg(FYI, "Sync directory - name: %pD datasync: 0x%x\n", + file, datasync); + + return 0; +} + static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, size_t len, unsigned int flags) @@ -1181,6 +1193,7 @@ const struct file_operations cifs_dir_ops = { .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = generic_file_llseek, + .fsync = cifs_dir_fsync, }; static void From 0a0b98734479aa5b3c671d5190e86273372cab95 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 11 May 2018 02:19:01 +0200 Subject: [PATCH 231/347] compat: fix 4-byte infoleak via uninitialized struct field Commit 3a4d44b61625 ("ntp: Move adjtimex related compat syscalls to native counterparts") removed the memset() in compat_get_timex(). Since then, the compat adjtimex syscall can invoke do_adjtimex() with an uninitialized ->tai. If do_adjtimex() doesn't write to ->tai (e.g. because the arguments are invalid), compat_put_timex() then copies the uninitialized ->tai field to userspace. Fix it by adding the memset() back. Fixes: 3a4d44b61625 ("ntp: Move adjtimex related compat syscalls to native counterparts") Signed-off-by: Jann Horn Acked-by: Kees Cook Acked-by: Al Viro Signed-off-by: Linus Torvalds --- kernel/compat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/compat.c b/kernel/compat.c index 6d21894806b4..92d8c98c0f57 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -34,6 +34,7 @@ int compat_get_timex(struct timex *txc, const struct compat_timex __user *utp) { struct compat_timex tx32; + memset(txc, 0, sizeof(struct timex)); if (copy_from_user(&tx32, utp, sizeof(struct compat_timex))) return -EFAULT; From 7f6df440b8623c441c42d070bf592e2d2c1fa9bb Mon Sep 17 00:00:00 2001 From: Haneen Mohammed Date: Fri, 11 May 2018 07:15:42 +0300 Subject: [PATCH 232/347] drm: Match sysfs name in link removal to link creation This patch matches the sysfs name used in the unlinking with the linking function. Otherwise, remove_compat_control_link() fails to remove sysfs created by create_compat_control_link() in drm_dev_register(). Fixes: 6449b088dd51 ("drm: Add fake controlD* symlinks for backwards compat") Cc: Dave Airlie Cc: Alex Deucher Cc: Emil Velikov Cc: David Herrmann Cc: Greg Kroah-Hartman Cc: Daniel Vetter Cc: Gustavo Padovan Cc: Maarten Lankhorst Cc: Sean Paul Cc: David Airlie Cc: dri-devel@lists.freedesktop.org Cc: # v4.10+ Signed-off-by: Haneen Mohammed [seanpaul added Fixes and Cc tags] Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180511041542.GA4253@haneen-vb --- drivers/gpu/drm/drm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index a1b9338736e3..c2c21d839727 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -716,7 +716,7 @@ static void remove_compat_control_link(struct drm_device *dev) if (!minor) return; - name = kasprintf(GFP_KERNEL, "controlD%d", minor->index); + name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64); if (!name) return; From dc432c3d7f9bceb3de6f5b44fb9c657c9810ed6d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 9 May 2018 11:59:32 -0400 Subject: [PATCH 233/347] tracing: Fix regex_match_front() to not over compare the test string The regex match function regex_match_front() in the tracing filter logic, was fixed to test just the pattern length from testing the entire test string. That is, it went from strncmp(str, r->pattern, len) to strcmp(str, r->pattern, r->len). The issue is that str is not guaranteed to be nul terminated, and if r->len is greater than the length of str, it can access more memory than is allocated. The solution is to add a simple test if (len < r->len) return 0. Cc: stable@vger.kernel.org Fixes: 285caad415f45 ("tracing/filters: Fix MATCH_FRONT_ONLY filter matching") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 1f951b3df60c..7d306b74230f 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -762,6 +762,9 @@ static int regex_match_full(char *str, struct regex *r, int len) static int regex_match_front(char *str, struct regex *r, int len) { + if (len < r->len) + return 0; + if (strncmp(str, r->pattern, r->len) == 0) return 1; return 0; From 4fa8667ca3989ce14cf66301fa251544fbddbdd0 Mon Sep 17 00:00:00 2001 From: Debabrata Banerjee Date: Wed, 9 May 2018 19:32:10 -0400 Subject: [PATCH 234/347] bonding: do not allow rlb updates to invalid mac Make sure multicast, broadcast, and zero mac's cannot be the output of rlb updates, which should all be directed arps. Receive load balancing will be collapsed if any of these happen, as the switch will broadcast. Signed-off-by: Debabrata Banerjee Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 1ed9529e7bd1..3f6faa657360 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -450,7 +450,7 @@ static void rlb_update_client(struct rlb_client_info *client_info) { int i; - if (!client_info->slave) + if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst)) return; for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { From 21706ee8a47d3ede7fdae0be6d7c0a0e31a83229 Mon Sep 17 00:00:00 2001 From: Debabrata Banerjee Date: Wed, 9 May 2018 19:32:11 -0400 Subject: [PATCH 235/347] bonding: send learning packets for vlans on slave There was a regression at some point from the intended functionality of commit f60c3704e87d ("bonding: Fix alb mode to only use first level vlans.") Given the return value vlan_get_encap_level() we need to store the nest level of the bond device, and then compare the vlan's encap level to this. Without this, this check always fails and learning packets are never sent. In addition, this same commit caused a regression in the behavior of balance_alb, which requires learning packets be sent for all interfaces using the slave's mac in order to load balance properly. For vlan's that have not set a user mac, we can send after checking one bit. Otherwise we need send the set mac, albeit defeating rx load balancing for that vlan. Signed-off-by: Debabrata Banerjee Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 13 ++++++++----- drivers/net/bonding/bond_main.c | 2 ++ include/net/bonding.h | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 3f6faa657360..5eb0df2e5464 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -943,6 +943,10 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[], skb->priority = TC_PRIO_CONTROL; skb->dev = slave->dev; + netdev_dbg(slave->bond->dev, + "Send learning packet: dev %s mac %pM vlan %d\n", + slave->dev->name, mac_addr, vid); + if (vid) __vlan_hwaccel_put_tag(skb, vlan_proto, vid); @@ -965,14 +969,13 @@ static int alb_upper_dev_walk(struct net_device *upper, void *_data) u8 *mac_addr = data->mac_addr; struct bond_vlan_tag *tags; - if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { - if (strict_match && - ether_addr_equal_64bits(mac_addr, - upper->dev_addr)) { + if (is_vlan_dev(upper) && + bond->nest_level == vlan_get_encap_level(upper) - 1) { + if (upper->addr_assign_type == NET_ADDR_STOLEN) { alb_send_lp_vid(slave, mac_addr, vlan_dev_vlan_proto(upper), vlan_dev_vlan_id(upper)); - } else if (!strict_match) { + } else { alb_send_lp_vid(slave, upper->dev_addr, vlan_dev_vlan_proto(upper), vlan_dev_vlan_id(upper)); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 718e4914e3a0..1f1e97b26f95 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1738,6 +1738,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, if (bond_mode_uses_xmit_hash(bond)) bond_update_slave_arr(bond, NULL); + bond->nest_level = dev_get_nest_level(bond_dev); + netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", slave_dev->name, bond_is_active_slave(new_slave) ? "an active" : "a backup", diff --git a/include/net/bonding.h b/include/net/bonding.h index f801fc940b29..b52235158836 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -198,6 +198,7 @@ struct bonding { struct slave __rcu *primary_slave; struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ bool force_primary; + u32 nest_level; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, struct slave *); From 8ccc113172e4c1ebef45c2433f3c32ed6ae1b9c9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 10 May 2018 13:26:16 +0200 Subject: [PATCH 236/347] mlxsw: core: Fix an error handling path in 'mlxsw_core_bus_device_register()' Resources are not freed in the reverse order of the allocation. Labels are also mixed-up. Fix it and reorder code and labels in the error handling path of 'mlxsw_core_bus_device_register()' Fixes: ef3116e5403e ("mlxsw: spectrum: Register KVD resources with devlink") Signed-off-by: Christophe JAILLET Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 93ea56620a24..e13ac3b8dff7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1100,11 +1100,11 @@ err_emad_init: err_alloc_lag_mapping: mlxsw_ports_fini(mlxsw_core); err_ports_init: - mlxsw_bus->fini(bus_priv); -err_bus_init: if (!reload) devlink_resources_unregister(devlink, NULL); err_register_resources: + mlxsw_bus->fini(bus_priv); +err_bus_init: if (!reload) devlink_free(devlink); err_devlink_alloc: From 1b97013bfb11d66f041de691de6f0fec748ce016 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Thu, 10 May 2018 10:59:34 -0700 Subject: [PATCH 237/347] ipv4: fix memory leaks in udp_sendmsg, ping_v4_sendmsg Fix more memory leaks in ip_cmsg_send() callers. Part of them were fixed earlier in 919483096bfe. * udp_sendmsg one was there since the beginning when linux sources were first added to git; * ping_v4_sendmsg one was copy/pasted in c319b4d76b9e. Whenever return happens in udp_sendmsg() or ping_v4_sendmsg() IP options have to be freed if they were allocated previously. Add label so that future callers (if any) can use it instead of kfree() before return that is easy to forget. Fixes: c319b4d76b9e (net: ipv4: add IPPROTO_ICMP socket kind) Signed-off-by: Andrey Ignatov Signed-off-by: David S. Miller --- net/ipv4/ping.c | 7 +++++-- net/ipv4/udp.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 05e47d777009..56a010622f70 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -775,8 +775,10 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc.addr = faddr = daddr; if (ipc.opt && ipc.opt->opt.srr) { - if (!daddr) - return -EINVAL; + if (!daddr) { + err = -EINVAL; + goto out_free; + } faddr = ipc.opt->opt.faddr; } tos = get_rttos(&ipc, inet); @@ -842,6 +844,7 @@ back_from_confirm: out: ip_rt_put(rt); +out_free: if (free) kfree(ipc.opt); if (!err) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c2a292dfd137..b61a770884fa 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -952,8 +952,10 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); if (ipc.opt && ipc.opt->opt.srr) { - if (!daddr) - return -EINVAL; + if (!daddr) { + err = -EINVAL; + goto out_free; + } faddr = ipc.opt->opt.faddr; connected = 0; } @@ -1074,6 +1076,7 @@ do_append_data: out: ip_rt_put(rt); +out_free: if (free) kfree(ipc.opt); if (!err) From 4e50d9ebaeaa3c6761d2b513ef7039510c8cf213 Mon Sep 17 00:00:00 2001 From: Charles Machalow Date: Thu, 10 May 2018 16:01:38 -0700 Subject: [PATCH 238/347] nvme: Fix sync controller reset return If a controller reset is requested while the device has no namespaces, we were incorrectly returning ENETRESET. This patch adds the check for ADMIN_ONLY controller state to indicate a successful reset. Fixes: 8000d1fdb0 ("nvme-rdma: fix sysfs invoked reset_ctrl error flow ") Cc: Signed-off-by: Charles Machalow [changelog] Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2cbc378bc0d6..99b857e5a7a9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -118,7 +118,8 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ret = nvme_reset_ctrl(ctrl); if (!ret) { flush_work(&ctrl->reset_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (ctrl->state != NVME_CTRL_LIVE && + ctrl->state != NVME_CTRL_ADMIN_ONLY) ret = -ENETRESET; } From be83bbf806822b1b89e0a0f23cd87cddc409e429 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 11 May 2018 09:52:01 -0700 Subject: [PATCH 239/347] mmap: introduce sane default mmap limits The internal VM "mmap()" interfaces are based on the mmap target doing everything using page indexes rather than byte offsets, because traditionally (ie 32-bit) we had the situation that the byte offset didn't fit in a register. So while the mmap virtual address was limited by the word size of the architecture, the backing store was not. So we're basically passing "pgoff" around as a page index, in order to be able to describe backing store locations that are much bigger than the word size (think files larger than 4GB etc). But while this all makes a ton of sense conceptually, we've been dogged by various drivers that don't really understand this, and internally work with byte offsets, and then try to work with the page index by turning it into a byte offset with "pgoff << PAGE_SHIFT". Which obviously can overflow. Adding the size of the mapping to it to get the byte offset of the end of the backing store just exacerbates the problem, and if you then use this overflow-prone value to check various limits of your device driver mmap capability, you're just setting yourself up for problems. The correct thing for drivers to do is to do their limit math in page indices, the way the interface is designed. Because the generic mmap code _does_ test that the index doesn't overflow, since that's what the mmap code really cares about. HOWEVER. Finding and fixing various random drivers is a sisyphean task, so let's just see if we can just make the core mmap() code do the limiting for us. Realistically, the only "big" backing stores we need to care about are regular files and block devices, both of which are known to do this properly, and which have nice well-defined limits for how much data they can access. So let's special-case just those two known cases, and then limit other random mmap users to a backing store that still fits in "unsigned long". Realistically, that's not much of a limit at all on 64-bit, and on 32-bit architectures the only worry might be the GPU drivers, which can have big physical address spaces. To make it possible for drivers like that to say that they are 64-bit clean, this patch does repurpose the "FMODE_UNSIGNED_OFFSET" bit in the file flags to allow drivers to mark their file descriptors as safe in the full 64-bit mmap address space. [ The timing for doing this is less than optimal, and this should really go in a merge window. But realistically, this needs wide testing more than it needs anything else, and being main-line is the only way to do that. So the earlier the better, even if it's outside the proper development cycle - Linus ] Cc: Kees Cook Cc: Dan Carpenter Cc: Al Viro Cc: Willy Tarreau Cc: Dave Airlie Signed-off-by: Linus Torvalds --- mm/mmap.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/mm/mmap.c b/mm/mmap.c index 9d5968d1e8e3..6fc435760086 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1324,6 +1324,35 @@ static inline int mlock_future_check(struct mm_struct *mm, return 0; } +static inline u64 file_mmap_size_max(struct file *file, struct inode *inode) +{ + if (S_ISREG(inode->i_mode)) + return inode->i_sb->s_maxbytes; + + if (S_ISBLK(inode->i_mode)) + return MAX_LFS_FILESIZE; + + /* Special "we do even unsigned file positions" case */ + if (file->f_mode & FMODE_UNSIGNED_OFFSET) + return 0; + + /* Yes, random drivers might want more. But I'm tired of buggy drivers */ + return ULONG_MAX; +} + +static inline bool file_mmap_ok(struct file *file, struct inode *inode, + unsigned long pgoff, unsigned long len) +{ + u64 maxsize = file_mmap_size_max(file, inode); + + if (maxsize && len > maxsize) + return false; + maxsize -= len; + if (pgoff > maxsize >> PAGE_SHIFT) + return false; + return true; +} + /* * The caller must hold down_write(¤t->mm->mmap_sem). */ @@ -1409,6 +1438,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr, struct inode *inode = file_inode(file); unsigned long flags_mask; + if (!file_mmap_ok(file, inode, pgoff, len)) + return -EOVERFLOW; + flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags; switch (flags & MAP_TYPE) { From ac21fc2dcb405cf250ad3f1228f64f64930d9211 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 11 May 2018 08:45:59 -0500 Subject: [PATCH 240/347] sh: switch to NO_BOOTMEM Commit 0fa1c579349f ("of/fdt: use memblock_virt_alloc for early alloc") inadvertently switched the DT unflattening allocations from memblock to bootmem which doesn't work because the unflattening happens before bootmem is initialized. Swapping the order of bootmem init and unflattening could also fix this, but removing bootmem is desired. So enable NO_BOOTMEM on SH like other architectures have done. Fixes: 0fa1c579349f ("of/fdt: use memblock_virt_alloc for early alloc") Reported-by: Rich Felker Cc: Yoshinori Sato Signed-off-by: Rob Herring Signed-off-by: Rich Felker --- arch/sh/Kconfig | 1 + arch/sh/kernel/setup.c | 1 - arch/sh/mm/init.c | 68 ++++-------------------------------------- arch/sh/mm/numa.c | 19 ------------ 4 files changed, 7 insertions(+), 82 deletions(-) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 97fe29316476..1851eaeee131 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -9,6 +9,7 @@ config SUPERH select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP + select NO_BOOTMEM select ARCH_DISCARD_MEMBLOCK select HAVE_OPROFILE select HAVE_GENERIC_DMA_COHERENT diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index d34e998b809f..c286cf5da6e7 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index ce0bbaa7e404..4034035fbede 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -211,59 +211,15 @@ void __init allocate_pgdat(unsigned int nid) NODE_DATA(nid) = __va(phys); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; #endif NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; } -static void __init bootmem_init_one_node(unsigned int nid) -{ - unsigned long total_pages, paddr; - unsigned long end_pfn; - struct pglist_data *p; - - p = NODE_DATA(nid); - - /* Nothing to do.. */ - if (!p->node_spanned_pages) - return; - - end_pfn = pgdat_end_pfn(p); - - total_pages = bootmem_bootmap_pages(p->node_spanned_pages); - - paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE); - if (!paddr) - panic("Can't allocate bootmap for nid[%d]\n", nid); - - init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - - /* - * XXX Handle initial reservations for the system memory node - * only for the moment, we'll refactor this later for handling - * reservations in other nodes. - */ - if (nid == 0) { - struct memblock_region *reg; - - /* Reserve the sections we're already using. */ - for_each_memblock(reserved, reg) { - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - } - } - - sparse_memory_present_with_active_regions(nid); -} - static void __init do_init_bootmem(void) { struct memblock_region *reg; - int i; /* Add active regions with valid PFNs. */ for_each_memblock(memory, reg) { @@ -279,9 +235,12 @@ static void __init do_init_bootmem(void) plat_mem_setup(); - for_each_online_node(i) - bootmem_init_one_node(i); + for_each_memblock(memory, reg) { + int nid = memblock_get_region_node(reg); + memory_present(nid, memblock_region_memory_base_pfn(reg), + memblock_region_memory_end_pfn(reg)); + } sparse_init(); } @@ -322,7 +281,6 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long vaddr, end; - int nid; sh_mv.mv_mem_init(); @@ -377,21 +335,7 @@ void __init paging_init(void) kmap_coherent_init(); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); - unsigned long low, start_pfn; - - start_pfn = pgdat->bdata->node_min_pfn; - low = pgdat->bdata->node_low_pfn; - - if (max_zone_pfns[ZONE_NORMAL] < low) - max_zone_pfns[ZONE_NORMAL] = low; - - printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n", - nid, start_pfn, low); - } - + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); } diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c index 05713d190247..830e8b3684e4 100644 --- a/arch/sh/mm/numa.c +++ b/arch/sh/mm/numa.c @@ -8,7 +8,6 @@ * for more details. */ #include -#include #include #include #include @@ -26,9 +25,7 @@ EXPORT_SYMBOL_GPL(node_data); */ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) { - unsigned long bootmap_pages; unsigned long start_pfn, end_pfn; - unsigned long bootmem_paddr; /* Don't allow bogus node assignment */ BUG_ON(nid >= MAX_NUMNODES || nid <= 0); @@ -48,25 +45,9 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) SMP_CACHE_BYTES, end)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; - /* Node-local bootmap */ - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT, - PAGE_SIZE, end); - init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, - start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - - /* Reserve the pgdat and bootmap space with the bootmem allocator */ - reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT, - sizeof(struct pglist_data), BOOTMEM_DEFAULT); - reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr, - bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT); - /* It's up */ node_set_online(nid); From 04ac6fdba1afffad664377a324b017e63ac08bd8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 11 May 2018 14:13:57 -0400 Subject: [PATCH 241/347] Change Trond's email address in MAINTAINERS Signed-off-by: Trond Myklebust --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 4623caf8d72d..92e8db177a64 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9744,7 +9744,7 @@ F: include/linux/platform_data/nxp-nci.h F: Documentation/devicetree/bindings/net/nfc/ NFS, SUNRPC, AND LOCKD CLIENTS -M: Trond Myklebust +M: Trond Myklebust M: Anna Schumaker L: linux-nfs@vger.kernel.org W: http://client.linux-nfs.org From ea3beca422742ca7e8502496e2806421d5e9d6fd Mon Sep 17 00:00:00 2001 From: Jeff Shaw Date: Wed, 18 Apr 2018 11:23:27 -0700 Subject: [PATCH 242/347] ice: Set rq_last_status when cleaning rq Prior to this commit, the rq_last_status was only set when hardware responded with an error. This leads to rq_last_status being invalid in the future when hardware eventually responds without error. This commit resolves the issue by unconditionally setting rq_last_status with the value returned in the descriptor. Fixes: 940b61af02f4 ("ice: Initialize PF and setup miscellaneous interrupt") Signed-off-by: Jeff Shaw Signed-off-by: Anirudh Venkataramanan Tested-by: Tony Brelinski Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ice/ice_controlq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index 5909a4407e38..7c511f144ed6 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -1014,10 +1014,10 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, desc = ICE_CTL_Q_DESC(cq->rq, ntc); desc_idx = ntc; + cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); flags = le16_to_cpu(desc->flags); if (flags & ICE_AQ_FLAG_ERR) { ret_code = ICE_ERR_AQ_ERROR; - cq->rq_last_status = (enum ice_aq_err)le16_to_cpu(desc->retval); ice_debug(hw, ICE_DBG_AQ_MSG, "Control Receive Queue Event received with error 0x%x\n", cq->rq_last_status); From bbb2707623f3ccc48695da2433f06d7c38193451 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 19 Apr 2018 17:06:57 -0700 Subject: [PATCH 243/347] ixgbe: return error on unsupported SFP module when resetting Add check for unsupported module and return the error code. This fixes a Coverity hit due to unused return status from setup_sfp. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 3123267dfba9..9592f3e3e42e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3427,6 +3427,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) hw->phy.sfp_setup_needed = false; } + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + return status; + /* Reset PHY */ if (!hw->phy.reset_disable && hw->phy.ops.reset) hw->phy.ops.reset(hw); From cf12aab67a2fb29342d833a6333a322aaccca2d0 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 24 Apr 2018 15:16:48 +0200 Subject: [PATCH 244/347] ixgbevf: fix ixgbevf_xmit_frame()'s return type The method ndo_start_xmit() is defined as returning an 'netdev_tx_t', which is a typedef for an enum type, but the implementation in this driver returns an 'int'. Fix this by returning 'netdev_tx_t' in this driver too. Signed-off-by: Luc Van Oostenryck Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index e3d04f226d57..850f8af95e49 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4137,7 +4137,7 @@ out_drop: return NETDEV_TX_OK; } -static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct ixgbevf_adapter *adapter = netdev_priv(netdev); struct ixgbevf_ring *tx_ring; From c89ebb968f04c71e16e86c91caeacb045dc8f908 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 9 May 2018 14:58:48 +0100 Subject: [PATCH 245/347] ixgbe: fix memory leak on ipsec allocation The error clean up path kfree's adapter->ipsec and should be instead kfree'ing ipsec. Fix this. Also, the err1 error exit path does not need to kfree ipsec because this failure path was for the failed allocation of ipsec. Detected by CoverityScan, CID#146424 ("Resource Leak") Fixes: 63a67fe229ea ("ixgbe: add ipsec offload add and remove SA") Signed-off-by: Colin Ian King Acked-by: Shannon Nelson Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index 68af127987bc..cead23e3db0c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -943,8 +943,8 @@ err2: kfree(ipsec->ip_tbl); kfree(ipsec->rx_tbl); kfree(ipsec->tx_tbl); + kfree(ipsec); err1: - kfree(adapter->ipsec); netdev_err(adapter->netdev, "Unable to allocate memory for SA tables"); } From 9abd68ef454c824bfd18629033367b4382b5f390 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 May 2018 10:25:15 -0600 Subject: [PATCH 246/347] nvme: add quirk to force medium priority for SQ creation Some P3100 drives have a bug where they think WRRU (weighted round robin) is always enabled, even though the host doesn't set it. Since they think it's enabled, they also look at the submission queue creation priority. We used to set that to MEDIUM by default, but that was removed in commit 81c1cd98351b. This causes various issues on that drive. Add a quirk to still set MEDIUM priority for that controller. Fixes: 81c1cd98351b ("nvme/pci: Don't set reserved SQ create flags") Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe Signed-off-by: Keith Busch --- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7ded7a51c430..17d2f7cf3fed 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -84,6 +84,11 @@ enum nvme_quirks { * Supports the LighNVM command set if indicated in vs[1]. */ NVME_QUIRK_LIGHTNVM = (1 << 6), + + /* + * Set MEDIUM priority on SQ creation + */ + NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fbc71fac6f1e..17a0190bd88f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1093,9 +1093,18 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { + struct nvme_ctrl *ctrl = &dev->ctrl; struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG; + /* + * Some drives have a bug that auto-enables WRRU if MEDIUM isn't + * set. Since URGENT priority is zeroes, it makes all queues + * URGENT. + */ + if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ) + flags |= NVME_SQ_PRIO_MEDIUM; + /* * Note: we (ab)use the fact that the prp fields survive if no data * is attached to the request. @@ -2701,7 +2710,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ - .driver_data = NVME_QUIRK_NO_DEEPEST_PS }, + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_MEDIUM_PRIO_SQ }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ From af5d01842fe1fbfb9f5e1c1d957ba02ab6f4569a Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Fri, 11 May 2018 10:55:09 -0400 Subject: [PATCH 247/347] net sched actions: fix invalid pointer dereferencing if skbedit flags missing When application fails to pass flags in netlink TLV for a new skbedit action, the kernel results in the following oops: [ 8.307732] BUG: unable to handle kernel paging request at 0000000000021130 [ 8.309167] PGD 80000000193d1067 P4D 80000000193d1067 PUD 180e0067 PMD 0 [ 8.310595] Oops: 0000 [#1] SMP PTI [ 8.311334] Modules linked in: kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd cryptd glue_helper serio_raw [ 8.314190] CPU: 1 PID: 397 Comm: tc Not tainted 4.17.0-rc3+ #357 [ 8.315252] RIP: 0010:__tcf_idr_release+0x33/0x140 [ 8.316203] RSP: 0018:ffffa0718038f840 EFLAGS: 00010246 [ 8.317123] RAX: 0000000000000001 RBX: 0000000000021100 RCX: 0000000000000000 [ 8.319831] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000021100 [ 8.321181] RBP: 0000000000000000 R08: 000000000004adf8 R09: 0000000000000122 [ 8.322645] R10: 0000000000000000 R11: ffffffff9e5b01ed R12: 0000000000000000 [ 8.324157] R13: ffffffff9e0d3cc0 R14: 0000000000000000 R15: 0000000000000000 [ 8.325590] FS: 00007f591292e700(0000) GS:ffff8fcf5bc40000(0000) knlGS:0000000000000000 [ 8.327001] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 8.327987] CR2: 0000000000021130 CR3: 00000000180e6004 CR4: 00000000001606a0 [ 8.329289] Call Trace: [ 8.329735] tcf_skbedit_init+0xa7/0xb0 [ 8.330423] tcf_action_init_1+0x362/0x410 [ 8.331139] ? try_to_wake_up+0x44/0x430 [ 8.331817] tcf_action_init+0x103/0x190 [ 8.332511] tc_ctl_action+0x11a/0x220 [ 8.333174] rtnetlink_rcv_msg+0x23d/0x2e0 [ 8.333902] ? _cond_resched+0x16/0x40 [ 8.334569] ? __kmalloc_node_track_caller+0x5b/0x2c0 [ 8.335440] ? rtnl_calcit.isra.31+0xf0/0xf0 [ 8.336178] netlink_rcv_skb+0xdb/0x110 [ 8.336855] netlink_unicast+0x167/0x220 [ 8.337550] netlink_sendmsg+0x2a7/0x390 [ 8.338258] sock_sendmsg+0x30/0x40 [ 8.338865] ___sys_sendmsg+0x2c5/0x2e0 [ 8.339531] ? pagecache_get_page+0x27/0x210 [ 8.340271] ? filemap_fault+0xa2/0x630 [ 8.340943] ? page_add_file_rmap+0x108/0x200 [ 8.341732] ? alloc_set_pte+0x2aa/0x530 [ 8.342573] ? finish_fault+0x4e/0x70 [ 8.343332] ? __handle_mm_fault+0xbc1/0x10d0 [ 8.344337] ? __sys_sendmsg+0x53/0x80 [ 8.345040] __sys_sendmsg+0x53/0x80 [ 8.345678] do_syscall_64+0x4f/0x100 [ 8.346339] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 8.347206] RIP: 0033:0x7f591191da67 [ 8.347831] RSP: 002b:00007fff745abd48 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 8.349179] RAX: ffffffffffffffda RBX: 00007fff745abe70 RCX: 00007f591191da67 [ 8.350431] RDX: 0000000000000000 RSI: 00007fff745abdc0 RDI: 0000000000000003 [ 8.351659] RBP: 000000005af35251 R08: 0000000000000001 R09: 0000000000000000 [ 8.352922] R10: 00000000000005f1 R11: 0000000000000246 R12: 0000000000000000 [ 8.354183] R13: 00007fff745afed0 R14: 0000000000000001 R15: 00000000006767c0 [ 8.355400] Code: 41 89 d4 53 89 f5 48 89 fb e8 aa 20 fd ff 85 c0 0f 84 ed 00 00 00 48 85 db 0f 84 cf 00 00 00 40 84 ed 0f 85 cd 00 00 00 45 84 e4 <8b> 53 30 74 0d 85 d2 b8 ff ff ff ff 0f 8f b3 00 00 00 8b 43 2c [ 8.358699] RIP: __tcf_idr_release+0x33/0x140 RSP: ffffa0718038f840 [ 8.359770] CR2: 0000000000021130 [ 8.360438] ---[ end trace 60c66be45dfc14f0 ]--- The caller calls action's ->init() and passes pointer to "struct tc_action *a", which later may be initialized to point at the existing action, otherwise "struct tc_action *a" is still invalid, and therefore dereferencing it is an error as happens in tcf_idr_release, where refcnt is decremented. So in case of missing flags tcf_idr_release must be called only for existing actions. v2: - prepare patch for net tree Fixes: 5e1567aeb7fe ("net sched: skbedit action fix late binding") Signed-off-by: Roman Mashak Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_skbedit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index ddf69fc01bdf..6138d1d71900 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -121,7 +121,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, return 0; if (!flags) { - tcf_idr_release(*a, bind); + if (exists) + tcf_idr_release(*a, bind); return -EINVAL; } From d68d75fdc34b0253c2bded7ed18cd60eb5a9599b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 11 May 2018 17:45:32 +0200 Subject: [PATCH 248/347] net: sched: fix error path in tcf_proto_create() when modules are not configured In case modules are not configured, error out when tp->ops is null and prevent later null pointer dereference. Fixes: 33a48927c193 ("sched: push TC filter protocol creation into a separate function") Signed-off-by: Jiri Pirko Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index b66754f52a9f..963e4bf0aab8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -152,8 +152,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol, NL_SET_ERR_MSG(extack, "TC classifier not found"); err = -ENOENT; } - goto errout; #endif + goto errout; } tp->classify = tp->ops->classify; tp->protocol = protocol; From a52956dfc503f8cc5cfe6454959b7049fddb4413 Mon Sep 17 00:00:00 2001 From: Roman Mashak Date: Fri, 11 May 2018 14:35:33 -0400 Subject: [PATCH 249/347] net sched actions: fix refcnt leak in skbmod When application fails to pass flags in netlink TLV when replacing existing skbmod action, the kernel will leak refcnt: $ tc actions get action skbmod index 1 total acts 0 action order 0: skbmod pipe set smac 00:11:22:33:44:55 index 1 ref 1 bind 0 For example, at this point a buggy application replaces the action with index 1 with new smac 00:aa:22:33:44:55, it fails because of zero flags, however refcnt gets bumped: $ tc actions get actions skbmod index 1 total acts 0 action order 0: skbmod pipe set smac 00:11:22:33:44:55 index 1 ref 2 bind 0 $ Tha patch fixes this by calling tcf_idr_release() on existing actions. Fixes: 86da71b57383d ("net_sched: Introduce skbmod action") Signed-off-by: Roman Mashak Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index bbcbdce732cc..ad050d7d4b46 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -131,8 +131,11 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (exists && bind) return 0; - if (!lflags) + if (!lflags) { + if (exists) + tcf_idr_release(*a, bind); return -EINVAL; + } if (!exists) { ret = tcf_idr_create(tn, parm->index, est, a, From 1d1c8e5f0d95c2c97f29122741aed7d0dfb2b391 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Fri, 11 May 2018 16:01:32 -0700 Subject: [PATCH 250/347] MAINTAINERS: update Shuah's email address Update email address in MAINTAINERS file due to IT infrastructure changes at Samsung. Link: http://lkml.kernel.org/r/20180501212815.25911-1-shuah@kernel.org Signed-off-by: Shuah Khan (Samsung OSG) Cc: Mauro Carvalho Chehab Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Linus Walleij Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 3 --- 1 file changed, 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index df6e9bb2559a..b495113b3c2d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c CPU POWER MONITORING SUBSYSTEM M: Thomas Renninger -M: Shuah Khan M: Shuah Khan L: linux-pm@vger.kernel.org S: Maintained @@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/ F: include/uapi/linux/sunrpc/ KERNEL SELFTEST FRAMEWORK -M: Shuah Khan M: Shuah Khan L: linux-kselftest@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git @@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c USB OVER IP DRIVER M: Valentina Manea -M: Shuah Khan M: Shuah Khan L: linux-usb@vger.kernel.org S: Maintained From c9cf87ea6a4725bc586cc5776c3fb6042a264b7d Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 11 May 2018 16:01:35 -0700 Subject: [PATCH 251/347] KASAN: prohibit KASAN+STRUCTLEAK combination Currently STRUCTLEAK inserts initialization out of live scope of variables from KASAN point of view. This leads to KASAN false positive reports. Prohibit this combination for now. Link: http://lkml.kernel.org/r/20180419172451.104700-1-dvyukov@google.com Signed-off-by: Dmitry Vyukov Acked-by: Kees Cook Cc: Fengguang Wu Cc: Sergey Senozhatsky Cc: Andrey Ryabinin Cc: Dennis Zhou Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 8e0d665c8d53..75dd23acf133 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY config GCC_PLUGIN_STRUCTLEAK bool "Force initialization of variables containing userspace addresses" depends on GCC_PLUGINS + # Currently STRUCTLEAK inserts initialization out of live scope of + # variables from KASAN point of view. This leads to KASAN false + # positive reports. Prohibit this combination for now. + depends on !KASAN_EXTRA help This plugin zero-initializes any structures containing a __user attribute. This can prevent some classes of information From 4ba281d5bd9907355e6b79fb72049c9ed50cc670 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 11 May 2018 16:01:39 -0700 Subject: [PATCH 252/347] lib/find_bit_benchmark.c: avoid soft lockup in test_find_first_bit() test_find_first_bit() is intentionally sub-optimal, and may cause soft lockup due to long time of run on some systems. So decrease length of bitmap to traverse to avoid lockup. With the change below, time of test execution doesn't exceed 0.2 seconds on my testing system. Link: http://lkml.kernel.org/r/20180420171949.15710-1-ynorov@caviumnetworks.com Fixes: 4441fca0a27f5 ("lib: test module for find_*_bit() functions") Signed-off-by: Yury Norov Reviewed-by: Andrew Morton Reported-by: Fengguang Wu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/find_bit_benchmark.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 5985a25e6cbc..5367ffa5c18f 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -132,7 +132,12 @@ static int __init find_bit_test(void) test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); - test_find_first_bit(bitmap, BITMAP_LEN); + + /* + * test_find_first_bit() may take some time, so + * traverse only part of bitmap to avoid soft lockup. + */ + test_find_first_bit(bitmap, BITMAP_LEN / 10); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); From ae646f0b9ca135b87bc73ff606ef996c3029780a Mon Sep 17 00:00:00 2001 From: Jeffrey Hugo Date: Fri, 11 May 2018 16:01:42 -0700 Subject: [PATCH 253/347] init: fix false positives in W+X checking load_module() creates W+X mappings via __vmalloc_node_range() (from layout_and_allocate()->move_module()->module_alloc()) by using PAGE_KERNEL_EXEC. These mappings are later cleaned up via "call_rcu_sched(&freeinit->rcu, do_free_init)" from do_init_module(). This is a problem because call_rcu_sched() queues work, which can be run after debug_checkwx() is run, resulting in a race condition. If hit, the race results in a nasty splat about insecure W+X mappings, which results in a poor user experience as these are not the mappings that debug_checkwx() is intended to catch. This issue is observed on multiple arm64 platforms, and has been artificially triggered on an x86 platform. Address the race by flushing the queued work before running the arch-defined mark_rodata_ro() which then calls debug_checkwx(). Link: http://lkml.kernel.org/r/1525103946-29526-1-git-send-email-jhugo@codeaurora.org Fixes: e1a58320a38d ("x86/mm: Warn on W^X mappings") Signed-off-by: Jeffrey Hugo Reported-by: Timur Tabi Reported-by: Jan Glauber Acked-by: Kees Cook Acked-by: Ingo Molnar Acked-by: Will Deacon Acked-by: Laura Abbott Cc: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Stephen Smalley Cc: Thomas Gleixner Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/main.c | 7 +++++++ kernel/module.c | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/init/main.c b/init/main.c index a404936d85d8..fd37315835b4 100644 --- a/init/main.c +++ b/init/main.c @@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata); static void mark_readonly(void) { if (rodata_enabled) { + /* + * load_module() results in W+X mappings, which are cleaned up + * with call_rcu_sched(). Let's make sure that queued work is + * flushed so that we don't hit false positives looking for + * insecure pages which are W+X. + */ + rcu_barrier_sched(); mark_rodata_ro(); rodata_test(); } else diff --git a/kernel/module.c b/kernel/module.c index ce8066b88178..c9bea7f2b43e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod) * walking this with preempt disabled. In all the failure paths, we * call synchronize_sched(), but we don't want to slow down the success * path, so use actual RCU here. + * Note that module_alloc() on most architectures creates W+X page + * mappings which won't be cleaned up until do_free_init() runs. Any + * code such as mark_rodata_ro() which depends on those mappings to + * be cleaned up needs to sync with the queued work - ie + * rcu_barrier_sched() */ call_rcu_sched(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); From 6098d7e136692f9c6e23ae362c62ec822343e4d5 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Fri, 11 May 2018 16:01:46 -0700 Subject: [PATCH 254/347] z3fold: fix reclaim lock-ups Do not try to optimize in-page object layout while the page is under reclaim. This fixes lock-ups on reclaim and improves reclaim performance at the same time. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20180430125800.444cae9706489f412ad12621@gmail.com Signed-off-by: Vitaly Wool Reported-by: Guenter Roeck Tested-by: Guenter Roeck Cc: Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/z3fold.c | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/mm/z3fold.c b/mm/z3fold.c index c0bca6153b95..4b366d181f35 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -144,7 +144,8 @@ enum z3fold_page_flags { PAGE_HEADLESS = 0, MIDDLE_CHUNK_MAPPED, NEEDS_COMPACTING, - PAGE_STALE + PAGE_STALE, + UNDER_RECLAIM }; /***************** @@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); clear_bit(NEEDS_COMPACTING, &page->private); clear_bit(PAGE_STALE, &page->private); + clear_bit(UNDER_RECLAIM, &page->private); spin_lock_init(&zhdr->page_lock); kref_init(&zhdr->refcount); @@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) atomic64_dec(&pool->pages_nr); return; } + if (test_bit(UNDER_RECLAIM, &page->private)) { + z3fold_page_unlock(zhdr); + return; + } if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { z3fold_page_unlock(zhdr); return; @@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); zhdr->cpu = -1; + set_bit(UNDER_RECLAIM, &page->private); + break; } list_del_init(&page->lru); @@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) goto next; } next: - spin_lock(&pool->lock); if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { - spin_unlock(&pool->lock); free_z3fold_page(page); return 0; } - } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { - atomic64_dec(&pool->pages_nr); + spin_lock(&pool->lock); + list_add(&page->lru, &pool->lru); spin_unlock(&pool->lock); - return 0; + } else { + z3fold_page_lock(zhdr); + clear_bit(UNDER_RECLAIM, &page->private); + if (kref_put(&zhdr->refcount, + release_z3fold_page_locked)) { + atomic64_dec(&pool->pages_nr); + return 0; + } + /* + * if we are here, the page is still not completely + * free. Take the global pool lock then to be able + * to add it back to the lru list + */ + spin_lock(&pool->lock); + list_add(&page->lru, &pool->lru); + spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); } - /* - * Add to the beginning of LRU. - * Pool lock has to be kept here to ensure the page has - * not already been released - */ - list_add(&page->lru, &pool->lru); + /* We started off locked to we need to lock the pool back */ + spin_lock(&pool->lock); } spin_unlock(&pool->lock); return -EAGAIN; From 27227c733852f71008e9bf165950bb2edaed3a90 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 11 May 2018 16:01:50 -0700 Subject: [PATCH 255/347] mm: sections are not offlined during memory hotremove Memory hotplug and hotremove operate with per-block granularity. If the machine has a large amount of memory (more than 64G), the size of a memory block can span multiple sections. By mistake, during hotremove we set only the first section to offline state. The bug was discovered because kernel selftest started to fail: https://lkml.kernel.org/r/20180423011247.GK5563@yexl-desktop After commit, "mm/memory_hotplug: optimize probe routine". But, the bug is older than this commit. In this optimization we also added a check for sections to be in a proper state during hotplug operation. Link: http://lkml.kernel.org/r/20180427145257.15222-1-pasha.tatashin@oracle.com Fixes: 2d070eab2e82 ("mm: consider zone which is not fully populated to have holes") Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Vlastimil Babka Cc: Steven Sistare Cc: Daniel Jordan Cc: "Kirill A. Shutemov" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/sparse.c b/mm/sparse.c index 62eef264a7bd..73dc2fcc0eab 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - unsigned long section_nr = pfn_to_section_nr(start_pfn); + unsigned long section_nr = pfn_to_section_nr(pfn); struct mem_section *ms; /* From 7aaf7727235870f497eb928f728f7773d6df3b40 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Fri, 11 May 2018 16:01:53 -0700 Subject: [PATCH 256/347] mm: don't show nr_indirectly_reclaimable in /proc/vmstat Don't show nr_indirectly_reclaimable in /proc/vmstat, because there is no need to export this vm counter to userspace, and some changes are expected in reclaimable object accounting, which can alter this counter. Link: http://lkml.kernel.org/r/20180425191422.9159-1-guro@fb.com Signed-off-by: Roman Gushchin Acked-by: Vlastimil Babka Reviewed-by: Andrew Morton Cc: Matthew Wilcox Cc: Alexander Viro Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 536332e988b8..a2b9518980ce 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = { "nr_vmscan_immediate_reclaim", "nr_dirtied", "nr_written", - "nr_indirectly_reclaimable", + "", /* nr_indirectly_reclaimable */ /* enum writeback_stat_item counters */ "nr_dirty_threshold", @@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg) unsigned long *l = arg; unsigned long off = l - (unsigned long *)m->private; + /* Skip hidden vmstat items. */ + if (*vmstat_text[off] == '\0') + return 0; + seq_puts(m, vmstat_text[off]); seq_put_decimal_ull(m, " ", *l); seq_putc(m, '\n'); From 3955333df9a50e8783d115613a397ae55d905080 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 11 May 2018 16:01:57 -0700 Subject: [PATCH 257/347] proc/kcore: don't bounds check against address 0 The existing kcore code checks for bad addresses against __va(0) with the assumption that this is the lowest address on the system. This may not hold true on some systems (e.g. arm64) and produce overflows and crashes. Switch to using other functions to validate the address range. It's currently only seen on arm64 and it's not clear if anyone wants to use that particular combination on a stable release. So this is not urgent for stable. Link: http://lkml.kernel.org/r/20180501201143.15121-1-labbott@redhat.com Signed-off-by: Laura Abbott Tested-by: Dave Anderson Cc: Kees Cook Cc: Ard Biesheuvel Cc: Ingo Molnar Cc: Andi Kleen Cc: Alexey Dobriyan a Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d1e82761de81..e64ecb9f2720 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) { struct list_head *head = (struct list_head *)arg; struct kcore_list *ent; + struct page *p; + + if (!pfn_valid(pfn)) + return 1; + + p = pfn_to_page(pfn); + if (!memmap_valid_within(pfn, p, page_zone(p))) + return 1; ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; - ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); + ent->addr = (unsigned long)page_to_virt(p); ent->size = nr_pages << PAGE_SHIFT; - /* Sanity check: Can happen in 32bit arch...maybe */ - if (ent->addr < (unsigned long) __va(0)) + if (!virt_addr_valid(ent->addr)) goto free_out; /* cut not-mapped area. ....from ppc-32 code. */ if (ULONG_MAX - ent->addr < ent->size) ent->size = ULONG_MAX - ent->addr; - /* cut when vmalloc() area is higher than direct-map area */ - if (VMALLOC_START > (unsigned long)__va(0)) { - if (ent->addr > VMALLOC_START) - goto free_out; + /* + * We've already checked virt_addr_valid so we know this address + * is a valid pointer, therefore we can check against it to determine + * if we need to trim + */ + if (VMALLOC_START > ent->addr) { if (VMALLOC_START - ent->addr < ent->size) ent->size = VMALLOC_START - ent->addr; } From 013567be19761e2d14fc2a2676fe7686ac54c9ac Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Fri, 11 May 2018 16:02:00 -0700 Subject: [PATCH 258/347] mm: migrate: fix double call of radix_tree_replace_slot() radix_tree_replace_slot() is called twice for head page, it's obviously a bug. Let's fix it. Link: http://lkml.kernel.org/r/20180423072101.GA12157@hori1.linux.bs1.fc.nec.co.jp Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp") Signed-off-by: Naoya Horiguchi Reported-by: Matthew Wilcox Reviewed-by: Andrew Morton Cc: Michal Hocko Cc: "Kirill A. Shutemov" Cc: Zi Yan Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index 568433023831..8c0af0f7cab1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping, int i; int index = page_index(page); - for (i = 0; i < HPAGE_PMD_NR; i++) { + for (i = 1; i < HPAGE_PMD_NR; i++) { pslot = radix_tree_lookup_slot(&mapping->i_pages, index + i); radix_tree_replace_slot(&mapping->i_pages, pslot, newpage + i); } - } else { - radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); } /* From 27ae357fa82be5ab73b2ef8d39dcb8ca2563483a Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 11 May 2018 16:02:04 -0700 Subject: [PATCH 259/347] mm, oom: fix concurrent munlock and oom reaper unmap, v3 Since exit_mmap() is done without the protection of mm->mmap_sem, it is possible for the oom reaper to concurrently operate on an mm until MMF_OOM_SKIP is set. This allows munlock_vma_pages_all() to concurrently run while the oom reaper is operating on a vma. Since munlock_vma_pages_range() depends on clearing VM_LOCKED from vm_flags before actually doing the munlock to determine if any other vmas are locking the same memory, the check for VM_LOCKED in the oom reaper is racy. This is especially noticeable on architectures such as powerpc where clearing a huge pmd requires serialize_against_pte_lookup(). If the pmd is zapped by the oom reaper during follow_page_mask() after the check for pmd_none() is bypassed, this ends up deferencing a NULL ptl or a kernel oops. Fix this by manually freeing all possible memory from the mm before doing the munlock and then setting MMF_OOM_SKIP. The oom reaper can not run on the mm anymore so the munlock is safe to do in exit_mmap(). It also matches the logic that the oom reaper currently uses for determining when to set MMF_OOM_SKIP itself, so there's no new risk of excessive oom killing. This issue fixes CVE-2018-1000200. Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1804241526320.238665@chino.kir.corp.google.com Fixes: 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently") Signed-off-by: David Rientjes Suggested-by: Tetsuo Handa Acked-by: Michal Hocko Cc: Andrea Arcangeli Cc: [4.14+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 2 ++ mm/mmap.c | 44 ++++++++++++++---------- mm/oom_kill.c | 81 ++++++++++++++++++++++++--------------------- 3 files changed, 71 insertions(+), 56 deletions(-) diff --git a/include/linux/oom.h b/include/linux/oom.h index 5bad038ac012..6adac113e96d 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm) return 0; } +void __oom_reap_task_mm(struct mm_struct *mm); + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); diff --git a/mm/mmap.c b/mm/mmap.c index 9d5968d1e8e3..d6836566e4e5 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3024,6 +3024,32 @@ void exit_mmap(struct mm_struct *mm) /* mm's last user has gone, and its about to be pulled down */ mmu_notifier_release(mm); + if (unlikely(mm_is_oom_victim(mm))) { + /* + * Manually reap the mm to free as much memory as possible. + * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard + * this mm from further consideration. Taking mm->mmap_sem for + * write after setting MMF_OOM_SKIP will guarantee that the oom + * reaper will not run on this mm again after mmap_sem is + * dropped. + * + * Nothing can be holding mm->mmap_sem here and the above call + * to mmu_notifier_release(mm) ensures mmu notifier callbacks in + * __oom_reap_task_mm() will not block. + * + * This needs to be done before calling munlock_vma_pages_all(), + * which clears VM_LOCKED, otherwise the oom reaper cannot + * reliably test it. + */ + mutex_lock(&oom_lock); + __oom_reap_task_mm(mm); + mutex_unlock(&oom_lock); + + set_bit(MMF_OOM_SKIP, &mm->flags); + down_write(&mm->mmap_sem); + up_write(&mm->mmap_sem); + } + if (mm->locked_vm) { vma = mm->mmap; while (vma) { @@ -3045,24 +3071,6 @@ void exit_mmap(struct mm_struct *mm) /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); - - if (unlikely(mm_is_oom_victim(mm))) { - /* - * Wait for oom_reap_task() to stop working on this - * mm. Because MMF_OOM_SKIP is already set before - * calling down_read(), oom_reap_task() will not run - * on this "mm" post up_write(). - * - * mm_is_oom_victim() cannot be set from under us - * either because victim->mm is already set to NULL - * under task_lock before calling mmput and oom_mm is - * set not NULL by the OOM killer only if victim->mm - * is found not NULL while holding the task_lock. - */ - set_bit(MMF_OOM_SKIP, &mm->flags); - down_write(&mm->mmap_sem); - up_write(&mm->mmap_sem); - } free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, 0, -1); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ff992fa8760a..8ba6cb88cf58 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) return false; } - #ifdef CONFIG_MMU /* * OOM Reaper kernel thread which tries to reap the memory used by the OOM @@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); static struct task_struct *oom_reaper_list; static DEFINE_SPINLOCK(oom_reaper_lock); -static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) +void __oom_reap_task_mm(struct mm_struct *mm) { - struct mmu_gather tlb; struct vm_area_struct *vma; + + /* + * Tell all users of get_user/copy_from_user etc... that the content + * is no longer stable. No barriers really needed because unmapping + * should imply barriers already and the reader would hit a page fault + * if it stumbled over a reaped memory. + */ + set_bit(MMF_UNSTABLE, &mm->flags); + + for (vma = mm->mmap ; vma; vma = vma->vm_next) { + if (!can_madv_dontneed_vma(vma)) + continue; + + /* + * Only anonymous pages have a good chance to be dropped + * without additional steps which we cannot afford as we + * are OOM already. + * + * We do not even care about fs backed pages because all + * which are reclaimable have already been reclaimed and + * we do not want to block exit_mmap by keeping mm ref + * count elevated without a good reason. + */ + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { + const unsigned long start = vma->vm_start; + const unsigned long end = vma->vm_end; + struct mmu_gather tlb; + + tlb_gather_mmu(&tlb, mm, start, end); + mmu_notifier_invalidate_range_start(mm, start, end); + unmap_page_range(&tlb, vma, start, end, NULL); + mmu_notifier_invalidate_range_end(mm, start, end); + tlb_finish_mmu(&tlb, start, end); + } + } +} + +static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) +{ bool ret = true; /* * We have to make sure to not race with the victim exit path * and cause premature new oom victim selection: - * __oom_reap_task_mm exit_mm + * oom_reap_task_mm exit_mm * mmget_not_zero * mmput * atomic_dec_and_test @@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) trace_start_task_reaping(tsk->pid); - /* - * Tell all users of get_user/copy_from_user etc... that the content - * is no longer stable. No barriers really needed because unmapping - * should imply barriers already and the reader would hit a page fault - * if it stumbled over a reaped memory. - */ - set_bit(MMF_UNSTABLE, &mm->flags); + __oom_reap_task_mm(mm); - for (vma = mm->mmap ; vma; vma = vma->vm_next) { - if (!can_madv_dontneed_vma(vma)) - continue; - - /* - * Only anonymous pages have a good chance to be dropped - * without additional steps which we cannot afford as we - * are OOM already. - * - * We do not even care about fs backed pages because all - * which are reclaimable have already been reclaimed and - * we do not want to block exit_mmap by keeping mm ref - * count elevated without a good reason. - */ - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { - const unsigned long start = vma->vm_start; - const unsigned long end = vma->vm_end; - - tlb_gather_mmu(&tlb, mm, start, end); - mmu_notifier_invalidate_range_start(mm, start, end); - unmap_page_range(&tlb, vma, start, end, NULL); - mmu_notifier_invalidate_range_end(mm, start, end); - tlb_finish_mmu(&tlb, start, end); - } - } pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), @@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk) struct mm_struct *mm = tsk->signal->oom_mm; /* Retry the down_read_trylock(mmap_sem) a few times */ - while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) + while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) schedule_timeout_idle(HZ/10); if (attempts <= MAX_OOM_REAP_RETRIES || test_bit(MMF_OOM_SKIP, &mm->flags)) goto done; - pr_info("oom_reaper: unable to reap pid:%d (%s)\n", task_pid_nr(tsk), tsk->comm); debug_show_all_locks(); From e4383029201470523c3ffe339bd7d57e9b4a7d65 Mon Sep 17 00:00:00 2001 From: Ashish Samant Date: Fri, 11 May 2018 16:02:07 -0700 Subject: [PATCH 260/347] ocfs2: take inode cluster lock before moving reflinked inode from orphan dir While reflinking an inode, we create a new inode in orphan directory, then take EX lock on it, reflink the original inode to orphan inode and release EX lock. Once the lock is released another node could request it in EX mode from ocfs2_recover_orphans() which causes downconvert of the lock, on this node, to NL mode. Later we attempt to initialize security acl for the orphan inode and move it to the reflink destination. However, while doing this we dont take EX lock on the inode. This could potentially cause problems because we could be starting transaction, accessing journal and modifying metadata of the inode while holding NL lock and with another node holding EX lock on the inode. Fix this by taking orphan inode cluster lock in EX mode before initializing security and moving orphan inode to reflink destination. Use the __tracker variant while taking inode lock to avoid recursive locking in the ocfs2_init_security_and_acl() call chain. Link: http://lkml.kernel.org/r/1523475107-7639-1-git-send-email-ashish.samant@oracle.com Signed-off-by: Ashish Samant Reviewed-by: Joseph Qi Reviewed-by: Junxiao Bi Acked-by: Jun Piao Cc: Mark Fasheh Cc: Joel Becker Cc: Changwei Ge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/refcounttree.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 01c6b3894406..7869622af22a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4250,10 +4250,11 @@ out: static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, bool preserve) { - int error; + int error, had_lock; struct inode *inode = d_inode(old_dentry); struct buffer_head *old_bh = NULL; struct inode *new_orphan_inode = NULL; + struct ocfs2_lock_holder oh; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; @@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, goto out; } + had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1, + &oh); + if (had_lock < 0) { + error = had_lock; + mlog_errno(error); + goto out; + } + /* If the security isn't preserved, we need to re-initialize them. */ if (!preserve) { error = ocfs2_init_security_and_acl(dir, new_orphan_inode, @@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, if (error) mlog_errno(error); } -out: if (!error) { error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, new_dentry); if (error) mlog_errno(error); } + ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock); +out: if (new_orphan_inode) { /* * We need to open_unlock the inode no matter whether we From 78eb0c6356cda285c6ee6e29bea0c0188368103e Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Fri, 11 May 2018 16:02:11 -0700 Subject: [PATCH 261/347] scripts/faddr2line: fix error when addr2line output contains discriminator When addr2line output contains discriminator, the current awk script cannot parse it. This patch fixes it by extracting key words using regex which is more reliable. $ scripts/faddr2line vmlinux tlb_flush_mmu_free+0x26 tlb_flush_mmu_free+0x26/0x50: tlb_flush_mmu_free at mm/memory.c:258 (discriminator 3) scripts/faddr2line: eval: line 173: unexpected EOF while looking for matching `)' Link: http://lkml.kernel.org/r/1525323379-25193-1-git-send-email-changbin.du@intel.com Fixes: 6870c0165feaa5 ("scripts/faddr2line: show the code context") Signed-off-by: Changbin Du Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Philippe Ombredanne Cc: NeilBrown Cc: Richard Weinberger Cc: Kate Stewart Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/faddr2line | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/faddr2line b/scripts/faddr2line index 9e5735a4d3a5..1876a741087c 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -170,7 +170,10 @@ __faddr2line() { echo "$file_lines" | while read -r line do echo $line - eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}') + n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g') + n1=$[$n-5] + n2=$[$n+5] + f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g') awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f done From 2075b16e32c26e4031b9fd3cbe26c54676a8fcb5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 11 May 2018 16:02:14 -0700 Subject: [PATCH 262/347] rbtree: include rcu.h Since commit c1adf20052d8 ("Introduce rb_replace_node_rcu()") rbtree_augmented.h uses RCU related data structures but does not include the header file. It works as long as it gets somehow included before that and fails otherwise. Link: http://lkml.kernel.org/r/20180504103159.19938-1-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Andrew Morton Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree_augmented.h | 1 + include/linux/rbtree_latch.h | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 6bfd2b581f75..af8a61be2d8d 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -26,6 +26,7 @@ #include #include +#include /* * Please note - only struct rb_augment_callbacks and the prototypes for diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index ece43e882b56..7d012faa509a 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h @@ -35,6 +35,7 @@ #include #include +#include struct latch_tree_node { struct rb_node node[2]; From 789ba28013ce23dbf5e9f5f014f4233b35523bf3 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 9 May 2018 17:31:15 +0100 Subject: [PATCH 263/347] Revert "sched/numa: Delay retrying placement for automatic NUMA balance after wake_affine()" This reverts commit 7347fc87dfe6b7315e74310ee1243dc222c68086. Srikar Dronamra pointed out that while the commit in question did show a performance improvement on ppc64, it did so at the cost of disabling active CPU migration by automatic NUMA balancing which was not the intent. The issue was that a serious flaw in the logic failed to ever active balance if SD_WAKE_AFFINE was disabled on scheduler domains. Even when it's enabled, the logic is still bizarre and against the original intent. Investigation showed that fixing the patch in either the way he suggested, using the correct comparison for jiffies values or introducing a new numa_migrate_deferred variable in task_struct all perform similarly to a revert with a mix of gains and losses depending on the workload, machine and socket count. The original intent of the commit was to handle a problem whereby wake_affine, idle balancing and automatic NUMA balancing disagree on the appropriate placement for a task. This was particularly true for cases where a single task was a massive waker of tasks but where wake_wide logic did not apply. This was particularly noticeable when a futex (a barrier) woke all worker threads and tried pulling the wakees to the waker nodes. In that specific case, it could be handled by tuning MPI or openMP appropriately, but the behavior is not illogical and was worth attempting to fix. However, the approach was wrong. Given that we're at rc4 and a fix is not obvious, it's better to play safe, revert this commit and retry later. Signed-off-by: Mel Gorman Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Srikar Dronamraju Cc: Linus Torvalds Cc: Thomas Gleixner Cc: efault@gmx.de Cc: ggherdovich@suse.cz Cc: hpa@zytor.com Cc: matt@codeblueprint.co.uk Cc: mpe@ellerman.id.au Link: http://lkml.kernel.org/r/20180509163115.6fnnyeg4vdm2ct4v@techsingularity.net Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 57 +-------------------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 54dc31e7ab9b..f43627c6bb3d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p) static void numa_migrate_preferred(struct task_struct *p) { unsigned long interval = HZ; - unsigned long numa_migrate_retry; /* This task has no NUMA fault statistics yet */ if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) @@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p) /* Periodically retry migrating the task to the preferred node */ interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); - numa_migrate_retry = jiffies + interval; - - /* - * Check that the new retry threshold is after the current one. If - * the retry is in the future, it implies that wake_affine has - * temporarily asked NUMA balancing to backoff from placement. - */ - if (numa_migrate_retry > p->numa_migrate_retry) - return; - - /* Safe to try placing the task on the preferred node */ - p->numa_migrate_retry = numa_migrate_retry; + p->numa_migrate_retry = jiffies + interval; /* Success if task is already running on preferred CPU */ if (task_node(p) == p->numa_preferred_nid) @@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits; } -#ifdef CONFIG_NUMA_BALANCING -static void -update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target) -{ - unsigned long interval; - - if (!static_branch_likely(&sched_numa_balancing)) - return; - - /* If balancing has no preference then continue gathering data */ - if (p->numa_preferred_nid == -1) - return; - - /* - * If the wakeup is not affecting locality then it is neutral from - * the perspective of NUMA balacing so continue gathering data. - */ - if (cpu_to_node(prev_cpu) == cpu_to_node(target)) - return; - - /* - * Temporarily prevent NUMA balancing trying to place waker/wakee after - * wakee has been moved by wake_affine. This will potentially allow - * related tasks to converge and update their data placement. The - * 4 * numa_scan_period is to allow the two-pass filter to migrate - * hot data to the wakers node. - */ - interval = max(sysctl_numa_balancing_scan_delay, - p->numa_scan_period << 2); - p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval); - - interval = max(sysctl_numa_balancing_scan_delay, - current->numa_scan_period << 2); - current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval); -} -#else -static void -update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target) -{ -} -#endif - static int wake_affine(struct sched_domain *sd, struct task_struct *p, int this_cpu, int prev_cpu, int sync) { @@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, if (target == nr_cpumask_bits) return prev_cpu; - update_wa_numa_placement(p, prev_cpu, target); schedstat_inc(sd->ttwu_move_affine); schedstat_inc(p->se.statistics.nr_wakeups_affine); return target; From 05e13bb57e6f181d7605f8608181c7e6fb7f591d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 12 May 2018 11:57:37 +0200 Subject: [PATCH 264/347] swiotlb: silent unwanted warning "buffer is full" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If DMA_ATTR_NO_WARN is passed to swiotlb_alloc_buffer(), it should be passed further down to swiotlb_tbl_map_single(). Otherwise we escape half of the warnings but still log the other half. This is one of the multiple causes of spurious warnings reported at: https://bugs.freedesktop.org/show_bug.cgi?id=104082 Signed-off-by: Jean Delvare Fixes: 0176adb00406 ("swiotlb: refactor coherent buffer allocation") Cc: Christoph Hellwig Cc: Christian König Cc: Michel Dänzer Cc: Takashi Iwai Cc: stable@vger.kernel.org # v4.16 --- lib/swiotlb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 12fbaa445637..cc640588f145 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -714,7 +714,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, phys_addr = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), - 0, size, DMA_FROM_DEVICE, 0); + 0, size, DMA_FROM_DEVICE, attrs); if (phys_addr == SWIOTLB_MAP_ERROR) goto out_warn; From 3f12888dfae2a48741c4caa9214885b3aaf350f9 Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Sat, 5 May 2018 13:38:03 -0500 Subject: [PATCH 265/347] ALSA: control: fix a redundant-copy issue In snd_ctl_elem_add_compat(), the fields of the struct 'data' need to be copied from the corresponding fields of the struct 'data32' in userspace. This is achieved by invoking copy_from_user() and get_user() functions. The problem here is that the 'type' field is copied twice. One is by copy_from_user() and one is by get_user(). Given that the 'type' field is not used between the two copies, the second copy is *completely* redundant and should be removed for better performance and cleanup. Also, these two copies can cause inconsistent data: as the struct 'data32' resides in userspace and a malicious userspace process can race to change the 'type' field between the two copies to cause inconsistent data. Depending on how the data is used in the future, such an inconsistency may cause potential security risks. For above reasons, we should take out the second copy. Signed-off-by: Wenwen Wang Cc: Signed-off-by: Takashi Iwai --- sound/core/control_compat.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index a848836a5de0..507fd5210c1c 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -396,8 +396,7 @@ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file, if (copy_from_user(&data->id, &data32->id, sizeof(data->id)) || copy_from_user(&data->type, &data32->type, 3 * sizeof(u32))) goto error; - if (get_user(data->owner, &data32->owner) || - get_user(data->type, &data32->type)) + if (get_user(data->owner, &data32->owner)) goto error; switch (data->type) { case SNDRV_CTL_ELEM_TYPE_BOOLEAN: From c8beccc19b92f5172994c0732db689c08f4f98e5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 8 May 2018 09:27:46 +0200 Subject: [PATCH 266/347] ALSA: hda: Add Lenovo C50 All in one to the power_save blacklist Power-saving is causing loud plops on the Lenovo C50 All in one, add it to the blacklist. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1572975 Signed-off-by: Hans de Goede Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b0c8c79848a9..a0c93b9c9a28 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2210,6 +2210,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ + SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0), {} From f9bc6b2dd9cf025f827f471769e1d88b527bfb91 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 4 May 2018 13:01:32 -0700 Subject: [PATCH 267/347] x86/amd_nb: Add support for Raven Ridge CPUs Add Raven Ridge root bridge and data fabric PCI IDs. This is required for amd_pci_dev_to_node_id() and amd_smn_read(). Cc: stable@vger.kernel.org # v4.16+ Tested-by: Gabriel Craciunescu Acked-by: Thomas Gleixner Signed-off-by: Guenter Roeck --- arch/x86/kernel/amd_nb.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c88e0b127810..b481b95bd8f6 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -14,8 +14,11 @@ #include #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -24,6 +27,7 @@ static u32 *flush_words; static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, {} }; @@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, {} }; @@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; From 3b031622f598481970400519bd5abc2a16708282 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 4 May 2018 13:01:33 -0700 Subject: [PATCH 268/347] hwmon: (k10temp) Use API function to access System Management Network The SMN (System Management Network) on Family 17h AMD CPUs is also accessed from other drivers, specifically EDAC. Accessing it directly is racy. On top of that, accessing the SMN through root bridge 00:00 is wrong on multi-die CPUs and may result in reading the temperature from the wrong die. Use available API functions to fix the problem. For this to work, add dependency on AMD_NB. Also change the Raven Ridge PCI device ID to point to Data Fabric Function 3, since this ID is used by the API functions to find the CPU node. Cc: stable@vger.kernel.org # v4.16+ Tested-by: Gabriel Craciunescu Signed-off-by: Guenter Roeck --- drivers/hwmon/Kconfig | 2 +- drivers/hwmon/k10temp.c | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index f249a4428458..6ec307c93ece 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -272,7 +272,7 @@ config SENSORS_K8TEMP config SENSORS_K10TEMP tristate "AMD Family 10h+ temperature sensor" - depends on X86 && PCI + depends on X86 && PCI && AMD_NB help If you say yes here you get support for the temperature sensor(s) inside your CPU. Supported are later revisions of diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 34b5448b00be..3b73dee6fdc6 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -23,6 +23,7 @@ #include #include #include +#include #include MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor"); @@ -40,8 +41,8 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #endif -#ifndef PCI_DEVICE_ID_AMD_17H_RR_NB -#define PCI_DEVICE_ID_AMD_17H_RR_NB 0x15d0 +#ifndef PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #endif /* CPUID function 0x80000001, ebx */ @@ -136,8 +137,8 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval) static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval) { - amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0x60, - F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval); + amd_smn_read(amd_pci_dev_to_node_id(pdev), + F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval); } static ssize_t temp1_input_show(struct device *dev, @@ -322,7 +323,7 @@ static const struct pci_device_id k10temp_id_table[] = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_RR_NB) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, {} }; MODULE_DEVICE_TABLE(pci, k10temp_id_table); From 21493316a3c4598f308d5a9fa31cc74639c4caff Mon Sep 17 00:00:00 2001 From: Federico Cuello Date: Wed, 9 May 2018 00:13:38 +0200 Subject: [PATCH 269/347] ALSA: usb: mixer: volume quirk for CM102-A+/102S+ Currently it's not possible to set volume lower than 26% (it just mutes). Also fixes this warning: Warning! Unlikely big volume range (=9472), cval->res is probably wrong. [13] FU [PCM Playback Volume] ch = 2, val = -9473/-1/1 , and volume works fine for full range. Signed-off-by: Federico Cuello Cc: Signed-off-by: Takashi Iwai --- sound/usb/mixer.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 344d7b069d59..bb5ab7a7dfa5 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -967,6 +967,14 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; + case USB_ID(0x0d8c, 0x0103): + if (!strcmp(kctl->id.name, "PCM Playback Volume")) { + usb_audio_info(chip, + "set volume quirk for CM102-A+/102S+\n"); + cval->min = -256; + } + break; + case USB_ID(0x0471, 0x0101): case USB_ID(0x0471, 0x0104): case USB_ID(0x0471, 0x0105): From 2f0d520a1a73555ac51c19cd494493f60b4c1cea Mon Sep 17 00:00:00 2001 From: Jeremy Soller Date: Mon, 7 May 2018 09:28:45 -0600 Subject: [PATCH 270/347] ALSA: hda/realtek - Clevo P950ER ALC1220 Fixup This adds support for the P950ER, which has the same required fixup as the P950HR, but has a different PCI ID. Signed-off-by: Jeremy Soller Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2dd34dd77447..01a6643fc7d4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2363,6 +2363,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530), From 67b8d5c7081221efa252e111cd52532ec6d4266f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 May 2018 16:15:17 -0700 Subject: [PATCH 271/347] Linux 4.17-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d0d2652db174..ba3106b36597 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Merciless Moray # *DOCUMENTATION* From 2278446e2b7cd33ad894b32e7eb63afc7db6c86e Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Mon, 14 May 2018 11:57:23 +0300 Subject: [PATCH 272/347] xhci: Fix USB3 NULL pointer dereference at logical disconnect. Hub driver will try to disable a USB3 device twice at logical disconnect, racing with xhci_free_dev() callback from the first port disable. This can be triggered with "udisksctl power-off --block-device " or by writing "1" to the "remove" sysfs file for a USB3 device in 4.17-rc4. USB3 devices don't have a similar disabled link state as USB2 devices, and use a U3 suspended link state instead. In this state the port is still enabled and connected. hub_port_connect() first disconnects the device, then later it notices that device is still enabled (due to U3 states) it will try to disable the port again (set to U3). The xhci_free_dev() called during device disable is async, so checking for existing xhci->devs[i] when setting link state to U3 the second time was successful, even if device was being freed. The regression was caused by, and whole thing revealed by, Commit 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device") which sets xhci->devs[i]->udev to NULL before xhci_virt_dev() returned. and causes a NULL pointer dereference the second time we try to set U3. Fix this by checking xhci->devs[i]->udev exists before setting link state. The original patch went to stable so this fix needs to be applied there as well. Fixes: 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device") Cc: Reported-by: Jordan Glover Tested-by: Jordan Glover Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 72ebbc908e19..32cd52ca8318 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -354,7 +354,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci, slot_id = 0; for (i = 0; i < MAX_HC_SLOTS; i++) { - if (!xhci->devs[i]) + if (!xhci->devs[i] || !xhci->devs[i]->udev) continue; speed = xhci->devs[i]->udev->speed; if (((speed >= USB_SPEED_SUPER) == (hcd->speed >= HCD_USB3)) From 20943f984967477c906522112d2b6b5a29f94684 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Wed, 2 May 2018 20:50:21 +0100 Subject: [PATCH 273/347] drm/i915/userptr: reject zero user_size Operating on a zero sized GEM userptr object will lead to explosions. Fixes: 5cc9ed4b9a7a ("drm/i915: Introduce mapping of user pages into video memory (userptr) ioctl") Testcase: igt/gem_userptr_blits/input-checking Signed-off-by: Matthew Auld Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180502195021.30900-1-matthew.auld@intel.com (cherry picked from commit c11c7bfd213495784b22ef82a69b6489f8d0092f) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_userptr.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index d596a8302ca3..854bd51b9478 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; + if (!args->user_size) + return -EINVAL; + if (offset_in_page(args->user_ptr | args->user_size)) return -EINVAL; From cd078bf95df29d827daca0274a9a13821c11eedb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 11 May 2018 13:11:45 +0100 Subject: [PATCH 274/347] drm/i915/execlists: Use rmb() to order CSB reads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We assume that the CSB is written using the normal ringbuffer coherency protocols, as outlined in kernel/events/ring_buffer.c: * (HW) (DRIVER) * * if (LOAD ->data_tail) { LOAD ->data_head * (A) smp_rmb() (C) * STORE $data LOAD $data * smp_wmb() (B) smp_mb() (D) * STORE ->data_head STORE ->data_tail * } So we assume that the HW fulfils its ordering requirements (B), and so we should use a complimentary rmb (C) to ensure that our read of its WRITE pointer is completed before we start accessing the data. The final mb (D) is implied by the uncached mmio we perform to inform the HW of our READ pointer. References: https://bugs.freedesktop.org/show_bug.cgi?id=105064 References: https://bugs.freedesktop.org/show_bug.cgi?id=105888 References: https://bugs.freedesktop.org/show_bug.cgi?id=106185 Fixes: 767a983ab255 ("drm/i915/execlists: Read the context-status HEAD from the HWSP") References: 61bf9719fa17 ("drm/i915/cnl: Use mmio access to context status buffer") Suggested-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Michał Winiarski Cc: Rafael Antognolli Cc: Michel Thierry Cc: Timo Aaltonen Tested-by: Timo Aaltonen Acked-by: Michel Thierry Acked-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20180511121147.31915-1-chris@chris-wilson.co.uk (cherry picked from commit 77dfedb5be03779f9a5d83e323a1b36e32090105) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e3a5f673ff67..8704f7f8d072 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -884,6 +884,7 @@ static void execlists_submission_tasklet(unsigned long data) head = execlists->csb_head; tail = READ_ONCE(buf[write_idx]); + rmb(); /* Hopefully paired with a wmb() in HW */ } GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", engine->name, From b61f7dcf4eb2653e870c9079b02d11a0834cfe39 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Apr 2018 20:46:22 +0100 Subject: [PATCH 275/347] afs: Fix directory page locking The afs directory loading code (primarily afs_read_dir()) locks all the pages that hold a directory's content blob to defend against getdents/getdents races and getdents/lookup races where the competitors issue conflicting reads on the same data. As the reads will complete consecutively, they may retrieve different versions of the data and one may overwrite the data that the other is busy parsing. Fix this by not locking the pages at all, but rather by turning the validation lock into an rwsem and getting an exclusive lock on it whilst reading the data or validating the attributes and a shared lock whilst parsing the data. Sharing the attribute validation lock should be fine as the data fetch will retrieve the attributes also. The individual page locks aren't needed at all as the only place they're being used is to serialise data loading. Without this patch, the: if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { ... } part of afs_read_dir() may be skipped, leaving the pages unlocked when we hit the success: clause - in which case we try to unlock the not-locked pages, leading to the following oops: page:ffffe38b405b4300 count:3 mapcount:0 mapping:ffff98156c83a978 index:0x0 flags: 0xfffe000001004(referenced|private) raw: 000fffe000001004 ffff98156c83a978 0000000000000000 00000003ffffffff raw: dead000000000100 dead000000000200 0000000000000001 ffff98156b27c000 page dumped because: VM_BUG_ON_PAGE(!PageLocked(page)) page->mem_cgroup:ffff98156b27c000 ------------[ cut here ]------------ kernel BUG at mm/filemap.c:1205! ... RIP: 0010:unlock_page+0x43/0x50 ... Call Trace: afs_dir_iterate+0x789/0x8f0 [kafs] ? _cond_resched+0x15/0x30 ? kmem_cache_alloc_trace+0x166/0x1d0 ? afs_do_lookup+0x69/0x490 [kafs] ? afs_do_lookup+0x101/0x490 [kafs] ? key_default_cmp+0x20/0x20 ? request_key+0x3c/0x80 ? afs_lookup+0xf1/0x340 [kafs] ? __lookup_slow+0x97/0x150 ? lookup_slow+0x35/0x50 ? walk_component+0x1bf/0x490 ? path_lookupat.isra.52+0x75/0x200 ? filename_lookup.part.66+0xa0/0x170 ? afs_end_vnode_operation+0x41/0x60 [kafs] ? __check_object_size+0x9c/0x171 ? strncpy_from_user+0x4a/0x170 ? vfs_statx+0x73/0xe0 ? __do_sys_newlstat+0x39/0x70 ? __x64_sys_getdents+0xc9/0x140 ? __x64_sys_getdents+0x140/0x140 ? do_syscall_64+0x5b/0x160 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: f3ddee8dc4e2 ("afs: Fix directory handling") Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/dir.c | 38 ++++++++++++++++++-------------------- fs/afs/inode.c | 6 +++--- fs/afs/internal.h | 2 +- fs/afs/super.c | 2 +- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5889f70d4d27..2853acd64482 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file) * get reclaimed during the iteration. */ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) + __acquires(&dvnode->validate_lock) { struct afs_read *req; loff_t i_size; @@ -261,18 +262,21 @@ retry: /* If we're going to reload, we need to lock all the pages to prevent * races. */ + ret = -ERESTARTSYS; + if (down_read_killable(&dvnode->validate_lock) < 0) + goto error; + + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + goto success; + + up_read(&dvnode->validate_lock); + if (down_write_killable(&dvnode->validate_lock) < 0) + goto error; + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { - ret = -ERESTARTSYS; - for (i = 0; i < req->nr_pages; i++) - if (lock_page_killable(req->pages[i]) < 0) - goto error_unlock; - - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - goto success; - ret = afs_fetch_data(dvnode, key, req); if (ret < 0) - goto error_unlock_all; + goto error_unlock; task_io_account_read(PAGE_SIZE * req->nr_pages); @@ -284,33 +288,26 @@ retry: for (i = 0; i < req->nr_pages; i++) if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len)) - goto error_unlock_all; + goto error_unlock; // TODO: Trim excess pages set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); } + downgrade_write(&dvnode->validate_lock); success: - i = req->nr_pages; - while (i > 0) - unlock_page(req->pages[--i]); return req; -error_unlock_all: - i = req->nr_pages; error_unlock: - while (i > 0) - unlock_page(req->pages[--i]); + up_write(&dvnode->validate_lock); error: afs_put_read(req); _leave(" = %d", ret); return ERR_PTR(ret); content_has_grown: - i = req->nr_pages; - while (i > 0) - unlock_page(req->pages[--i]); + up_write(&dvnode->validate_lock); afs_put_read(req); goto retry; } @@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, } out: + up_read(&dvnode->validate_lock); afs_put_read(req); _leave(" = %d", ret); return ret; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 06194cfe9724..e855c6e5cf28 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -415,7 +415,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) if (valid) goto valid; - mutex_lock(&vnode->validate_lock); + down_write(&vnode->validate_lock); /* if the promise has expired, we need to check the server again to get * a new promise - note that if the (parent) directory's metadata was @@ -444,13 +444,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * different */ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) afs_zap_data(vnode); - mutex_unlock(&vnode->validate_lock); + up_write(&vnode->validate_lock); valid: _leave(" = 0"); return 0; error_unlock: - mutex_unlock(&vnode->validate_lock); + up_write(&vnode->validate_lock); _leave(" = %d", ret); return ret; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index f8086ec95e24..468be1e0dffb 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -494,7 +494,7 @@ struct afs_vnode { #endif struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ - struct mutex validate_lock; /* lock for validating this vnode */ + struct rw_semaphore validate_lock; /* lock for validating this vnode */ spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; diff --git a/fs/afs/super.c b/fs/afs/super.c index 65081ec3c36e..b02838cd9525 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); mutex_init(&vnode->io_lock); - mutex_init(&vnode->validate_lock); + init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); INIT_LIST_HEAD(&vnode->wb_keys); From 01fd79e6de74a447c5657913a335d9ce6508cdb1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 9 May 2018 22:03:18 +0100 Subject: [PATCH 276/347] afs: Fix address list parsing The parsing of port specifiers in the address list obtained from the DNS resolution upcall doesn't work as in4_pton() and in6_pton() will fail on encountering an unexpected delimiter (in this case, the '+' marking the port number). However, in*_pton() can't be given multiple specifiers. Fix this by finding the delimiter in advance and not relying on in*_pton() to find the end of the address for us. Fixes: 8b2a464ced77 ("afs: Add an address list concept") Signed-off-by: David Howells --- fs/afs/addr_list.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 3bedfed608a2..7587fb665ff1 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -121,7 +121,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, p = text; do { struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs]; - char tdelim = delim; + const char *q, *stop; if (*p == delim) { p++; @@ -130,28 +130,33 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, if (*p == '[') { p++; - tdelim = ']'; + q = memchr(p, ']', end - p); + } else { + for (q = p; q < end; q++) + if (*q == '+' || *q == delim) + break; } - if (in4_pton(p, end - p, + if (in4_pton(p, q - p, (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], - tdelim, &p)) { + -1, &stop)) { srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - } else if (in6_pton(p, end - p, + } else if (in6_pton(p, q - p, srx->transport.sin6.sin6_addr.s6_addr, - tdelim, &p)) { + -1, &stop)) { /* Nothing to do */ } else { goto bad_address; } - if (tdelim == ']') { - if (p == end || *p != ']') - goto bad_address; + if (stop != q) + goto bad_address; + + p = q; + if (q < end && *q == ']') p++; - } if (p < end) { if (*p == '+') { From f2686b09269ec1a6f23028b5675d87c3b4579a4c Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 14:12:50 +0100 Subject: [PATCH 277/347] afs: Fix giving up callbacks on server destruction When a server record is destroyed, we want to send a message to the server telling it that we're giving up all the callbacks it has promised us. Apply two fixes to this: (1) Only send the FS.GiveUpAllCallBacks message if we actually got a callback from that server. We assume this to be the case if we performed at least one successful FS operation on that server. (2) Send it to the address last used for that server rather than always picking the first address in the list (which might be unreachable). Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: David Howells --- fs/afs/internal.h | 1 + fs/afs/rxrpc.c | 6 +++++- fs/afs/server.c | 8 +++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 468be1e0dffb..8de04b29bec1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -396,6 +396,7 @@ struct afs_server { #define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ #define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ #define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ +#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ atomic_t usage; u32 addr_version; /* Address list version */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5c6263972ec9..1f6235a6e9ae 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -482,8 +482,12 @@ static void afs_deliver_to_call(struct afs_call *call) state = READ_ONCE(call->state); switch (ret) { case 0: - if (state == AFS_CALL_CL_PROC_REPLY) + if (state == AFS_CALL_CL_PROC_REPLY) { + if (call->cbi) + set_bit(AFS_SERVER_FL_MAY_HAVE_CB, + &call->cbi->server->flags); goto call_complete; + } ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY); goto done; case -EINPROGRESS: diff --git a/fs/afs/server.c b/fs/afs/server.c index 629c74986cff..2c5cff60e34d 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -395,14 +395,16 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) struct afs_addr_list *alist = rcu_access_pointer(server->addresses); struct afs_addr_cursor ac = { .alist = alist, - .addr = &alist->addrs[0], .start = alist->index, - .index = alist->index, + .index = 0, + .addr = &alist->addrs[alist->index], .error = 0, }; _enter("%p", server); - afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) + afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } From d4a96bec7a7362834ef5c31d7b2cc9bf36eb0570 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 08:43:04 +0100 Subject: [PATCH 278/347] afs: Fix refcounting in callback registration The refcounting on afs_cb_interest struct objects in afs_register_server_cb_interest() is wrong as it uses the server list entry's call back interest pointer without regard for the fact that it might be replaced at any time and the object thrown away. Fix this by: (1) Put a lock on the afs_server_list struct that can be used to mediate access to the callback interest pointers in the servers array. (2) Keep a ref on the callback interest that we get from the entry. (3) Dropping the old reference held by vnode->cb_interest if we replace the pointer. Fixes: c435ee34551e ("afs: Overhaul the callback handling") Signed-off-by: David Howells --- fs/afs/callback.c | 64 ++++++++++++++++++++++++++++++-------------- fs/afs/internal.h | 7 +++-- fs/afs/rotate.c | 4 +-- fs/afs/server_list.c | 7 +++-- 4 files changed, 56 insertions(+), 26 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index abd9a84f4e88..09332945d322 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -23,36 +23,55 @@ /* * Set up an interest-in-callbacks record for a volume on a server and * register it with the server. - * - Called with volume->server_sem held. + * - Called with vnode->io_lock held. */ int afs_register_server_cb_interest(struct afs_vnode *vnode, - struct afs_server_entry *entry) + struct afs_server_list *slist, + unsigned int index) { - struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x; + struct afs_server_entry *entry = &slist->servers[index]; + struct afs_cb_interest *cbi, *vcbi, *new, *old; struct afs_server *server = entry->server; again: + if (vnode->cb_interest && + likely(vnode->cb_interest == entry->cb_interest)) + return 0; + + read_lock(&slist->lock); + cbi = afs_get_cb_interest(entry->cb_interest); + read_unlock(&slist->lock); + vcbi = vnode->cb_interest; if (vcbi) { - if (vcbi == cbi) - return 0; - - if (cbi && vcbi->server == cbi->server) { - write_seqlock(&vnode->cb_lock); - vnode->cb_interest = afs_get_cb_interest(cbi); - write_sequnlock(&vnode->cb_lock); + if (vcbi == cbi) { afs_put_cb_interest(afs_v2net(vnode), cbi); return 0; } + /* Use a new interest in the server list for the same server + * rather than an old one that's still attached to a vnode. + */ + if (cbi && vcbi->server == cbi->server) { + write_seqlock(&vnode->cb_lock); + old = vnode->cb_interest; + vnode->cb_interest = cbi; + write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), old); + return 0; + } + + /* Re-use the one attached to the vnode. */ if (!cbi && vcbi->server == server) { - afs_get_cb_interest(vcbi); - x = cmpxchg(&entry->cb_interest, cbi, vcbi); - if (x != cbi) { - cbi = x; - afs_put_cb_interest(afs_v2net(vnode), vcbi); + write_lock(&slist->lock); + if (entry->cb_interest) { + write_unlock(&slist->lock); + afs_put_cb_interest(afs_v2net(vnode), cbi); goto again; } + + entry->cb_interest = cbi; + write_unlock(&slist->lock); return 0; } } @@ -72,13 +91,16 @@ again: list_add_tail(&new->cb_link, &server->cb_interests); write_unlock(&server->cb_break_lock); - x = cmpxchg(&entry->cb_interest, cbi, new); - if (x == cbi) { + write_lock(&slist->lock); + if (!entry->cb_interest) { + entry->cb_interest = afs_get_cb_interest(new); cbi = new; + new = NULL; } else { - cbi = x; - afs_put_cb_interest(afs_v2net(vnode), new); + cbi = afs_get_cb_interest(entry->cb_interest); } + write_unlock(&slist->lock); + afs_put_cb_interest(afs_v2net(vnode), new); } ASSERT(cbi); @@ -88,11 +110,13 @@ again: */ write_seqlock(&vnode->cb_lock); - vnode->cb_interest = afs_get_cb_interest(cbi); + old = vnode->cb_interest; + vnode->cb_interest = cbi; vnode->cb_s_break = cbi->server->cb_s_break; clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), old); return 0; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8de04b29bec1..e75e57e13320 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -434,6 +434,7 @@ struct afs_server_list { unsigned short index; /* Server currently in use */ unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ unsigned int seq; /* Set to ->servers_seq when installed */ + rwlock_t lock; struct afs_server_entry servers[]; }; @@ -649,13 +650,15 @@ extern void afs_init_callback_state(struct afs_server *); extern void afs_break_callback(struct afs_vnode *); extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); -extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *); +extern int afs_register_server_cb_interest(struct afs_vnode *, + struct afs_server_list *, unsigned int); extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *); static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi) { - refcount_inc(&cbi->usage); + if (cbi) + refcount_inc(&cbi->usage); return cbi; } diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ac0feac9d746..4a26d51b2968 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -350,8 +350,8 @@ use_server: * break request before we've finished decoding the reply and * installing the vnode. */ - fc->ac.error = afs_register_server_cb_interest( - vnode, &fc->server_list->servers[fc->index]); + fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list, + fc->index); if (fc->ac.error < 0) goto failed; diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 0f8dc4c8f07c..8a5760aa5832 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -49,6 +49,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, goto error; refcount_set(&slist->usage, 1); + rwlock_init(&slist->lock); /* Make sure a records exists for each server in the list. */ for (i = 0; i < vldb->nr_servers; i++) { @@ -64,9 +65,11 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, goto error_2; } - /* Insertion-sort by server pointer */ + /* Insertion-sort by UUID */ for (j = 0; j < slist->nr_servers; j++) - if (slist->servers[j].server >= server) + if (memcmp(&slist->servers[j].server->uuid, + &server->uuid, + sizeof(server->uuid)) >= 0) break; if (j < slist->nr_servers) { if (slist->servers[j].server == server) { From ec5a3b4b507efca903d848518dcf2ebf7b04b466 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 14:22:38 +0100 Subject: [PATCH 279/347] afs: Fix server rotation's handling of fileserver probe failure The server rotation algorithm just gives up if it fails to probe a fileserver. Fix this by rotating to the next fileserver instead. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: David Howells --- fs/afs/rotate.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 4a26d51b2968..84584dcced72 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -369,8 +369,16 @@ use_server: if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { fc->ac.alist = afs_get_addrlist(alist); - if (!afs_probe_fileserver(fc)) - goto failed; + if (!afs_probe_fileserver(fc)) { + switch (fc->ac.error) { + case -ENOMEM: + case -ERESTARTSYS: + case -EINTR: + goto failed; + default: + goto next_server; + } + } } if (!fc->ac.alist) From ea739a287f4f16d6250bea779a1026ead79695f2 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 10 May 2018 19:20:54 +0100 Subject: [PATCH 280/347] mtd: Fix comparison in map_word_andequal() Commit 9e343e87d2c4 ("mtd: cfi: convert inline functions to macros") changed map_word_andequal() into a macro, but also changed the right hand side of the comparison from val3 to val2. Change it back to use val3 on the right hand side. Thankfully this did not cause a regression because all callers currently pass the same argument for val2 and val3. Fixes: 9e343e87d2c4 ("mtd: cfi: convert inline functions to macros") Signed-off-by: Ben Hutchings Signed-off-by: Boris Brezillon --- include/linux/mtd/map.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index b5b43f94f311..01b990e4b228 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -312,7 +312,7 @@ void map_destroy(struct mtd_info *mtd); ({ \ int i, ret = 1; \ for (i = 0; i < map_words(map); i++) { \ - if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \ + if (((val1).x[i] & (val2).x[i]) != (val3).x[i]) { \ ret = 0; \ break; \ } \ From 90d617633368ab97a2c7571c6e66dad54f39228d Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 9 May 2018 09:13:58 +0200 Subject: [PATCH 281/347] mtd: rawnand: marvell: Fix read logic for layouts with ->nchunks > 2 The code is doing monolithic reads for all chunks except the last one which is wrong since a monolithic read will issue the READ0+ADDRS+READ_START sequence. It not only takes longer because it forces the NAND chip to reload the page content into its internal cache, but by doing that we also reset the column pointer to 0, which means we'll always read the first chunk instead of moving to the next one. Rework the code to do a monolithic read only for the first chunk, then switch to naked reads for all intermediate chunks and finally issue a last naked read for the last chunk. Fixes: 02f26ecf8c77 mtd: nand: add reworked Marvell NAND controller driver Cc: stable@vger.kernel.org Reported-by: Chris Packham Signed-off-by: Boris Brezillon Tested-by: Chris Packham Acked-by: Miquel Raynal --- drivers/mtd/nand/raw/marvell_nand.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index db5ec4e8bde9..ebb1d141b900 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1194,11 +1194,13 @@ static void marvell_nfc_hw_ecc_bch_read_chunk(struct nand_chip *chip, int chunk, NDCB0_CMD2(NAND_CMD_READSTART); /* - * Trigger the naked read operation only on the last chunk. - * Otherwise, use monolithic read. + * Trigger the monolithic read on the first chunk, then naked read on + * intermediate chunks and finally a last naked read on the last chunk. */ - if (lt->nchunks == 1 || (chunk < lt->nchunks - 1)) + if (chunk == 0) nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW); + else if (chunk < lt->nchunks - 1) + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_NAKED_RW); else nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW); From 684b0f68cf1c1cf4a40834818653491c5cad4435 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 21:51:47 +0100 Subject: [PATCH 282/347] afs: Fix AFSFetchStatus decoder to provide OpenAFS compatibility The OpenAFS server's RXAFS_InlineBulkStatus implementation has a bug whereby if an error occurs on one of the vnodes being queried, then the errorCode field is set correctly in the corresponding status, but the interfaceVersion field is left unset. Fix kAFS to deal with this by evaluating the AFSFetchStatus blob against the following cases when called from FS.InlineBulkStatus delivery: (1) If InterfaceVersion == 0 then: (a) If errorCode != 0 then it indicates the abort code for the corresponding vnode. (b) If errorCode == 0 then the status record is invalid. (2) If InterfaceVersion == 1 then: (a) If errorCode != 0 then it indicates the abort code for the corresponding vnode. (b) If errorCode == 0 then the status record is valid and can be parsed. (3) If InterfaceVersion is anything else then the status record is invalid. Fixes: dd9fbcb8e103 ("afs: Rearrange status mapping") Reported-by: Jeffrey Altman Signed-off-by: David Howells --- fs/afs/fsclient.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index efacdb7c1dee..d16f26c6cdbe 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -134,6 +134,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, struct afs_read *read_req) { const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; + bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; u8 flags = 0; @@ -142,13 +143,32 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, if (vnode) write_seqlock(&vnode->cb_lock); + abort_code = ntohl(xdr->abort_code); + if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) { + if (xdr->if_version == htonl(0) && + abort_code != 0 && + inline_error) { + /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus + * whereby it doesn't set the interface version in the error + * case. + */ + status->abort_code = abort_code; + ret = 0; + goto out; + } + pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); goto bad; } + if (abort_code != 0 && inline_error) { + status->abort_code = abort_code; + ret = 0; + goto out; + } + type = ntohl(xdr->type); - abort_code = ntohl(xdr->abort_code); switch (type) { case AFS_FTYPE_FILE: case AFS_FTYPE_DIR: @@ -165,13 +185,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, } status->type = type; break; - case AFS_FTYPE_INVALID: - if (abort_code != 0) { - status->abort_code = abort_code; - ret = 0; - goto out; - } - /* Fall through */ default: goto bad; } From 3d9fa91161387ee629e7a07c47934d119910c8ae Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 May 2018 22:55:59 +0100 Subject: [PATCH 283/347] afs: Fix VNOVOL handling in address rotation If a volume location record lists multiple file servers for a volume, then it's possible that due to a misconfiguration or a changing configuration that one of the file servers doesn't know about it yet and will abort VNOVOL. Currently, the rotation algorithm will stop with EREMOTEIO. Fix this by moving on to try the next server if VNOVOL is returned. Once all the servers have been tried and the record rechecked, the algorithm will stop with EREMOTEIO or ENOMEDIUM. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Reported-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/rotate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index 84584dcced72..e065bc0768e6 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -179,7 +179,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) */ if (fc->flags & AFS_FS_CURSOR_VNOVOL) { fc->ac.error = -EREMOTEIO; - goto failed; + goto next_server; } write_lock(&vnode->volume->servers_lock); @@ -201,7 +201,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) */ if (vnode->volume->servers == fc->server_list) { fc->ac.error = -EREMOTEIO; - goto failed; + goto next_server; } /* Try again */ From 001ab5a67ee5d191c64aebf4b4ef8c7a0dcfd2bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 May 2018 23:21:35 +0100 Subject: [PATCH 284/347] afs: Fix the handling of CB.InitCallBackState3 to find the server by UUID Fix the handling of the CB.InitCallBackState3 service call to find the record of a server that we're using by looking it up by the UUID passed as the parameter rather than by its address (of which it might have many, and which may change). Fixes: c35eccb1f614 ("[AFS]: Implement the CB.InitCallBackState3 operation.") Signed-off-by: David Howells --- fs/afs/cmservice.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 357de908df3a..bcd13397bd59 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -341,7 +341,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) */ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { - struct sockaddr_rxrpc srx; struct afs_server *server; struct afs_uuid *r; unsigned loop; @@ -398,8 +397,9 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - server = afs_find_server(call->net, &srx); + rcu_read_lock(); + server = afs_find_server_by_uuid(call->net, call->request); + rcu_read_unlock(); if (!server) return -ENOTCONN; call->cm_server = server; From 3709a399c15e4273d9a94b123374f12e5664318c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 May 2018 22:59:42 +0100 Subject: [PATCH 285/347] afs: Add a tracepoint to record callbacks from unlisted servers Add a tracepoint to record callbacks from servers for which we don't have a record. Signed-off-by: David Howells --- fs/afs/cmservice.c | 12 ++++++++--- include/trace/events/afs.h | 42 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index bcd13397bd59..9f13375f49b8 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -287,8 +287,10 @@ static int afs_deliver_cb_callback(struct afs_call *call) * vnodes to operate upon */ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); server = afs_find_server(call->net, &srx); - if (!server) + if (!server) { + trace_afs_cm_no_server(call, &srx); return -ENOTCONN; + } call->cm_server = server; return afs_queue_call_work(call); @@ -329,8 +331,10 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ server = afs_find_server(call->net, &srx); - if (!server) + if (!server) { + trace_afs_cm_no_server(call, &srx); return -ENOTCONN; + } call->cm_server = server; return afs_queue_call_work(call); @@ -400,8 +404,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) rcu_read_lock(); server = afs_find_server_by_uuid(call->net, call->request); rcu_read_unlock(); - if (!server) + if (!server) { + trace_afs_cm_no_server_u(call, call->request); return -ENOTCONN; + } call->cm_server = server; return afs_queue_call_work(call); diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index f0820554caa9..d0a341bc4540 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -575,6 +575,48 @@ TRACE_EVENT(afs_protocol_error, __entry->call, __entry->error, __entry->where) ); +TRACE_EVENT(afs_cm_no_server, + TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx), + + TP_ARGS(call, srx), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(unsigned int, op_id ) + __field_struct(struct sockaddr_rxrpc, srx ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->op_id = call->operation_ID; + memcpy(&__entry->srx, srx, sizeof(__entry->srx)); + ), + + TP_printk("c=%08x op=%u %pISpc", + __entry->call, __entry->op_id, &__entry->srx.transport) + ); + +TRACE_EVENT(afs_cm_no_server_u, + TP_PROTO(struct afs_call *call, const uuid_t *uuid), + + TP_ARGS(call, uuid), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(unsigned int, op_id ) + __field_struct(uuid_t, uuid ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->op_id = call->operation_ID; + memcpy(&__entry->uuid, uuid, sizeof(__entry->uuid)); + ), + + TP_printk("c=%08x op=%u %pU", + __entry->call, __entry->op_id, &__entry->uuid) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ From a86b06d1ccd218a6a50d6a3a88fbd2abcd0eaa94 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 May 2018 23:45:40 +0100 Subject: [PATCH 286/347] afs: Fix the handling of an unfound server in CM operations If the client cache manager operations that need the server record (CB.Callback, CB.InitCallBackState, and CB.InitCallBackState3) can't find the server record, they abort the call from the file server with RX_CALL_DEAD when they should return okay. Fixes: c35eccb1f614 ("[AFS]: Implement the CB.InitCallBackState3 operation.") Signed-off-by: David Howells --- fs/afs/cmservice.c | 34 ++++++++++++---------------------- fs/afs/rxrpc.c | 5 ----- 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 9f13375f49b8..b44491410af3 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -143,8 +143,8 @@ static void afs_cm_destructor(struct afs_call *call) * received. The step number here must match the final number in * afs_deliver_cb_callback(). */ - if (call->unmarshall == 5) { - ASSERT(call->cm_server && call->count && call->request); + if (call->cm_server && call->unmarshall == 5) { + ASSERT(call->count && call->request); afs_break_callbacks(call->cm_server, call->count, call->request); } @@ -168,7 +168,8 @@ static void SRXAFSCB_CallBack(struct work_struct *work) * yet */ afs_send_empty_reply(call); - afs_break_callbacks(call->cm_server, call->count, call->request); + if (call->cm_server) + afs_break_callbacks(call->cm_server, call->count, call->request); afs_put_call(call); _leave(""); } @@ -180,7 +181,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) { struct afs_callback_break *cb; struct sockaddr_rxrpc srx; - struct afs_server *server; __be32 *bp; int ret, loop; @@ -286,12 +286,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - server = afs_find_server(call->net, &srx); - if (!server) { + call->cm_server = afs_find_server(call->net, &srx); + if (!call->cm_server) trace_afs_cm_no_server(call, &srx); - return -ENOTCONN; - } - call->cm_server = server; return afs_queue_call_work(call); } @@ -305,7 +302,8 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) _enter("{%p}", call->cm_server); - afs_init_callback_state(call->cm_server); + if (call->cm_server) + afs_init_callback_state(call->cm_server); afs_send_empty_reply(call); afs_put_call(call); _leave(""); @@ -317,7 +315,6 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { struct sockaddr_rxrpc srx; - struct afs_server *server; int ret; _enter(""); @@ -330,12 +327,9 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(call->net, &srx); - if (!server) { + call->cm_server = afs_find_server(call->net, &srx); + if (!call->cm_server) trace_afs_cm_no_server(call, &srx); - return -ENOTCONN; - } - call->cm_server = server; return afs_queue_call_work(call); } @@ -345,7 +339,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) */ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { - struct afs_server *server; struct afs_uuid *r; unsigned loop; __be32 *b; @@ -402,13 +395,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ rcu_read_lock(); - server = afs_find_server_by_uuid(call->net, call->request); + call->cm_server = afs_find_server_by_uuid(call->net, call->request); rcu_read_unlock(); - if (!server) { + if (!call->cm_server) trace_afs_cm_no_server_u(call, call->request); - return -ENOTCONN; - } - call->cm_server = server; return afs_queue_call_work(call); } diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 1f6235a6e9ae..d0eee5d32c94 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -497,11 +497,6 @@ static void afs_deliver_to_call(struct afs_call *call) case -ECONNABORTED: ASSERTCMP(state, ==, AFS_CALL_COMPLETE); goto done; - case -ENOTCONN: - abort_code = RX_CALL_DEAD; - rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KNC"); - goto local_abort; case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, From f9c1bba3d392843f046d2ee27b4dfcec989d8a4b Mon Sep 17 00:00:00 2001 From: Marc Dionne Date: Fri, 11 May 2018 21:35:06 -0300 Subject: [PATCH 287/347] afs: Fix afs_find_server search loop The code that looks up servers by addresses makes the assumption that the list of addresses for a server is sorted. It exits the loop if it finds that the target address is larger than the current candidate. As the list is not currently sorted, this can lead to a failure to find a matching server, which can cause callbacks from that server to be ignored. Remove the early exit case so that the complete list is searched. Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation") Signed-off-by: Marc Dionne Signed-off-by: David Howells --- fs/afs/server.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/afs/server.c b/fs/afs/server.c index 2c5cff60e34d..3af4625e2f8c 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -67,12 +67,6 @@ struct afs_server *afs_find_server(struct afs_net *net, sizeof(struct in6_addr)); if (diff == 0) goto found; - if (diff < 0) { - // TODO: Sort the list - //if (i == alist->nr_ipv4) - // goto not_found; - break; - } } } } else { @@ -87,17 +81,10 @@ struct afs_server *afs_find_server(struct afs_net *net, (u32 __force)b->sin6_addr.s6_addr32[3]); if (diff == 0) goto found; - if (diff < 0) { - // TODO: Sort the list - //if (i == 0) - // goto not_found; - break; - } } } } - //not_found: server = NULL; found: if (server && !atomic_inc_not_zero(&server->usage)) From 68251f0a6818f3be19b1471f36c956ca97c1427d Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 12 May 2018 22:31:33 +0100 Subject: [PATCH 288/347] afs: Fix whole-volume callback handling It's possible for an AFS file server to issue a whole-volume notification that callbacks on all the vnodes in the file have been broken. This is done for R/O and backup volumes (which don't have per-file callbacks) and for things like a volume being taken offline. Fix callback handling to detect whole-volume notifications, to track it across operations and to check it during inode validation. Fixes: c435ee34551e ("afs: Overhaul the callback handling") Signed-off-by: David Howells --- fs/afs/callback.c | 28 +++++++++++++++++++++------- fs/afs/dir.c | 18 +++++++++--------- fs/afs/file.c | 2 +- fs/afs/flock.c | 6 +++--- fs/afs/fsclient.c | 2 +- fs/afs/inode.c | 13 ++++++++----- fs/afs/internal.h | 15 +++++++++++++++ fs/afs/security.c | 7 +++---- fs/afs/super.c | 2 +- fs/afs/write.c | 2 +- 10 files changed, 63 insertions(+), 32 deletions(-) diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 09332945d322..571437dcb252 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -113,6 +113,7 @@ again: old = vnode->cb_interest; vnode->cb_interest = cbi; vnode->cb_s_break = cbi->server->cb_s_break; + vnode->cb_v_break = vnode->volume->cb_v_break; clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); write_sequnlock(&vnode->cb_lock); @@ -195,13 +196,24 @@ static void afs_break_one_callback(struct afs_server *server, if (cbi->vid != fid->vid) continue; - data.volume = NULL; - data.fid = *fid; - inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data); - if (inode) { - vnode = AFS_FS_I(inode); - afs_break_callback(vnode); - iput(inode); + if (fid->vnode == 0 && fid->unique == 0) { + /* The callback break applies to an entire volume. */ + struct afs_super_info *as = AFS_FS_S(cbi->sb); + struct afs_volume *volume = as->volume; + + write_lock(&volume->cb_break_lock); + volume->cb_v_break++; + write_unlock(&volume->cb_break_lock); + } else { + data.volume = NULL; + data.fid = *fid; + inode = ilookup5_nowait(cbi->sb, fid->vnode, + afs_iget5_test, &data); + if (inode) { + vnode = AFS_FS_I(inode); + afs_break_callback(vnode); + iput(inode); + } } } @@ -219,6 +231,8 @@ void afs_break_callbacks(struct afs_server *server, size_t count, ASSERT(server != NULL); ASSERTCMP(count, <=, AFSCBMAX); + /* TODO: Sort the callback break list by volume ID */ + for (; count > 0; callbacks++, count--) { _debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }", callbacks->fid.vid, diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2853acd64482..7d623008157f 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1141,7 +1141,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -1211,7 +1211,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_remove(&fc, dentry->d_name.name, true, data_version); } @@ -1314,7 +1314,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_remove(&fc, dentry->d_name.name, false, data_version); } @@ -1371,7 +1371,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -1441,8 +1441,8 @@ static int afs_link(struct dentry *from, struct inode *dir, } while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); + fc.cb_break_2 = afs_calc_vnode_cb_break(vnode); afs_fs_link(&fc, vnode, dentry->d_name.name, data_version); } @@ -1510,7 +1510,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_symlink(&fc, dentry->d_name.name, content, data_version, &newfid, &newstatus); @@ -1586,8 +1586,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, } } while (afs_select_fileserver(&fc)) { - fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break; - fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode); + fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode); afs_fs_rename(&fc, old_dentry->d_name.name, new_dvnode, new_dentry->d_name.name, orig_data_version, new_data_version); diff --git a/fs/afs/file.c b/fs/afs/file.c index c24c08016dd9..7d4f26198573 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_fetch_data(&fc, desc); } diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 7a0e017070ec..dc62d15a964b 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -86,7 +86,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_set_lock(&fc, type); } @@ -117,7 +117,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_current_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_extend_lock(&fc); } @@ -148,7 +148,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_current_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_release_lock(&fc); } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index d16f26c6cdbe..b273e1d60478 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -261,7 +261,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, write_seqlock(&vnode->cb_lock); - if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) { + if (call->cb_break == afs_cb_break_sum(vnode, cbi)) { vnode->cb_version = ntohl(*bp++); cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index e855c6e5cf28..479b7fdda124 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -108,7 +108,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_fetch_file_status(&fc, NULL, new_inode); } @@ -393,15 +393,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) read_seqlock_excl(&vnode->cb_lock); if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) { + if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break || + vnode->cb_v_break != vnode->volume->cb_v_break) { vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; + vnode->cb_v_break = vnode->volume->cb_v_break; + valid = false; } else if (vnode->status.type == AFS_FTYPE_DIR && test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) && vnode->cb_expires_at - 10 > now) { - valid = true; + valid = true; } else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && vnode->cb_expires_at - 10 > now) { - valid = true; + valid = true; } } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { valid = true; @@ -574,7 +577,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_setattr(&fc, attr); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index e75e57e13320..e3f8a46663db 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -461,6 +461,9 @@ struct afs_volume { rwlock_t servers_lock; /* Lock for ->servers */ unsigned int servers_seq; /* Incremented each time ->servers changes */ + unsigned cb_v_break; /* Break-everything counter. */ + rwlock_t cb_break_lock; + afs_voltype_t type; /* type of volume */ short error; char type_force; /* force volume type (suppress R/O -> R/W) */ @@ -521,6 +524,7 @@ struct afs_vnode { /* outstanding callback notification on this file */ struct afs_cb_interest *cb_interest; /* Server on which this resides */ unsigned int cb_s_break; /* Mass break counter on ->server */ + unsigned int cb_v_break; /* Mass break counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */ @@ -662,6 +666,17 @@ static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest return cbi; } +static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) +{ + return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break; +} + +static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode, + struct afs_cb_interest *cbi) +{ + return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break; +} + /* * cell.c */ diff --git a/fs/afs/security.c b/fs/afs/security.c index cea2fff313dc..1992b0ffa543 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -147,8 +147,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, break; } - if (cb_break != (vnode->cb_break + - vnode->cb_interest->server->cb_s_break)) { + if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) { changed = true; break; } @@ -178,7 +177,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) + if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to @@ -257,7 +256,7 @@ found: spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); - if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) && + if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else diff --git a/fs/afs/super.c b/fs/afs/super.c index b02838cd9525..9e5d7966621c 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -688,7 +688,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) if (afs_begin_vnode_operation(&fc, vnode, key)) { fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_get_volume_status(&fc, &vs); } diff --git a/fs/afs/write.c b/fs/afs/write.c index c164698dc304..8b39e6ebb40b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -351,7 +351,7 @@ found_key: ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_store_data(&fc, mapping, first, last, offset, to); } From 428edade4e6c70e5b51fcd4188d944fbb744d84c Mon Sep 17 00:00:00 2001 From: David Howells Date: Sat, 12 May 2018 00:28:58 +0100 Subject: [PATCH 289/347] afs: Fix CB.CallBack handling The handling of CB.CallBack messages sent by the fileserver to the client is broken in that they are currently being processed after the reply has been transmitted. This is not what the fileserver expects, however. It holds up change visibility until the reply comes so as to maintain cache coherency, and so expects the client to have to refetch the state on the affected files. Fix CB.CallBack handling to perform the callback break before sending the reply. The fileserver is free to hold up status fetches issued by other threads on the same client that occur in reponse to the callback until any pending changes have been committed. Fixes: d001648ec7cf ("rxrpc: Don't expose skbs to in-kernel users [ver #2]") Signed-off-by: David Howells --- fs/afs/cmservice.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index b44491410af3..c332c95a6940 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -133,21 +133,10 @@ bool afs_cm_incoming_call(struct afs_call *call) } /* - * clean up a cache manager call + * Clean up a cache manager call. */ static void afs_cm_destructor(struct afs_call *call) { - _enter(""); - - /* Break the callbacks here so that we do it after the final ACK is - * received. The step number here must match the final number in - * afs_deliver_cb_callback(). - */ - if (call->cm_server && call->unmarshall == 5) { - ASSERT(call->count && call->request); - afs_break_callbacks(call->cm_server, call->count, call->request); - } - kfree(call->buffer); call->buffer = NULL; } @@ -161,15 +150,14 @@ static void SRXAFSCB_CallBack(struct work_struct *work) _enter(""); - /* be sure to send the reply *before* attempting to spam the AFS server - * with FSFetchStatus requests on the vnodes with broken callbacks lest - * the AFS server get into a vicious cycle of trying to break further - * callbacks because it hadn't received completion of the CBCallBack op - * yet */ - afs_send_empty_reply(call); - + /* We need to break the callbacks before sending the reply as the + * server holds up change visibility till it receives our reply so as + * to maintain cache coherency. + */ if (call->cm_server) afs_break_callbacks(call->cm_server, call->count, call->request); + + afs_send_empty_reply(call); afs_put_call(call); _leave(""); } @@ -267,15 +255,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->offset = 0; call->unmarshall++; - - /* Record that the message was unmarshalled successfully so - * that the call destructor can know do the callback breaking - * work, even if the final ACK isn't received. - * - * If the step number changes, then afs_cm_destructor() must be - * updated also. - */ - call->unmarshall++; case 5: break; } From 4776cab43fd3111618112737a257dc3ef368eddd Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 10 May 2018 23:10:40 +0100 Subject: [PATCH 290/347] afs: Fix the non-encryption of calls Some AFS servers refuse to accept unencrypted traffic, so can't be accessed with kAFS. Set the AF_RXRPC security level to encrypt client calls to deal with this. Note that incoming service calls are set by the remote client and so aren't affected by this. This requires an AF_RXRPC patch to pass the value set by setsockopt to calls begun by the kernel. Signed-off-by: David Howells --- fs/afs/rxrpc.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index d0eee5d32c94..08735948f15d 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -41,6 +41,7 @@ int afs_open_socket(struct afs_net *net) { struct sockaddr_rxrpc srx; struct socket *socket; + unsigned int min_level; int ret; _enter(""); @@ -60,6 +61,12 @@ int afs_open_socket(struct afs_net *net) srx.transport.sin6.sin6_family = AF_INET6; srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); + min_level = RXRPC_SECURITY_ENCRYPT; + ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL, + (void *)&min_level, sizeof(min_level)); + if (ret < 0) + goto error_2; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); if (ret == -EADDRINUSE) { srx.transport.sin6.sin6_port = 0; From ebc3dd688cd988754a304147753b13e58de1b5a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Gl=C3=B6ckner?= Date: Mon, 14 May 2018 09:40:05 -0500 Subject: [PATCH 291/347] usb: musb: fix remote wakeup racing with suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It has been observed that writing 0xF2 to the power register while it reads as 0xF4 results in the register having the value 0xF0, i.e. clearing RESUME and setting SUSPENDM in one go does not work. It might also violate the USB spec to transition directly from resume to suspend, especially when not taking T_DRSMDN into account. But this is what happens when a remote wakeup occurs between SetPortFeature USB_PORT_FEAT_SUSPEND on the root hub and musb_bus_suspend being called. This commit returns -EBUSY when musb_bus_suspend is called while remote wakeup is signalled and thus avoids to reset the RESUME bit. Ignoring this error when musb_port_suspend is called from musb_hub_control is ok. Signed-off-by: Daniel Glöckner Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_host.c | 5 ++++- drivers/usb/musb/musb_host.h | 7 +++++-- drivers/usb/musb/musb_virthub.c | 25 +++++++++++++++---------- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index e7f99d55922a..15a42cee0a9c 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2524,8 +2524,11 @@ static int musb_bus_suspend(struct usb_hcd *hcd) { struct musb *musb = hcd_to_musb(hcd); u8 devctl; + int ret; - musb_port_suspend(musb, true); + ret = musb_port_suspend(musb, true); + if (ret) + return ret; if (!is_host_active(musb)) return 0; diff --git a/drivers/usb/musb/musb_host.h b/drivers/usb/musb/musb_host.h index 72392bbcd0a4..2999845632ce 100644 --- a/drivers/usb/musb/musb_host.h +++ b/drivers/usb/musb/musb_host.h @@ -67,7 +67,7 @@ extern void musb_host_rx(struct musb *, u8); extern void musb_root_disconnect(struct musb *musb); extern void musb_host_resume_root_hub(struct musb *musb); extern void musb_host_poke_root_hub(struct musb *musb); -extern void musb_port_suspend(struct musb *musb, bool do_suspend); +extern int musb_port_suspend(struct musb *musb, bool do_suspend); extern void musb_port_reset(struct musb *musb, bool do_reset); extern void musb_host_finish_resume(struct work_struct *work); #else @@ -99,7 +99,10 @@ static inline void musb_root_disconnect(struct musb *musb) {} static inline void musb_host_resume_root_hub(struct musb *musb) {} static inline void musb_host_poll_rh_status(struct musb *musb) {} static inline void musb_host_poke_root_hub(struct musb *musb) {} -static inline void musb_port_suspend(struct musb *musb, bool do_suspend) {} +static inline int musb_port_suspend(struct musb *musb, bool do_suspend) +{ + return 0; +} static inline void musb_port_reset(struct musb *musb, bool do_reset) {} static inline void musb_host_finish_resume(struct work_struct *work) {} #endif diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index 5165d2b07ade..2f8dd9826e94 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -48,14 +48,14 @@ void musb_host_finish_resume(struct work_struct *work) spin_unlock_irqrestore(&musb->lock, flags); } -void musb_port_suspend(struct musb *musb, bool do_suspend) +int musb_port_suspend(struct musb *musb, bool do_suspend) { struct usb_otg *otg = musb->xceiv->otg; u8 power; void __iomem *mbase = musb->mregs; if (!is_host_active(musb)) - return; + return 0; /* NOTE: this doesn't necessarily put PHY into low power mode, * turning off its clock; that's a function of PHY integration and @@ -66,16 +66,20 @@ void musb_port_suspend(struct musb *musb, bool do_suspend) if (do_suspend) { int retries = 10000; - power &= ~MUSB_POWER_RESUME; - power |= MUSB_POWER_SUSPENDM; - musb_writeb(mbase, MUSB_POWER, power); + if (power & MUSB_POWER_RESUME) + return -EBUSY; - /* Needed for OPT A tests */ - power = musb_readb(mbase, MUSB_POWER); - while (power & MUSB_POWER_SUSPENDM) { + if (!(power & MUSB_POWER_SUSPENDM)) { + power |= MUSB_POWER_SUSPENDM; + musb_writeb(mbase, MUSB_POWER, power); + + /* Needed for OPT A tests */ power = musb_readb(mbase, MUSB_POWER); - if (retries-- < 1) - break; + while (power & MUSB_POWER_SUSPENDM) { + power = musb_readb(mbase, MUSB_POWER); + if (retries-- < 1) + break; + } } musb_dbg(musb, "Root port suspended, power %02x", power); @@ -111,6 +115,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend) schedule_delayed_work(&musb->finish_resume_work, msecs_to_jiffies(USB_RESUME_TIMEOUT)); } + return 0; } void musb_port_reset(struct musb *musb, bool do_reset) From 0f8db8cc73df60b3de9a5eebd8f117b56eff5b03 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 11 May 2018 20:15:11 +0300 Subject: [PATCH 292/347] selinux: add AF_UNSPEC and INADDR_ANY checks to selinux_socket_bind() Commit d452930fd3b9 ("selinux: Add SCTP support") breaks compatibility with the old programs that can pass sockaddr_in structure with AF_UNSPEC and INADDR_ANY to bind(). As a result, bind() returns EAFNOSUPPORT error. This was found with LTP/asapi_01 test. Similar to commit 29c486df6a20 ("net: ipv4: relax AF_INET check in bind()"), which relaxed AF_INET check for compatibility, add AF_UNSPEC case to AF_INET and make sure that the address is INADDR_ANY. Fixes: d452930fd3b9 ("selinux: Add SCTP support") Signed-off-by: Alexey Kodanev Signed-off-by: Paul Moore --- security/selinux/hooks.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 21b377aef69a..16df6cca9a1b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4568,6 +4568,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { struct sock *sk = sock->sk; + struct sk_security_struct *sksec = sk->sk_security; u16 family; int err; @@ -4579,11 +4580,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in family = sk->sk_family; if (family == PF_INET || family == PF_INET6) { char *addrp; - struct sk_security_struct *sksec = sk->sk_security; struct common_audit_data ad; struct lsm_network_audit net = {0,}; struct sockaddr_in *addr4 = NULL; struct sockaddr_in6 *addr6 = NULL; + u16 family_sa = address->sa_family; unsigned short snum; u32 sid, node_perm; @@ -4593,11 +4594,20 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in * need to check address->sa_family as it is possible to have * sk->sk_family = PF_INET6 with addr->sa_family = AF_INET. */ - switch (address->sa_family) { + switch (family_sa) { + case AF_UNSPEC: case AF_INET: if (addrlen < sizeof(struct sockaddr_in)) return -EINVAL; addr4 = (struct sockaddr_in *)address; + if (family_sa == AF_UNSPEC) { + /* see __inet_bind(), we only want to allow + * AF_UNSPEC if the address is INADDR_ANY + */ + if (addr4->sin_addr.s_addr != htonl(INADDR_ANY)) + goto err_af; + family_sa = AF_INET; + } snum = ntohs(addr4->sin_port); addrp = (char *)&addr4->sin_addr.s_addr; break; @@ -4609,13 +4619,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in addrp = (char *)&addr6->sin6_addr.s6_addr; break; default: - /* Note that SCTP services expect -EINVAL, whereas - * others expect -EAFNOSUPPORT. - */ - if (sksec->sclass == SECCLASS_SCTP_SOCKET) - return -EINVAL; - else - return -EAFNOSUPPORT; + goto err_af; } if (snum) { @@ -4673,7 +4677,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in ad.u.net->sport = htons(snum); ad.u.net->family = family; - if (address->sa_family == AF_INET) + if (family_sa == AF_INET) ad.u.net->v4info.saddr = addr4->sin_addr.s_addr; else ad.u.net->v6info.saddr = addr6->sin6_addr; @@ -4686,6 +4690,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in } out: return err; +err_af: + /* Note that SCTP services expect -EINVAL, others -EAFNOSUPPORT. */ + if (sksec->sclass == SECCLASS_SCTP_SOCKET) + return -EINVAL; + return -EAFNOSUPPORT; } /* This supports connect(2) and SCTP connect services such as sctp_connectx(3) From 88b7d370bb4b1280717ebdacd6748456f9ba484f Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 11 May 2018 20:15:12 +0300 Subject: [PATCH 293/347] selinux: fix address family in bind() and connect() to match address/port Since sctp_bindx() and sctp_connectx() can have multiple addresses, sk_family can differ from sa_family. Therefore, selinux_socket_bind() and selinux_socket_connect_helper(), which process sockaddr structure (address and port), should use the address family from that structure too, and not from the socket one. The initialization of the data for the audit record is moved above, in selinux_socket_bind(), so that there is no duplicate changes and code. Fixes: d452930fd3b9 ("selinux: Add SCTP support") Suggested-by: Paul Moore Signed-off-by: Alexey Kodanev Signed-off-by: Paul Moore --- security/selinux/hooks.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 16df6cca9a1b..f5f2d6a582f0 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4622,6 +4622,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in goto err_af; } + ad.type = LSM_AUDIT_DATA_NET; + ad.u.net = &net; + ad.u.net->sport = htons(snum); + ad.u.net->family = family_sa; + if (snum) { int low, high; @@ -4633,10 +4638,6 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in snum, &sid); if (err) goto out; - ad.type = LSM_AUDIT_DATA_NET; - ad.u.net = &net; - ad.u.net->sport = htons(snum); - ad.u.net->family = family; err = avc_has_perm(&selinux_state, sksec->sid, sid, sksec->sclass, @@ -4668,15 +4669,10 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in break; } - err = sel_netnode_sid(addrp, family, &sid); + err = sel_netnode_sid(addrp, family_sa, &sid); if (err) goto out; - ad.type = LSM_AUDIT_DATA_NET; - ad.u.net = &net; - ad.u.net->sport = htons(snum); - ad.u.net->family = family; - if (family_sa == AF_INET) ad.u.net->v4info.saddr = addr4->sin_addr.s_addr; else @@ -4772,7 +4768,7 @@ static int selinux_socket_connect_helper(struct socket *sock, ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; ad.u.net->dport = htons(snum); - ad.u.net->family = sk->sk_family; + ad.u.net->family = address->sa_family; err = avc_has_perm(&selinux_state, sksec->sid, sid, sksec->sclass, perm, &ad); if (err) From 4152dc91b5932e7fe49a5afed62a068b2f31d196 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 11 May 2018 20:15:13 +0300 Subject: [PATCH 294/347] selinux: correctly handle sa_family cases in selinux_sctp_bind_connect() Allow to pass the socket address structure with AF_UNSPEC family for compatibility purposes. selinux_socket_bind() will further check it for INADDR_ANY and selinux_socket_connect_helper() should return EINVAL. For a bad address family return EINVAL instead of AFNOSUPPORT error, i.e. what is expected from SCTP protocol in such case. Fixes: d452930fd3b9 ("selinux: Add SCTP support") Suggested-by: Paul Moore Signed-off-by: Alexey Kodanev Signed-off-by: Paul Moore --- security/selinux/hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f5f2d6a582f0..efeb1db8f61d 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5269,6 +5269,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, while (walk_size < addrlen) { addr = addr_buf; switch (addr->sa_family) { + case AF_UNSPEC: case AF_INET: len = sizeof(struct sockaddr_in); break; @@ -5276,7 +5277,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, len = sizeof(struct sockaddr_in6); break; default: - return -EAFNOSUPPORT; + return -EINVAL; } err = -EINVAL; From 7b34c0fb1b51a6377752fc971e57577eeb102d60 Mon Sep 17 00:00:00 2001 From: Bob Moore Date: Tue, 8 May 2018 14:06:15 -0700 Subject: [PATCH 295/347] ACPICA: Add deferred package support for the Load and loadTable operators Completes the support and fixes a regression introduced in version 20180209. The regression caused package objects that were loaded by the Load and loadTable operators. This created an error message like the following: [ 0.251922] ACPI Error: No pointer back to namespace node in package 00000000fd2a44cd (20180313/dsargs-303) Link: https://bugzilla.kernel.org/show_bug.cgi?id=199413 Fixes: 5a8361f7ecce (ACPICA: Integrate package handling with module-level code) Signed-off-by: Bob Moore Signed-off-by: Erik Schmauss Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acnamesp.h | 4 ++ drivers/acpi/acpica/exconfig.c | 14 +++++++ drivers/acpi/acpica/nsinit.c | 76 +++++++++++++++++++++++++--------- 3 files changed, 74 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 514aaf948ea9..3825df923480 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -56,6 +56,10 @@ acpi_status acpi_ns_initialize_objects(void); acpi_status acpi_ns_initialize_devices(u32 flags); +acpi_status +acpi_ns_init_one_package(acpi_handle obj_handle, + u32 level, void *context, void **return_value); + /* * nsload - Namespace loading */ diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c index 99d92cb32803..f85c6f3271f6 100644 --- a/drivers/acpi/acpica/exconfig.c +++ b/drivers/acpi/acpica/exconfig.c @@ -174,6 +174,13 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state, return_ACPI_STATUS(status); } + /* Complete the initialization/resolution of package objects */ + + status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, 0, + acpi_ns_init_one_package, NULL, NULL, + NULL); + /* Parameter Data (optional) */ if (parameter_node) { @@ -430,6 +437,13 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc, return_ACPI_STATUS(status); } + /* Complete the initialization/resolution of package objects */ + + status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, 0, + acpi_ns_init_one_package, NULL, NULL, + NULL); + /* Store the ddb_handle into the Target operand */ status = acpi_ex_store(ddb_handle, target, walk_state); diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 77f2b5f4948a..d77257d1c827 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -240,6 +240,58 @@ error_exit: return_ACPI_STATUS(status); } +/******************************************************************************* + * + * FUNCTION: acpi_ns_init_one_package + * + * PARAMETERS: obj_handle - Node + * level - Current nesting level + * context - Not used + * return_value - Not used + * + * RETURN: Status + * + * DESCRIPTION: Callback from acpi_walk_namespace. Invoked for every package + * within the namespace. Used during dynamic load of an SSDT. + * + ******************************************************************************/ + +acpi_status +acpi_ns_init_one_package(acpi_handle obj_handle, + u32 level, void *context, void **return_value) +{ + acpi_status status; + union acpi_operand_object *obj_desc; + struct acpi_namespace_node *node = + (struct acpi_namespace_node *)obj_handle; + + obj_desc = acpi_ns_get_attached_object(node); + if (!obj_desc) { + return (AE_OK); + } + + /* Exit if package is already initialized */ + + if (obj_desc->package.flags & AOPOBJ_DATA_VALID) { + return (AE_OK); + } + + status = acpi_ds_get_package_arguments(obj_desc); + if (ACPI_FAILURE(status)) { + return (AE_OK); + } + + status = + acpi_ut_walk_package_tree(obj_desc, NULL, + acpi_ds_init_package_element, NULL); + if (ACPI_FAILURE(status)) { + return (AE_OK); + } + + obj_desc->package.flags |= AOPOBJ_DATA_VALID; + return (AE_OK); +} + /******************************************************************************* * * FUNCTION: acpi_ns_init_one_object @@ -360,27 +412,11 @@ acpi_ns_init_one_object(acpi_handle obj_handle, case ACPI_TYPE_PACKAGE: + /* Complete the initialization/resolution of the package object */ + info->package_init++; - status = acpi_ds_get_package_arguments(obj_desc); - if (ACPI_FAILURE(status)) { - break; - } - - ACPI_DEBUG_PRINT_RAW((ACPI_DB_PARSE, - "%s: Completing resolution of Package elements\n", - ACPI_GET_FUNCTION_NAME)); - - /* - * Resolve all named references in package objects (and all - * sub-packages). This action has been deferred until the entire - * namespace has been loaded, in order to support external and - * forward references from individual package elements (05/2017). - */ - status = acpi_ut_walk_package_tree(obj_desc, NULL, - acpi_ds_init_package_element, - NULL); - - obj_desc->package.flags |= AOPOBJ_DATA_VALID; + status = + acpi_ns_init_one_package(obj_handle, level, NULL, NULL); break; default: From 0cf442c6bcf572e04f5690340d5b8e62afcee2ca Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 24 Apr 2018 17:45:06 +0200 Subject: [PATCH 296/347] cpufreq: armada-37xx: driver relies on cpufreq-dt Armada-37xx driver registers a cpufreq-dt driver. Not having CONFIG_CPUFREQ_DT selected leads to a silent abort during the probe. Prevent that situation by having the former depending on the latter. Fixes: 92ce45fb875d7 (cpufreq: Add DVFS support for Armada 37xx) Cc: 4.16+ # 4.16+ Signed-off-by: Miquel Raynal Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Kconfig.arm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index de55c7d57438..96b35b8b3606 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -20,7 +20,7 @@ config ACPI_CPPC_CPUFREQ config ARM_ARMADA_37XX_CPUFREQ tristate "Armada 37xx CPUFreq support" - depends on ARCH_MVEBU + depends on ARCH_MVEBU && CPUFREQ_DT help This adds the CPUFreq driver support for Marvell Armada 37xx SoCs. The Armada 37xx PMU supports 4 frequency and VDD levels. From 45dd9b0666a162f8e4be76096716670cf1741f0e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 9 May 2018 14:36:09 -0400 Subject: [PATCH 297/347] tracing/x86/xen: Remove zero data size trace events trace_xen_mmu_flush_tlb{_all} Doing an audit of trace events, I discovered two trace events in the xen subsystem that use a hack to create zero data size trace events. This is not what trace events are for. Trace events add memory footprint overhead, and if all you need to do is see if a function is hit or not, simply make that function noinline and use function tracer filtering. Worse yet, the hack used was: __array(char, x, 0) Which creates a static string of zero in length. There's assumptions about such constructs in ftrace that this is a dynamic string that is nul terminated. This is not the case with these tracepoints and can cause problems in various parts of ftrace. Nuke the trace events! Link: http://lkml.kernel.org/r/20180509144605.5a220327@gandalf.local.home Cc: stable@vger.kernel.org Fixes: 95a7d76897c1e ("xen/mmu: Use Xen specific TLB flush instead of the generic one.") Reviewed-by: Juergen Gross Signed-off-by: Steven Rostedt (VMware) --- arch/x86/xen/mmu.c | 4 +--- arch/x86/xen/mmu_pv.c | 4 +--- include/trace/events/xen.h | 16 ---------------- 3 files changed, 2 insertions(+), 22 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d33e7dbe3129..2d76106788a3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) } EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); -static void xen_flush_tlb_all(void) +static noinline void xen_flush_tlb_all(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_all(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 486c0a34d00b..2c30cabfda90 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void) return this_cpu_read(xen_vcpu_info.arch.cr2); } -static void xen_flush_tlb(void) +static noinline void xen_flush_tlb(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 7dd8f34c37df..fdcf88bcf0ea 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -352,22 +352,6 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd, DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin); DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin); -TRACE_EVENT(xen_mmu_flush_tlb_all, - TP_PROTO(int x), - TP_ARGS(x), - TP_STRUCT__entry(__array(char, x, 0)), - TP_fast_assign((void)x), - TP_printk("%s", "") - ); - -TRACE_EVENT(xen_mmu_flush_tlb, - TP_PROTO(int x), - TP_ARGS(x), - TP_STRUCT__entry(__array(char, x, 0)), - TP_fast_assign((void)x), - TP_printk("%s", "") - ); - TRACE_EVENT(xen_mmu_flush_tlb_one_user, TP_PROTO(unsigned long addr), TP_ARGS(addr), From 76ef6b28ea4f81c3d511866a9b31392caa833126 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 15 May 2018 13:38:15 +1000 Subject: [PATCH 298/347] drm: set FMODE_UNSIGNED_OFFSET for drm files Since we have the ttm and gem vma managers using a subset of the file address space for objects, and these start at 0x100000000 they will overflow the new mmap checks. I've checked all the mmap routines I could see for any bad behaviour but overall most people use GEM/TTM VMA managers even the legacy drivers have a hashtable. Reported-and-Tested-by: Arthur Marsh (amarsh04 on #radeon) Fixes: be83bbf8068 (mmap: introduce sane default mmap limits) Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index e394799979a6..6d9b9453707c 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -212,6 +212,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor) return -ENOMEM; filp->private_data = priv; + filp->f_mode |= FMODE_UNSIGNED_OFFSET; priv->filp = filp; priv->pid = get_pid(task_pid(current)); priv->minor = minor; From c99f0802e42fcd38e84ee4d306691805ebed204f Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Date: Fri, 11 May 2018 16:25:35 +0100 Subject: [PATCH 299/347] ALSA: usb-audio: Use Class Specific EP for UAC3 devices. bmAtributes offset doesn't exist in the UAC3 CS_EP descriptor. Hence, checking for pitch control as if it was UAC2 doesn't make any sense. Use the defined UAC3 offsets instead. Fixes: 9a2fe9b801f5 ("ALSA: usb: initial USB Audio Device Class 3.0 support") Signed-off-by: Jorge Sanjuan Reviewed-by: Ruslan Bilovol Signed-off-by: Takashi Iwai --- sound/usb/stream.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 956be9f7c72a..5ed334575fc7 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -576,7 +576,7 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip, if (protocol == UAC_VERSION_1) { attributes = csep->bmAttributes; - } else { + } else if (protocol == UAC_VERSION_2) { struct uac2_iso_endpoint_descriptor *csep2 = (struct uac2_iso_endpoint_descriptor *) csep; @@ -585,6 +585,13 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip, /* emulate the endpoint attributes of a v1 device */ if (csep2->bmControls & UAC2_CONTROL_PITCH) attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL; + } else { /* UAC_VERSION_3 */ + struct uac3_iso_endpoint_descriptor *csep3 = + (struct uac3_iso_endpoint_descriptor *) csep; + + /* emulate the endpoint attributes of a v1 device */ + if (le32_to_cpu(csep3->bmControls) & UAC2_CONTROL_PITCH) + attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL; } return attributes; From e521813468f786271a87e78e8644243bead48fad Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 2 May 2018 08:48:43 +0200 Subject: [PATCH 300/347] s390/qdio: fix access to uninitialized qdio_q fields Ever since CQ/QAOB support was added, calling qdio_free() straight after qdio_alloc() results in qdio_release_memory() accessing uninitialized memory (ie. q->u.out.use_cq and q->u.out.aobs). Followed by a kmem_cache_free() on the random AOB addresses. For older kernels that don't have 6e30c549f6ca, the same applies if qdio_establish() fails in the DEV_STATE_ONLINE check. While initializing q->u.out.use_cq would be enough to fix this particular bug, the more future-proof change is to just zero-alloc the whole struct. Fixes: 104ea556ee7f ("qdio: support asynchronous delivery of storage blocks") Cc: #v3.2+ Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio_setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 439991d71b14..22ddb49e0f50 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -141,7 +141,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues) int i; for (i = 0; i < nr_queues; i++) { - q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); + q = kmem_cache_zalloc(qdio_q_cache, GFP_KERNEL); if (!q) return -ENOMEM; From 2e68adcd2fb21b7188ba449f0fab3bee2910e500 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Wed, 2 May 2018 08:28:34 +0200 Subject: [PATCH 301/347] s390/qdio: don't release memory in qdio_setup_irq() Calling qdio_release_memory() on error is just plain wrong. It frees the main qdio_irq struct, when following code still uses it. Also, no other error path in qdio_establish() does this. So trust callers to clean up via qdio_free() if some step of the QDIO initialization fails. Fixes: 779e6e1c724d ("[S390] qdio: new qdio driver.") Cc: #v2.6.27+ Signed-off-by: Julian Wiedmann Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/qdio_setup.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 22ddb49e0f50..4c14ce428e92 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -456,7 +456,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data) { struct ciw *ciw; struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data; - int rc; memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib)); memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag)); @@ -493,16 +492,14 @@ int qdio_setup_irq(struct qdio_initialize *init_data) ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE); if (!ciw) { DBF_ERROR("%4x NO EQ", irq_ptr->schid.sch_no); - rc = -EINVAL; - goto out_err; + return -EINVAL; } irq_ptr->equeue = *ciw; ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE); if (!ciw) { DBF_ERROR("%4x NO AQ", irq_ptr->schid.sch_no); - rc = -EINVAL; - goto out_err; + return -EINVAL; } irq_ptr->aqueue = *ciw; @@ -512,9 +509,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data) init_data->cdev->handler = qdio_int_handler; spin_unlock_irq(get_ccwdev_lock(irq_ptr->cdev)); return 0; -out_err: - qdio_release_memory(irq_ptr); - return rc; } void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, From de9a8634f1cb4560a35696d472cc7f1383d9b866 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 9 May 2018 21:46:29 +0200 Subject: [PATCH 302/347] i2c: pmcmsp: return message count on master_xfer success Returning zero is wrong in this case. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang Fixes: 1b144df1d7d6 ("i2c: New PMC MSP71xx TWI bus driver") --- drivers/i2c/busses/i2c-pmcmsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c index 2aa0e83174c5..ec27e27e8d06 100644 --- a/drivers/i2c/busses/i2c-pmcmsp.c +++ b/drivers/i2c/busses/i2c-pmcmsp.c @@ -567,7 +567,7 @@ static int pmcmsptwi_master_xfer(struct i2c_adapter *adap, return -1; } - return 0; + return num; } static u32 pmcmsptwi_i2c_func(struct i2c_adapter *adapter) From 12d9bbc5a7f347eaa65ff2a9d34995cadc05eb1b Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 9 May 2018 21:46:30 +0200 Subject: [PATCH 303/347] i2c: pmcmsp: fix error return from master_xfer Returning -1 (-EPERM) is not appropriate here, go with -EIO. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang Fixes: 1b144df1d7d6 ("i2c: New PMC MSP71xx TWI bus driver") --- drivers/i2c/busses/i2c-pmcmsp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c index ec27e27e8d06..dae8ac618a52 100644 --- a/drivers/i2c/busses/i2c-pmcmsp.c +++ b/drivers/i2c/busses/i2c-pmcmsp.c @@ -564,7 +564,7 @@ static int pmcmsptwi_master_xfer(struct i2c_adapter *adap, * TODO: We could potentially loop and retry in the case * of MSP_TWI_XFER_TIMEOUT. */ - return -1; + return -EIO; } return num; From 35cd67a0caf767aba472452865dcb4471fcce2b1 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 9 May 2018 21:47:48 +0200 Subject: [PATCH 304/347] i2c: viperboard: return message count on master_xfer success Returning zero is wrong in this case. Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang Fixes: 174a13aa8669 ("i2c: Add viperboard i2c master driver") --- drivers/i2c/busses/i2c-viperboard.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c index e4be86b3de9a..7235c7302bb7 100644 --- a/drivers/i2c/busses/i2c-viperboard.c +++ b/drivers/i2c/busses/i2c-viperboard.c @@ -337,7 +337,7 @@ static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs, } mutex_unlock(&vb->lock); } - return 0; + return num; error: mutex_unlock(&vb->lock); return error; From 28b68acc4a88dcf91fd1dcf2577371dc9bf574cc Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Wed, 11 Apr 2018 18:13:30 -0600 Subject: [PATCH 305/347] usbip: usbip_host: refine probe and disconnect debug msgs to be useful Refine probe and disconnect debug msgs to be useful and say what is in progress. Signed-off-by: Shuah Khan Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index dd8ef36ab10e..7813c1862941 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -302,7 +302,7 @@ static int stub_probe(struct usb_device *udev) struct bus_id_priv *busid_priv; int rc; - dev_dbg(&udev->dev, "Enter\n"); + dev_dbg(&udev->dev, "Enter probe\n"); /* check we should claim or not by busid_table */ busid_priv = get_busid_priv(udev_busid); @@ -404,7 +404,7 @@ static void stub_disconnect(struct usb_device *udev) struct bus_id_priv *busid_priv; int rc; - dev_dbg(&udev->dev, "Enter\n"); + dev_dbg(&udev->dev, "Enter disconnect\n"); busid_priv = get_busid_priv(udev_busid); if (!busid_priv) { From 1e180f167d4e413afccbbb4a421b48b2de832549 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Mon, 30 Apr 2018 16:17:19 -0600 Subject: [PATCH 306/347] usbip: usbip_host: delete device from busid_table after rebind Device is left in the busid_table after unbind and rebind. Rebind initiates usb bus scan and the original driver claims the device. After rescan the device should be deleted from the busid_table as it no longer belongs to usbip_host. Fix it to delete the device after device_attach() succeeds. Signed-off-by: Shuah Khan (Samsung OSG) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index d41d0cdeec0f..fb46bd62d538 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -186,6 +186,9 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, if (!bid) return -ENODEV; + /* mark the device for deletion so probe ignores it during rescan */ + bid->status = STUB_BUSID_OTHER; + /* device_attach() callers should hold parent lock for USB */ if (bid->udev->dev.parent) device_lock(bid->udev->dev.parent); @@ -197,6 +200,9 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, return ret; } + /* delete device from busid_table */ + del_match_busid((char *) buf); + return count; } From 7510df3f29d44685bab7b1918b61a8ccd57126a9 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Mon, 30 Apr 2018 16:17:20 -0600 Subject: [PATCH 307/347] usbip: usbip_host: run rebind from exit when module is removed After removing usbip_host module, devices it releases are left without a driver. For example, when a keyboard or a mass storage device are bound to usbip_host when it is removed, these devices are no longer bound to any driver. Fix it to run device_attach() from the module exit routine to restore the devices to their original drivers. This includes cleanup changes and moving device_attach() code to a common routine to be called from rebind_store() and usbip_host_exit(). Signed-off-by: Shuah Khan (Samsung OSG) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 6 +--- drivers/usb/usbip/stub_main.c | 60 +++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 7813c1862941..9d0425113c4b 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -448,12 +448,8 @@ static void stub_disconnect(struct usb_device *udev) busid_priv->sdev = NULL; stub_device_free(sdev); - if (busid_priv->status == STUB_BUSID_ALLOC) { + if (busid_priv->status == STUB_BUSID_ALLOC) busid_priv->status = STUB_BUSID_ADDED; - } else { - busid_priv->status = STUB_BUSID_OTHER; - del_match_busid((char *)udev_busid); - } } #ifdef CONFIG_PM diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index fb46bd62d538..587b9bc10042 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -14,6 +14,7 @@ #define DRIVER_DESC "USB/IP Host Driver" struct kmem_cache *stub_priv_cache; + /* * busid_tables defines matching busids that usbip can grab. A user can change * dynamically what device is locally used and what device is exported to a @@ -169,6 +170,51 @@ static ssize_t match_busid_store(struct device_driver *dev, const char *buf, } static DRIVER_ATTR_RW(match_busid); +static int do_rebind(char *busid, struct bus_id_priv *busid_priv) +{ + int ret; + + /* device_attach() callers should hold parent lock for USB */ + if (busid_priv->udev->dev.parent) + device_lock(busid_priv->udev->dev.parent); + ret = device_attach(&busid_priv->udev->dev); + if (busid_priv->udev->dev.parent) + device_unlock(busid_priv->udev->dev.parent); + if (ret < 0) { + dev_err(&busid_priv->udev->dev, "rebind failed\n"); + return ret; + } + return 0; +} + +static void stub_device_rebind(void) +{ +#if IS_MODULE(CONFIG_USBIP_HOST) + struct bus_id_priv *busid_priv; + int i; + + /* update status to STUB_BUSID_OTHER so probe ignores the device */ + spin_lock(&busid_table_lock); + for (i = 0; i < MAX_BUSID; i++) { + if (busid_table[i].name[0] && + busid_table[i].shutdown_busid) { + busid_priv = &(busid_table[i]); + busid_priv->status = STUB_BUSID_OTHER; + } + } + spin_unlock(&busid_table_lock); + + /* now run rebind */ + for (i = 0; i < MAX_BUSID; i++) { + if (busid_table[i].name[0] && + busid_table[i].shutdown_busid) { + busid_priv = &(busid_table[i]); + do_rebind(busid_table[i].name, busid_priv); + } + } +#endif +} + static ssize_t rebind_store(struct device_driver *dev, const char *buf, size_t count) { @@ -189,16 +235,9 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, /* mark the device for deletion so probe ignores it during rescan */ bid->status = STUB_BUSID_OTHER; - /* device_attach() callers should hold parent lock for USB */ - if (bid->udev->dev.parent) - device_lock(bid->udev->dev.parent); - ret = device_attach(&bid->udev->dev); - if (bid->udev->dev.parent) - device_unlock(bid->udev->dev.parent); - if (ret < 0) { - dev_err(&bid->udev->dev, "rebind failed\n"); + ret = do_rebind((char *) buf, bid); + if (ret < 0) return ret; - } /* delete device from busid_table */ del_match_busid((char *) buf); @@ -323,6 +362,9 @@ static void __exit usbip_host_exit(void) */ usb_deregister_device_driver(&stub_driver); + /* initiate scan to attach devices */ + stub_device_rebind(); + kmem_cache_destroy(stub_priv_cache); } From 22076557b07c12086eeb16b8ce2b0b735f7a27e7 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Mon, 14 May 2018 20:49:58 -0600 Subject: [PATCH 308/347] usbip: usbip_host: fix NULL-ptr deref and use-after-free errors usbip_host updates device status without holding lock from stub probe, disconnect and rebind code paths. When multiple requests to import a device are received, these unprotected code paths step all over each other and drive fails with NULL-ptr deref and use-after-free errors. The driver uses a table lock to protect the busid array for adding and deleting busids to the table. However, the probe, disconnect and rebind paths get the busid table entry and update the status without holding the busid table lock. Add a new finer grain lock to protect the busid entry. This new lock will be held to search and update the busid entry fields from get_busid_idx(), add_match_busid() and del_match_busid(). match_busid_show() does the same to access the busid entry fields. get_busid_priv() changed to return the pointer to the busid entry holding the busid lock. stub_probe(), stub_disconnect() and stub_device_rebind() call put_busid_priv() to release the busid lock before returning. This changes fixes the unprotected code paths eliminating the race conditions in updating the busid entries. Reported-by: Jakub Jirasek Signed-off-by: Shuah Khan (Samsung OSG) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub.h | 2 ++ drivers/usb/usbip/stub_dev.c | 33 ++++++++++++++++++++--------- drivers/usb/usbip/stub_main.c | 40 ++++++++++++++++++++++++++++++----- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/drivers/usb/usbip/stub.h b/drivers/usb/usbip/stub.h index 14a72357800a..35618ceb2791 100644 --- a/drivers/usb/usbip/stub.h +++ b/drivers/usb/usbip/stub.h @@ -73,6 +73,7 @@ struct bus_id_priv { struct stub_device *sdev; struct usb_device *udev; char shutdown_busid; + spinlock_t busid_lock; }; /* stub_priv is allocated from stub_priv_cache */ @@ -83,6 +84,7 @@ extern struct usb_device_driver stub_driver; /* stub_main.c */ struct bus_id_priv *get_busid_priv(const char *busid); +void put_busid_priv(struct bus_id_priv *bid); int del_match_busid(char *busid); void stub_device_cleanup_urbs(struct stub_device *sdev); diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 9d0425113c4b..c0d6ff1baa72 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -300,7 +300,7 @@ static int stub_probe(struct usb_device *udev) struct stub_device *sdev = NULL; const char *udev_busid = dev_name(&udev->dev); struct bus_id_priv *busid_priv; - int rc; + int rc = 0; dev_dbg(&udev->dev, "Enter probe\n"); @@ -317,13 +317,15 @@ static int stub_probe(struct usb_device *udev) * other matched drivers by the driver core. * See driver_probe_device() in driver/base/dd.c */ - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) { dev_dbg(&udev->dev, "%s is a usb hub device... skip!\n", udev_busid); - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } if (!strcmp(udev->bus->bus_name, "vhci_hcd")) { @@ -331,13 +333,16 @@ static int stub_probe(struct usb_device *udev) "%s is attached on vhci_hcd... skip!\n", udev_busid); - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } /* ok, this is my device */ sdev = stub_device_alloc(udev); - if (!sdev) - return -ENOMEM; + if (!sdev) { + rc = -ENOMEM; + goto call_put_busid_priv; + } dev_info(&udev->dev, "usbip-host: register new device (bus %u dev %u)\n", @@ -369,7 +374,9 @@ static int stub_probe(struct usb_device *udev) } busid_priv->status = STUB_BUSID_ALLOC; - return 0; + rc = 0; + goto call_put_busid_priv; + err_files: usb_hub_release_port(udev->parent, udev->portnum, (struct usb_dev_state *) udev); @@ -379,6 +386,9 @@ err_port: busid_priv->sdev = NULL; stub_device_free(sdev); + +call_put_busid_priv: + put_busid_priv(busid_priv); return rc; } @@ -417,7 +427,7 @@ static void stub_disconnect(struct usb_device *udev) /* get stub_device */ if (!sdev) { dev_err(&udev->dev, "could not get device"); - return; + goto call_put_busid_priv; } dev_set_drvdata(&udev->dev, NULL); @@ -432,12 +442,12 @@ static void stub_disconnect(struct usb_device *udev) (struct usb_dev_state *) udev); if (rc) { dev_dbg(&udev->dev, "unable to release port\n"); - return; + goto call_put_busid_priv; } /* If usb reset is called from event handler */ if (usbip_in_eh(current)) - return; + goto call_put_busid_priv; /* shutdown the current connection */ shutdown_busid(busid_priv); @@ -450,6 +460,9 @@ static void stub_disconnect(struct usb_device *udev) if (busid_priv->status == STUB_BUSID_ALLOC) busid_priv->status = STUB_BUSID_ADDED; + +call_put_busid_priv: + put_busid_priv(busid_priv); } #ifdef CONFIG_PM diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 587b9bc10042..41c7b9de2a92 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -26,6 +26,8 @@ static spinlock_t busid_table_lock; static void init_busid_table(void) { + int i; + /* * This also sets the bus_table[i].status to * STUB_BUSID_OTHER, which is 0. @@ -33,6 +35,9 @@ static void init_busid_table(void) memset(busid_table, 0, sizeof(busid_table)); spin_lock_init(&busid_table_lock); + + for (i = 0; i < MAX_BUSID; i++) + spin_lock_init(&busid_table[i].busid_lock); } /* @@ -44,15 +49,20 @@ static int get_busid_idx(const char *busid) int i; int idx = -1; - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (busid_table[i].name[0]) if (!strncmp(busid_table[i].name, busid, BUSID_SIZE)) { idx = i; + spin_unlock(&busid_table[i].busid_lock); break; } + spin_unlock(&busid_table[i].busid_lock); + } return idx; } +/* Returns holding busid_lock. Should call put_busid_priv() to unlock */ struct bus_id_priv *get_busid_priv(const char *busid) { int idx; @@ -60,13 +70,21 @@ struct bus_id_priv *get_busid_priv(const char *busid) spin_lock(&busid_table_lock); idx = get_busid_idx(busid); - if (idx >= 0) + if (idx >= 0) { bid = &(busid_table[idx]); + /* get busid_lock before returning */ + spin_lock(&bid->busid_lock); + } spin_unlock(&busid_table_lock); return bid; } +void put_busid_priv(struct bus_id_priv *bid) +{ + spin_unlock(&bid->busid_lock); +} + static int add_match_busid(char *busid) { int i; @@ -79,15 +97,19 @@ static int add_match_busid(char *busid) goto out; } - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (!busid_table[i].name[0]) { strlcpy(busid_table[i].name, busid, BUSID_SIZE); if ((busid_table[i].status != STUB_BUSID_ALLOC) && (busid_table[i].status != STUB_BUSID_REMOV)) busid_table[i].status = STUB_BUSID_ADDED; ret = 0; + spin_unlock(&busid_table[i].busid_lock); break; } + spin_unlock(&busid_table[i].busid_lock); + } out: spin_unlock(&busid_table_lock); @@ -108,6 +130,8 @@ int del_match_busid(char *busid) /* found */ ret = 0; + spin_lock(&busid_table[idx].busid_lock); + if (busid_table[idx].status == STUB_BUSID_OTHER) memset(busid_table[idx].name, 0, BUSID_SIZE); @@ -115,6 +139,7 @@ int del_match_busid(char *busid) (busid_table[idx].status != STUB_BUSID_ADDED)) busid_table[idx].status = STUB_BUSID_REMOV; + spin_unlock(&busid_table[idx].busid_lock); out: spin_unlock(&busid_table_lock); @@ -127,9 +152,12 @@ static ssize_t match_busid_show(struct device_driver *drv, char *buf) char *out = buf; spin_lock(&busid_table_lock); - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (busid_table[i].name[0]) out += sprintf(out, "%s ", busid_table[i].name); + spin_unlock(&busid_table[i].busid_lock); + } spin_unlock(&busid_table_lock); out += sprintf(out, "\n"); @@ -204,7 +232,7 @@ static void stub_device_rebind(void) } spin_unlock(&busid_table_lock); - /* now run rebind */ + /* now run rebind - no need to hold locks. driver files are removed */ for (i = 0; i < MAX_BUSID; i++) { if (busid_table[i].name[0] && busid_table[i].shutdown_busid) { @@ -234,6 +262,8 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, /* mark the device for deletion so probe ignores it during rescan */ bid->status = STUB_BUSID_OTHER; + /* release the busid lock */ + put_busid_priv(bid); ret = do_rebind((char *) buf, bid); if (ret < 0) From 4c27625b7a67eb9006963ed2bcf8e53b259b43af Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sat, 5 May 2018 04:02:32 -0700 Subject: [PATCH 309/347] KVM: X86: Lower the default timer frequency limit to 200us MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anthoine reported: The period used by Windows change over time but it can be 1 milliseconds or less. I saw the limit_periodic_timer_frequency print so 500 microseconds is sometimes reached. As suggested by Paolo, lower the default timer frequency limit to a smaller interval of 200 us (5000 Hz) to leave some headroom. This is required due to Windows 10 changing the scheduler tick limit from 1024 Hz to 2048 Hz. Reported-by: Anthoine Bourgeois Suggested-by: Paolo Bonzini Reviewed-by: Darren Kenny Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Anthoine Bourgeois Cc: Darren Kenny Cc: Jan Kiszka Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 37dd9a9d050a..59371de5d722 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); static bool __read_mostly report_ignored_msrs = true; module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); -unsigned int min_timer_period_us = 500; +unsigned int min_timer_period_us = 200; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; From 72cb0d893343cd33e6ab62cf26f2625d5d3532c9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 9 Apr 2018 13:58:13 -0700 Subject: [PATCH 310/347] drm/vc4: Fix leak of the file_priv that stored the perfmon. Signed-off-by: Eric Anholt Fixes: 65101d8c9108 ("drm/vc4: Expose performance counters to userspace") Link: https://patchwork.freedesktop.org/patch/msgid/20180409205813.7077-1-eric@anholt.net Reviewed-by: Boris Brezillon Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/vc4/vc4_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 94b99c90425a..7c95ed5c5cac 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -130,6 +130,7 @@ static void vc4_close(struct drm_device *dev, struct drm_file *file) struct vc4_file *vc4file = file->driver_priv; vc4_perfmon_close_file(vc4file); + kfree(vc4file); } static const struct vm_operations_struct vc4_vm_ops = { From 401dca8cbd14fc4b32d93499dcd12a1711a73ecc Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Mon, 14 May 2018 10:27:35 +0200 Subject: [PATCH 311/347] cxl: Set the PBCQ Tunnel BAR register when enabling capi mode Skiboot used to set the default Tunnel BAR register value when capi mode was enabled. This approach was ok for the cxl driver, but prevented other drivers from choosing different values. Skiboot versions > 5.11 will not set the default value any longer. This patch modifies the cxl driver to set/reset the Tunnel BAR register when entering/exiting the cxl mode, with pnv_pci_set_tunnel_bar(). That should work with old skiboot (since we are re-writing the value already set) and new skiboot. mpe: The tunnel support was only merged into Linux recently, in commit d6a90bb83b50 ("powerpc/powernv: Enable tunneled operations") (v4.17-rc1), so with new skiboot kernels between that commit and this will not work correctly. Fixes: d6a90bb83b50 ("powerpc/powernv: Enable tunneled operations") Signed-off-by: Philippe Bergheaud Reviewed-by: Christophe Lombard Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 83f1d08058fc..4eb893284af2 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1742,6 +1742,10 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) /* Required for devices using CAPP DMA mode, harmless for others */ pci_set_master(dev); + if (cxl_is_power9()) + if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1)) + dev_info(&dev->dev, "Tunneled operations unsupported\n"); + if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode))) goto err; @@ -1768,6 +1772,9 @@ static void cxl_deconfigure_adapter(struct cxl *adapter) { struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); + if (cxl_is_power9()) + pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0); + cxl_native_release_psl_err_irq(adapter); cxl_unmap_adapter_regs(adapter); From 497a0790e2c604366b9e35dcb41310319e9bca13 Mon Sep 17 00:00:00 2001 From: Philippe Bergheaud Date: Mon, 14 May 2018 10:27:36 +0200 Subject: [PATCH 312/347] cxl: Report the tunneled operations status Failure to synchronize the tunneled operations does not prevent the initialization of the cxl card. This patch reports the tunneled operations status via /sys. Signed-off-by: Philippe Bergheaud Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/sysfs-class-cxl | 8 ++++++++ drivers/misc/cxl/cxl.h | 1 + drivers/misc/cxl/pci.c | 7 ++++++- drivers/misc/cxl/sysfs.c | 10 ++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index 640f65e79ef1..8e69345c37cc 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -244,3 +244,11 @@ Description: read only Returns 1 if the psl timebase register is synchronized with the core timebase register, 0 otherwise. Users: https://github.com/ibm-capi/libcxl + +What: /sys/class/cxl//tunneled_ops_supported +Date: May 2018 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Returns 1 if tunneled operations are supported in capi mode, + 0 otherwise. +Users: https://github.com/ibm-capi/libcxl diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index a4c9c8297a6d..918d4fb742d1 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -717,6 +717,7 @@ struct cxl { bool perst_select_user; bool perst_same_image; bool psl_timebase_synced; + bool tunneled_ops_supported; /* * number of contexts mapped on to this card. Possible values are: diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 4eb893284af2..4d6736f9d463 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1742,9 +1742,14 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) /* Required for devices using CAPP DMA mode, harmless for others */ pci_set_master(dev); - if (cxl_is_power9()) + adapter->tunneled_ops_supported = false; + + if (cxl_is_power9()) { if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1)) dev_info(&dev->dev, "Tunneled operations unsupported\n"); + else + adapter->tunneled_ops_supported = true; + } if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode))) goto err; diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index 95285b7f636f..4b5a4c5d3c01 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -78,6 +78,15 @@ static ssize_t psl_timebase_synced_show(struct device *device, return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced); } +static ssize_t tunneled_ops_supported_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported); +} + static ssize_t reset_adapter_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) @@ -183,6 +192,7 @@ static struct device_attribute adapter_attrs[] = { __ATTR_RO(base_image), __ATTR_RO(image_loaded), __ATTR_RO(psl_timebase_synced), + __ATTR_RO(tunneled_ops_supported), __ATTR_RW(load_image_on_perst), __ATTR_RW(perst_reloads_same_image), __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), From 388d4359680b56dba82fe2ffca05871e9fd2b73e Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 May 2018 15:20:12 +0100 Subject: [PATCH 313/347] KVM: arm/arm64: Properly protect VGIC locks from IRQs As Jan reported [1], lockdep complains about the VGIC not being bullet proof. This seems to be due to two issues: - When commit 006df0f34930 ("KVM: arm/arm64: Support calling vgic_update_irq_pending from irq context") promoted irq_lock and ap_list_lock to _irqsave, we forgot two instances of irq_lock. lockdeps seems to pick those up. - If a lock is _irqsave, any other locks we take inside them should be _irqsafe as well. So the lpi_list_lock needs to be promoted also. This fixes both issues by simply making the remaining instances of those locks _irqsave. One irq_lock is addressed in a separate patch, to simplify backporting. [1] http://lists.infradead.org/pipermail/linux-arm-kernel/2018-May/575718.html Cc: stable@vger.kernel.org Fixes: 006df0f34930 ("KVM: arm/arm64: Support calling vgic_update_irq_pending from irq context") Reported-by: Jan Glauber Acked-by: Christoffer Dall Signed-off-by: Andre Przywara Signed-off-by: Paolo Bonzini --- virt/kvm/arm/vgic/vgic-debug.c | 5 +++-- virt/kvm/arm/vgic/vgic-its.c | 10 ++++++---- virt/kvm/arm/vgic/vgic.c | 22 ++++++++++++++-------- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index 10b38178cff2..4ffc0b5e6105 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -211,6 +211,7 @@ static int vgic_debug_show(struct seq_file *s, void *v) struct vgic_state_iter *iter = (struct vgic_state_iter *)v; struct vgic_irq *irq; struct kvm_vcpu *vcpu = NULL; + unsigned long flags; if (iter->dist_id == 0) { print_dist_state(s, &kvm->arch.vgic); @@ -227,9 +228,9 @@ static int vgic_debug_show(struct seq_file *s, void *v) irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS]; } - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); print_irq_state(s, irq, vcpu); - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); return 0; } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index a8f07243aa9f..41abf92f2699 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -52,6 +52,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + unsigned long flags; int ret; /* In this case there is no put, since we keep the reference. */ @@ -71,7 +72,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, irq->intid = intid; irq->target_vcpu = vcpu; - spin_lock(&dist->lpi_list_lock); + spin_lock_irqsave(&dist->lpi_list_lock, flags); /* * There could be a race with another vgic_add_lpi(), so we need to @@ -99,7 +100,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, dist->lpi_list_count++; out_unlock: - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); /* * We "cache" the configuration table entries in our struct vgic_irq's. @@ -315,6 +316,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_irq *irq; + unsigned long flags; u32 *intids; int irq_count, i = 0; @@ -330,7 +332,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) if (!intids) return -ENOMEM; - spin_lock(&dist->lpi_list_lock); + spin_lock_irqsave(&dist->lpi_list_lock, flags); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { if (i == irq_count) break; @@ -339,7 +341,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) continue; intids[i++] = irq->intid; } - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); *intid_ptr = intids; return i; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 97bfba8d9a59..33c8325c8f35 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -43,9 +43,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * kvm->lock (mutex) * its->cmd_lock (mutex) * its->its_lock (mutex) - * vgic_cpu->ap_list_lock - * kvm->lpi_list_lock - * vgic_irq->irq_lock + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled + * + * As the ap_list_lock might be taken from the timer interrupt handler, + * we have to disable IRQs before taking this lock and everything lower + * than it. * * If you need to take multiple locks, always take the upper lock first, * then the lower ones, e.g. first take the its_lock, then the irq_lock. @@ -72,8 +76,9 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = NULL; + unsigned long flags; - spin_lock(&dist->lpi_list_lock); + spin_lock_irqsave(&dist->lpi_list_lock, flags); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { if (irq->intid != intid) @@ -89,7 +94,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) irq = NULL; out_unlock: - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); return irq; } @@ -134,19 +139,20 @@ static void vgic_irq_release(struct kref *ref) void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; if (irq->intid < VGIC_MIN_LPI) return; - spin_lock(&dist->lpi_list_lock); + spin_lock_irqsave(&dist->lpi_list_lock, flags); if (!kref_put(&irq->refcount, vgic_irq_release)) { - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); return; }; list_del(&irq->lpi_list); dist->lpi_list_count--; - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); kfree(irq); } From 9c4188762f7fee032abf8451fd9865a9abfc5516 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 May 2018 15:20:13 +0100 Subject: [PATCH 314/347] KVM: arm/arm64: VGIC/ITS: Promote irq_lock() in update_affinity Apparently the development of update_affinity() overlapped with the promotion of irq_lock to be _irqsave, so the patch didn't convert this lock over. This will make lockdep complain. Fix this by disabling IRQs around the lock. Cc: stable@vger.kernel.org Fixes: 08c9fd042117 ("KVM: arm/arm64: vITS: Add a helper to update the affinity of an LPI") Reported-by: Jan Glauber Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Signed-off-by: Paolo Bonzini --- virt/kvm/arm/vgic/vgic-its.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 41abf92f2699..51a80b600632 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -350,10 +350,11 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) { int ret = 0; + unsigned long flags; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->target_vcpu = vcpu; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); if (irq->hw) { struct its_vlpi_map map; From bf308242ab98b5d1648c3663e753556bef9bec01 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 May 2018 15:20:14 +0100 Subject: [PATCH 315/347] KVM: arm/arm64: VGIC/ITS: protect kvm_read_guest() calls with SRCU lock kvm_read_guest() will eventually look up in kvm_memslots(), which requires either to hold the kvm->slots_lock or to be inside a kvm->srcu critical section. In contrast to x86 and s390 we don't take the SRCU lock on every guest exit, so we have to do it individually for each kvm_read_guest() call. Provide a wrapper which does that and use that everywhere. Note that ending the SRCU critical section before returning from the kvm_read_guest() wrapper is safe, because the data has been *copied*, so we don't need to rely on valid references to the memslot anymore. Cc: Stable # 4.8+ Reported-by: Jan Glauber Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Signed-off-by: Paolo Bonzini --- arch/arm/include/asm/kvm_mmu.h | 16 ++++++++++++++++ arch/arm64/include/asm/kvm_mmu.h | 16 ++++++++++++++++ virt/kvm/arm/vgic/vgic-its.c | 15 ++++++++------- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 707a1f06dc5d..f675162663f0 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -309,6 +309,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + static inline void *kvm_get_hyp_vector(void) { return kvm_ksym_ref(__kvm_hyp_vector); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 082110993647..6128992c2ded 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -360,6 +360,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + #ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 51a80b600632..7cb060e01a76 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -281,8 +281,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, int ret; unsigned long flags; - ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET, - &prop, 1); + ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET, + &prop, 1); if (ret) return ret; @@ -444,8 +444,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) * this very same byte in the last iteration. Reuse that. */ if (byte_offset != last_byte_offset) { - ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset, - &pendmask, 1); + ret = kvm_read_guest_lock(vcpu->kvm, + pendbase + byte_offset, + &pendmask, 1); if (ret) { kfree(intids); return ret; @@ -789,7 +790,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, return false; /* Each 1st level entry is represented by a 64-bit value. */ - if (kvm_read_guest(its->dev->kvm, + if (kvm_read_guest_lock(its->dev->kvm, BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), &indirect_ptr, sizeof(indirect_ptr))) return false; @@ -1370,8 +1371,8 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) cbaser = CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { - int ret = kvm_read_guest(kvm, cbaser + its->creadr, - cmd_buf, ITS_CMD_SIZE); + int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, + cmd_buf, ITS_CMD_SIZE); /* * If kvm_read_guest() fails, this could be due to the guest * programming a bogus value in CBASER or something else going From 711702b57cc3c50b84bd648de0f1ca0a378805be Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 11 May 2018 15:20:15 +0100 Subject: [PATCH 316/347] KVM: arm/arm64: VGIC/ITS save/restore: protect kvm_read_guest() calls kvm_read_guest() will eventually look up in kvm_memslots(), which requires either to hold the kvm->slots_lock or to be inside a kvm->srcu critical section. In contrast to x86 and s390 we don't take the SRCU lock on every guest exit, so we have to do it individually for each kvm_read_guest() call. Use the newly introduced wrapper for that. Cc: Stable # 4.12+ Reported-by: Jan Glauber Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Signed-off-by: Paolo Bonzini --- virt/kvm/arm/vgic/vgic-its.c | 4 ++-- virt/kvm/arm/vgic/vgic-v3.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 7cb060e01a76..4ed79c939fb4 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1897,7 +1897,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz, int next_offset; size_t byte_offset; - ret = kvm_read_guest(kvm, gpa, entry, esz); + ret = kvm_read_guest_lock(kvm, gpa, entry, esz); if (ret) return ret; @@ -2267,7 +2267,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) int ret; BUG_ON(esz > sizeof(val)); - ret = kvm_read_guest(kvm, gpa, &val, esz); + ret = kvm_read_guest_lock(kvm, gpa, &val, esz); if (ret) return ret; val = le64_to_cpu(val); diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index c7423f3768e5..bdcf8e7a6161 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -344,7 +344,7 @@ retry: bit_nr = irq->intid % BITS_PER_BYTE; ptr = pendbase + byte_offset; - ret = kvm_read_guest(kvm, ptr, &val, 1); + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; @@ -397,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) ptr = pendbase + byte_offset; if (byte_offset != last_byte_offset) { - ret = kvm_read_guest(kvm, ptr, &val, 1); + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; last_byte_offset = byte_offset; From 9f825e74d761c13b0cfaa5f65344d64ff970e252 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 14 May 2018 12:49:37 +0200 Subject: [PATCH 317/347] mtd: rawnand: Fix return type of __DIVIDE() when called with 32-bit The __DIVIDE() macro checks whether it is called with a 32-bit or 64-bit dividend, to select the appropriate divide-and-round-up routine. As the check uses the ternary operator, the result will always be promoted to a type that can hold both results, i.e. unsigned long long. When using this result in a division on a 32-bit system, this may lead to link errors like: ERROR: "__udivdi3" [drivers/mtd/nand/raw/nand.ko] undefined! Fix this by casting the result of the division to the type of the dividend. Fixes: 8878b126df769831 ("mtd: nand: add ->exec_op() implementation") Signed-off-by: Geert Uytterhoeven Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 5dad59b31244..17c919436f48 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -867,12 +867,18 @@ struct nand_op_instr { * tBERS (during an erase) which all of them are u64 values that cannot be * divided by usual kernel macros and must be handled with the special * DIV_ROUND_UP_ULL() macro. + * + * Cast to type of dividend is needed here to guarantee that the result won't + * be an unsigned long long when the dividend is an unsigned long (or smaller), + * which is what the compiler does when it sees ternary operator with 2 + * different return types (picks the largest type to make sure there's no + * loss). */ -#define __DIVIDE(dividend, divisor) ({ \ - sizeof(dividend) == sizeof(u32) ? \ - DIV_ROUND_UP(dividend, divisor) : \ - DIV_ROUND_UP_ULL(dividend, divisor); \ - }) +#define __DIVIDE(dividend, divisor) ({ \ + (__typeof__(dividend))(sizeof(dividend) <= sizeof(unsigned long) ? \ + DIV_ROUND_UP(dividend, divisor) : \ + DIV_ROUND_UP_ULL(dividend, divisor)); \ + }) #define PSEC_TO_NSEC(x) __DIVIDE(x, 1000) #define PSEC_TO_MSEC(x) __DIVIDE(x, 1000000000) From 2e5be528ab0182ad4b42b9feea3b80f85f37179b Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 18 Apr 2018 14:49:08 +0200 Subject: [PATCH 318/347] clk: imx6ull: use OSC clock during AXI rate change On i.MX6 ULL using PLL3 seems to cause a freeze when setting the parent to IMX6UL_CLK_PLL3_USB_OTG. This only seems to appear since commit 6f9575e55632 ("clk: imx: Add CLK_IS_CRITICAL flag for busy divider and busy mux"), probably because the clock is now forced to be on. Fixes: 6f9575e55632("clk: imx: Add CLK_IS_CRITICAL flag for busy divider and busy mux") Signed-off-by: Stefan Agner Signed-off-by: Stephen Boyd --- drivers/clk/imx/clk-imx6ul.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c index 114ecbb94ec5..12320118f8de 100644 --- a/drivers/clk/imx/clk-imx6ul.c +++ b/drivers/clk/imx/clk-imx6ul.c @@ -464,7 +464,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node) clk_set_rate(clks[IMX6UL_CLK_AHB], 99000000); /* Change periph_pre clock to pll2_bus to adjust AXI rate to 264MHz */ - clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_PLL3_USB_OTG]); + clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_OSC]); clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_CLK2]); clk_set_parent(clks[IMX6UL_CLK_PERIPH_PRE], clks[IMX6UL_CLK_PLL2_BUS]); clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_PRE]); From 9a160601f3fb6ffa196b87d1b5643646be486405 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Thu, 3 May 2018 08:40:09 +0200 Subject: [PATCH 319/347] clk: stm32: fix: stm32 clock drivers are not compiled by default Clock driver is mandatory if the machine is selected. Then don't use 'bool' and 'depends on' commands, but 'def_bool' with the machine(s). Fixes: da32d3539fca ("clk: stm32: add configuration flags for each of the stm32 drivers") Signed-off-by: Gabriel Fernandez Acked-by: Alexandre TORGUE Signed-off-by: Stephen Boyd --- drivers/clk/Kconfig | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 41492e980ef4..34968a381d0f 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -266,15 +266,13 @@ config COMMON_CLK_STM32MP157 Support for stm32mp157 SoC family clocks config COMMON_CLK_STM32F - bool "Clock driver for stm32f4 and stm32f7 SoC families" - depends on MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746 + def_bool COMMON_CLK && (MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746) help ---help--- Support for stm32f4 and stm32f7 SoC families clocks config COMMON_CLK_STM32H7 - bool "Clock driver for stm32h7 SoC family" - depends on MACH_STM32H743 + def_bool COMMON_CLK && MACH_STM32H743 help ---help--- Support for stm32h7 SoC family clocks From 91ba9f28a3de97761c2b5fd5df5d88421268e507 Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Tue, 15 May 2018 15:39:09 +0200 Subject: [PATCH 320/347] drm/vmwgfx: Set dmabuf_size when vmw_dmabuf_init is successful SOU primary plane prepare_fb hook depends upon dmabuf_size to pin up BO (and not call a new vmw_dmabuf_init) when a new fb size is same as current fb. This was changed in a recent commit which is causing page_flip to fail on VM with low display memory and multi-mon failure when cycle monitors from secondary display. Cc: # 4.14, 4.16 Fixes: 20fb5a635a0c ("drm/vmwgfx: Unpin the screen object backup buffer when not used") Signed-off-by: Deepak Rawat Reviewed-by: Sinclair Yeh Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 648f8127f65a..3d667e903beb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -482,6 +482,8 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, return ret; } + vps->dmabuf_size = size; + /* * TTM already thinks the buffer is pinned, but make sure the * pin_count is upped. From b579f924a90f42fa561afd8201514fc216b71949 Mon Sep 17 00:00:00 2001 From: Michel Thierry Date: Mon, 14 May 2018 09:54:45 -0700 Subject: [PATCH 321/347] drm/i915/gen9: Add WaClearHIZ_WM_CHICKEN3 for bxt and glk Factor in clear values wherever required while updating destination min/max. References: HSDES#1604444184 Signed-off-by: Michel Thierry Cc: mesa-dev@lists.freedesktop.org Cc: Mika Kuoppala Cc: Oscar Mateo Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180510200708.18097-1-michel.thierry@intel.com Cc: stable@vger.kernel.org Cc: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180514165445.9198-1-michel.thierry@intel.com (backported from commit 0c79f9cb77eae28d48a4f9fc1b3341aacbbd260c) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_reg.h | 3 +++ drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e6a8c0ee7df1..8a69a9275e28 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7326,6 +7326,9 @@ enum { #define SLICE_ECO_CHICKEN0 _MMIO(0x7308) #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) +#define GEN9_WM_CHICKEN3 _MMIO(0x5588) +#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) + /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030) #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4ba139c27fba..f7c25828d3bb 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1149,6 +1149,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) From 85f4f12d51397f1648e1f4350f77e24039b82d61 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 15 May 2018 22:24:52 -0400 Subject: [PATCH 322/347] vsprintf: Replace memory barrier with static_key for random_ptr_key update Reviewing Tobin's patches for getting pointers out early before entropy has been established, I noticed that there's a lone smp_mb() in the code. As with most lone memory barriers, this one appears to be incorrectly used. We currently basically have this: get_random_bytes(&ptr_key, sizeof(ptr_key)); /* * have_filled_random_ptr_key==true is dependent on get_random_bytes(). * ptr_to_id() needs to see have_filled_random_ptr_key==true * after get_random_bytes() returns. */ smp_mb(); WRITE_ONCE(have_filled_random_ptr_key, true); And later we have: if (unlikely(!have_filled_random_ptr_key)) return string(buf, end, "(ptrval)", spec); /* Missing memory barrier here. */ hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key); As the CPU can perform speculative loads, we could have a situation with the following: CPU0 CPU1 ---- ---- load ptr_key = 0 store ptr_key = random smp_mb() store have_filled_random_ptr_key load have_filled_random_ptr_key = true BAD BAD BAD! (you're so bad!) Because nothing prevents CPU1 from loading ptr_key before loading have_filled_random_ptr_key. But this race is very unlikely, but we can't keep an incorrect smp_mb() in place. Instead, replace the have_filled_random_ptr_key with a static_branch not_filled_random_ptr_key, that is initialized to true and changed to false when we get enough entropy. If the update happens in early boot, the static_key is updated immediately, otherwise it will have to wait till entropy is filled and this happens in an interrupt handler which can't enable a static_key, as that requires a preemptible context. In that case, a work_queue is used to enable it, as entropy already took too long to establish in the first place waiting a little more shouldn't hurt anything. The benefit of using the static key is that the unlikely branch in vsprintf() now becomes a nop. Link: http://lkml.kernel.org/r/20180515100558.21df515e@gandalf.local.home Cc: stable@vger.kernel.org Fixes: ad67b74d2469d ("printk: hash addresses printed with %p") Acked-by: Linus Torvalds Signed-off-by: Steven Rostedt (VMware) --- lib/vsprintf.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 30c0cb8cc9bc..23920c5ff728 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1669,19 +1669,22 @@ char *pointer_string(char *buf, char *end, const void *ptr, return number(buf, end, (unsigned long int)ptr, spec); } -static bool have_filled_random_ptr_key __read_mostly; +static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key); static siphash_key_t ptr_key __read_mostly; +static void enable_ptr_key_workfn(struct work_struct *work) +{ + get_random_bytes(&ptr_key, sizeof(ptr_key)); + /* Needs to run from preemptible context */ + static_branch_disable(¬_filled_random_ptr_key); +} + +static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); + static void fill_random_ptr_key(struct random_ready_callback *unused) { - get_random_bytes(&ptr_key, sizeof(ptr_key)); - /* - * have_filled_random_ptr_key==true is dependent on get_random_bytes(). - * ptr_to_id() needs to see have_filled_random_ptr_key==true - * after get_random_bytes() returns. - */ - smp_mb(); - WRITE_ONCE(have_filled_random_ptr_key, true); + /* This may be in an interrupt handler. */ + queue_work(system_unbound_wq, &enable_ptr_key_work); } static struct random_ready_callback random_ready = { @@ -1695,7 +1698,8 @@ static int __init initialize_ptr_random(void) if (!ret) { return 0; } else if (ret == -EALREADY) { - fill_random_ptr_key(&random_ready); + /* This is in preemptible context */ + enable_ptr_key_workfn(&enable_ptr_key_work); return 0; } @@ -1709,7 +1713,7 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) unsigned long hashval; const int default_width = 2 * sizeof(ptr); - if (unlikely(!have_filled_random_ptr_key)) { + if (static_branch_unlikely(¬_filled_random_ptr_key)) { spec.field_width = default_width; /* string length must be less than default_width */ return string(buf, end, "(ptrval)", spec); From 2b6207291b7b277a5df9d1aab44b56815a292dba Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 16 May 2018 17:00:26 +0300 Subject: [PATCH 323/347] drm/dumb-buffers: Integer overflow in drm_mode_create_ioctl() There is a comment here which says that DIV_ROUND_UP() and that's where the problem comes from. Say you pick: args->bpp = UINT_MAX - 7; args->width = 4; args->height = 1; The integer overflow in DIV_ROUND_UP() means "cpp" is UINT_MAX / 8 and because of how we picked args->width that means cpp < UINT_MAX / 4. I've fixed it by preventing the integer overflow in DIV_ROUND_UP(). I removed the check for !cpp because it's not possible after this change. I also changed all the 0xffffffffU references to U32_MAX. Signed-off-by: Dan Carpenter Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180516140026.GA19340@mwanda --- drivers/gpu/drm/drm_dumb_buffers.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_dumb_buffers.c b/drivers/gpu/drm/drm_dumb_buffers.c index 39ac15ce4702..9e2ae02f31e0 100644 --- a/drivers/gpu/drm/drm_dumb_buffers.c +++ b/drivers/gpu/drm/drm_dumb_buffers.c @@ -65,12 +65,13 @@ int drm_mode_create_dumb_ioctl(struct drm_device *dev, return -EINVAL; /* overflow checks for 32bit size calculations */ - /* NOTE: DIV_ROUND_UP() can overflow */ + if (args->bpp > U32_MAX - 8) + return -EINVAL; cpp = DIV_ROUND_UP(args->bpp, 8); - if (!cpp || cpp > 0xffffffffU / args->width) + if (cpp > U32_MAX / args->width) return -EINVAL; stride = cpp * args->width; - if (args->height > 0xffffffffU / stride) + if (args->height > U32_MAX / stride) return -EINVAL; /* test for wrap-around */ From c171654caa875919be3c533d3518da8be5be966e Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 15 May 2018 17:57:23 -0600 Subject: [PATCH 324/347] usbip: usbip_host: fix bad unlock balance during stub_probe() stub_probe() calls put_busid_priv() in an error path when device isn't found in the busid_table. Fix it by making put_busid_priv() safe to be called with null struct bus_id_priv pointer. This problem happens when "usbip bind" is run without loading usbip_host driver and then running modprobe. The first failed bind attempt unbinds the device from the original driver and when usbip_host is modprobed, stub_probe() runs and doesn't find the device in its busid table and calls put_busid_priv(0 with null bus_id_priv pointer. usbip-host 3-10.2: 3-10.2 is not in match_busid table... skip! [ 367.359679] ===================================== [ 367.359681] WARNING: bad unlock balance detected! [ 367.359683] 4.17.0-rc4+ #5 Not tainted [ 367.359685] ------------------------------------- [ 367.359688] modprobe/2768 is trying to release lock ( [ 367.359689] ================================================================== [ 367.359696] BUG: KASAN: null-ptr-deref in print_unlock_imbalance_bug+0x99/0x110 [ 367.359699] Read of size 8 at addr 0000000000000058 by task modprobe/2768 [ 367.359705] CPU: 4 PID: 2768 Comm: modprobe Not tainted 4.17.0-rc4+ #5 Fixes: 22076557b07c ("usbip: usbip_host: fix NULL-ptr deref and use-after-free errors") in usb-linus Signed-off-by: Shuah Khan (Samsung OSG) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 41c7b9de2a92..bf8a5feb0ee9 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -82,7 +82,8 @@ struct bus_id_priv *get_busid_priv(const char *busid) void put_busid_priv(struct bus_id_priv *bid) { - spin_unlock(&bid->busid_lock); + if (bid) + spin_unlock(&bid->busid_lock); } static int add_match_busid(char *busid) From 22aac3eb0c465dd9ea7f06ee1ed8ad933890f2a3 Mon Sep 17 00:00:00 2001 From: Pierre-Yves MORDRET Date: Fri, 11 May 2018 10:22:39 +0200 Subject: [PATCH 325/347] MAINTAINERS: add entry for STM32 I2C driver Add I2C/SMBUS Driver entry for STM32 family from ST Microelectronics. Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Wolfram Sang --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index df6e9bb2559a..6b176cc27ebe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13266,6 +13266,12 @@ M: Jan-Benedict Glaw S: Maintained F: arch/alpha/kernel/srm_env.c +ST STM32 I2C/SMBUS DRIVER +M: Pierre-Yves MORDRET +L: linux-i2c@vger.kernel.org +S: Maintained +F: drivers/i2c/busses/i2c-stm32* + STABLE BRANCH M: Greg Kroah-Hartman L: stable@vger.kernel.org From c1d2a31397ec51f0370f6bd17b19b39152c263cb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 15 May 2018 01:59:47 +1000 Subject: [PATCH 326/347] powerpc/powernv: Fix NVRAM sleep in invalid context when crashing Similarly to opal_event_shutdown, opal_nvram_write can be called in the crash path with irqs disabled. Special case the delay to avoid sleeping in invalid context. Fixes: 3b8070335f75 ("powerpc/powernv: Fix OPAL NVRAM driver OPAL_BUSY loops") Cc: stable@vger.kernel.org # v3.2 Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-nvram.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 1bceb95f422d..5584247f5029 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index) return count; } +/* + * This can be called in the panic path with interrupts off, so use + * mdelay in that case. + */ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) { s64 rc = OPAL_BUSY; @@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_write_nvram(__pa(buf), count, off); if (rc == OPAL_BUSY_EVENT) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); } else if (rc == OPAL_BUSY) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); } } From 1c1a2ee1b53b006754073eefc65d2b2cedb5264b Mon Sep 17 00:00:00 2001 From: Coly Li Date: Thu, 17 May 2018 23:33:26 +0800 Subject: [PATCH 327/347] bcache: return 0 from bch_debug_init() if CONFIG_DEBUG_FS=n Commit 539d39eb2708 ("bcache: fix wrong return value in bch_debug_init()") returns the return value of debugfs_create_dir() to bcache_init(). When CONFIG_DEBUG_FS=n, bch_debug_init() always returns 1 and makes bcache_init() failedi. This patch makes bch_debug_init() always returns 0 if CONFIG_DEBUG_FS=n, so bcache can continue to work for the kernels which don't have debugfs enanbled. Changelog: v4: Add Acked-by from Kent Overstreet. v3: Use IS_ENABLED(CONFIG_DEBUG_FS) to replace #ifdef DEBUG_FS. v2: Remove a warning information v1: Initial version. Fixes: Commit 539d39eb2708 ("bcache: fix wrong return value in bch_debug_init()") Cc: stable@vger.kernel.org Signed-off-by: Coly Li Reported-by: Massimo B. Reported-by: Kai Krakow Tested-by: Kai Krakow Acked-by: Kent Overstreet Signed-off-by: Jens Axboe --- drivers/md/bcache/debug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 4e63c6f6c04d..d030ce3025a6 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -250,7 +250,9 @@ void bch_debug_exit(void) int __init bch_debug_init(struct kobject *kobj) { - bcache_debug = debugfs_create_dir("bcache", NULL); + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + bcache_debug = debugfs_create_dir("bcache", NULL); return IS_ERR_OR_NULL(bcache_debug); } From 7f7ccc2ccc2e70c6054685f5e3522efa81556830 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 11 May 2018 08:11:44 +0200 Subject: [PATCH 328/347] proc: do not access cmdline nor environ from file-backed areas proc_pid_cmdline_read() and environ_read() directly access the target process' VM to retrieve the command line and environment. If this process remaps these areas onto a file via mmap(), the requesting process may experience various issues such as extra delays if the underlying device is slow to respond. Let's simply refuse to access file-backed areas in these functions. For this we add a new FOLL_ANON gup flag that is passed to all calls to access_remote_vm(). The code already takes care of such failures (including unmapped areas). Accesses via /proc/pid/mem were not changed though. This was assigned CVE-2018-1120. Note for stable backports: the patch may apply to kernels prior to 4.11 but silently miss one location; it must be checked that no call to access_remote_vm() keeps zero as the last argument. Reported-by: Qualys Security Advisory Cc: Linus Torvalds Cc: Andy Lutomirski Cc: Oleg Nesterov Cc: stable@vger.kernel.org Signed-off-by: Willy Tarreau Signed-off-by: Linus Torvalds --- fs/proc/base.c | 8 ++++---- include/linux/mm.h | 1 + mm/gup.c | 3 +++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b2ede6abcdf..1a76d751cf3c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -261,7 +261,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); if (rv <= 0) goto out_free_page; @@ -279,7 +279,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -325,7 +325,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -946,7 +946,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); + retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ac1f06a4be6..c080af584ddd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2493,6 +2493,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ +#define FOLL_ANON 0x8000 /* don't do file mappings */ static inline int vm_fault_to_errno(int vm_fault, int foll_flags) { diff --git a/mm/gup.c b/mm/gup.c index 76af4cfeaf68..541904a7c60f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -544,6 +544,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (vm_flags & (VM_IO | VM_PFNMAP)) return -EFAULT; + if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma)) + return -EFAULT; + if (write) { if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) From 633711e82878dc29083fc5d2605166755e25b57a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 17 May 2018 17:54:24 +0300 Subject: [PATCH 329/347] kvm: rename KVM_HINTS_DEDICATED to KVM_HINTS_REALTIME KVM_HINTS_DEDICATED seems to be somewhat confusing: Guest doesn't really care whether it's the only task running on a host CPU as long as it's not preempted. And there are more reasons for Guest to be preempted than host CPU sharing, for example, with memory overcommit it can get preempted on a memory access, post copy migration can cause preemption, etc. Let's call it KVM_HINTS_REALTIME which seems to better match what guests expect. Also, the flag most be set on all vCPUs - current guests assume this. Note so in the documentation. Signed-off-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/cpuid.txt | 6 +++--- arch/x86/include/uapi/asm/kvm_para.h | 2 +- arch/x86/kernel/kvm.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index d4f33eb805dd..ab022dcd0911 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side flag || value || meaning ================================================================================== -KVM_HINTS_DEDICATED || 0 || guest checks this feature bit to - || || determine if there is vCPU pinning - || || and there is no vCPU over-commitment, +KVM_HINTS_REALTIME || 0 || guest checks this feature bit to + || || determine that vCPUs are never + || || preempted for an unlimited time, || || allowing optimizations ---------------------------------------------------------------------------------- diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 4c851ebb3ceb..0ede697c3961 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,7 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 -#define KVM_HINTS_DEDICATED 0 +#define KVM_HINTS_REALTIME 0 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7867417cfaff..5b2300b818af 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void) static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) static_branch_disable(&virt_spin_lock_key); } @@ -553,7 +553,7 @@ static void __init kvm_guest_init(void) } if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; @@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void) int cpu; if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), @@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) return; __pv_init_lock_hash(); From 54940fa60ad3728c592f62dadb558165495a6938 Mon Sep 17 00:00:00 2001 From: Darren Hart Date: Sat, 12 May 2018 12:10:07 -0700 Subject: [PATCH 330/347] platform/x86: DELL_WMI use depends on instead of select for DELL_SMBIOS If DELL_WMI "select"s DELL_SMBIOS, the DELL_SMBIOS dependencies are ignored and it is still possible to end up with unmet direct dependencies. Change the select to a depends on. Tested-by: Randy Dunlap Signed-off-by: Darren Hart (VMware) --- drivers/platform/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index bc309c5327ff..566644bb496a 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -168,8 +168,8 @@ config DELL_WMI depends on DMI depends on INPUT depends on ACPI_VIDEO || ACPI_VIDEO = n + depends on DELL_SMBIOS select DELL_WMI_DESCRIPTOR - select DELL_SMBIOS select INPUT_SPARSEKMAP ---help--- Say Y here if you want to support WMI-based hotkeys on Dell laptops. From 1e3054b98c5415d5cb5f8824fc33b548ae5644c3 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 18 May 2018 16:08:44 -0700 Subject: [PATCH 331/347] lib/test_bitmap.c: fix bitmap optimisation tests to report errors correctly I had neglected to increment the error counter when the tests failed, which made the tests noisy when they fail, but not actually return an error code. Link: http://lkml.kernel.org/r/20180509114328.9887-1-mpe@ellerman.id.au Fixes: 3cc78125a081 ("lib/test_bitmap.c: add optimisation tests") Signed-off-by: Matthew Wilcox Signed-off-by: Michael Ellerman Reported-by: Michael Ellerman Tested-by: Michael Ellerman Reviewed-by: Kees Cook Cc: Yury Norov Cc: Andy Shevchenko Cc: Geert Uytterhoeven Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/test_bitmap.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index de16f7869fb1..6cd7d0740005 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -331,23 +331,32 @@ static void noinline __init test_mem_optimisations(void) unsigned int start, nbits; for (start = 0; start < 1024; start += 8) { - memset(bmap1, 0x5a, sizeof(bmap1)); - memset(bmap2, 0x5a, sizeof(bmap2)); for (nbits = 0; nbits < 1024 - start; nbits += 8) { + memset(bmap1, 0x5a, sizeof(bmap1)); + memset(bmap2, 0x5a, sizeof(bmap2)); + bitmap_set(bmap1, start, nbits); __bitmap_set(bmap2, start, nbits); - if (!bitmap_equal(bmap1, bmap2, 1024)) + if (!bitmap_equal(bmap1, bmap2, 1024)) { printk("set not equal %d %d\n", start, nbits); - if (!__bitmap_equal(bmap1, bmap2, 1024)) + failed_tests++; + } + if (!__bitmap_equal(bmap1, bmap2, 1024)) { printk("set not __equal %d %d\n", start, nbits); + failed_tests++; + } bitmap_clear(bmap1, start, nbits); __bitmap_clear(bmap2, start, nbits); - if (!bitmap_equal(bmap1, bmap2, 1024)) + if (!bitmap_equal(bmap1, bmap2, 1024)) { printk("clear not equal %d %d\n", start, nbits); - if (!__bitmap_equal(bmap1, bmap2, 1024)) + failed_tests++; + } + if (!__bitmap_equal(bmap1, bmap2, 1024)) { printk("clear not __equal %d %d\n", start, nbits); + failed_tests++; + } } } } From d97baf9470b0668904aa96865abe7db4000dc3ba Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Fri, 18 May 2018 16:08:47 -0700 Subject: [PATCH 332/347] include/linux/mm.h: add new inline function vmf_error() Many places in drivers/ file systems, error was handled in a common way like below: ret = (ret == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS; vmf_error() will replace this and return vm_fault_t type err. A lot of drivers and filesystems currently have a rather complex mapping of errno-to-VM_FAULT code. We have been able to eliminate a lot of it by just returning VM_FAULT codes directly from functions which are called exclusively from the fault handling path. Some functions can be called both from the fault handler and other context which are expecting an errno, so they have to continue to return an errno. Some users still need to choose different behaviour for different errnos, but vmf_error() captures the essential error translation that's common to all users, and those that need to handle additional errors can handle them first. Link: http://lkml.kernel.org/r/20180510174826.GA14268@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Reviewed-by: Matthew Wilcox Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index c080af584ddd..c6fa9a255dbf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2466,6 +2466,13 @@ static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +static inline vm_fault_t vmf_error(int err) +{ + if (err == -ENOMEM) + return VM_FAULT_OOM; + return VM_FAULT_SIGBUS; +} + struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags, unsigned int *page_mask); From 8d9fa88edd5e360b71765feeadb915d4066c9684 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 18 May 2018 16:08:51 -0700 Subject: [PATCH 333/347] radix tree test suite: fix mapshift build target Commit c6ce3e2fe3da ("radix tree test suite: Add config option for map shift") introduced a phony makefile target called 'mapshift' that ends up generating the file generated/map-shift.h. This phony target was then added as a dependency of the top level 'targets' build target, which is what is run when you go to tools/testing/radix-tree and just type 'make'. Unfortunately, this phony target doesn't actually work as a dependency, so you end up getting: $ make make: *** No rule to make target 'generated/map-shift.h', needed by 'main.o'. Stop. make: *** Waiting for unfinished jobs.... Fix this by making the file generated/map-shift.h our real makefile target, and add this a dependency of the top level build target. Link: http://lkml.kernel.org/r/20180503192430.7582-2-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Cc: Christoph Hellwig Cc: CR, Sapthagirish Cc: Dan Williams Cc: Dave Chinner Cc: Jan Kara Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index fa7ee369b3c9..db66f8a0d4be 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -17,7 +17,7 @@ ifeq ($(BUILD), 32) LDFLAGS += -m32 endif -targets: mapshift $(TARGETS) +targets: generated/map-shift.h $(TARGETS) main: $(OFILES) @@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c idr.c: ../../../lib/idr.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ -.PHONY: mapshift - -mapshift: +generated/map-shift.h: @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ From dcbbf25adb31410c95ce844f80d372ed38b68b24 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 18 May 2018 16:08:54 -0700 Subject: [PATCH 334/347] radix tree test suite: fix compilation issue Pulled from a patch from Matthew Wilcox entitled "xarray: Add definition of struct xarray": > From: Matthew Wilcox > Signed-off-by: Matthew Wilcox https://patchwork.kernel.org/patch/10341249/ These defines fix this compilation error: In file included from ./linux/radix-tree.h:6:0, from ./linux/../../../../include/linux/idr.h:15, from ./linux/idr.h:1, from idr.c:4: ./linux/../../../../include/linux/idr.h: In function `idr_init_base': ./linux/../../../../include/linux/radix-tree.h:129:2: warning: implicit declaration of function `spin_lock_init'; did you mean `spinlock_t'? [-Wimplicit-function-declaration] spin_lock_init(&(root)->xa_lock); \ ^ ./linux/../../../../include/linux/idr.h:126:2: note: in expansion of macro `INIT_RADIX_TREE' INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER); ^~~~~~~~~~~~~~~ by providing a spin_lock_init() wrapper for the v4.17-rc* version of the radix tree test suite. Link: http://lkml.kernel.org/r/20180503192430.7582-3-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Cc: Christoph Hellwig Cc: CR, Sapthagirish Cc: Dan Williams Cc: Dave Chinner Cc: Jan Kara Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/linux/spinlock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index b21b586b9854..1738c0391da4 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -6,8 +6,9 @@ #include #define spinlock_t pthread_mutex_t -#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER; +#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER #define __SPIN_LOCK_UNLOCKED(x) (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER +#define spin_lock_init(x) pthread_mutex_init(x, NULL) #define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x) #define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x) From 3e252fa7d4f711798e7a3f5ff2d7b62f0e2987ce Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 18 May 2018 16:08:58 -0700 Subject: [PATCH 335/347] radix tree test suite: add item_delete_rcu() Currently the lifetime of "struct item" entries in the radix tree are not controlled by RCU, but are instead deleted inline as they are removed from the tree. In the following patches we add a test which has threads iterating over items pulled from the tree and verifying them in an rcu_read_lock()/rcu_read_unlock() section. This means that though an item has been removed from the tree it could still be being worked on by other threads until the RCU grace period expires. So, we need to actually free the "struct item" structures at the end of the grace period, just as we do with "struct radix_tree_node" items. Link: http://lkml.kernel.org/r/20180503192430.7582-4-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Cc: Christoph Hellwig Cc: CR, Sapthagirish Cc: Dan Williams Cc: Dave Chinner Cc: Jan Kara Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/test.c | 19 +++++++++++++++++++ tools/testing/radix-tree/test.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 5978ab1f403d..def6015570b2 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index) return 0; } +static void item_free_rcu(struct rcu_head *head) +{ + struct item *item = container_of(head, struct item, rcu_head); + + free(item); +} + +int item_delete_rcu(struct radix_tree_root *root, unsigned long index) +{ + struct item *item = radix_tree_delete(root, index); + + if (item) { + item_sanity(item, index); + call_rcu(&item->rcu_head, item_free_rcu); + return 1; + } + return 0; +} + void item_check_present(struct radix_tree_root *root, unsigned long index) { struct item *item; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index d9c031dbeb1a..8cf4a7a7f94c 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -5,6 +5,7 @@ #include struct item { + struct rcu_head rcu_head; unsigned long index; unsigned int order; }; @@ -15,6 +16,7 @@ int item_insert(struct radix_tree_root *root, unsigned long index); int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); +int item_delete_rcu(struct radix_tree_root *root, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); void item_check_present(struct radix_tree_root *root, unsigned long index); From fd8f58c40b703e47697c9f12bc16c31f14c161f1 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 18 May 2018 16:09:01 -0700 Subject: [PATCH 336/347] radix tree test suite: multi-order iteration race Add a test which shows a race in the multi-order iteration code. This test reliably hits the race in under a second on my machine, and is the result of a real bug report against kernel a production v4.15 based kernel (4.15.6-300.fc27.x86_64). With a real kernel this issue is hit when using order 9 PMD DAX radix tree entries. The race has to do with how we tear down multi-order sibling entries when we are removing an item from the tree. Remember that an order 2 entry looks like this: struct radix_tree_node.slots[] = [entry][sibling][sibling][sibling] where 'entry' is in some slot in the struct radix_tree_node, and the three slots following 'entry' contain sibling pointers which point back to 'entry.' When we delete 'entry' from the tree, we call : radix_tree_delete() radix_tree_delete_item() __radix_tree_delete() replace_slot() replace_slot() first removes the siblings in order from the first to the last, then at then replaces 'entry' with NULL. This means that for a brief period of time we end up with one or more of the siblings removed, so: struct radix_tree_node.slots[] = [entry][NULL][sibling][sibling] This causes an issue if you have a reader iterating over the slots in the tree via radix_tree_for_each_slot() while only under rcu_read_lock()/rcu_read_unlock() protection. This is a common case in mm/filemap.c. The issue is that when __radix_tree_next_slot() => skip_siblings() tries to skip over the sibling entries in the slots, it currently does so with an exact match on the slot directly preceding our current slot. Normally this works: V preceding slot struct radix_tree_node.slots[] = [entry][sibling][sibling][sibling] ^ current slot This lets you find the first sibling, and you skip them all in order. But in the case where one of the siblings is NULL, that slot is skipped and then our sibling detection is interrupted: V preceding slot struct radix_tree_node.slots[] = [entry][NULL][sibling][sibling] ^ current slot This means that the sibling pointers aren't recognized since they point all the way back to 'entry', so we think that they are normal internal radix tree pointers. This causes us to think we need to walk down to a struct radix_tree_node starting at the address of 'entry'. In a real running kernel this will crash the thread with a GP fault when you try and dereference the slots in your broken node starting at 'entry'. In the radix tree test suite this will be caught by the address sanitizer: ==27063==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60c0008ae400 at pc 0x00000040ce4f bp 0x7fa89b8fcad0 sp 0x7fa89b8fcac0 READ of size 8 at 0x60c0008ae400 thread T3 #0 0x40ce4e in __radix_tree_next_slot /home/rzwisler/project/linux/tools/testing/radix-tree/radix-tree.c:1660 #1 0x4022cc in radix_tree_next_slot linux/../../../../include/linux/radix-tree.h:567 #2 0x4022cc in iterator_func /home/rzwisler/project/linux/tools/testing/radix-tree/multiorder.c:655 #3 0x7fa8a088d50a in start_thread (/lib64/libpthread.so.0+0x750a) #4 0x7fa8a03bd16e in clone (/lib64/libc.so.6+0xf516e) Link: http://lkml.kernel.org/r/20180503192430.7582-5-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler Cc: Christoph Hellwig Cc: CR, Sapthagirish Cc: Dan Williams Cc: Dave Chinner Cc: Jan Kara Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/radix-tree/multiorder.c | 63 +++++++++++++++++++++++++++ tools/testing/radix-tree/test.h | 1 + 2 files changed, 64 insertions(+) diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 59245b3d587c..7bf405638b0b 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "test.h" @@ -624,6 +625,67 @@ static void multiorder_account(void) item_kill_tree(&tree); } +bool stop_iteration = false; + +static void *creator_func(void *ptr) +{ + /* 'order' is set up to ensure we have sibling entries */ + unsigned int order = RADIX_TREE_MAP_SHIFT - 1; + struct radix_tree_root *tree = ptr; + int i; + + for (i = 0; i < 10000; i++) { + item_insert_order(tree, 0, order); + item_delete_rcu(tree, 0); + } + + stop_iteration = true; + return NULL; +} + +static void *iterator_func(void *ptr) +{ + struct radix_tree_root *tree = ptr; + struct radix_tree_iter iter; + struct item *item; + void **slot; + + while (!stop_iteration) { + rcu_read_lock(); + radix_tree_for_each_slot(slot, tree, &iter, 0) { + item = radix_tree_deref_slot(slot); + + if (!item) + continue; + if (radix_tree_deref_retry(item)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + item_sanity(item, iter.index); + } + rcu_read_unlock(); + } + return NULL; +} + +static void multiorder_iteration_race(void) +{ + const int num_threads = sysconf(_SC_NPROCESSORS_ONLN); + pthread_t worker_thread[num_threads]; + RADIX_TREE(tree, GFP_KERNEL); + int i; + + pthread_create(&worker_thread[0], NULL, &creator_func, &tree); + for (i = 1; i < num_threads; i++) + pthread_create(&worker_thread[i], NULL, &iterator_func, &tree); + + for (i = 0; i < num_threads; i++) + pthread_join(worker_thread[i], NULL); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -644,6 +706,7 @@ void multiorder_checks(void) multiorder_join(); multiorder_split(); multiorder_account(); + multiorder_iteration_race(); radix_tree_cpu_dead(0); } diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 8cf4a7a7f94c..31f1d9b6f506 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -13,6 +13,7 @@ struct item { struct item *item_create(unsigned long index, unsigned int order); int __item_insert(struct radix_tree_root *root, struct item *item); int item_insert(struct radix_tree_root *root, unsigned long index); +void item_sanity(struct item *item, unsigned long index); int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); From 9f418224e8114156d995b98fa4e0f4fd21f685fe Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 18 May 2018 16:09:06 -0700 Subject: [PATCH 337/347] radix tree: fix multi-order iteration race Fix a race in the multi-order iteration code which causes the kernel to hit a GP fault. This was first seen with a production v4.15 based kernel (4.15.6-300.fc27.x86_64) utilizing a DAX workload which used order 9 PMD DAX entries. The race has to do with how we tear down multi-order sibling entries when we are removing an item from the tree. Remember for example that an order 2 entry looks like this: struct radix_tree_node.slots[] = [entry][sibling][sibling][sibling] where 'entry' is in some slot in the struct radix_tree_node, and the three slots following 'entry' contain sibling pointers which point back to 'entry.' When we delete 'entry' from the tree, we call : radix_tree_delete() radix_tree_delete_item() __radix_tree_delete() replace_slot() replace_slot() first removes the siblings in order from the first to the last, then at then replaces 'entry' with NULL. This means that for a brief period of time we end up with one or more of the siblings removed, so: struct radix_tree_node.slots[] = [entry][NULL][sibling][sibling] This causes an issue if you have a reader iterating over the slots in the tree via radix_tree_for_each_slot() while only under rcu_read_lock()/rcu_read_unlock() protection. This is a common case in mm/filemap.c. The issue is that when __radix_tree_next_slot() => skip_siblings() tries to skip over the sibling entries in the slots, it currently does so with an exact match on the slot directly preceding our current slot. Normally this works: V preceding slot struct radix_tree_node.slots[] = [entry][sibling][sibling][sibling] ^ current slot This lets you find the first sibling, and you skip them all in order. But in the case where one of the siblings is NULL, that slot is skipped and then our sibling detection is interrupted: V preceding slot struct radix_tree_node.slots[] = [entry][NULL][sibling][sibling] ^ current slot This means that the sibling pointers aren't recognized since they point all the way back to 'entry', so we think that they are normal internal radix tree pointers. This causes us to think we need to walk down to a struct radix_tree_node starting at the address of 'entry'. In a real running kernel this will crash the thread with a GP fault when you try and dereference the slots in your broken node starting at 'entry'. We fix this race by fixing the way that skip_siblings() detects sibling nodes. Instead of testing against the preceding slot we instead look for siblings via is_sibling_entry() which compares against the position of the struct radix_tree_node.slots[] array. This ensures that sibling entries are properly identified, even if they are no longer contiguous with the 'entry' they point to. Link: http://lkml.kernel.org/r/20180503192430.7582-6-ross.zwisler@linux.intel.com Fixes: 148deab223b2 ("radix-tree: improve multiorder iterators") Signed-off-by: Ross Zwisler Reported-by: CR, Sapthagirish Reviewed-by: Jan Kara Cc: Matthew Wilcox Cc: Christoph Hellwig Cc: Dan Williams Cc: Dave Chinner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index da9e10c827df..43e0cbedc3a0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1612,11 +1612,9 @@ static void set_iter_tags(struct radix_tree_iter *iter, static void __rcu **skip_siblings(struct radix_tree_node **nodep, void __rcu **slot, struct radix_tree_iter *iter) { - void *sib = node_to_entry(slot - 1); - while (iter->index < iter->next_index) { *nodep = rcu_dereference_raw(*slot); - if (*nodep && *nodep != sib) + if (*nodep && !is_sibling_entry(iter->node, *nodep)) return slot; slot++; iter->index = __radix_tree_iter_add(iter, 1); @@ -1631,7 +1629,7 @@ void __rcu **__radix_tree_next_slot(void __rcu **slot, struct radix_tree_iter *iter, unsigned flags) { unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; - struct radix_tree_node *node = rcu_dereference_raw(*slot); + struct radix_tree_node *node; slot = skip_siblings(&node, slot, iter); From f3d8d3cfc1c00d1ebdb53ee96aa53aa822209003 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Fri, 18 May 2018 16:09:09 -0700 Subject: [PATCH 338/347] MAINTAINERS: add Q: entry to kselftest for patchwork project A new patchwork project is created to track kselftest patches. Update the kselftest entry in the MAINTAINERS file adding 'Q:' entry: https://patchwork.kernel.org/project/linux-kselftest/list/ Link: http://lkml.kernel.org/r/20180515164427.12201-1-shuah@kernel.org Signed-off-by: Shuah Khan (Samsung OSG) Cc: David S. Miller Cc: Mauro Carvalho Chehab Cc: Greg Kroah-Hartman Cc: Linus Walleij Cc: Randy Dunlap Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 92e47b5b0480..9d1bc95dfe3c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7698,6 +7698,7 @@ KERNEL SELFTEST FRAMEWORK M: Shuah Khan L: linux-kselftest@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git +Q: https://patchwork.kernel.org/project/linux-kselftest/list/ S: Maintained F: tools/testing/selftests/ F: Documentation/dev-tools/kselftest* From ab1e8d8960b68f54af42b6484b5950bd13a4054b Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Fri, 18 May 2018 16:09:13 -0700 Subject: [PATCH 339/347] mm: don't allow deferred pages with NEED_PER_CPU_KM It is unsafe to do virtual to physical translations before mm_init() is called if struct page is needed in order to determine the memory section number (see SECTION_IN_PAGE_FLAGS). This is because only in mm_init() we initialize struct pages for all the allocated memory when deferred struct pages are used. My recent fix in commit c9e97a1997 ("mm: initialize pages on demand during boot") exposed this problem, because it greatly reduced number of pages that are initialized before mm_init(), but the problem existed even before my fix, as Fengguang Wu found. Below is a more detailed explanation of the problem. We initialize struct pages in four places: 1. Early in boot a small set of struct pages is initialized to fill the first section, and lower zones. 2. During mm_init() we initialize "struct pages" for all the memory that is allocated, i.e reserved in memblock. 3. Using on-demand logic when pages are allocated after mm_init call (when memblock is finished) 4. After smp_init() when the rest free deferred pages are initialized. The problem occurs if we try to do va to phys translation of a memory between steps 1 and 2. Because we have not yet initialized struct pages for all the reserved pages, it is inherently unsafe to do va to phys if the translation itself requires access of "struct page" as in case of this combination: CONFIG_SPARSE && !CONFIG_SPARSE_VMEMMAP The following path exposes the problem: start_kernel() trap_init() setup_cpu_entry_areas() setup_cpu_entry_area(cpu) get_cpu_gdt_paddr(cpu) per_cpu_ptr_to_phys(addr) pcpu_addr_to_page(addr) virt_to_page(addr) pfn_to_page(__pa(addr) >> PAGE_SHIFT) We disable this path by not allowing NEED_PER_CPU_KM with deferred struct pages feature. The problems are discussed in these threads: http://lkml.kernel.org/r/20180418135300.inazvpxjxowogyge@wfg-t540p.sh.intel.com http://lkml.kernel.org/r/20180419013128.iurzouiqxvcnpbvz@wfg-t540p.sh.intel.com http://lkml.kernel.org/r/20180426202619.2768-1-pasha.tatashin@oracle.com Link: http://lkml.kernel.org/r/20180515175124.1770-1-pasha.tatashin@oracle.com Fixes: 3a80a7fa7989 ("mm: meminit: initialise a subset of struct pages if CONFIG_DEFERRED_STRUCT_PAGE_INIT is set") Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Steven Sistare Cc: Daniel Jordan Cc: Mel Gorman Cc: Fengguang Wu Cc: Dennis Zhou Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/Kconfig b/mm/Kconfig index d5004d82a1d6..e14c01513bfd 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -636,6 +636,7 @@ config DEFERRED_STRUCT_PAGE_INIT default n depends on NO_BOOTMEM depends on !FLATMEM + depends on !NEED_PER_CPU_KM help Ordinarily all struct pages are initialised during early boot in a single thread. On very large machines this can take a considerable From 66072c29328717072fd84aaff3e070e3f008ba77 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 18 May 2018 16:09:16 -0700 Subject: [PATCH 340/347] hfsplus: stop workqueue when fill_super() failed syzbot is reporting ODEBUG messages at hfsplus_fill_super() [1]. This is because hfsplus_fill_super() forgot to call cancel_delayed_work_sync(). As far as I can see, it is hfsplus_mark_mdb_dirty() from hfsplus_new_inode() in hfsplus_fill_super() that calls queue_delayed_work(). Therefore, I assume that hfsplus_new_inode() does not fail if queue_delayed_work() was called, and the out_put_hidden_dir label is the appropriate location to call cancel_delayed_work_sync(). [1] https://syzkaller.appspot.com/bug?id=a66f45e96fdbeb76b796bf46eb25ea878c42a6c9 Link: http://lkml.kernel.org/r/964a8b27-cd69-357c-fe78-76b066056201@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reported-by: syzbot Cc: Al Viro Cc: David Howells Cc: Ernesto A. Fernandez Cc: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/super.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 513c357c734b..a6c0f54c48c3 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) return 0; out_put_hidden_dir: + cancel_delayed_work_sync(&sbi->sync_work); iput(sbi->hidden_dir); out_put_root: dput(sb->s_root); From 6b48cb5f8347bc0153ff1d7b075db92e6723ffdb Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:30 -0700 Subject: [PATCH 341/347] X86/Hyper-V: Enlighten APIC access Hyper-V supports MSR based APIC access; implement the enlightenment. Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-1-kys@linuxonhyperv.com --- arch/x86/hyperv/Makefile | 2 +- arch/x86/hyperv/hv_apic.c | 104 ++++++++++++++++++++++++++++++++ arch/x86/hyperv/hv_init.c | 5 +- arch/x86/include/asm/mshyperv.h | 4 +- 4 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 arch/x86/hyperv/hv_apic.c diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 367a8203cfcf..00ce4df01a09 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1 +1 @@ -obj-y := hv_init.o mmu.o +obj-y := hv_init.o mmu.o hv_apic.o diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c new file mode 100644 index 000000000000..ca20e31d311c --- /dev/null +++ b/arch/x86/hyperv/hv_apic.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific APIC code. + * + * Copyright (C) 2018, Microsoft, Inc. + * + * Author : K. Y. Srinivasan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_X86_64 +#if IS_ENABLED(CONFIG_HYPERV) + +static u64 hv_apic_icr_read(void) +{ + u64 reg_val; + + rdmsrl(HV_X64_MSR_ICR, reg_val); + return reg_val; +} + +static void hv_apic_icr_write(u32 low, u32 id) +{ + u64 reg_val; + + reg_val = SET_APIC_DEST_FIELD(id); + reg_val = reg_val << 32; + reg_val |= low; + + wrmsrl(HV_X64_MSR_ICR, reg_val); +} + +static u32 hv_apic_read(u32 reg) +{ + u32 reg_val, hi; + + switch (reg) { + case APIC_EOI: + rdmsr(HV_X64_MSR_EOI, reg_val, hi); + return reg_val; + case APIC_TASKPRI: + rdmsr(HV_X64_MSR_TPR, reg_val, hi); + return reg_val; + + default: + return native_apic_mem_read(reg); + } +} + +static void hv_apic_write(u32 reg, u32 val) +{ + switch (reg) { + case APIC_EOI: + wrmsr(HV_X64_MSR_EOI, val, 0); + break; + case APIC_TASKPRI: + wrmsr(HV_X64_MSR_TPR, val, 0); + break; + default: + native_apic_mem_write(reg, val); + } +} + +static void hv_apic_eoi_write(u32 reg, u32 val) +{ + wrmsr(HV_X64_MSR_EOI, val, 0); +} + +void __init hv_apic_init(void) +{ + if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { + pr_info("Hyper-V: Using MSR based APIC access\n"); + apic_set_eoi_write(hv_apic_eoi_write); + apic->read = hv_apic_read; + apic->write = hv_apic_write; + apic->icr_write = hv_apic_icr_write; + apic->icr_read = hv_apic_icr_read; + } +} + +#endif /* CONFIG_HYPERV */ +#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index cfecc2272f2d..71e50fc2b7ef 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -242,8 +242,9 @@ static int hv_cpu_die(unsigned int cpu) * * 1. Setup the hypercall page. * 2. Register Hyper-V specific clocksource. + * 3. Setup Hyper-V specific APIC entry points. */ -void hyperv_init(void) +void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; @@ -298,6 +299,8 @@ void hyperv_init(void) hyper_alloc_mmu(); + hv_apic_init(); + /* * Register Hyper-V specific clocksource. */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index b90e79610cf7..162977b82e2e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -258,7 +258,7 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } -void hyperv_init(void); +void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); @@ -269,6 +269,7 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); +void hv_apic_init(void); #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline bool hv_is_hyperv_initialized(void) { return false; } @@ -277,6 +278,7 @@ static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; +static inline void hv_apic_init(void) {} static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) { return NULL; From 68bb7bfb7985df2bd15c2dc975cb68b7a901488a Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:31 -0700 Subject: [PATCH 342/347] X86/Hyper-V: Enable IPI enlightenments Hyper-V supports hypercalls to implement IPI; use them. Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-2-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_apic.c | 117 +++++++++++++++++++++++++++++ arch/x86/hyperv/hv_init.c | 27 +++++++ arch/x86/include/asm/hyperv-tlfs.h | 15 ++++ arch/x86/include/asm/mshyperv.h | 1 + 4 files changed, 160 insertions(+) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index ca20e31d311c..3e0de61f1a7c 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -33,6 +33,8 @@ #ifdef CONFIG_X86_64 #if IS_ENABLED(CONFIG_HYPERV) +static struct apic orig_apic; + static u64 hv_apic_icr_read(void) { u64 reg_val; @@ -88,8 +90,123 @@ static void hv_apic_eoi_write(u32 reg, u32 val) wrmsr(HV_X64_MSR_EOI, val, 0); } +/* + * IPI implementation on Hyper-V. + */ +static bool __send_ipi_mask(const struct cpumask *mask, int vector) +{ + int cur_cpu, vcpu; + struct ipi_arg_non_ex **arg; + struct ipi_arg_non_ex *ipi_arg; + int ret = 1; + unsigned long flags; + + if (cpumask_empty(mask)) + return true; + + if (!hv_hypercall_pg) + return false; + + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + return false; + + local_irq_save(flags); + arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->cpu_mask = 0; + + for_each_cpu(cur_cpu, mask) { + vcpu = hv_cpu_number_to_vp_number(cur_cpu); + /* + * This particular version of the IPI hypercall can + * only target upto 64 CPUs. + */ + if (vcpu >= 64) + goto ipi_mask_done; + + __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask); + } + + ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL); + +ipi_mask_done: + local_irq_restore(flags); + return ((ret == 0) ? true : false); +} + +static bool __send_ipi_one(int cpu, int vector) +{ + struct cpumask mask = CPU_MASK_NONE; + + cpumask_set_cpu(cpu, &mask); + return __send_ipi_mask(&mask, vector); +} + +static void hv_send_ipi(int cpu, int vector) +{ + if (!__send_ipi_one(cpu, vector)) + orig_apic.send_IPI(cpu, vector); +} + +static void hv_send_ipi_mask(const struct cpumask *mask, int vector) +{ + if (!__send_ipi_mask(mask, vector)) + orig_apic.send_IPI_mask(mask, vector); +} + +static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned int this_cpu = smp_processor_id(); + struct cpumask new_mask; + const struct cpumask *local_mask; + + cpumask_copy(&new_mask, mask); + cpumask_clear_cpu(this_cpu, &new_mask); + local_mask = &new_mask; + if (!__send_ipi_mask(local_mask, vector)) + orig_apic.send_IPI_mask_allbutself(mask, vector); +} + +static void hv_send_ipi_allbutself(int vector) +{ + hv_send_ipi_mask_allbutself(cpu_online_mask, vector); +} + +static void hv_send_ipi_all(int vector) +{ + if (!__send_ipi_mask(cpu_online_mask, vector)) + orig_apic.send_IPI_all(vector); +} + +static void hv_send_ipi_self(int vector) +{ + if (!__send_ipi_one(smp_processor_id(), vector)) + orig_apic.send_IPI_self(vector); +} + void __init hv_apic_init(void) { + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { + pr_info("Hyper-V: Using IPI hypercalls\n"); + /* + * Set the IPI entry points. + */ + orig_apic = *apic; + + apic->send_IPI = hv_send_ipi; + apic->send_IPI_mask = hv_send_ipi_mask; + apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself; + apic->send_IPI_allbutself = hv_send_ipi_allbutself; + apic->send_IPI_all = hv_send_ipi_all; + apic->send_IPI_self = hv_send_ipi_self; + } + if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { pr_info("Hyper-V: Using MSR based APIC access\n"); apic_set_eoi_write(hv_apic_eoi_write); diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 71e50fc2b7ef..6bc90d68ac8b 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -91,12 +91,19 @@ EXPORT_SYMBOL_GPL(hv_vp_index); struct hv_vp_assist_page **hv_vp_assist_page; EXPORT_SYMBOL_GPL(hv_vp_assist_page); +void __percpu **hyperv_pcpu_input_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); + u32 hv_max_vp_index; static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + void **input_arg; + + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + *input_arg = page_address(alloc_page(GFP_KERNEL)); hv_get_vp_index(msr_vp_index); @@ -217,6 +224,16 @@ static int hv_cpu_die(unsigned int cpu) { struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; + unsigned long flags; + void **input_arg; + void *input_pg = NULL; + + local_irq_save(flags); + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + input_pg = *input_arg; + *input_arg = NULL; + local_irq_restore(flags); + free_page((unsigned long)input_pg); if (hv_vp_assist_page && hv_vp_assist_page[cpu]) wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); @@ -260,6 +277,16 @@ void __init hyperv_init(void) if ((ms_hyperv.features & required_msrs) != required_msrs) return; + /* + * Allocate the per-CPU state for the hypercall input arg. + * If this allocation fails, we will not be able to setup + * (per-CPU) hypercall input page and thus this failure is + * fatal on Hyper-V. + */ + hyperv_pcpu_input_arg = alloc_percpu(void *); + + BUG_ON(hyperv_pcpu_input_arg == NULL); + /* Allocate percpu VP index */ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), GFP_KERNEL); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 416cb0e0c496..332e786d4deb 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -164,6 +164,11 @@ */ #define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9) +/* + * Recommend using cluster IPI hypercalls. + */ +#define HV_X64_CLUSTER_IPI_RECOMMENDED (1 << 10) + /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) @@ -329,10 +334,14 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_IPI_LOW_VECTOR 0x10 +#define HV_IPI_HIGH_VECTOR 0xff + /* Declare the various hypercall operations. */ #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_SEND_IPI 0x000b #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 #define HVCALL_POST_MESSAGE 0x005c @@ -706,4 +715,10 @@ struct hv_enlightened_vmcs { #define HV_STIMER_AUTOENABLE (1ULL << 3) #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) +struct ipi_arg_non_ex { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 162977b82e2e..1eff91599c2b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -122,6 +122,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; +extern void __percpu **hyperv_pcpu_input_arg; static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { From 366f03b0cf90ef55f063d4a54cf62b0ac9b6da9d Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:32 -0700 Subject: [PATCH 343/347] X86/Hyper-V: Enhanced IPI enlightenment Support enhanced IPI enlightenments (to target more than 64 CPUs). Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-3-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_apic.c | 42 +++++++++++++++++++++++++++++- arch/x86/hyperv/mmu.c | 2 +- arch/x86/include/asm/hyperv-tlfs.h | 15 ++++++++++- arch/x86/include/asm/mshyperv.h | 33 +++++++++++++++++++++++ 4 files changed, 89 insertions(+), 3 deletions(-) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 3e0de61f1a7c..192b6ad6a361 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -93,6 +93,40 @@ static void hv_apic_eoi_write(u32 reg, u32 val) /* * IPI implementation on Hyper-V. */ +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) +{ + struct ipi_arg_ex **arg; + struct ipi_arg_ex *ipi_arg; + unsigned long flags; + int nr_bank = 0; + int ret = 1; + + local_irq_save(flags); + arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_ex_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->vp_set.valid_bank_mask = 0; + + if (!cpumask_equal(mask, cpu_present_mask)) { + ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + } + if (!nr_bank) + ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; + + ret = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, + ipi_arg, NULL); + +ipi_mask_ex_done: + local_irq_restore(flags); + return ((ret == 0) ? true : false); +} + static bool __send_ipi_mask(const struct cpumask *mask, int vector) { int cur_cpu, vcpu; @@ -110,6 +144,9 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; + if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return __send_ipi_mask_ex(mask, vector); + local_irq_save(flags); arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); @@ -193,7 +230,10 @@ static void hv_send_ipi_self(int vector) void __init hv_apic_init(void) { if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { - pr_info("Hyper-V: Using IPI hypercalls\n"); + if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + pr_info("Hyper-V: Using ext hypercalls for IPI\n"); + else + pr_info("Hyper-V: Using IPI hypercalls\n"); /* * Set the IPI entry points. */ diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 56c9ebac946f..adee39a7a3f2 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -239,7 +239,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, flush->hv_vp_set.valid_bank_mask = 0; if (!cpumask_equal(cpus, cpu_present_mask)) { - flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K; + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; nr_bank = cpumask_to_vp_set(flush, cpus); } diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 332e786d4deb..3bfa92c2793c 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -344,6 +344,7 @@ struct hv_tsc_emulation_status { #define HVCALL_SEND_IPI 0x000b #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d @@ -369,7 +370,7 @@ struct hv_tsc_emulation_status { #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARCE_4K, + HV_GENERIC_SET_SPARSE_4K, HV_GENERIC_SET_ALL, }; @@ -721,4 +722,16 @@ struct ipi_arg_non_ex { u64 cpu_mask; }; +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +struct ipi_arg_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + #endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 1eff91599c2b..0ee82519957b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -259,6 +259,39 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } +static inline int cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + + /* valid_bank_mask can represent up to 64 banks */ + if (hv_max_vp_index / 64 >= 64) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) + vpset->bank_contents[vcpu_bank] = 0; + + /* + * Some banks may end up being empty but this is acceptable. + */ + for_each_cpu(cpu, cpus) { + vcpu = hv_cpu_number_to_vp_number(cpu); + vcpu_bank = vcpu / 64; + vcpu_offset = vcpu % 64; + __set_bit(vcpu_offset, (unsigned long *) + &vpset->bank_contents[vcpu_bank]); + if (vcpu_bank >= nr_bank) + nr_bank = vcpu_bank + 1; + } + vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); + return nr_bank; +} + void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); void hyper_alloc_mmu(void); From 800b8f03fdc8d66885ff03de531285526a4ca0d4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:33 -0700 Subject: [PATCH 344/347] X86/Hyper-V: Consolidate code for converting cpumask to vpset Consolidate code for converting cpumask to vpset. Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-4-kys@linuxonhyperv.com --- arch/x86/hyperv/mmu.c | 43 ++----------------------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index adee39a7a3f2..c9cd28f0bae4 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -25,11 +25,7 @@ struct hv_flush_pcpu { struct hv_flush_pcpu_ex { u64 address_space; u64 flags; - struct { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; - } hv_vp_set; + struct hv_vpset hv_vp_set; u64 gva_list[]; }; @@ -70,41 +66,6 @@ static inline int fill_gva_list(u64 gva_list[], int offset, return gva_n - offset; } -/* Return the number of banks in the resulting vp_set */ -static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, - const struct cpumask *cpus) -{ - int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; - - /* valid_bank_mask can represent up to 64 banks */ - if (hv_max_vp_index / 64 >= 64) - return 0; - - /* - * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex - * structs are not cleared between calls, we risk flushing unneeded - * vCPUs otherwise. - */ - for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) - flush->hv_vp_set.bank_contents[vcpu_bank] = 0; - - /* - * Some banks may end up being empty but this is acceptable. - */ - for_each_cpu(cpu, cpus) { - vcpu = hv_cpu_number_to_vp_number(cpu); - vcpu_bank = vcpu / 64; - vcpu_offset = vcpu % 64; - __set_bit(vcpu_offset, (unsigned long *) - &flush->hv_vp_set.bank_contents[vcpu_bank]); - if (vcpu_bank >= nr_bank) - nr_bank = vcpu_bank + 1; - } - flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); - - return nr_bank; -} - static void hyperv_flush_tlb_others(const struct cpumask *cpus, const struct flush_tlb_info *info) { @@ -240,7 +201,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, if (!cpumask_equal(cpus, cpu_present_mask)) { flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vp_set(flush, cpus); + nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); } if (!nr_bank) { From 9a2d78e291a7dea0ae4b4a06ce6bbbe4f1ab7c13 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 16 May 2018 14:53:34 -0700 Subject: [PATCH 345/347] X86/Hyper-V: Consolidate the allocation of the hypercall input page Consolidate the allocation of the hypercall input page. Signed-off-by: K. Y. Srinivasan Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Cc: olaf@aepfle.de Cc: sthemmin@microsoft.com Cc: gregkh@linuxfoundation.org Cc: jasowang@redhat.com Cc: Michael.H.Kelley@microsoft.com Cc: hpa@zytor.com Cc: apw@canonical.com Cc: devel@linuxdriverproject.org Cc: vkuznets@redhat.com Link: https://lkml.kernel.org/r/20180516215334.6547-5-kys@linuxonhyperv.com --- arch/x86/hyperv/hv_init.c | 2 -- arch/x86/hyperv/mmu.c | 30 ++++++------------------------ arch/x86/include/asm/mshyperv.h | 1 - 3 files changed, 6 insertions(+), 27 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6bc90d68ac8b..4c431e1c1eff 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -324,8 +324,6 @@ void __init hyperv_init(void) hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - hyper_alloc_mmu(); - hv_apic_init(); /* diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index c9cd28f0bae4..5f053d7d1bd9 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -32,9 +32,6 @@ struct hv_flush_pcpu_ex { /* Each gva in gva_list encodes up to 4096 pages to flush */ #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) -static struct hv_flush_pcpu __percpu **pcpu_flush; - -static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex; /* * Fills in gva_list starting from offset. Returns the number of items added. @@ -77,7 +74,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, trace_hyperv_mmu_flush_tlb_others(cpus, info); - if (!pcpu_flush || !hv_hypercall_pg) + if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) @@ -85,10 +82,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = this_cpu_ptr(pcpu_flush); - - if (unlikely(!*flush_pcpu)) - *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + flush_pcpu = (struct hv_flush_pcpu **) + this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -164,7 +159,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, trace_hyperv_mmu_flush_tlb_others(cpus, info); - if (!pcpu_flush_ex || !hv_hypercall_pg) + if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) @@ -172,10 +167,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = this_cpu_ptr(pcpu_flush_ex); - - if (unlikely(!*flush_pcpu)) - *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + flush_pcpu = (struct hv_flush_pcpu_ex **) + this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -257,14 +250,3 @@ void hyperv_setup_mmu_ops(void) pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex; } } - -void hyper_alloc_mmu(void) -{ - if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) - return; - - if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) - pcpu_flush = alloc_percpu(struct hv_flush_pcpu *); - else - pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *); -} diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 0ee82519957b..9aaa493f5756 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -294,7 +294,6 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); bool hv_is_hyperv_initialized(void); void hyperv_cleanup(void); From 61eeb1f6d1f2648a218855d7c8d44f16df242ef3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 May 2018 16:38:59 +0200 Subject: [PATCH 346/347] x86/Hyper-V/hv_apic: Include asm/apic.h Not all configurations magically include asm/apic.h, but the Hyper-V code requires it. Include it explicitely. Fixes: 6b48cb5f8347 ("X86/Hyper-V: Enlighten APIC access") Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner Cc: K. Y. Srinivasan Cc: Michael Kelley --- arch/x86/hyperv/hv_apic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 192b6ad6a361..d3ff6e255924 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -29,6 +29,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #if IS_ENABLED(CONFIG_HYPERV) From 2d2ccf24939cf369f7473c7e4ea309891be91848 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 19 May 2018 21:22:48 +0200 Subject: [PATCH 347/347] x86/Hyper-V/hv_apic: Build the Hyper-V APIC conditionally The Hyper-V APIC code is built when CONFIG_HYPERV is enabled but the actual code in that file is guarded with CONFIG_X86_64. There is no point in doing this. Neither is there a point in having the CONFIG_HYPERV guard in there because the containing directory is not built when CONFIG_HYPERV=n. Further for the hv_init_apic() function a stub is provided only for CONFIG_HYPERV=n, which is pointless as the callsite is not compiled at all. But for X86_32 the stub is missing and the build fails. Clean that up: - Compile hv_apic.c only when CONFIG_X86_64=y - Make the stub for hv_init_apic() available when CONFG_X86_64=n Fixes: 6b48cb5f8347 ("X86/Hyper-V: Enlighten APIC access") Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner Cc: K. Y. Srinivasan Cc: Michael Kelley --- arch/x86/hyperv/Makefile | 3 ++- arch/x86/hyperv/hv_apic.c | 6 ------ arch/x86/include/asm/mshyperv.h | 7 ++++++- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 00ce4df01a09..b173d404e3df 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1 +1,2 @@ -obj-y := hv_init.o mmu.o hv_apic.o +obj-y := hv_init.o mmu.o +obj-$(CONFIG_X86_64) += hv_apic.o diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index d3ff6e255924..f68855499391 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -31,9 +31,6 @@ #include #include -#ifdef CONFIG_X86_64 -#if IS_ENABLED(CONFIG_HYPERV) - static struct apic orig_apic; static u64 hv_apic_icr_read(void) @@ -257,6 +254,3 @@ void __init hv_apic_init(void) apic->icr_read = hv_apic_icr_read; } } - -#endif /* CONFIG_HYPERV */ -#endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 9aaa493f5756..997192131b7b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -302,7 +302,13 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); + +#ifdef CONFIG_X86_64 void hv_apic_init(void); +#else +static inline void hv_apic_init(void) {} +#endif + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline bool hv_is_hyperv_initialized(void) { return false; } @@ -311,7 +317,6 @@ static inline void hyperv_setup_mmu_ops(void) {} static inline void set_hv_tscchange_cb(void (*cb)(void)) {} static inline void clear_hv_tscchange_cb(void) {} static inline void hyperv_stop_tsc_emulation(void) {}; -static inline void hv_apic_init(void) {} static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu) { return NULL;