From 4eda9b766b042ea38d84df91581b03f6145a2ab0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 10 May 2018 16:37:26 +0200 Subject: [PATCH 001/572] ARM: dts: socfpga: Fix NAND controller clock supply The Denali NAND x-clock should be supplied by nand_x_clk, not by nand_clk. Fix this, otherwise the Denali driver gets incorrect clock frequency information and incorrectly configures the NAND timing. Cc: stable@vger.kernel.org Signed-off-by: Marek Vasut Fixes: d837a80d19 ("ARM: dts: socfpga: add nand controller nodes") Cc: Steffen Trumtrar Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 486d4e7433ed..8fbddc2a5c33 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -754,7 +754,7 @@ reg-names = "nand_data", "denali_reg"; interrupts = <0x0 0x90 0x4>; dma-mask = <0xffffffff>; - clocks = <&nand_clk>; + clocks = <&nand_x_clk>; status = "disabled"; }; From d9a695f3c8098ac9684689774a151cff30d8aa25 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 10 May 2018 14:52:23 +0200 Subject: [PATCH 002/572] ARM: dts: socfpga: Fix NAND controller node compatible The compatible string for the Denali NAND controller is incorrect, fix it by replacing it with one matching the DT bindings and the driver. Cc: stable@vger.kernel.org Signed-off-by: Marek Vasut Fixes: d837a80d19 ("ARM: dts: socfpga: add nand controller nodes") Cc: Steffen Trumtrar Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 8fbddc2a5c33..b38f8c240558 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -748,7 +748,7 @@ nand0: nand@ff900000 { #address-cells = <0x1>; #size-cells = <0x1>; - compatible = "denali,denali-nand-dt"; + compatible = "altr,socfpga-denali-nand"; reg = <0xff900000 0x100000>, <0xffb80000 0x10000>; reg-names = "nand_data", "denali_reg"; From 3877ef7a1ccecaae378c497e1dcddbc2dccb664c Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 14 May 2018 10:15:19 -0500 Subject: [PATCH 003/572] ARM: dts: socfpga: Fix NAND controller node compatible for Arria10 The NAND compatible "denali,denal-nand-dt" property has never been used and is obsolete. Remove it. Cc: stable@vger.kernel.org Fixes: f549af06e9b6("ARM: dts: socfpga: Add NAND device tree for Arria10") Signed-off-by: Dinh Nguyen --- arch/arm/boot/dts/socfpga_arria10.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index bead79e4b2aa..d8b1aa309f76 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -633,7 +633,7 @@ nand: nand@ffb90000 { #address-cells = <1>; #size-cells = <1>; - compatible = "denali,denali-nand-dt", "altr,socfpga-denali-nand"; + compatible = "altr,socfpga-denali-nand"; reg = <0xffb90000 0x72000>, <0xffb80000 0x10000>; reg-names = "nand_data", "denali_reg"; From 2f872ddcdb1e8e2186162616cea4581b8403849d Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 22 May 2018 11:40:28 +0200 Subject: [PATCH 004/572] arm64: dts: marvell: fix CP110 ICU node size ICU size in CP110 is not 0x10 but at least 0x440 bytes long (from the specification). Fixes: 6ef84a827c37 ("arm64: dts: marvell: enable GICP and ICU on Armada 7K/8K") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Reviewed-by: Thomas Petazzoni Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi index ed2f1237ea1e..8259b32f0ced 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi @@ -149,7 +149,7 @@ CP110_LABEL(icu): interrupt-controller@1e0000 { compatible = "marvell,cp110-icu"; - reg = <0x1e0000 0x10>; + reg = <0x1e0000 0x440>; #interrupt-cells = <3>; interrupt-controller; msi-parent = <&gicp>; From 866f4c8e0e26293b5819fd61c241502c79023775 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Tue, 22 May 2018 12:42:57 +0200 Subject: [PATCH 005/572] s390/net: add pnetid support s390 hardware supports the definition of a so-call Physical NETwork IDentifier (short PNETID) per network device port. These PNETIDS can be used to identify network devices that are attached to the same physical network (broadcast domain). This patch provides the interface to extract the PNETID of a port of a device attached to the ccw-bus or pci-bus. Parts of this patch are based on an initial implementation by Thomas Richter. Signed-off-by: Ursula Braun Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 3 ++ arch/s390/include/asm/pnet.h | 23 +++++++++++ arch/s390/net/Makefile | 1 + arch/s390/net/pnet.c | 76 ++++++++++++++++++++++++++++++++++++ 4 files changed, 103 insertions(+) create mode 100644 arch/s390/include/asm/pnet.h create mode 100644 arch/s390/net/pnet.c diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 199ac3e4da1d..33072e0bc589 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -59,6 +59,9 @@ config PCI_QUIRKS config ARCH_SUPPORTS_UPROBES def_bool y +config HAVE_PNETID + def_bool y if SMC + config S390 def_bool y select ARCH_BINFMT_ELF_STATE diff --git a/arch/s390/include/asm/pnet.h b/arch/s390/include/asm/pnet.h new file mode 100644 index 000000000000..6e278584f8f1 --- /dev/null +++ b/arch/s390/include/asm/pnet.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * IBM System z PNET ID Support + * + * Copyright IBM Corp. 2018 + */ + +#ifndef _ASM_S390_PNET_H +#define _ASM_S390_PNET_H + +#include +#include + +#define PNETIDS_LEN 64 /* Total utility string length in bytes + * to cover up to 4 PNETIDs of 16 bytes + * for up to 4 device ports + */ +#define MAX_PNETID_LEN 16 /* Max.length of a single port PNETID */ +#define MAX_PNETID_PORTS (PNETIDS_LEN / MAX_PNETID_LEN) + /* Max. # of ports with a PNETID */ + +int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid); +#endif /* _ASM_S390_PNET_H */ diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile index e0d5f245e42b..e2b85ffdbb0c 100644 --- a/arch/s390/net/Makefile +++ b/arch/s390/net/Makefile @@ -3,3 +3,4 @@ # Arch-specific network modules # obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o +obj-$(CONFIG_HAVE_PNETID) += pnet.o diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c new file mode 100644 index 000000000000..ae958ba5337f --- /dev/null +++ b/arch/s390/net/pnet.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IBM System z PNET ID Support + * + * Copyright IBM Corp. 2018 + */ + +#include +#include +#include +#include +#include +#include + +/* + * Get the PNETIDs from a device. + * s390 hardware supports the definition of a so-called Physical Network + * Identifier (short PNETID) per network device port. These PNETIDs can be + * used to identify network devices that are attached to the same physical + * network (broadcast domain). + * + * The device can be + * - a ccwgroup device with all bundled subchannels having the same PNETID + * - a PCI attached network device + * + * Returns: + * 0: PNETIDs extracted from device. + * -ENOMEM: No memory to extract utility string. + * -EOPNOTSUPP: Device type without utility string support + */ +static int pnet_ids_by_device(struct device *dev, u8 *pnetids) +{ + memset(pnetids, 0, PNETIDS_LEN); + if (dev_is_ccwgroup(dev)) { + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + u8 *util_str; + + util_str = ccw_device_get_util_str(gdev->cdev[0], 0); + if (!util_str) + return -ENOMEM; + memcpy(pnetids, util_str, PNETIDS_LEN); + kfree(util_str); + return 0; + } + if (dev_is_pci(dev)) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); + + memcpy(pnetids, zdev->util_str, sizeof(zdev->util_str)); + return 0; + } + return -EOPNOTSUPP; +} + +/* + * Extract the pnetid for a device port. + * + * Return 0 if a pnetid is found and -ENOENT otherwise. + */ +int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid) +{ + u8 pnetids[MAX_PNETID_PORTS][MAX_PNETID_LEN]; + static const u8 zero[MAX_PNETID_LEN] = { 0 }; + int rc = 0; + + if (!dev || port >= MAX_PNETID_PORTS) + return -ENOENT; + + if (!pnet_ids_by_device(dev, (u8 *)pnetids) && + memcmp(pnetids[port], zero, MAX_PNETID_LEN)) + memcpy(pnetid, pnetids[port], MAX_PNETID_LEN); + else + rc = -ENOENT; + + return rc; +} +EXPORT_SYMBOL_GPL(pnet_id_by_dev_port); From 3376d98021e915196f4894d835325a884e635a04 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Wed, 25 Apr 2018 11:43:17 +0200 Subject: [PATCH 006/572] s390/archrandom: Rework arch random implementation. The arch_get_random_seed_long() invocation done by the random device driver is done in interrupt context and may be invoked very very frequently. The existing s390 arch_get_random_seed*() implementation uses the PRNO(TRNG) instruction which produces excellent high quality entropy but is relatively slow and thus expensive. This fix reworks the arch_get_random_seed* implementation. It introduces a buffer concept to decouple the delivery of random data via arch_get_random_seed*() from the generation of new random bytes. The buffer of random data is filled asynchronously by a workqueue thread. If there are enough bytes in the buffer the s390_arch_random_generate() just delivers these bytes. Otherwise false is returned until the worker thread refills the buffer. The worker fills the rng buffer by pulling fresh entropy from the high quality (but slow) true hardware random generator. This entropy is then spread over the buffer with an pseudo random generator. As the arch_get_random_seed_long() fetches 8 bytes and the calling function add_interrupt_randomness() counts this as 1 bit entropy the distribution needs to make sure there is in fact 1 bit entropy contained in 8 bytes of the buffer. The current values pull 32 byte entropy and scatter this into a 2048 byte buffer. So 8 byte in the buffer will contain 1 bit of entropy. The worker thread is rescheduled based on the charge level of the buffer but at least with 500 ms delay to avoid too much cpu consumption. So the max. amount of rng data delivered via arch_get_random_seed is limited to 4Kb per second. Signed-off-by: Harald Freudenberger Reviewed-by: Patrick Steuer Signed-off-by: Martin Schwidefsky --- arch/s390/crypto/arch_random.c | 103 +++++++++++++++++++++++++++-- arch/s390/include/asm/archrandom.h | 13 +--- 2 files changed, 102 insertions(+), 14 deletions(-) diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 8720e9203ecf..dd95cdbd22ce 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,14 +2,37 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017 - * Author(s): Harald Freudenberger + * Copyright IBM Corp. 2017, 2018 + * Author(s): Harald Freudenberger + * + * The s390_arch_random_generate() function may be called from random.c + * in interrupt context. So this implementation does the best to be very + * fast. There is a buffer of random data which is asynchronously checked + * and filled by a workqueue thread. + * If there are enough bytes in the buffer the s390_arch_random_generate() + * just delivers these bytes. Otherwise false is returned until the + * worker thread refills the buffer. + * The worker fills the rng buffer by pulling fresh entropy from the + * high quality (but slow) true hardware random generator. This entropy + * is then spread over the buffer with an pseudo random generator PRNG. + * As the arch_get_random_seed_long() fetches 8 bytes and the calling + * function add_interrupt_randomness() counts this as 1 bit entropy the + * distribution needs to make sure there is in fact 1 bit entropy contained + * in 8 bytes of the buffer. The current values pull 32 byte entropy + * and scatter this into a 2048 byte buffer. So 8 byte in the buffer + * will contain 1 bit of entropy. + * The worker thread is rescheduled based on the charge level of the + * buffer but at least with 500 ms delay to avoid too much CPU consumption. + * So the max. amount of rng data delivered via arch_get_random_seed is + * limited to 4k bytes per second. */ #include #include #include +#include #include +#include #include DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); @@ -17,11 +40,83 @@ DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); +#define ARCH_REFILL_TICKS (HZ/2) +#define ARCH_PRNG_SEED_SIZE 32 +#define ARCH_RNG_BUF_SIZE 2048 + +static DEFINE_SPINLOCK(arch_rng_lock); +static u8 *arch_rng_buf; +static unsigned int arch_rng_buf_idx; + +static void arch_rng_refill_buffer(struct work_struct *); +static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); + +bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) +{ + /* lock rng buffer */ + if (!spin_trylock(&arch_rng_lock)) + return false; + + /* try to resolve the requested amount of bytes from the buffer */ + arch_rng_buf_idx -= nbytes; + if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { + memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); + atomic64_add(nbytes, &s390_arch_random_counter); + spin_unlock(&arch_rng_lock); + return true; + } + + /* not enough bytes in rng buffer, refill is done asynchronously */ + spin_unlock(&arch_rng_lock); + + return false; +} +EXPORT_SYMBOL(s390_arch_random_generate); + +static void arch_rng_refill_buffer(struct work_struct *unused) +{ + unsigned int delay = ARCH_REFILL_TICKS; + + spin_lock(&arch_rng_lock); + if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { + /* buffer is exhausted and needs refill */ + u8 seed[ARCH_PRNG_SEED_SIZE]; + u8 prng_wa[240]; + /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ + cpacf_trng(NULL, 0, seed, sizeof(seed)); + /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ + memset(prng_wa, 0, sizeof(prng_wa)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_wa, NULL, 0, seed, sizeof(seed)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, + &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); + arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; + } + delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; + spin_unlock(&arch_rng_lock); + + /* kick next check */ + queue_delayed_work(system_long_wq, &arch_rng_work, delay); +} + static int __init s390_arch_random_init(void) { - /* check if subfunction CPACF_PRNO_TRNG is available */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + /* all the needed PRNO subfunctions available ? */ + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && + cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { + + /* alloc arch random working buffer */ + arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); + if (!arch_rng_buf) + return -ENOMEM; + + /* kick worker queue job to fill the random buffer */ + queue_delayed_work(system_long_wq, + &arch_rng_work, ARCH_REFILL_TICKS); + + /* enable arch random to the outside world */ static_branch_enable(&s390_arch_random_available); + } return 0; } diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 09aed1095336..c67b82dfa558 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -15,16 +15,11 @@ #include #include -#include DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -static void s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - cpacf_trng(NULL, 0, buf, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); -} +bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); static inline bool arch_has_random(void) { @@ -51,8 +46,7 @@ static inline bool arch_get_random_int(unsigned int *v) static inline bool arch_get_random_seed_long(unsigned long *v) { if (static_branch_likely(&s390_arch_random_available)) { - s390_arch_random_generate((u8 *)v, sizeof(*v)); - return true; + return s390_arch_random_generate((u8 *)v, sizeof(*v)); } return false; } @@ -60,8 +54,7 @@ static inline bool arch_get_random_seed_long(unsigned long *v) static inline bool arch_get_random_seed_int(unsigned int *v) { if (static_branch_likely(&s390_arch_random_available)) { - s390_arch_random_generate((u8 *)v, sizeof(*v)); - return true; + return s390_arch_random_generate((u8 *)v, sizeof(*v)); } return false; } From ac62cc9d9cd6fa4c79e171c13dc8d58c3862b678 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 25 May 2018 16:01:47 +0530 Subject: [PATCH 007/572] arm: dts: armada: Fix "#cooling-cells" property's name It should be "#cooling-cells" instead of "cooling-cells". Fix it. Signed-off-by: Viresh Kumar Signed-off-by: Gregory CLEMENT --- arch/arm/boot/dts/armada-385-synology-ds116.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/armada-385-synology-ds116.dts b/arch/arm/boot/dts/armada-385-synology-ds116.dts index 6782ce481ac9..d8769956cbfc 100644 --- a/arch/arm/boot/dts/armada-385-synology-ds116.dts +++ b/arch/arm/boot/dts/armada-385-synology-ds116.dts @@ -139,7 +139,7 @@ 3700 5 3900 6 4000 7>; - cooling-cells = <2>; + #cooling-cells = <2>; }; gpio-leds { From 2c861d89ccda2fbcea9358eff9cc5f8fae548be5 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 2 May 2018 09:25:59 +0200 Subject: [PATCH 008/572] vfio: ccw: fix error return in vfio_ccw_sch_event If the device has not been registered, or there is work pending, we should reschedule a sch_event call again. Signed-off-by: Dong Jia Shi Message-Id: <20180502072559.50691-1-bjsdjshi@linux.vnet.ibm.com> Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index ea6a2d0b2894..770fa9cfc310 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -177,6 +177,7 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) { struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); unsigned long flags; + int rc = -EAGAIN; spin_lock_irqsave(sch->lock, flags); if (!device_is_registered(&sch->dev)) @@ -187,6 +188,7 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) if (cio_update_schib(sch)) { vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + rc = 0; goto out_unlock; } @@ -195,11 +197,12 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) private->state = private->mdev ? VFIO_CCW_STATE_IDLE : VFIO_CCW_STATE_STANDBY; } + rc = 0; out_unlock: spin_unlock_irqrestore(sch->lock, flags); - return 0; + return rc; } static struct css_device_id vfio_ccw_sch_ids[] = { From fb9e7880af357f0244f57a3dc4dd365091970b1a Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Wed, 16 May 2018 19:33:42 +0200 Subject: [PATCH 009/572] vfio: ccw: push down unsupported IDA check There is at least one relevant guest OS that doesn't set the IDA flags in the ORB as we would like them, but never uses any IDA. So instead of saying -EOPNOTSUPP when observing an ORB, such that a channel program specified by it could be a not supported one, let us say -EOPNOTSUPP only if the channel program is a not supported one. Of course, the real solution would be doing proper translation for all IDA. This is possible, but given the current code not straight forward. Signed-off-by: Halil Pasic Tested-by: Jason J. Herne Message-Id: <20180516173342.15174-1-pasic@linux.ibm.com> Reviewed-by: Dong Jia Shi Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index dce92b2a895d..9a2a39df1056 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -365,6 +365,9 @@ static void cp_unpin_free(struct channel_program *cp) * This is the chain length not considering any TICs. * You need to do a new round for each TIC target. * + * The program is also validated for absence of not yet supported + * indirect data addressing scenarios. + * * Returns: the length of the ccw chain or -errno. */ static int ccwchain_calc_length(u64 iova, struct channel_program *cp) @@ -391,6 +394,14 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp) do { cnt++; + /* + * As we don't want to fail direct addressing even if the + * orb specified one of the unsupported formats, we defer + * checking for IDAWs in unsupported formats to here. + */ + if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) + return -EOPNOTSUPP; + if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw))) break; @@ -656,10 +667,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) /* * XXX: * Only support prefetch enable mode now. - * Only support 64bit addressing idal. - * Only support 4k IDAW. */ - if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k) + if (!orb->cmd.pfch) return -EOPNOTSUPP; INIT_LIST_HEAD(&cp->ccwchain_list); @@ -688,6 +697,10 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) ret = ccwchain_loop_tic(chain, cp); if (ret) cp_unpin_free(cp); + /* It is safe to force: if not set but idals used + * ccwchain_calc_length returns an error. + */ + cp->orb.cmd.c64 = 1; return ret; } From 80c57f7a075b0c53944113e42ce114d8bf0977e4 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 23 May 2018 04:56:42 +0200 Subject: [PATCH 010/572] vfio: ccw: shorten kernel doc description for pfn_array_pin() The kernel doc description for usage of the struct pfn_array in pfn_array_pin() is unnecessary long. Let's shorten it by describing the contents of the struct pfn_array fields at the struct's definition instead. Suggested-by: Cornelia Huck Signed-off-by: Dong Jia Shi Message-Id: <20180523025645.8978-2-bjsdjshi@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 9a2a39df1056..c532939c1c3f 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -23,9 +23,13 @@ #define CCWCHAIN_LEN_MAX 256 struct pfn_array { + /* Starting guest physical I/O address. */ unsigned long pa_iova; + /* Array that stores PFNs of the pages need to pin. */ unsigned long *pa_iova_pfn; + /* Array that receives PFNs of the pages pinned. */ unsigned long *pa_pfn; + /* Number of pages to pin/pinned from @pa_iova. */ int pa_nr; }; @@ -53,14 +57,8 @@ struct ccwchain { * Attempt to pin user pages in memory. * * Usage of pfn_array: - * @pa->pa_iova starting guest physical I/O address. Assigned by caller. - * @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated - * by caller. - * @pa->pa_pfn array that receives PFNs of the pages pinned. Allocated by - * caller. - * @pa->pa_nr number of pages from @pa->pa_iova to pin. Assigned by - * caller. - * number of pages pinned. Assigned by callee. + * Any field in this structure should be initialized by caller. + * We expect @pa->pa_nr > 0, and its value will be assigned by callee. * * Returns: * Number of pages pinned on success. From 5c1cfb1c3948fe93a32dfcd75223dda0f1558bb7 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 23 May 2018 04:56:43 +0200 Subject: [PATCH 011/572] vfio: ccw: refactor and improve pfn_array_alloc_pin() This refactors pfn_array_alloc_pin() and also improves it by adding defensive code in error handling so that calling pfn_array_unpin_free() after error return won't lead to problem. This mainly does: 1. Merge pfn_array_pin() into pfn_array_alloc_pin(), since there is no other user of pfn_array_pin(). As a result, also remove kernel-doc for pfn_array_pin() and add/update kernel-doc for pfn_array_alloc_pin() and struct pfn_array. 2. For a vfio_pin_pages() failure, set pa->pa_nr to zero to indicate zero pages were pinned. 3. Set pa->pa_iova_pfn to NULL right after it was freed. Suggested-by: Pierre Morel Signed-off-by: Dong Jia Shi Message-Id: <20180523025645.8978-3-bjsdjshi@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 86 +++++++++++++++------------------- 1 file changed, 38 insertions(+), 48 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index c532939c1c3f..b0f20230fc72 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -29,7 +29,7 @@ struct pfn_array { unsigned long *pa_iova_pfn; /* Array that receives PFNs of the pages pinned. */ unsigned long *pa_pfn; - /* Number of pages to pin/pinned from @pa_iova. */ + /* Number of pages pinned from @pa_iova. */ int pa_nr; }; @@ -50,64 +50,33 @@ struct ccwchain { }; /* - * pfn_array_pin() - pin user pages in memory + * pfn_array_alloc_pin() - alloc memory for PFNs, then pin user pages in memory * @pa: pfn_array on which to perform the operation * @mdev: the mediated device to perform pin/unpin operations + * @iova: target guest physical address + * @len: number of bytes that should be pinned from @iova * - * Attempt to pin user pages in memory. + * Attempt to allocate memory for PFNs, and pin user pages in memory. * * Usage of pfn_array: - * Any field in this structure should be initialized by caller. - * We expect @pa->pa_nr > 0, and its value will be assigned by callee. + * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in + * this structure will be filled in by this function. * * Returns: * Number of pages pinned on success. - * If @pa->pa_nr is 0 or negative, returns 0. + * If @pa->pa_nr is not 0, or @pa->pa_iova_pfn is not NULL initially, + * returns -EINVAL. * If no pages were pinned, returns -errno. */ -static int pfn_array_pin(struct pfn_array *pa, struct device *mdev) -{ - int i, ret; - - if (pa->pa_nr <= 0) { - pa->pa_nr = 0; - return 0; - } - - pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT; - for (i = 1; i < pa->pa_nr; i++) - pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1; - - ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr, - IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); - - if (ret > 0 && ret != pa->pa_nr) { - vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret); - pa->pa_nr = 0; - return 0; - } - - return ret; -} - -/* Unpin the pages before releasing the memory. */ -static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev) -{ - vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr); - pa->pa_nr = 0; - kfree(pa->pa_iova_pfn); -} - -/* Alloc memory for PFNs, then pin pages with them. */ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev, u64 iova, unsigned int len) { - int ret = 0; + int i, ret = 0; if (!len) return 0; - if (pa->pa_nr) + if (pa->pa_nr || pa->pa_iova_pfn) return -EINVAL; pa->pa_iova = iova; @@ -124,16 +93,37 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev, return -ENOMEM; pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr; - ret = pfn_array_pin(pa, mdev); + pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT; + for (i = 1; i < pa->pa_nr; i++) + pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1; - if (ret > 0) - return ret; - else if (!ret) + ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr, + IOMMU_READ | IOMMU_WRITE, pa->pa_pfn); + + if (ret < 0) { + goto err_out; + } else if (ret > 0 && ret != pa->pa_nr) { + vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret); ret = -EINVAL; - - kfree(pa->pa_iova_pfn); + goto err_out; + } return ret; + +err_out: + pa->pa_nr = 0; + kfree(pa->pa_iova_pfn); + pa->pa_iova_pfn = NULL; + + return ret; +} + +/* Unpin the pages before releasing the memory. */ +static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev) +{ + vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr); + pa->pa_nr = 0; + kfree(pa->pa_iova_pfn); } static int pfn_array_table_init(struct pfn_array_table *pat, int nr) From 6238f92132a6da64b731de1a728fa46ffaa21f62 Mon Sep 17 00:00:00 2001 From: Dong Jia Shi Date: Wed, 23 May 2018 04:56:44 +0200 Subject: [PATCH 012/572] vfio: ccw: set ccw->cda to NULL defensively Let's avoid free on ccw->cda that points to a guest address or an already freed memory area by setting it to NULL if memory allocation didn't happen or failed. Signed-off-by: Dong Jia Shi Message-Id: <20180523025645.8978-4-bjsdjshi@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index b0f20230fc72..dbe7c7ac9ac8 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -502,7 +502,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, struct ccw1 *ccw; struct pfn_array_table *pat; unsigned long *idaws; - int idaw_nr; + int ret; ccw = chain->ch_ccw + idx; @@ -522,18 +522,19 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, * needed when translating a direct ccw to a idal ccw. */ pat = chain->ch_pat + idx; - if (pfn_array_table_init(pat, 1)) - return -ENOMEM; - idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, - ccw->cda, ccw->count); - if (idaw_nr < 0) - return idaw_nr; + ret = pfn_array_table_init(pat, 1); + if (ret) + goto out_init; + + ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count); + if (ret < 0) + goto out_init; /* Translate this direct ccw to a idal ccw. */ - idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL); + idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL); if (!idaws) { - pfn_array_table_unpin_free(pat, cp->mdev); - return -ENOMEM; + ret = -ENOMEM; + goto out_unpin; } ccw->cda = (__u32) virt_to_phys(idaws); ccw->flags |= CCW_FLAG_IDA; @@ -541,6 +542,12 @@ static int ccwchain_fetch_direct(struct ccwchain *chain, pfn_array_table_idal_create_words(pat, idaws); return 0; + +out_unpin: + pfn_array_table_unpin_free(pat, cp->mdev); +out_init: + ccw->cda = 0; + return ret; } static int ccwchain_fetch_idal(struct ccwchain *chain, @@ -570,7 +577,7 @@ static int ccwchain_fetch_idal(struct ccwchain *chain, pat = chain->ch_pat + idx; ret = pfn_array_table_init(pat, idaw_nr); if (ret) - return ret; + goto out_init; /* Translate idal ccw to use new allocated idaws. */ idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL); @@ -602,6 +609,8 @@ out_free_idaws: kfree(idaws); out_unpin: pfn_array_table_unpin_free(pat, cp->mdev); +out_init: + ccw->cda = 0; return ret; } From 3cd90214b70f7f971496bffc3c34d23b2141feb3 Mon Sep 17 00:00:00 2001 From: Halil Pasic Date: Wed, 23 May 2018 04:56:45 +0200 Subject: [PATCH 013/572] vfio: ccw: add tracepoints for interesting error paths Add some tracepoints so we can inspect what is not working as is should. Signed-off-by: Halil Pasic Signed-off-by: Dong Jia Shi Message-Id: <20180523025645.8978-5-bjsdjshi@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/Makefile | 1 + drivers/s390/cio/vfio_ccw_fsm.c | 17 +++++++++- drivers/s390/cio/vfio_ccw_trace.h | 54 +++++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 drivers/s390/cio/vfio_ccw_trace.h diff --git a/drivers/s390/cio/Makefile b/drivers/s390/cio/Makefile index a070ef0efe65..f230516abb96 100644 --- a/drivers/s390/cio/Makefile +++ b/drivers/s390/cio/Makefile @@ -5,6 +5,7 @@ # The following is required for define_trace.h to find ./trace.h CFLAGS_trace.o := -I$(src) +CFLAGS_vfio_ccw_fsm.o := -I$(src) obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o isc.o \ fcx.o itcw.o crw.o ccwreq.o trace.o ioasm.o diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 3c800642134e..797a82731159 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -13,6 +13,9 @@ #include "ioasm.h" #include "vfio_ccw_private.h" +#define CREATE_TRACE_POINTS +#include "vfio_ccw_trace.h" + static int fsm_io_helper(struct vfio_ccw_private *private) { struct subchannel *sch; @@ -110,6 +113,10 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private, */ cio_disable_subchannel(sch); } +inline struct subchannel_id get_schid(struct vfio_ccw_private *p) +{ + return p->sch->schid; +} /* * Deal with the ccw command request from the userspace. @@ -121,6 +128,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, union scsw *scsw = &private->scsw; struct ccw_io_region *io_region = &private->io_region; struct mdev_device *mdev = private->mdev; + char *errstr = "request"; private->state = VFIO_CCW_STATE_BOXED; @@ -132,15 +140,19 @@ static void fsm_io_request(struct vfio_ccw_private *private, /* Don't try to build a cp if transport mode is specified. */ if (orb->tm.b) { io_region->ret_code = -EOPNOTSUPP; + errstr = "transport mode"; goto err_out; } io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev), orb); - if (io_region->ret_code) + if (io_region->ret_code) { + errstr = "cp init"; goto err_out; + } io_region->ret_code = cp_prefetch(&private->cp); if (io_region->ret_code) { + errstr = "cp prefetch"; cp_free(&private->cp); goto err_out; } @@ -148,6 +160,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, /* Start channel program and wait for I/O interrupt. */ io_region->ret_code = fsm_io_helper(private); if (io_region->ret_code) { + errstr = "cp fsm_io_helper"; cp_free(&private->cp); goto err_out; } @@ -164,6 +177,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, err_out: private->state = VFIO_CCW_STATE_IDLE; + trace_vfio_ccw_io_fctl(scsw->cmd.fctl, get_schid(private), + io_region->ret_code, errstr); } /* diff --git a/drivers/s390/cio/vfio_ccw_trace.h b/drivers/s390/cio/vfio_ccw_trace.h new file mode 100644 index 000000000000..b1da53ddec1f --- /dev/null +++ b/drivers/s390/cio/vfio_ccw_trace.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Tracepoints for vfio_ccw driver + * + * Copyright IBM Corp. 2018 + * + * Author(s): Dong Jia Shi + * Halil Pasic + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM vfio_ccw + +#if !defined(_VFIO_CCW_TRACE_) || defined(TRACE_HEADER_MULTI_READ) +#define _VFIO_CCW_TRACE_ + +#include + +TRACE_EVENT(vfio_ccw_io_fctl, + TP_PROTO(int fctl, struct subchannel_id schid, int errno, char *errstr), + TP_ARGS(fctl, schid, errno, errstr), + + TP_STRUCT__entry( + __field(int, fctl) + __field_struct(struct subchannel_id, schid) + __field(int, errno) + __field(char*, errstr) + ), + + TP_fast_assign( + __entry->fctl = fctl; + __entry->schid = schid; + __entry->errno = errno; + __entry->errstr = errstr; + ), + + TP_printk("schid=%x.%x.%04x fctl=%x errno=%d info=%s", + __entry->schid.cssid, + __entry->schid.ssid, + __entry->schid.sch_no, + __entry->fctl, + __entry->errno, + __entry->errstr) +); + +#endif /* _VFIO_CCW_TRACE_ */ + +/* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE vfio_ccw_trace + +#include From 069035c5db3459b9b5f12caf3bffed9a863fa5c4 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Tue, 22 May 2018 17:13:04 +0300 Subject: [PATCH 014/572] drm: Fix possible race conditions while unplugging DRM device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When unplugging a hotpluggable DRM device we first unregister it with drm_dev_unregister and then set drm_device.unplugged flag which is used to mark device critical sections with drm_dev_enter()/ drm_dev_exit() preventing access to device resources that are not available after the device is gone. But drm_dev_unregister may lead to hotplug uevent(s) fired to user-space on card and/or connector removal, thus making it possible for user-space to try accessing a disconnected device. Fix this by first making sure device is properly marked as disconnected and only then unregister it. Fixes: bee330f3d672 ("drm: Use srcu to protect drm_device.unplugged") Signed-off-by: Oleksandr Andrushchenko Reported-by: Andrii Chepurnyi Cc: "Noralf Trønnes" Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20180522141304.18646-1-andr2000@gmail.com --- drivers/gpu/drm/drm_drv.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index f6910ebe4d0e..cc2675550e28 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -369,13 +369,6 @@ EXPORT_SYMBOL(drm_dev_exit); */ void drm_dev_unplug(struct drm_device *dev) { - drm_dev_unregister(dev); - - mutex_lock(&drm_global_mutex); - if (dev->open_count == 0) - drm_dev_put(dev); - mutex_unlock(&drm_global_mutex); - /* * After synchronizing any critical read section is guaranteed to see * the new value of ->unplugged, and any critical section which might @@ -384,6 +377,13 @@ void drm_dev_unplug(struct drm_device *dev) */ dev->unplugged = true; synchronize_srcu(&drm_unplug_srcu); + + drm_dev_unregister(dev); + + mutex_lock(&drm_global_mutex); + if (dev->open_count == 0) + drm_dev_put(dev); + mutex_unlock(&drm_global_mutex); } EXPORT_SYMBOL(drm_dev_unplug); From 889ad63d41eea20184b0483e7e585e5b20fb6cfe Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Fri, 1 Jun 2018 16:05:32 -0400 Subject: [PATCH 015/572] drm/qxl: Call qxl_bo_unref outside atomic context "qxl_bo_unref" may sleep, but calling "qxl_release_map" causes "preempt_disable()" to be called and "preempt_enable()" isn't called until "qxl_release_unmap" is used. Move the call to "qxl_bo_unref" out from in between the two to avoid sleeping from an atomic context. This issue can be demonstrated on a kernel with CONFIG_LOCKDEP=y by creating a VM using QXL, using a desktop environment using Xorg, then moving the cursor on or off a window. Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1571128 Fixes: 9428088c90b6 ("drm/qxl: reapply cursor after resetting primary") Cc: stable@vger.kernel.org Signed-off-by: Jeremy Cline Link: http://patchwork.freedesktop.org/patch/msgid/20180601200532.13619-1-jcline@redhat.com Signed-off-by: Gerd Hoffmann --- drivers/gpu/drm/qxl/qxl_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index ecb35ed0eac8..61e51516fec5 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -630,7 +630,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, struct qxl_cursor_cmd *cmd; struct qxl_cursor *cursor; struct drm_gem_object *obj; - struct qxl_bo *cursor_bo = NULL, *user_bo = NULL; + struct qxl_bo *cursor_bo = NULL, *user_bo = NULL, *old_cursor_bo = NULL; int ret; void *user_ptr; int size = 64*64*4; @@ -684,7 +684,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, cursor_bo, 0); cmd->type = QXL_CURSOR_SET; - qxl_bo_unref(&qcrtc->cursor_bo); + old_cursor_bo = qcrtc->cursor_bo; qcrtc->cursor_bo = cursor_bo; cursor_bo = NULL; } else { @@ -704,6 +704,9 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane, qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false); qxl_release_fence_buffer_objects(release); + if (old_cursor_bo) + qxl_bo_unref(&old_cursor_bo); + qxl_bo_unref(&cursor_bo); return; From 45615a9baf4661f953887988eeb4de69fd6cd96e Mon Sep 17 00:00:00 2001 From: Xiaotong Lu Date: Fri, 1 Jun 2018 11:36:22 -0700 Subject: [PATCH 016/572] Input: add Spreadtrum vibrator driver This patch adds the Spreadtrum vibrator driver, which embedded in the Spreadtrum SC27xx series PMICs. Signed-off-by: Xiaotong Lu Signed-off-by: Baolin Wang Signed-off-by: Dmitry Torokhov --- .../bindings/input/sprd,sc27xx-vibra.txt | 23 +++ drivers/input/misc/Kconfig | 10 ++ drivers/input/misc/Makefile | 1 + drivers/input/misc/sc27xx-vibra.c | 154 ++++++++++++++++++ 4 files changed, 188 insertions(+) create mode 100644 Documentation/devicetree/bindings/input/sprd,sc27xx-vibra.txt create mode 100644 drivers/input/misc/sc27xx-vibra.c diff --git a/Documentation/devicetree/bindings/input/sprd,sc27xx-vibra.txt b/Documentation/devicetree/bindings/input/sprd,sc27xx-vibra.txt new file mode 100644 index 000000000000..f2ec0d4f2dff --- /dev/null +++ b/Documentation/devicetree/bindings/input/sprd,sc27xx-vibra.txt @@ -0,0 +1,23 @@ +Spreadtrum SC27xx PMIC Vibrator + +Required properties: +- compatible: should be "sprd,sc2731-vibrator". +- reg: address of vibrator control register. + +Example : + + sc2731_pmic: pmic@0 { + compatible = "sprd,sc2731"; + reg = <0>; + spi-max-frequency = <26000000>; + interrupts = ; + interrupt-controller; + #interrupt-cells = <2>; + #address-cells = <1>; + #size-cells = <0>; + + vibrator@eb4 { + compatible = "sprd,sc2731-vibrator"; + reg = <0xeb4>; + }; + }; diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 572b15fa18c2..c761c0c4f2b8 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -841,4 +841,14 @@ config INPUT_RAVE_SP_PWRBUTTON To compile this driver as a module, choose M here: the module will be called rave-sp-pwrbutton. +config INPUT_SC27XX_VIBRA + tristate "Spreadtrum sc27xx vibrator support" + depends on MFD_SC27XX_PMIC || COMPILE_TEST + select INPUT_FF_MEMLESS + help + This option enables support for Spreadtrum sc27xx vibrator driver. + + To compile this driver as a module, choose M here. The module will + be called sc27xx_vibra. + endif diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 72cde28649e2..9d0f9d1ff68f 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_INPUT_RETU_PWRBUTTON) += retu-pwrbutton.o obj-$(CONFIG_INPUT_AXP20X_PEK) += axp20x-pek.o obj-$(CONFIG_INPUT_GPIO_ROTARY_ENCODER) += rotary_encoder.o obj-$(CONFIG_INPUT_RK805_PWRKEY) += rk805-pwrkey.o +obj-$(CONFIG_INPUT_SC27XX_VIBRA) += sc27xx-vibra.o obj-$(CONFIG_INPUT_SGI_BTNS) += sgi_btns.o obj-$(CONFIG_INPUT_SIRFSOC_ONKEY) += sirfsoc-onkey.o obj-$(CONFIG_INPUT_SOC_BUTTON_ARRAY) += soc_button_array.o diff --git a/drivers/input/misc/sc27xx-vibra.c b/drivers/input/misc/sc27xx-vibra.c new file mode 100644 index 000000000000..295251abbdac --- /dev/null +++ b/drivers/input/misc/sc27xx-vibra.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Spreadtrum Communications Inc. + */ + +#include +#include +#include +#include +#include +#include + +#define CUR_DRV_CAL_SEL GENMASK(13, 12) +#define SLP_LDOVIBR_PD_EN BIT(9) +#define LDO_VIBR_PD BIT(8) + +struct vibra_info { + struct input_dev *input_dev; + struct work_struct play_work; + struct regmap *regmap; + u32 base; + u32 strength; + bool enabled; +}; + +static void sc27xx_vibra_set(struct vibra_info *info, bool on) +{ + if (on) { + regmap_update_bits(info->regmap, info->base, LDO_VIBR_PD, 0); + regmap_update_bits(info->regmap, info->base, + SLP_LDOVIBR_PD_EN, 0); + info->enabled = true; + } else { + regmap_update_bits(info->regmap, info->base, LDO_VIBR_PD, + LDO_VIBR_PD); + regmap_update_bits(info->regmap, info->base, + SLP_LDOVIBR_PD_EN, SLP_LDOVIBR_PD_EN); + info->enabled = false; + } +} + +static int sc27xx_vibra_hw_init(struct vibra_info *info) +{ + return regmap_update_bits(info->regmap, info->base, CUR_DRV_CAL_SEL, 0); +} + +static void sc27xx_vibra_play_work(struct work_struct *work) +{ + struct vibra_info *info = container_of(work, struct vibra_info, + play_work); + + if (info->strength && !info->enabled) + sc27xx_vibra_set(info, true); + else if (info->strength == 0 && info->enabled) + sc27xx_vibra_set(info, false); +} + +static int sc27xx_vibra_play(struct input_dev *input, void *data, + struct ff_effect *effect) +{ + struct vibra_info *info = input_get_drvdata(input); + + info->strength = effect->u.rumble.weak_magnitude; + schedule_work(&info->play_work); + + return 0; +} + +static void sc27xx_vibra_close(struct input_dev *input) +{ + struct vibra_info *info = input_get_drvdata(input); + + cancel_work_sync(&info->play_work); + if (info->enabled) + sc27xx_vibra_set(info, false); +} + +static int sc27xx_vibra_probe(struct platform_device *pdev) +{ + struct vibra_info *info; + int error; + + info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + info->regmap = dev_get_regmap(pdev->dev.parent, NULL); + if (!info->regmap) { + dev_err(&pdev->dev, "failed to get vibrator regmap.\n"); + return -ENODEV; + } + + error = device_property_read_u32(&pdev->dev, "reg", &info->base); + if (error) { + dev_err(&pdev->dev, "failed to get vibrator base address.\n"); + return error; + } + + info->input_dev = devm_input_allocate_device(&pdev->dev); + if (!info->input_dev) { + dev_err(&pdev->dev, "failed to allocate input device.\n"); + return -ENOMEM; + } + + info->input_dev->name = "sc27xx:vibrator"; + info->input_dev->id.version = 0; + info->input_dev->close = sc27xx_vibra_close; + + input_set_drvdata(info->input_dev, info); + input_set_capability(info->input_dev, EV_FF, FF_RUMBLE); + INIT_WORK(&info->play_work, sc27xx_vibra_play_work); + info->enabled = false; + + error = sc27xx_vibra_hw_init(info); + if (error) { + dev_err(&pdev->dev, "failed to initialize the vibrator.\n"); + return error; + } + + error = input_ff_create_memless(info->input_dev, NULL, + sc27xx_vibra_play); + if (error) { + dev_err(&pdev->dev, "failed to register vibrator to FF.\n"); + return error; + } + + error = input_register_device(info->input_dev); + if (error) { + dev_err(&pdev->dev, "failed to register input device.\n"); + return error; + } + + return 0; +} + +static const struct of_device_id sc27xx_vibra_of_match[] = { + { .compatible = "sprd,sc2731-vibrator", }, + {} +}; +MODULE_DEVICE_TABLE(of, sc27xx_vibra_of_match); + +static struct platform_driver sc27xx_vibra_driver = { + .driver = { + .name = "sc27xx-vibrator", + .of_match_table = sc27xx_vibra_of_match, + }, + .probe = sc27xx_vibra_probe, +}; + +module_platform_driver(sc27xx_vibra_driver); + +MODULE_DESCRIPTION("Spreadtrum SC27xx Vibrator Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Xiaotong Lu "); From 8f6a652a8c1379023775709e3db6c6f7c1bdb2ab Mon Sep 17 00:00:00 2001 From: Roman Kiryanov Date: Thu, 31 May 2018 17:03:33 -0700 Subject: [PATCH 017/572] Input: goldfish_events - fix checkpatch warnings Address issues pointed by checkpatch.pl Signed-off-by: Roman Kiryanov Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/goldfish_events.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/input/keyboard/goldfish_events.c b/drivers/input/keyboard/goldfish_events.c index f6e643b589b6..e8dae6195b30 100644 --- a/drivers/input/keyboard/goldfish_events.c +++ b/drivers/input/keyboard/goldfish_events.c @@ -45,7 +45,7 @@ struct event_dev { static irqreturn_t events_interrupt(int irq, void *dev_id) { struct event_dev *edev = dev_id; - unsigned type, code, value; + unsigned int type, code, value; type = __raw_readl(edev->addr + REG_READ); code = __raw_readl(edev->addr + REG_READ); @@ -57,7 +57,7 @@ static irqreturn_t events_interrupt(int irq, void *dev_id) } static void events_import_bits(struct event_dev *edev, - unsigned long bits[], unsigned type, size_t count) + unsigned long bits[], unsigned int type, size_t count) { void __iomem *addr = edev->addr; int i, j; @@ -99,6 +99,7 @@ static void events_import_abs_params(struct event_dev *edev) for (j = 0; j < ARRAY_SIZE(val); j++) { int offset = (i * ARRAY_SIZE(val) + j) * sizeof(u32); + val[j] = __raw_readl(edev->addr + REG_DATA + offset); } @@ -112,7 +113,7 @@ static int events_probe(struct platform_device *pdev) struct input_dev *input_dev; struct event_dev *edev; struct resource *res; - unsigned keymapnamelen; + unsigned int keymapnamelen; void __iomem *addr; int irq; int i; @@ -150,7 +151,7 @@ static int events_probe(struct platform_device *pdev) for (i = 0; i < keymapnamelen; i++) edev->name[i] = __raw_readb(edev->addr + REG_DATA + i); - pr_debug("events_probe() keymap=%s\n", edev->name); + pr_debug("%s: keymap=%s\n", __func__, edev->name); input_dev->name = edev->name; input_dev->id.bustype = BUS_HOST; From 92d34134193e5b129dc24f8d79cb9196626e8d7a Mon Sep 17 00:00:00 2001 From: Shankara Pailoor Date: Tue, 5 Jun 2018 08:33:27 -0500 Subject: [PATCH 018/572] jfs: Fix inconsistency between memory allocation and ea_buf->max_size The code is assuming the buffer is max_size length, but we weren't allocating enough space for it. Signed-off-by: Shankara Pailoor Signed-off-by: Dave Kleikamp --- fs/jfs/xattr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index c60f3d32ee91..a6797986b625 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -491,15 +491,17 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) if (size > PSIZE) { /* * To keep the rest of the code simple. Allocate a - * contiguous buffer to work with + * contiguous buffer to work with. Make the buffer large + * enough to make use of the whole extent. */ - ea_buf->xattr = kmalloc(size, GFP_KERNEL); + ea_buf->max_size = (size + sb->s_blocksize - 1) & + ~(sb->s_blocksize - 1); + + ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL); if (ea_buf->xattr == NULL) return -ENOMEM; ea_buf->flag = EA_MALLOC; - ea_buf->max_size = (size + sb->s_blocksize - 1) & - ~(sb->s_blocksize - 1); if (ea_size == 0) return 0; From fc573af632b44f355f8fa15ab505f5593368078d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 5 Jun 2018 09:34:22 -0700 Subject: [PATCH 019/572] Input: silead - add MSSL0002 ACPI HID The Silead touchscreen on the Chuwi Vi8 tablet uses MSSL0002 as ACPI HID, rather then the usual MSSL1680 id. Signed-off-by: Hans de Goede Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/silead.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index ff7043f74a3d..d196ac3d8b8c 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -603,6 +603,7 @@ static const struct acpi_device_id silead_ts_acpi_match[] = { { "GSL3692", 0 }, { "MSSL1680", 0 }, { "MSSL0001", 0 }, + { "MSSL0002", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, silead_ts_acpi_match); From 24d28e4f1271cb2f91613dada8f2acccd00eff56 Mon Sep 17 00:00:00 2001 From: Nick Dyer Date: Tue, 5 Jun 2018 10:17:51 -0700 Subject: [PATCH 020/572] Input: synaptics-rmi4 - convert irq distribution to irq_domain Convert the RMI driver to use the standard mechanism for distributing IRQs to the various functions. Tested on: * S7300 (F11, F34, F54) * S7817 (F12, F34, F54) Signed-off-by: Nick Dyer Acked-by: Christopher Heiny Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/Kconfig | 1 + drivers/input/rmi4/rmi_bus.c | 50 ++++++++++++++++++++++++++++++- drivers/input/rmi4/rmi_bus.h | 10 ++++++- drivers/input/rmi4/rmi_driver.c | 52 +++++++++++++-------------------- drivers/input/rmi4/rmi_f01.c | 10 +++---- drivers/input/rmi4/rmi_f03.c | 9 +++--- drivers/input/rmi4/rmi_f11.c | 42 ++++++++++---------------- drivers/input/rmi4/rmi_f12.c | 8 ++--- drivers/input/rmi4/rmi_f30.c | 9 +++--- drivers/input/rmi4/rmi_f34.c | 5 ++-- drivers/input/rmi4/rmi_f54.c | 6 ---- include/linux/rmi.h | 2 ++ 12 files changed, 119 insertions(+), 85 deletions(-) diff --git a/drivers/input/rmi4/Kconfig b/drivers/input/rmi4/Kconfig index 7172b88cd064..fad2eae4a118 100644 --- a/drivers/input/rmi4/Kconfig +++ b/drivers/input/rmi4/Kconfig @@ -3,6 +3,7 @@ # config RMI4_CORE tristate "Synaptics RMI4 bus support" + select IRQ_DOMAIN help Say Y here if you want to support the Synaptics RMI4 bus. This is required for all RMI4 device support. diff --git a/drivers/input/rmi4/rmi_bus.c b/drivers/input/rmi4/rmi_bus.c index c5fa53adba8d..bd0d5ff01b08 100644 --- a/drivers/input/rmi4/rmi_bus.c +++ b/drivers/input/rmi4/rmi_bus.c @@ -9,6 +9,8 @@ #include #include +#include +#include #include #include #include @@ -167,6 +169,39 @@ static inline void rmi_function_of_probe(struct rmi_function *fn) {} #endif +static struct irq_chip rmi_irq_chip = { + .name = "rmi4", +}; + +static int rmi_create_function_irq(struct rmi_function *fn, + struct rmi_function_handler *handler) +{ + struct rmi_driver_data *drvdata = dev_get_drvdata(&fn->rmi_dev->dev); + int i, error; + + for (i = 0; i < fn->num_of_irqs; i++) { + set_bit(fn->irq_pos + i, fn->irq_mask); + + fn->irq[i] = irq_create_mapping(drvdata->irqdomain, + fn->irq_pos + i); + + irq_set_chip_data(fn->irq[i], fn); + irq_set_chip_and_handler(fn->irq[i], &rmi_irq_chip, + handle_simple_irq); + irq_set_nested_thread(fn->irq[i], 1); + + error = devm_request_threaded_irq(&fn->dev, fn->irq[i], NULL, + handler->attention, IRQF_ONESHOT, + dev_name(&fn->dev), fn); + if (error) { + dev_err(&fn->dev, "Error %d registering IRQ\n", error); + return error; + } + } + + return 0; +} + static int rmi_function_probe(struct device *dev) { struct rmi_function *fn = to_rmi_function(dev); @@ -178,7 +213,14 @@ static int rmi_function_probe(struct device *dev) if (handler->probe) { error = handler->probe(fn); - return error; + if (error) + return error; + } + + if (fn->num_of_irqs && handler->attention) { + error = rmi_create_function_irq(fn, handler); + if (error) + return error; } return 0; @@ -230,12 +272,18 @@ err_put_device: void rmi_unregister_function(struct rmi_function *fn) { + int i; + rmi_dbg(RMI_DEBUG_CORE, &fn->dev, "Unregistering F%02X.\n", fn->fd.function_number); device_del(&fn->dev); of_node_put(fn->dev.of_node); put_device(&fn->dev); + + for (i = 0; i < fn->num_of_irqs; i++) + irq_dispose_mapping(fn->irq[i]); + } /** diff --git a/drivers/input/rmi4/rmi_bus.h b/drivers/input/rmi4/rmi_bus.h index b7625a9ac66a..96383eab41ba 100644 --- a/drivers/input/rmi4/rmi_bus.h +++ b/drivers/input/rmi4/rmi_bus.h @@ -14,6 +14,12 @@ struct rmi_device; +/* + * The interrupt source count in the function descriptor can represent up to + * 6 interrupt sources in the normal manner. + */ +#define RMI_FN_MAX_IRQS 6 + /** * struct rmi_function - represents the implementation of an RMI4 * function for a particular device (basically, a driver for that RMI4 function) @@ -26,6 +32,7 @@ struct rmi_device; * @irq_pos: The position in the irq bitfield this function holds * @irq_mask: For convenience, can be used to mask IRQ bits off during ATTN * interrupt handling. + * @irqs: assigned virq numbers (up to num_of_irqs) * * @node: entry in device's list of functions */ @@ -36,6 +43,7 @@ struct rmi_function { struct list_head node; unsigned int num_of_irqs; + int irq[RMI_FN_MAX_IRQS]; unsigned int irq_pos; unsigned long irq_mask[]; }; @@ -76,7 +84,7 @@ struct rmi_function_handler { void (*remove)(struct rmi_function *fn); int (*config)(struct rmi_function *fn); int (*reset)(struct rmi_function *fn); - int (*attention)(struct rmi_function *fn, unsigned long *irq_bits); + irqreturn_t (*attention)(int irq, void *ctx); int (*suspend)(struct rmi_function *fn); int (*resume)(struct rmi_function *fn); }; diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index f5954981e9ee..2fb0ae0bdfe3 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include "rmi_bus.h" @@ -127,28 +128,11 @@ static int rmi_driver_process_config_requests(struct rmi_device *rmi_dev) return 0; } -static void process_one_interrupt(struct rmi_driver_data *data, - struct rmi_function *fn) -{ - struct rmi_function_handler *fh; - - if (!fn || !fn->dev.driver) - return; - - fh = to_rmi_function_handler(fn->dev.driver); - if (fh->attention) { - bitmap_and(data->fn_irq_bits, data->irq_status, fn->irq_mask, - data->irq_count); - if (!bitmap_empty(data->fn_irq_bits, data->irq_count)) - fh->attention(fn, data->fn_irq_bits); - } -} - static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) { struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev); struct device *dev = &rmi_dev->dev; - struct rmi_function *entry; + int i; int error; if (!data) @@ -173,16 +157,8 @@ static int rmi_process_interrupt_requests(struct rmi_device *rmi_dev) */ mutex_unlock(&data->irq_mutex); - /* - * It would be nice to be able to use irq_chip to handle these - * nested IRQs. Unfortunately, most of the current customers for - * this driver are using older kernels (3.0.x) that don't support - * the features required for that. Once they've shifted to more - * recent kernels (say, 3.3 and higher), this should be switched to - * use irq_chip. - */ - list_for_each_entry(entry, &data->function_list, node) - process_one_interrupt(data, entry); + for_each_set_bit(i, data->irq_status, data->irq_count) + handle_nested_irq(irq_find_mapping(data->irqdomain, i)); if (data->input) input_sync(data->input); @@ -1000,9 +976,13 @@ EXPORT_SYMBOL_GPL(rmi_driver_resume); static int rmi_driver_remove(struct device *dev) { struct rmi_device *rmi_dev = to_rmi_device(dev); + struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev); rmi_disable_irq(rmi_dev, false); + irq_domain_remove(data->irqdomain); + data->irqdomain = NULL; + rmi_f34_remove_sysfs(rmi_dev); rmi_free_function_list(rmi_dev); @@ -1034,7 +1014,8 @@ int rmi_probe_interrupts(struct rmi_driver_data *data) { struct rmi_device *rmi_dev = data->rmi_dev; struct device *dev = &rmi_dev->dev; - int irq_count; + struct fwnode_handle *fwnode = rmi_dev->xport->dev->fwnode; + int irq_count = 0; size_t size; int retval; @@ -1045,7 +1026,6 @@ int rmi_probe_interrupts(struct rmi_driver_data *data) * being accessed. */ rmi_dbg(RMI_DEBUG_CORE, dev, "%s: Counting IRQs.\n", __func__); - irq_count = 0; data->bootloader_mode = false; retval = rmi_scan_pdt(rmi_dev, &irq_count, rmi_count_irqs); @@ -1057,6 +1037,15 @@ int rmi_probe_interrupts(struct rmi_driver_data *data) if (data->bootloader_mode) dev_warn(dev, "Device in bootloader mode.\n"); + /* Allocate and register a linear revmap irq_domain */ + data->irqdomain = irq_domain_create_linear(fwnode, irq_count, + &irq_domain_simple_ops, + data); + if (!data->irqdomain) { + dev_err(&rmi_dev->dev, "Failed to create IRQ domain\n"); + return PTR_ERR(data->irqdomain); + } + data->irq_count = irq_count; data->num_of_irq_regs = (data->irq_count + 7) / 8; @@ -1079,10 +1068,9 @@ int rmi_init_functions(struct rmi_driver_data *data) { struct rmi_device *rmi_dev = data->rmi_dev; struct device *dev = &rmi_dev->dev; - int irq_count; + int irq_count = 0; int retval; - irq_count = 0; rmi_dbg(RMI_DEBUG_CORE, dev, "%s: Creating functions.\n", __func__); retval = rmi_scan_pdt(rmi_dev, &irq_count, rmi_create_function); if (retval < 0) { diff --git a/drivers/input/rmi4/rmi_f01.c b/drivers/input/rmi4/rmi_f01.c index 8a07ae147df6..4edaa14fe878 100644 --- a/drivers/input/rmi4/rmi_f01.c +++ b/drivers/input/rmi4/rmi_f01.c @@ -681,9 +681,9 @@ static int rmi_f01_resume(struct rmi_function *fn) return 0; } -static int rmi_f01_attention(struct rmi_function *fn, - unsigned long *irq_bits) +static irqreturn_t rmi_f01_attention(int irq, void *ctx) { + struct rmi_function *fn = ctx; struct rmi_device *rmi_dev = fn->rmi_dev; int error; u8 device_status; @@ -692,7 +692,7 @@ static int rmi_f01_attention(struct rmi_function *fn, if (error) { dev_err(&fn->dev, "Failed to read device status: %d.\n", error); - return error; + return IRQ_RETVAL(error); } if (RMI_F01_STATUS_BOOTLOADER(device_status)) @@ -704,11 +704,11 @@ static int rmi_f01_attention(struct rmi_function *fn, error = rmi_dev->driver->reset_handler(rmi_dev); if (error) { dev_err(&fn->dev, "Device reset failed: %d\n", error); - return error; + return IRQ_RETVAL(error); } } - return 0; + return IRQ_HANDLED; } struct rmi_function_handler rmi_f01_handler = { diff --git a/drivers/input/rmi4/rmi_f03.c b/drivers/input/rmi4/rmi_f03.c index 88822196d6b7..aaa1edc95522 100644 --- a/drivers/input/rmi4/rmi_f03.c +++ b/drivers/input/rmi4/rmi_f03.c @@ -244,8 +244,9 @@ static int rmi_f03_config(struct rmi_function *fn) return 0; } -static int rmi_f03_attention(struct rmi_function *fn, unsigned long *irq_bits) +static irqreturn_t rmi_f03_attention(int irq, void *ctx) { + struct rmi_function *fn = ctx; struct rmi_device *rmi_dev = fn->rmi_dev; struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); struct f03_data *f03 = dev_get_drvdata(&fn->dev); @@ -262,7 +263,7 @@ static int rmi_f03_attention(struct rmi_function *fn, unsigned long *irq_bits) /* First grab the data passed by the transport device */ if (drvdata->attn_data.size < ob_len) { dev_warn(&fn->dev, "F03 interrupted, but data is missing!\n"); - return 0; + return IRQ_HANDLED; } memcpy(obs, drvdata->attn_data.data, ob_len); @@ -277,7 +278,7 @@ static int rmi_f03_attention(struct rmi_function *fn, unsigned long *irq_bits) "%s: Failed to read F03 output buffers: %d\n", __func__, error); serio_interrupt(f03->serio, 0, SERIO_TIMEOUT); - return error; + return IRQ_RETVAL(error); } } @@ -303,7 +304,7 @@ static int rmi_f03_attention(struct rmi_function *fn, unsigned long *irq_bits) serio_interrupt(f03->serio, ob_data, serio_flags); } - return 0; + return IRQ_HANDLED; } static void rmi_f03_remove(struct rmi_function *fn) diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index bc5e37f30ac1..5f2e1ef5accc 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -570,9 +570,7 @@ static inline u8 rmi_f11_parse_finger_state(const u8 *f_state, u8 n_finger) } static void rmi_f11_finger_handler(struct f11_data *f11, - struct rmi_2d_sensor *sensor, - unsigned long *irq_bits, int num_irq_regs, - int size) + struct rmi_2d_sensor *sensor, int size) { const u8 *f_state = f11->data.f_state; u8 finger_state; @@ -581,12 +579,7 @@ static void rmi_f11_finger_handler(struct f11_data *f11, int rel_fingers; int abs_size = sensor->nbr_fingers * RMI_F11_ABS_BYTES; - int abs_bits = bitmap_and(f11->result_bits, irq_bits, f11->abs_mask, - num_irq_regs * 8); - int rel_bits = bitmap_and(f11->result_bits, irq_bits, f11->rel_mask, - num_irq_regs * 8); - - if (abs_bits) { + if (sensor->report_abs) { if (abs_size > size) abs_fingers = size / RMI_F11_ABS_BYTES; else @@ -604,19 +597,7 @@ static void rmi_f11_finger_handler(struct f11_data *f11, rmi_f11_abs_pos_process(f11, sensor, &sensor->objs[i], finger_state, i); } - } - if (rel_bits) { - if ((abs_size + sensor->nbr_fingers * RMI_F11_REL_BYTES) > size) - rel_fingers = (size - abs_size) / RMI_F11_REL_BYTES; - else - rel_fingers = sensor->nbr_fingers; - - for (i = 0; i < rel_fingers; i++) - rmi_f11_rel_pos_report(f11, i); - } - - if (abs_bits) { /* * the absolute part is made in 2 parts to allow the kernel * tracking to take place. @@ -638,7 +619,16 @@ static void rmi_f11_finger_handler(struct f11_data *f11, } input_mt_sync_frame(sensor->input); + } else if (sensor->report_rel) { + if ((abs_size + sensor->nbr_fingers * RMI_F11_REL_BYTES) > size) + rel_fingers = (size - abs_size) / RMI_F11_REL_BYTES; + else + rel_fingers = sensor->nbr_fingers; + + for (i = 0; i < rel_fingers; i++) + rmi_f11_rel_pos_report(f11, i); } + } static int f11_2d_construct_data(struct f11_data *f11) @@ -1275,8 +1265,9 @@ static int rmi_f11_config(struct rmi_function *fn) return 0; } -static int rmi_f11_attention(struct rmi_function *fn, unsigned long *irq_bits) +static irqreturn_t rmi_f11_attention(int irq, void *ctx) { + struct rmi_function *fn = ctx; struct rmi_device *rmi_dev = fn->rmi_dev; struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); struct f11_data *f11 = dev_get_drvdata(&fn->dev); @@ -1302,13 +1293,12 @@ static int rmi_f11_attention(struct rmi_function *fn, unsigned long *irq_bits) data_base_addr, f11->sensor.data_pkt, f11->sensor.pkt_size); if (error < 0) - return error; + return IRQ_RETVAL(error); } - rmi_f11_finger_handler(f11, &f11->sensor, irq_bits, - drvdata->num_of_irq_regs, valid_bytes); + rmi_f11_finger_handler(f11, &f11->sensor, valid_bytes); - return 0; + return IRQ_HANDLED; } static int rmi_f11_resume(struct rmi_function *fn) diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c index 8b0db086d68a..e226def74d82 100644 --- a/drivers/input/rmi4/rmi_f12.c +++ b/drivers/input/rmi4/rmi_f12.c @@ -197,10 +197,10 @@ static void rmi_f12_process_objects(struct f12_data *f12, u8 *data1, int size) rmi_2d_sensor_abs_report(sensor, &sensor->objs[i], i); } -static int rmi_f12_attention(struct rmi_function *fn, - unsigned long *irq_nr_regs) +static irqreturn_t rmi_f12_attention(int irq, void *ctx) { int retval; + struct rmi_function *fn = ctx; struct rmi_device *rmi_dev = fn->rmi_dev; struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); struct f12_data *f12 = dev_get_drvdata(&fn->dev); @@ -222,7 +222,7 @@ static int rmi_f12_attention(struct rmi_function *fn, if (retval < 0) { dev_err(&fn->dev, "Failed to read object data. Code: %d.\n", retval); - return retval; + return IRQ_RETVAL(retval); } } @@ -232,7 +232,7 @@ static int rmi_f12_attention(struct rmi_function *fn, input_mt_sync_frame(sensor->input); - return 0; + return IRQ_HANDLED; } static int rmi_f12_write_control_regs(struct rmi_function *fn) diff --git a/drivers/input/rmi4/rmi_f30.c b/drivers/input/rmi4/rmi_f30.c index 82e0f0d43d55..5e3ed5ac0c3e 100644 --- a/drivers/input/rmi4/rmi_f30.c +++ b/drivers/input/rmi4/rmi_f30.c @@ -122,8 +122,9 @@ static void rmi_f30_report_button(struct rmi_function *fn, } } -static int rmi_f30_attention(struct rmi_function *fn, unsigned long *irq_bits) +static irqreturn_t rmi_f30_attention(int irq, void *ctx) { + struct rmi_function *fn = ctx; struct f30_data *f30 = dev_get_drvdata(&fn->dev); struct rmi_driver_data *drvdata = dev_get_drvdata(&fn->rmi_dev->dev); int error; @@ -134,7 +135,7 @@ static int rmi_f30_attention(struct rmi_function *fn, unsigned long *irq_bits) if (drvdata->attn_data.size < f30->register_count) { dev_warn(&fn->dev, "F30 interrupted, but data is missing\n"); - return 0; + return IRQ_HANDLED; } memcpy(f30->data_regs, drvdata->attn_data.data, f30->register_count); @@ -147,7 +148,7 @@ static int rmi_f30_attention(struct rmi_function *fn, unsigned long *irq_bits) dev_err(&fn->dev, "%s: Failed to read F30 data registers: %d\n", __func__, error); - return error; + return IRQ_RETVAL(error); } } @@ -159,7 +160,7 @@ static int rmi_f30_attention(struct rmi_function *fn, unsigned long *irq_bits) rmi_f03_commit_buttons(f30->f03); } - return 0; + return IRQ_HANDLED; } static int rmi_f30_config(struct rmi_function *fn) diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c index f1f5ac539d5d..87a7d4ba382d 100644 --- a/drivers/input/rmi4/rmi_f34.c +++ b/drivers/input/rmi4/rmi_f34.c @@ -100,8 +100,9 @@ static int rmi_f34_command(struct f34_data *f34, u8 command, return 0; } -static int rmi_f34_attention(struct rmi_function *fn, unsigned long *irq_bits) +static irqreturn_t rmi_f34_attention(int irq, void *ctx) { + struct rmi_function *fn = ctx; struct f34_data *f34 = dev_get_drvdata(&fn->dev); int ret; u8 status; @@ -126,7 +127,7 @@ static int rmi_f34_attention(struct rmi_function *fn, unsigned long *irq_bits) complete(&f34->v7.cmd_done); } - return 0; + return IRQ_HANDLED; } static int rmi_f34_write_blocks(struct f34_data *f34, const void *data, diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index 5343f2c08f15..98a935efec8e 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -610,11 +610,6 @@ error: mutex_unlock(&f54->data_mutex); } -static int rmi_f54_attention(struct rmi_function *fn, unsigned long *irqbits) -{ - return 0; -} - static int rmi_f54_config(struct rmi_function *fn) { struct rmi_driver *drv = fn->rmi_dev->driver; @@ -756,6 +751,5 @@ struct rmi_function_handler rmi_f54_handler = { .func = 0x54, .probe = rmi_f54_probe, .config = rmi_f54_config, - .attention = rmi_f54_attention, .remove = rmi_f54_remove, }; diff --git a/include/linux/rmi.h b/include/linux/rmi.h index 64125443f8a6..5ef5c7c412a7 100644 --- a/include/linux/rmi.h +++ b/include/linux/rmi.h @@ -354,6 +354,8 @@ struct rmi_driver_data { struct mutex irq_mutex; struct input_dev *input; + struct irq_domain *irqdomain; + u8 pdt_props; u8 num_rx_electrodes; From 41477acf092251eb0cfe83068f48dbcb2521478a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jun 2018 14:19:54 -0300 Subject: [PATCH 021/572] perf hists: Save the callchain_size in struct hist_entry So that we can figure out the real size of the struct and also be able to tell if callchains may be present in this histogram entry. Since we can't always guarantee that from hist_entry->hists we can use hists_to_evsel, to then look at evsel->attr.sample_type for PERF_SAMPLE_CALLCHAIN, like with the 'perf c2c' tool, that uses plain 'struct hists' instances, we need another way of deciding if a specific hist_entry instance has callchains associated with it, i.e. if its hist_entry->callchain[0] has space allocated for. Cc: Adrian Hunter Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ptvndealxs1k7myluvu9flnq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 6 ++++-- tools/perf/util/sort.h | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 52e8fda93a47..0441a92b855f 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -370,9 +370,11 @@ void hists__delete_entries(struct hists *hists) static int hist_entry__init(struct hist_entry *he, struct hist_entry *template, - bool sample_self) + bool sample_self, + size_t callchain_size) { *he = *template; + he->callchain_size = callchain_size; if (symbol_conf.cumulate_callchain) { he->stat_acc = malloc(sizeof(he->stat)); @@ -473,7 +475,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, he = ops->new(callchain_size); if (he) { - err = hist_entry__init(he, template, sample_self); + err = hist_entry__init(he, template, sample_self, callchain_size); if (err) { ops->free(he); he = NULL; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 7cf2d5cc038e..9ab9257ed887 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -112,6 +112,8 @@ struct hist_entry { char level; u8 filtered; + + u16 callchain_size; union { /* * Since perf diff only supports the stdio output, TUI From e5654455795f2f89328f7b301dacb6926e57e2b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jun 2018 14:27:19 -0300 Subject: [PATCH 022/572] perf hists: Make hist_entry__has_callchains() work with 'perf c2c' Since 'perf c2c' uses 'struct hists' not allocated together with a 'struct perf_evsel' instance, we can't go from a 'struct hist_entry' pointer to a 'struct perf_evsel' via he->hists, so, instead, check if space was set aside for hist_entry->callchain[0] at hist_entry__new() time. Reported-by: Jin Yao Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Fixes: fabd37b837f6 ("perf hists: Check if a hist_entry has callchains before using them") Link: https://lkml.kernel.org/n/tip-e8ife8djvvvwmeze3s4yodii@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 9ab9257ed887..8bf302cafcec 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -155,7 +155,7 @@ struct hist_entry { static __pure inline bool hist_entry__has_callchains(struct hist_entry *he) { - return hists__has_callchains(he->hists); + return he->callchain_size != 0; } static inline bool hist_entry__has_pairs(struct hist_entry *he) From 29f9fcdd3f8edccad5809cf939ce921752460fe7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jun 2018 14:33:31 -0300 Subject: [PATCH 023/572] perf hists browser gtk: Use hist_entry__has_callchains() Since we can't go from struct hists to struct evsel for all cases (c2c is an exception) and we have access to the hist_entry, use hist_entry__has_callchains() in the GTK+ hists browser to figure out if callchains are available. Cc: Adrian Hunter Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-8owkgrruzzi5emvblwh4e6le@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/gtk/hists.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index b085f1b3e34d..4ab663ec3e5e 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c @@ -382,7 +382,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, gtk_tree_store_set(store, &iter, col_idx++, s, -1); } - if (hists__has_callchains(hists) && + if (hist_entry__has_callchains(h) && symbol_conf.use_callchain && hists__has(hists, sym)) { if (callchain_param.mode == CHAIN_GRAPH_REL) total = symbol_conf.cumulate_callchain ? From c9d366287042489090da0391318df528bdce9941 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 7 Jun 2018 14:42:27 -0300 Subject: [PATCH 024/572] perf hists: Reimplement hists__has_callchains() There are places where we have only access to struct hists and need to know if any of its hist_entries has callchains, like when drawing headers for the various output modes (stdio, TUI, etc), so, when adding a new hist_entry, check if it has callchains, storing this info for later use by hists__has_callchains(). This reimplementation is necessary because not always a 'struct hists' is allocated together with a 'struct perf evsel', so we can't go from 'hists' to 'perf_event_attr.sample_type & PERF_SAMPLE_CALLCHAIN'. Cc: Adrian Hunter Cc: David Ahern Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-hg5g7yddjio3ljwyqnnaj5dt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 6 ++++-- tools/perf/util/hist.h | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 0441a92b855f..828cb9794c76 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -621,9 +621,11 @@ __hists__add_entry(struct hists *hists, .raw_data = sample->raw_data, .raw_size = sample->raw_size, .ops = ops, - }; + }, *he = hists__findnew_entry(hists, &entry, al, sample_self); - return hists__findnew_entry(hists, &entry, al, sample_self); + if (!hists->has_callchains && he && he->callchain_size != 0) + hists->has_callchains = true; + return he; } struct hist_entry *hists__add_entry(struct hists *hists, diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 06607c434949..73049f7f0f60 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -85,6 +85,7 @@ struct hists { struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; + bool has_callchains; int socket_filter; struct perf_hpp_list *hpp_list; struct list_head hpp_formats; @@ -222,8 +223,7 @@ static inline struct hists *evsel__hists(struct perf_evsel *evsel) static __pure inline bool hists__has_callchains(struct hists *hists) { - const struct perf_evsel *evsel = hists_to_evsel(hists); - return evsel__has_callchain(evsel); + return hists->has_callchains; } int hists__init(void); From f7fa827f5f432a0b1f34e10fc49da93aeef9f817 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Jun 2018 00:15:05 +0200 Subject: [PATCH 025/572] perf tools: Fix error index for pmu event parser For events we provide specific error message we need to set error column index, PMU parser is missing that, adding it. Before: $ perf stat -e cycles,krava/cycles/ kill event syntax error: 'cycles,krava/cycles/' \___ Cannot find PMU `krava'. Missing kernel support? After: $ perf stat -e cycles,krava/cycles/ kill event syntax error: 'cycles,krava/cycles/' \___ Cannot find PMU `krava'. Missing kernel support? Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20180606221513.11302-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 155d2570274f..da8fe57691b8 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -227,11 +227,16 @@ event_def: event_pmu | event_pmu: PE_NAME opt_pmu_config { + struct parse_events_state *parse_state = _parse_state; + struct parse_events_error *error = parse_state->error; struct list_head *list, *orig_terms, *terms; if (parse_events_copy_term_list($2, &orig_terms)) YYABORT; + if (error) + error->idx = @1.first_column; + ALLOC_LIST(list); if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) { struct perf_pmu *pmu = NULL; From 9660e08ee8cbc94ac835f2c30576c6e51fbece8f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Jun 2018 00:15:06 +0200 Subject: [PATCH 026/572] perf stat: Add --interval-clear option Adding --interval-clear option to clear the screen before next interval. Committer testing: # perf stat -I 1000 --interval-clear And, as expected, it behaves almost like: # watch -n 0 perf stat -a sleep 1 Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20180606221513.11302-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 3 +++ tools/perf/builtin-stat.c | 11 +++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 5dfe102fb5b5..b10a90b6a718 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -178,6 +178,9 @@ Print count deltas for fixed number of times. This option should be used together with "-I" option. example: 'perf stat -I 1000 --interval-count 2 -e cycles -a' +--interval-clear:: +Clear the screen before next interval. + --timeout msecs:: Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms). This option is not supported with the "-I" option. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 096ccb25c11f..f1532e3ac7d7 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -65,6 +65,7 @@ #include "util/tool.h" #include "util/string2.h" #include "util/metricgroup.h" +#include "util/top.h" #include "asm/bug.h" #include @@ -173,6 +174,7 @@ static struct cpu_map *aggr_map; static aggr_get_id_t aggr_get_id; static bool append_file; static bool interval_count; +static bool interval_clear; static const char *output_name; static int output_fd; static int print_free_counters_hint; @@ -1704,9 +1706,12 @@ static void print_interval(char *prefix, struct timespec *ts) FILE *output = stat_config.output; static int num_print_interval; + if (interval_clear) + puts(CONSOLE_CLEAR); + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); - if (num_print_interval == 0 && !csv_output) { + if ((num_print_interval == 0 && !csv_output) || interval_clear) { switch (stat_config.aggr_mode) { case AGGR_SOCKET: fprintf(output, "# time socket cpus"); @@ -1738,7 +1743,7 @@ static void print_interval(char *prefix, struct timespec *ts) } } - if (num_print_interval == 0 && metric_only) + if ((num_print_interval == 0 && metric_only) || interval_clear) print_metric_headers(" ", true); if (++num_print_interval == 25) num_print_interval = 0; @@ -2057,6 +2062,8 @@ static const struct option stat_options[] = { "(overhead is possible for values <= 100ms)"), OPT_INTEGER(0, "interval-count", &stat_config.times, "print counts for fixed number of times"), + OPT_BOOLEAN(0, "interval-clear", &interval_clear, + "clear screen in between new interval"), OPT_UINTEGER(0, "timeout", &stat_config.timeout, "stop workload and print counts after a timeout period in ms (>= 10ms)"), OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, From b37d33edbf41b532ddd156707c037c6f4784e40b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Jun 2018 00:15:07 +0200 Subject: [PATCH 027/572] perf stat: Use only color_fprintf call in print_metric_only We can call color_fprintf also for non color case, it's handled properly. This change simplifies following patch. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20180606221513.11302-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f1532e3ac7d7..9e7b6f108956 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1008,10 +1008,7 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt, if (!valid_only_metric(unit)) return; unit = fixunit(buf, os->evsel, unit); - if (color) - n = color_fprintf(out, color, fmt, val); - else - n = fprintf(out, fmt, val); + n = color_fprintf(out, color ?: "", fmt, val); if (n > METRIC_ONLY_LEN) n = METRIC_ONLY_LEN; if (mlen < strlen(unit)) From f515572734fb323aa0efe9ea2c546cd7fee327f7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Jun 2018 00:15:08 +0200 Subject: [PATCH 028/572] perf stat: Fix metric column header display alignment Make the metric only display aligned. Before: # perf stat --topdown -I 1000 # time core cpus retiring bad speculation frontend bound backend bound 1.000394323 S0-C0 2 37.4% 12.0% 31.4% 19.2% 1.000394323 S0-C1 2 25.1% 9.2% 43.8% 21.9% 2.001521204 S0-C0 2 36.4% 11.4% 32.4% 19.8% 2.001521204 S0-C1 2 26.2% 9.4% 43.1% 21.3% 3.001930208 S0-C0 2 35.1% 10.7% 33.6% 20.6% 3.001930208 S0-C1 2 28.9% 10.0% 40.0% 21.1% After: # perf stat --topdown -I 1000 # time core cpus retiring bad speculation frontend bound backend bound 1.000303722 S0-C0 2 34.2% 7.6% 34.2% 24.0% 1.000303722 S0-C1 2 33.1% 6.4% 36.9% 23.6% 2.001281055 S0-C0 2 34.6% 6.7% 36.8% 21.8% 2.001281055 S0-C1 2 32.8% 7.1% 38.1% 22.0% 3.001546080 S0-C0 2 39.3% 5.5% 32.7% 22.5% 3.001546080 S0-C1 2 37.8% 6.0% 33.1% 23.1% Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20180606221513.11302-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9e7b6f108956..8f3fdc052728 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1001,19 +1001,20 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt, { struct outstate *os = ctx; FILE *out = os->fh; - int n; - char buf[1024]; + char buf[1024], str[1024]; unsigned mlen = METRIC_ONLY_LEN; if (!valid_only_metric(unit)) return; unit = fixunit(buf, os->evsel, unit); - n = color_fprintf(out, color ?: "", fmt, val); - if (n > METRIC_ONLY_LEN) - n = METRIC_ONLY_LEN; if (mlen < strlen(unit)) mlen = strlen(unit) + 1; - fprintf(out, "%*s", mlen - n, ""); + + if (color) + mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1; + + color_snprintf(str, sizeof(str), color ?: "", fmt, val); + fprintf(out, "%*s ", mlen, str); } static void print_metric_only_csv(void *ctx, const char *color __maybe_unused, @@ -1053,7 +1054,7 @@ static void print_metric_header(void *ctx, const char *color __maybe_unused, if (csv_output) fprintf(os->fh, "%s%s", unit, csv_sep); else - fprintf(os->fh, "%-*s ", METRIC_ONLY_LEN, unit); + fprintf(os->fh, "%*s ", METRIC_ONLY_LEN, unit); } static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) @@ -1721,7 +1722,7 @@ static void print_interval(char *prefix, struct timespec *ts) fprintf(output, " counts %*s events\n", unit_width, "unit"); break; case AGGR_NONE: - fprintf(output, "# time CPU"); + fprintf(output, "# time CPU "); if (!metric_only) fprintf(output, " counts %*s events\n", unit_width, "unit"); break; From c1a1f5d9da800dc715d8c1d8a9692c63c70c2955 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Jun 2018 00:15:09 +0200 Subject: [PATCH 029/572] perf stat: Allow to specify specific metric column len The following change will introduce new metrics, that doesn't need such wide hard coded spacing. Switch METRIC_ONLY_LEN macro usage with metric_only_len variable. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20180606221513.11302-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 8f3fdc052728..3fc1f5286d50 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -145,6 +145,8 @@ static struct target target = { typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu); +#define METRIC_ONLY_LEN 20 + static int run_count = 1; static bool no_inherit = false; static volatile pid_t child_pid = -1; @@ -182,6 +184,7 @@ static int print_mixed_hw_group_error; static u64 *walltime_run; static bool ru_display = false; static struct rusage ru_data; +static unsigned int metric_only_len = METRIC_ONLY_LEN; struct perf_stat { bool record; @@ -969,8 +972,6 @@ static void print_metric_csv(void *ctx, fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit); } -#define METRIC_ONLY_LEN 20 - /* Filter out some columns that don't work well in metrics only mode */ static bool valid_only_metric(const char *unit) @@ -1002,7 +1003,7 @@ static void print_metric_only(void *ctx, const char *color, const char *fmt, struct outstate *os = ctx; FILE *out = os->fh; char buf[1024], str[1024]; - unsigned mlen = METRIC_ONLY_LEN; + unsigned mlen = metric_only_len; if (!valid_only_metric(unit)) return; @@ -1054,7 +1055,7 @@ static void print_metric_header(void *ctx, const char *color __maybe_unused, if (csv_output) fprintf(os->fh, "%s%s", unit, csv_sep); else - fprintf(os->fh, "%*s ", METRIC_ONLY_LEN, unit); + fprintf(os->fh, "%*s ", metric_only_len, unit); } static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) From a5cfa6217c94a1f1cfad4481fc14f5fc399abde3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 7 Jun 2018 00:15:10 +0200 Subject: [PATCH 030/572] perf stat: Add event parsing error handling to add_default_attributes Add missing error handling for parse_events calls in add_default_attributes functions. The error handler displays error details, like for transactions (-T): Before: $ perf stat -T Cannot set up transaction events After: $ perf stat -T Cannot set up transaction events event syntax error: '..cycles,cpu/cycles-t/,cpu/tx-start/,cpu/el-start/,cpu/cycles-ct/}' \___ unknown term Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20180606221513.11302-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3fc1f5286d50..22547a490e1f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2442,14 +2442,13 @@ static int add_default_attributes(void) (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; + struct parse_events_error errinfo; /* Set attrs if no event is selected and !null_run: */ if (null_run) return 0; if (transaction_run) { - struct parse_events_error errinfo; - if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) err = parse_events(evsel_list, transaction_attrs, @@ -2460,6 +2459,7 @@ static int add_default_attributes(void) &errinfo); if (err) { fprintf(stderr, "Cannot set up transaction events\n"); + parse_events_print_error(&errinfo, transaction_attrs); return -1; } return 0; @@ -2485,10 +2485,11 @@ static int add_default_attributes(void) pmu_have_event("msr", "smi")) { if (!force_metric_only) metric_only = true; - err = parse_events(evsel_list, smi_cost_attrs, NULL); + err = parse_events(evsel_list, smi_cost_attrs, &errinfo); } else { fprintf(stderr, "To measure SMI cost, it needs " "msr/aperf/, msr/smi/ and cpu/cycles/ support\n"); + parse_events_print_error(&errinfo, smi_cost_attrs); return -1; } if (err) { @@ -2523,12 +2524,13 @@ static int add_default_attributes(void) if (topdown_attrs[0] && str) { if (warn) arch_topdown_group_warn(); - err = parse_events(evsel_list, str, NULL); + err = parse_events(evsel_list, str, &errinfo); if (err) { fprintf(stderr, "Cannot set up top down events %s: %d\n", str, err); free(str); + parse_events_print_error(&errinfo, str); return -1; } } else { From c7d606f560e4c698884697fef503e4abacdd8c25 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 25 May 2018 14:41:39 -0700 Subject: [PATCH 031/572] x86/mce: Improve error message when kernel cannot recover Since we added support to add recovery from some errors inside the kernel in: commit b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries") we have done a less than stellar job at reporting the cause of recoverable machine checks that occur in other parts of the kernel. The user just gets the unhelpful message: mce: [Hardware Error]: Machine check: Action required: unknown MCACOD doubly unhelpful when they check the manual for the reported IA32_MSR_STATUS.MCACOD and see that it is listed as one of the standard recoverable values. Add an extra rule to the MCE severity table to catch this case and report it as: mce: [Hardware Error]: Machine check: Data load in unrecoverable area of kernel Fixes: b2f9d678e28c ("x86/mce: Check for faults tagged in EXTABLE_CLASS_FAULT exception table entries") Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Qiuxu Zhuo Cc: Ashok Raj Cc: stable@vger.kernel.org # 4.6+ Cc: Dan Williams Cc: Borislav Petkov Link: https://lkml.kernel.org/r/4cc7c465150a9a48b8b9f45d0b840278e77eb9b5.1527283897.git.tony.luck@intel.com --- arch/x86/kernel/cpu/mcheck/mce-severity.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 5bbd06f38ff6..f34d89c01edc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -160,6 +160,11 @@ static struct severity { SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), USER ), + MCESEV( + PANIC, "Data load in unrecoverable area of kernel", + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), + KERNEL + ), #endif MCESEV( PANIC, "Action required: unknown MCACOD", From 4c5717da1d021cf368eabb3cb1adcaead56c0d1e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 25 May 2018 14:42:09 -0700 Subject: [PATCH 032/572] x86/mce: Check for alternate indication of machine check recovery on Skylake Currently we just check the "CAPID0" register to see whether the CPU can recover from machine checks. But there are also some special SKUs which do not have all advanced RAS features, but do enable machine check recovery for use with NVDIMMs. Add a check for any of bits {8:5} in the "CAPID5" register (each reports some NVDIMM mode available, if any of them are set, then the system supports memory machine check recovery). Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Qiuxu Zhuo Cc: Ashok Raj Cc: stable@vger.kernel.org # 4.9 Cc: Dan Williams Cc: Borislav Petkov Link: https://lkml.kernel.org/r/03cbed6e99ddafb51c2eadf9a3b7c8d7a0cc204e.1527283897.git.tony.luck@intel.com --- arch/x86/kernel/quirks.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 697a4ce04308..736348ead421 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev) /* Skylake */ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) { - u32 capid0; + u32 capid0, capid5; pci_read_config_dword(pdev, 0x84, &capid0); + pci_read_config_dword(pdev, 0x98, &capid5); - if ((capid0 & 0xc0) == 0xc0) + /* + * CAPID0{7:6} indicate whether this is an advanced RAS SKU + * CAPID5{8:5} indicate that various NVDIMM usage modes are + * enabled, so memory machine check recovery is also enabled. + */ + if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) static_branch_inc(&mcsafe_key); + } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap); From 977d5ba4507dfe5b1346597ee57750262d8d2b19 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 5 Jun 2018 17:05:15 +0200 Subject: [PATCH 033/572] soc: renesas: rcar-sysc: Make PM domain initialization more robust The quirk for R-Car E3 ES1.0 added in commit 086b399965a7ee7e ("soc: renesas: r8a77990-sysc: Add workaround for 3DG-{A,B}") makes the 3DG-A PM domain a subdomain of the 3DG-B PM domain. However, registering 3DG-A with its parent fails silently, as the 3DG-B PM domain hasn't been registered yet, and such failures are never reported. Fix this by: 1. Splitting PM Domain initialization in two steps, so all PM domains are registered before any child-parent links are established, 2. Reporting any failures in establishing child-parent relations. Check for and report pm_genpd_init() failures, too, as that function gained a return value in commit 7eb231c337e00735 ("PM / Domains: Convert pm_genpd_init() to return an error code"). Fixes: 086b399965a7ee7e ("soc: renesas: r8a77990-sysc: Add workaround for 3DG-{A,B}") Signed-off-by: Geert Uytterhoeven Tested-by: Yoshihiro Shimoda Reviewed-by: Ulf Hansson Signed-off-by: Simon Horman --- drivers/soc/renesas/rcar-sysc.c | 35 +++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/drivers/soc/renesas/rcar-sysc.c b/drivers/soc/renesas/rcar-sysc.c index 95120acc4d80..50d03d8b4f9a 100644 --- a/drivers/soc/renesas/rcar-sysc.c +++ b/drivers/soc/renesas/rcar-sysc.c @@ -194,11 +194,12 @@ static int rcar_sysc_pd_power_on(struct generic_pm_domain *genpd) static bool has_cpg_mstp; -static void __init rcar_sysc_pd_setup(struct rcar_sysc_pd *pd) +static int __init rcar_sysc_pd_setup(struct rcar_sysc_pd *pd) { struct generic_pm_domain *genpd = &pd->genpd; const char *name = pd->genpd.name; struct dev_power_governor *gov = &simple_qos_governor; + int error; if (pd->flags & PD_CPU) { /* @@ -251,7 +252,11 @@ static void __init rcar_sysc_pd_setup(struct rcar_sysc_pd *pd) rcar_sysc_power_up(&pd->ch); finalize: - pm_genpd_init(genpd, gov, false); + error = pm_genpd_init(genpd, gov, false); + if (error) + pr_err("Failed to init PM domain %s: %d\n", name, error); + + return error; } static const struct of_device_id rcar_sysc_matches[] __initconst = { @@ -375,6 +380,9 @@ static int __init rcar_sysc_pd_init(void) pr_debug("%pOF: syscier = 0x%08x\n", np, syscier); iowrite32(syscier, base + SYSCIER); + /* + * First, create all PM domains + */ for (i = 0; i < info->num_areas; i++) { const struct rcar_sysc_area *area = &info->areas[i]; struct rcar_sysc_pd *pd; @@ -397,14 +405,29 @@ static int __init rcar_sysc_pd_init(void) pd->ch.isr_bit = area->isr_bit; pd->flags = area->flags; - rcar_sysc_pd_setup(pd); - if (area->parent >= 0) - pm_genpd_add_subdomain(domains->domains[area->parent], - &pd->genpd); + error = rcar_sysc_pd_setup(pd); + if (error) + goto out_put; domains->domains[area->isr_bit] = &pd->genpd; } + /* + * Second, link all PM domains to their parents + */ + for (i = 0; i < info->num_areas; i++) { + const struct rcar_sysc_area *area = &info->areas[i]; + + if (!area->name || area->parent < 0) + continue; + + error = pm_genpd_add_subdomain(domains->domains[area->parent], + domains->domains[area->isr_bit]); + if (error) + pr_warn("Failed to add PM subdomain %s to parent %u\n", + area->name, area->parent); + } + error = of_genpd_add_provider_onecell(np, &domains->onecell_data); out_put: From 4c8205273626f27b9e5a64bdc194ab483a8cce66 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Jun 2018 02:22:11 +0200 Subject: [PATCH 034/572] perf c2c: Keep struct hist_entry at the end of struct c2c_hist_entry Exactly as the comment just before 'struct c2c_hist_entry" says, i.e. the last entry in struct hist_entry is a zero length array, that when allocating space for hist_entry gets extra space if callchains are in use, which, if hist_entry is not at the end of c2c_hist_entry, the members after it gets corrupted when callchains get added to the rb trees collecting them, etc. Signed-off-by: Jiri Olsa Reported-by: Arnaldo Carvalho de Melo Cc: Jin Yao Fixes: 7f834c2e84bb ("perf c2c report: Display node for cacheline address") Link: http://lkml.kernel.org/n/tip-bh0ke4fh2ygpj3yowna7o1di@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 307b3594525f..6a8738f7ead3 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -56,16 +56,16 @@ struct c2c_hist_entry { struct compute_stats cstats; + unsigned long paddr; + unsigned long paddr_cnt; + bool paddr_zero; + char *nodestr; + /* * must be at the end, * because of its callchain dynamic entry */ struct hist_entry he; - - unsigned long paddr; - unsigned long paddr_cnt; - bool paddr_zero; - char *nodestr; }; static char const *coalesce_default = "pid,iaddr"; From fad76d4333fe73cf3f73704aa34d4ce523b1c458 Mon Sep 17 00:00:00 2001 From: Seeteena Thoufeek Date: Fri, 8 Jun 2018 16:32:28 +0530 Subject: [PATCH 035/572] perf script: Show hw-cache events 'perf script' fails to report hardware cache events (PERF_TYPE_HW_CACHE) where as 'perf report' shows the samples. Fix it. Ex, # perf record -e L1-dcache-loads ./a.out [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.008 MB perf.data (11 samples)] Before patch: # perf script | wc -l 0 After patch: # perf script | wc -l 11 Committer testing: [root@jouet ~]# perf script | head -30 | tail Timer 9803 [2] 8.963330: 1554 L1-dcache-loads: 7ffef89baae4 __vdso_clock_gettime+0xf4 ([vdso]) swapper 0 [2] 8.963343: 5626 L1-dcache-loads: ffffffffa66f4f6b cpuidle_not_av+0xb (/lib/modules/4.17.0-rc5/build/vmlinux) firefox 4853 [2] 8.964070: 18935 L1-dcache-loads: 7f0b9a00dc30 xcb_poll_for_event+0x0 (/usr/lib64/libxcb.so.1.1.0) Softwar~cTh 4928 [2] 8.964548: 15928 L1-dcache-loads: ffffffffa60d795c update_curr+0x10c (/lib/modules/4.17.0-rc5/build/vmlinux) firefox 4853 [2] 8.964675: 14978 L1-dcache-loads: ffffffffa6897018 mutex_unlock+0x18 (/lib/modules/4.17.0-rc5/build/vmlinux) gnome-shell 2026 [3] 8.964693: 50670 L1-dcache-loads: 7fa08854de6d g_source_iter_next+0x6d (/usr/lib64/libglib-2.0.so.0.5400.3) Compositor 4929 [1] 8.964784: 71772 L1-dcache-loads: 7f0b936bf078 [unknown] (/usr/lib64/firefox/libxul.so) Xwayland 2096 [2] 8.964919: 16799 L1-dcache-loads: 7f68ce2fcb8a glXGetCurrentContext+0x1a (/usr/lib64/libGLX.so.0.0.0) gnome-shell 2026 [3] 8.964997: 50670 L1-dcache-loads: 7fa08854de6d g_source_iter_next+0x6d (/usr/lib64/libglib-2.0.so.0.5400.3) [root@jouet ~]# Signed-off-by: Seeteena Thoufeek Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1528455748-20087-1-git-send-email-s1seetee@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b3bf35512d21..a31d7082188e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -180,6 +180,18 @@ static struct { PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE }, + [PERF_TYPE_HW_CACHE] = { + .user_set = false, + + .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID | + PERF_OUTPUT_CPU | PERF_OUTPUT_TIME | + PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | + PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET | + PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD, + + .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, + }, + [PERF_TYPE_RAW] = { .user_set = false, From 3e84c7651dde7cca43c5cfd7385086599cce5a5d Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Fri, 8 Jun 2018 08:14:50 +0800 Subject: [PATCH 036/572] HID: google: Add support for whiskers Another device in the hammer class, with USB id 0x5030. Signed-off-by: Nicolas Boichat Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 7b8e17b03cb8..6bf4da7ad63a 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -124,6 +124,8 @@ static const struct hid_device_id hammer_devices[] = { USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WAND) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WHISKERS) }, { } }; MODULE_DEVICE_TABLE(hid, hammer_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index a85634fe033f..c7981ddd8776 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -452,6 +452,7 @@ #define USB_DEVICE_ID_GOOGLE_TOUCH_ROSE 0x5028 #define USB_DEVICE_ID_GOOGLE_STAFF 0x502b #define USB_DEVICE_ID_GOOGLE_WAND 0x502d +#define USB_DEVICE_ID_GOOGLE_WHISKERS 0x5030 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f From 828d810550abc1fffff9b20545fec4bc150d5e82 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 7 Jun 2018 02:32:52 -0400 Subject: [PATCH 037/572] IB/rxe: avoid double kfree skb In rxe_send, when network_type is not RDMA_NETWORK_IPV4 or RDMA_NETWORK_IPV6, skb is freed and -EINVAL is returned. Then rxe_xmit_packet will return -EINVAL, too. In rxe_requester, this skb is double freed. In rxe_requester, kfree_skb is needed only after fill_packet fails. So kfree_skb is moved from label err to test fill_packet. Fixes: 5793b4652155 ("IB/rxe: remove unnecessary skb_clone in xmit") Reported-by: Dan Carpenter Signed-off-by: Zhu Yanjun Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index f30eeba3f772..829ecb93661f 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -709,6 +709,7 @@ next_wqe: if (fill_packet(qp, wqe, &pkt, skb, payload)) { pr_debug("qp#%d Error during fill packet\n", qp_num(qp)); + kfree_skb(skb); goto err; } @@ -740,7 +741,6 @@ next_wqe: goto next_wqe; err: - kfree_skb(skb); wqe->status = IB_WC_LOC_PROT_ERR; wqe->state = wqe_state_error; __rxe_do_task(&qp->comp.task); From 299eafee39a22a9d9a7c19ae592b230bd199f259 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Jun 2018 14:19:15 -0500 Subject: [PATCH 038/572] IB/mlx5: Fix memory leak in mlx5_ib_create_flow In case memory resources for *ucmd* were allocated, release them before return. Addresses-Coverity-ID: 1469857 ("Resource leak") Fixes: 3b3233fbf02e ("IB/mlx5: Add flow counters binding support") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 32 ++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3544150f3469..f93b30060ca7 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3546,29 +3546,35 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, return ERR_PTR(-ENOMEM); err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); - if (err) { - kfree(ucmd); - return ERR_PTR(err); - } + if (err) + goto free_ucmd; } - if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) - return ERR_PTR(-ENOMEM); + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { + err = -ENOMEM; + goto free_ucmd; + } if (domain != IB_FLOW_DOMAIN_USER || flow_attr->port > dev->num_ports || (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) - return ERR_PTR(-EINVAL); + IB_FLOW_ATTR_FLAGS_EGRESS))) { + err = -EINVAL; + goto free_ucmd; + } if (is_egress && (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) - return ERR_PTR(-EINVAL); + flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { + err = -EINVAL; + goto free_ucmd; + } dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (!dst) - return ERR_PTR(-ENOMEM); + if (!dst) { + err = -ENOMEM; + goto free_ucmd; + } mutex_lock(&dev->flow_db->lock); @@ -3637,8 +3643,8 @@ destroy_ft: unlock: mutex_unlock(&dev->flow_db->lock); kfree(dst); +free_ucmd: kfree(ucmd); - kfree(handler); return ERR_PTR(err); } From e31abf76f4d4d3202ca16b9668b11178df23d473 Mon Sep 17 00:00:00 2001 From: "weiyongjun (A)" Date: Thu, 7 Jun 2018 01:47:41 +0000 Subject: [PATCH 039/572] IB/mlx5: Fix return value check in flow_counters_set_data() In case of error, the function mlx5_fc_create() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: 3b3233fbf02e ("IB/mlx5: Add flow counters binding support") Signed-off-by: Wei Yongjun Acked-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f93b30060ca7..645fc69997bc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3199,8 +3199,8 @@ static int flow_counters_set_data(struct ib_counters *ibcounters, if (!mcounters->hw_cntrs_hndl) { mcounters->hw_cntrs_hndl = mlx5_fc_create( to_mdev(ibcounters->device)->mdev, false); - if (!mcounters->hw_cntrs_hndl) { - ret = -ENOMEM; + if (IS_ERR(mcounters->hw_cntrs_hndl)) { + ret = PTR_ERR(mcounters->hw_cntrs_hndl); goto free; } hw_hndl = true; From 425cf5c1350a98b81f3ddda160b99c3be613a213 Mon Sep 17 00:00:00 2001 From: "Kalderon, Michal" Date: Mon, 11 Jun 2018 10:20:20 +0300 Subject: [PATCH 040/572] RDMA/qedr: Fix NULL pointer dereference when running over iWARP without RDMA-CM Some RoCE specific code in qedr_modify_qp was run over an iWARP device when running perftest benchmarks without the -R option. The commit 3e44e0ee0893 ("IB/providers: Avoid null netdev check for RoCE") exposed this. Dropping the check for NULL pointer on ndev in qedr_modify_qp lead to a null pointer dereference when running over iWARP. Before the code would identify ndev as being NULL and return an error. Fixes: 3e44e0ee0893 ("IB/providers: Avoid null netdev check for RoCE") Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Reviewed-by: Parav Pandit Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/qedr/verbs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 614a954d0757..f9b198455fc9 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1957,6 +1957,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) { + if (rdma_protocol_iwarp(&dev->ibdev, 1)) + return -EINVAL; + if (attr_mask & IB_QP_PATH_MTU) { if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { From fd629294759b1e6e5dfc0c9bf892e63553dcc6fa Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 11 Jun 2018 10:09:15 -0700 Subject: [PATCH 041/572] Input: synaptics-rmi4 - fix the error return code in rmi_probe_interrupts() The error return code PTR_ERR(data->irqdomain) is always 0 since data->irqdomain is equal to NULL in this error handling case. Fixes: 24d28e4f1271 ("Input: synaptics-rmi4 - convert irq distribution to irq_domain") Signed-off-by: Wei Yongjun Reviewed-by: Lyude Paul Reviewed-by: Nick Desaulniers Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 2fb0ae0bdfe3..cb6d983e8033 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -1043,7 +1043,7 @@ int rmi_probe_interrupts(struct rmi_driver_data *data) data); if (!data->irqdomain) { dev_err(&rmi_dev->dev, "Failed to create IRQ domain\n"); - return PTR_ERR(data->irqdomain); + return -ENOMEM; } data->irq_count = irq_count; From 645a397d325db6e1bb36588095ae637738b37693 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Tue, 5 Jun 2018 10:34:54 -0700 Subject: [PATCH 042/572] Input: synaptics-rmi4 - fix axis-swap behavior The documentation for the touchscreen-swapped-x-y property states that swapping is done after inverting if both are used. RMI4 did it the other way around, leading to inconsistent behavior with regard to other touchscreens. Signed-off-by: Lucas Stach Tested-by: Nick Dyer Signed-off-by: Dmitry Torokhov --- drivers/input/rmi4/rmi_2d_sensor.c | 34 ++++++++++++++---------------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/input/rmi4/rmi_2d_sensor.c b/drivers/input/rmi4/rmi_2d_sensor.c index 8bb866c7b985..8eeffa066022 100644 --- a/drivers/input/rmi4/rmi_2d_sensor.c +++ b/drivers/input/rmi4/rmi_2d_sensor.c @@ -32,15 +32,15 @@ void rmi_2d_sensor_abs_process(struct rmi_2d_sensor *sensor, if (obj->type == RMI_2D_OBJECT_NONE) return; - if (axis_align->swap_axes) - swap(obj->x, obj->y); - if (axis_align->flip_x) obj->x = sensor->max_x - obj->x; if (axis_align->flip_y) obj->y = sensor->max_y - obj->y; + if (axis_align->swap_axes) + swap(obj->x, obj->y); + /* * Here checking if X offset or y offset are specified is * redundant. We just add the offsets or clip the values. @@ -120,15 +120,15 @@ void rmi_2d_sensor_rel_report(struct rmi_2d_sensor *sensor, int x, int y) x = min(RMI_2D_REL_POS_MAX, max(RMI_2D_REL_POS_MIN, (int)x)); y = min(RMI_2D_REL_POS_MAX, max(RMI_2D_REL_POS_MIN, (int)y)); - if (axis_align->swap_axes) - swap(x, y); - if (axis_align->flip_x) x = min(RMI_2D_REL_POS_MAX, -x); if (axis_align->flip_y) y = min(RMI_2D_REL_POS_MAX, -y); + if (axis_align->swap_axes) + swap(x, y); + if (x || y) { input_report_rel(sensor->input, REL_X, x); input_report_rel(sensor->input, REL_Y, y); @@ -141,17 +141,10 @@ static void rmi_2d_sensor_set_input_params(struct rmi_2d_sensor *sensor) struct input_dev *input = sensor->input; int res_x; int res_y; + int max_x, max_y; int input_flags = 0; if (sensor->report_abs) { - if (sensor->axis_align.swap_axes) { - swap(sensor->max_x, sensor->max_y); - swap(sensor->axis_align.clip_x_low, - sensor->axis_align.clip_y_low); - swap(sensor->axis_align.clip_x_high, - sensor->axis_align.clip_y_high); - } - sensor->min_x = sensor->axis_align.clip_x_low; if (sensor->axis_align.clip_x_high) sensor->max_x = min(sensor->max_x, @@ -163,14 +156,19 @@ static void rmi_2d_sensor_set_input_params(struct rmi_2d_sensor *sensor) sensor->axis_align.clip_y_high); set_bit(EV_ABS, input->evbit); - input_set_abs_params(input, ABS_MT_POSITION_X, 0, sensor->max_x, - 0, 0); - input_set_abs_params(input, ABS_MT_POSITION_Y, 0, sensor->max_y, - 0, 0); + + max_x = sensor->max_x; + max_y = sensor->max_y; + if (sensor->axis_align.swap_axes) + swap(max_x, max_y); + input_set_abs_params(input, ABS_MT_POSITION_X, 0, max_x, 0, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, 0, max_y, 0, 0); if (sensor->x_mm && sensor->y_mm) { res_x = (sensor->max_x - sensor->min_x) / sensor->x_mm; res_y = (sensor->max_y - sensor->min_y) / sensor->y_mm; + if (sensor->axis_align.swap_axes) + swap(res_x, res_y); input_abs_set_res(input, ABS_X, res_x); input_abs_set_res(input, ABS_Y, res_y); From bf6247a70f2b7569180304041b63b59ab14217bc Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 5 Jun 2018 10:08:44 -0700 Subject: [PATCH 043/572] Input: make input_report_slot_state() return boolean Let's make input_report_slot_state() return boolean representing whether the contact is active or not. This will allow writing code like: if (input_mt_report_slot_state(input, obj->mt_tool, obj->type != RMI_2D_OBJECT_NONE) { input_event(sensor->input, EV_ABS, ABS_MT_POSITION_X, obj->x); input_event(sensor->input, EV_ABS, ABS_MT_POSITION_Y, obj->y); ... } instead of: input_mt_report_slot_state(input, obj->mt_tool, obj->type != RMI_2D_OBJECT_NONE); if (obj->type != RMI_2D_OBJECT_NONE) { input_event(sensor->input, EV_ABS, ABS_MT_POSITION_X, obj->x); input_event(sensor->input, EV_ABS, ABS_MT_POSITION_Y, obj->y); ... } Reviewed-by: Henrik Rydberg Acked-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/input-mt.c | 10 +++++++--- include/linux/input/mt.h | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index a1bbec9cda8d..8de52e3c00c0 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -131,8 +131,10 @@ EXPORT_SYMBOL(input_mt_destroy_slots); * inactive, or if the tool type is changed, a new tracking id is * assigned to the slot. The tool type is only reported if the * corresponding absbit field is set. + * + * Returns true if contact is active. */ -void input_mt_report_slot_state(struct input_dev *dev, +bool input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active) { struct input_mt *mt = dev->mt; @@ -140,14 +142,14 @@ void input_mt_report_slot_state(struct input_dev *dev, int id; if (!mt) - return; + return false; slot = &mt->slots[mt->slot]; slot->frame = mt->frame; if (!active) { input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, -1); - return; + return false; } id = input_mt_get_value(slot, ABS_MT_TRACKING_ID); @@ -156,6 +158,8 @@ void input_mt_report_slot_state(struct input_dev *dev, input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, id); input_event(dev, EV_ABS, ABS_MT_TOOL_TYPE, tool_type); + + return true; } EXPORT_SYMBOL(input_mt_report_slot_state); diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index d7188de4db96..3f4bf60b0bb5 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -100,7 +100,7 @@ static inline bool input_is_mt_axis(int axis) return axis == ABS_MT_SLOT || input_is_mt_value(axis); } -void input_mt_report_slot_state(struct input_dev *dev, +bool input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active); void input_mt_report_finger_count(struct input_dev *dev, int count); From c258e84b180d38f76ab9962cf5bfc10720af1360 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 15 Aug 2017 16:00:42 -0700 Subject: [PATCH 044/572] Input: do not assign new tracking ID when changing tool type We allow changing tool type (from MT_TOOL_FINGER to MT_TOOL_PALM) so we should not be forcing new tracking ID for the slot. Acked-by: Benjamin Tissoires Reviewed-by: Peter Hutterer Signed-off-by: Dmitry Torokhov --- drivers/input/input-mt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index 8de52e3c00c0..4a69716e5461 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -153,7 +153,7 @@ bool input_mt_report_slot_state(struct input_dev *dev, } id = input_mt_get_value(slot, ABS_MT_TRACKING_ID); - if (id < 0 || input_mt_get_value(slot, ABS_MT_TOOL_TYPE) != tool_type) + if (id < 0) id = input_mt_new_trkid(mt); input_event(dev, EV_ABS, ABS_MT_TRACKING_ID, id); From 3dc7c7badb7502ec3e3aa817a8bdd9e53aa54c52 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Mon, 11 Jun 2018 20:15:11 +0200 Subject: [PATCH 045/572] IB/mlx4: Fix an error handling path in 'mlx4_ib_rereg_user_mr()' Before returning -EPERM we should release some resources, as already done in the other error handling path of the function. Fixes: d8f9cc328c88 ("IB/mlx4: Mark user MR as writable if actual virtual memory is writable") Signed-off-by: Christophe JAILLET Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/mr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index ed1f253faf97..c7c85c22e4e3 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -486,8 +486,11 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, } if (flags & IB_MR_REREG_ACCESS) { - if (ib_access_writable(mr_access_flags) && !mmr->umem->writable) - return -EPERM; + if (ib_access_writable(mr_access_flags) && + !mmr->umem->writable) { + err = -EPERM; + goto release_mpt_entry; + } err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, convert_access(mr_access_flags)); From 5d902372ba5f416261c79123f02e49c664c7118f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 12 Jun 2018 12:02:56 +0200 Subject: [PATCH 046/572] xsk: re-add queue id check for XDP_SKB path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 173d3adb6f43 ("xsk: add zero-copy support for Rx") introduced a regression on the XDP_SKB receive path, when the queue id checks were removed. Now, they are back again. Fixes: 173d3adb6f43 ("xsk: add zero-copy support for Rx") Reported-by: Qi Zhang Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- net/xdp/xsk.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 36919a254ba3..3b3410ada097 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -118,6 +118,9 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) u64 addr; int err; + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) + return -EINVAL; + if (!xskq_peek_addr(xs->umem->fq, &addr) || len > xs->umem->chunk_size_nohr) { xs->rx_dropped++; From 71ab91157e9d849a5a0c5fcd1ce68d236c3ca703 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 11 Jun 2018 08:58:36 -0600 Subject: [PATCH 047/572] microblaze: heartbeat: fix missing prom.h include Since commit 96f0e6fcc9ad ("microblaze: remove redundant early_printk support") prom.h was removed and one instance in heartbeat.c remained. Include of.h as it is the actual header needed. Fixes: 96f0e6fcc9ad ("microblaze: remove redundant early_printk support") Reported-by: kbuild test robot Cc: Michal Simek Signed-off-by: Rob Herring Signed-off-by: Michal Simek --- arch/microblaze/kernel/heartbeat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/microblaze/kernel/heartbeat.c b/arch/microblaze/kernel/heartbeat.c index 2022130139d2..87aa942eee8c 100644 --- a/arch/microblaze/kernel/heartbeat.c +++ b/arch/microblaze/kernel/heartbeat.c @@ -11,10 +11,10 @@ #include #include #include +#include #include #include -#include static unsigned int base_addr; From 5cf8da7bc03570bc15ef1b59e584dfb9e0f47e51 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Tue, 12 Jun 2018 12:46:17 +0200 Subject: [PATCH 048/572] microblaze: Remove architecture heart beat code There is no reason to keep this gpio based code in architecture. Use ledtrig-heartbeat.c instead which is much more flexible then this ancient code. Signed-off-by: Michal Simek Reviewed-by: Linus Walleij --- arch/microblaze/Kconfig.debug | 7 --- arch/microblaze/include/asm/setup.h | 3 -- arch/microblaze/kernel/Makefile | 2 - arch/microblaze/kernel/heartbeat.c | 72 ----------------------------- arch/microblaze/kernel/timer.c | 7 --- 5 files changed, 91 deletions(-) delete mode 100644 arch/microblaze/kernel/heartbeat.c diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug index 331a3bb66297..93a737c8d1a6 100644 --- a/arch/microblaze/Kconfig.debug +++ b/arch/microblaze/Kconfig.debug @@ -8,11 +8,4 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" -config HEART_BEAT - bool "Heart beat function for kernel" - default n - help - This option turns on/off heart beat kernel functionality. - First GPIO node is taken. - endmenu diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index d5384f6f36f7..a38e4a56e3c6 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -19,9 +19,6 @@ extern char cmd_line[COMMAND_LINE_SIZE]; extern char *klimit; -void microblaze_heartbeat(void); -void microblaze_setup_heartbeat(void); - # ifdef CONFIG_MMU extern void mmu_reset(void); # endif /* CONFIG_MMU */ diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile index 7e99cf6984a1..3a53378d66d9 100644 --- a/arch/microblaze/kernel/Makefile +++ b/arch/microblaze/kernel/Makefile @@ -8,7 +8,6 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_timer.o = -pg CFLAGS_REMOVE_intc.o = -pg CFLAGS_REMOVE_early_printk.o = -pg -CFLAGS_REMOVE_heartbeat.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_process.o = -pg endif @@ -22,7 +21,6 @@ obj-y += dma.o exceptions.o \ obj-y += cpu/ -obj-$(CONFIG_HEART_BEAT) += heartbeat.o obj-$(CONFIG_MODULES) += microblaze_ksyms.o module.o obj-$(CONFIG_MMU) += misc.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/microblaze/kernel/heartbeat.c b/arch/microblaze/kernel/heartbeat.c deleted file mode 100644 index 87aa942eee8c..000000000000 --- a/arch/microblaze/kernel/heartbeat.c +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2007-2009 Michal Simek - * Copyright (C) 2007-2009 PetaLogix - * Copyright (C) 2006 Atmark Techno, Inc. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include -#include -#include -#include - -#include -#include - -static unsigned int base_addr; - -void microblaze_heartbeat(void) -{ - static unsigned int cnt, period, dist; - - if (base_addr) { - if (cnt == 0 || cnt == dist) - out_be32(base_addr, 1); - else if (cnt == 7 || cnt == dist + 7) - out_be32(base_addr, 0); - - if (++cnt > period) { - cnt = 0; - /* - * The hyperbolic function below modifies the heartbeat - * period length in dependency of the current (5min) - * load. It goes through the points f(0)=126, f(1)=86, - * f(5)=51, f(inf)->30. - */ - period = ((672 << FSHIFT) / (5 * avenrun[0] + - (7 << FSHIFT))) + 30; - dist = period / 4; - } - } -} - -void microblaze_setup_heartbeat(void) -{ - struct device_node *gpio = NULL; - int *prop; - int j; - const char * const gpio_list[] = { - "xlnx,xps-gpio-1.00.a", - NULL - }; - - for (j = 0; gpio_list[j] != NULL; j++) { - gpio = of_find_compatible_node(NULL, NULL, gpio_list[j]); - if (gpio) - break; - } - - if (gpio) { - base_addr = be32_to_cpup(of_get_property(gpio, "reg", NULL)); - base_addr = (unsigned long) ioremap(base_addr, PAGE_SIZE); - pr_notice("Heartbeat GPIO at 0x%x\n", base_addr); - - /* GPIO is configured as output */ - prop = (int *) of_get_property(gpio, "xlnx,is-bidir", NULL); - if (prop) - out_be32(base_addr + 4, 0); - } -} diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index 7de941cbbd94..a6683484b3a1 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -156,9 +156,6 @@ static inline void timer_ack(void) static irqreturn_t timer_interrupt(int irq, void *dev_id) { struct clock_event_device *evt = &clockevent_xilinx_timer; -#ifdef CONFIG_HEART_BEAT - microblaze_heartbeat(); -#endif timer_ack(); evt->event_handler(evt); return IRQ_HANDLED; @@ -318,10 +315,6 @@ static int __init xilinx_timer_init(struct device_node *timer) return ret; } -#ifdef CONFIG_HEART_BEAT - microblaze_setup_heartbeat(); -#endif - ret = xilinx_clocksource_init(); if (ret) return ret; From 7352c5469307395875f4b62f366a369ae1c46e83 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 23 May 2018 16:47:36 +0200 Subject: [PATCH 049/572] s390/cio: sanitize css_general_characteristics definition Change css_general_characteristics such that the bitfields don't straddle storage-unit boundaries of the base types. This does not change the offsets of the structs members but now we do as documented and also fix the following sparse complaint: drivers/s390/cio/chsc.c:926:56: warning: invalid access past the end of 'css_general_characteristics' (16 18) Signed-off-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/css_chars.h | 62 ++++++++++++++++--------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index 0563fd3e8458..480bb02ccacd 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -6,36 +6,38 @@ struct css_general_char { u64 : 12; - u32 dynio : 1; /* bit 12 */ - u32 : 4; - u32 eadm : 1; /* bit 17 */ - u32 : 23; - u32 aif : 1; /* bit 41 */ - u32 : 3; - u32 mcss : 1; /* bit 45 */ - u32 fcs : 1; /* bit 46 */ - u32 : 1; - u32 ext_mb : 1; /* bit 48 */ - u32 : 7; - u32 aif_tdd : 1; /* bit 56 */ - u32 : 1; - u32 qebsm : 1; /* bit 58 */ - u32 : 2; - u32 aiv : 1; /* bit 61 */ - u32 : 5; - u32 aif_osa : 1; /* bit 67 */ - u32 : 12; - u32 eadm_rf : 1; /* bit 80 */ - u32 : 1; - u32 cib : 1; /* bit 82 */ - u32 : 5; - u32 fcx : 1; /* bit 88 */ - u32 : 19; - u32 alt_ssi : 1; /* bit 108 */ - u32 : 1; - u32 narf : 1; /* bit 110 */ - u32 : 12; - u32 util_str : 1;/* bit 123 */ + u64 dynio : 1; /* bit 12 */ + u64 : 4; + u64 eadm : 1; /* bit 17 */ + u64 : 23; + u64 aif : 1; /* bit 41 */ + u64 : 3; + u64 mcss : 1; /* bit 45 */ + u64 fcs : 1; /* bit 46 */ + u64 : 1; + u64 ext_mb : 1; /* bit 48 */ + u64 : 7; + u64 aif_tdd : 1; /* bit 56 */ + u64 : 1; + u64 qebsm : 1; /* bit 58 */ + u64 : 2; + u64 aiv : 1; /* bit 61 */ + u64 : 2; + + u64 : 3; + u64 aif_osa : 1; /* bit 67 */ + u64 : 12; + u64 eadm_rf : 1; /* bit 80 */ + u64 : 1; + u64 cib : 1; /* bit 82 */ + u64 : 5; + u64 fcx : 1; /* bit 88 */ + u64 : 19; + u64 alt_ssi : 1; /* bit 108 */ + u64 : 1; + u64 narf : 1; /* bit 110 */ + u64 : 12; + u64 util_str : 1;/* bit 123 */ } __packed; extern struct css_general_char css_general_characteristics; From 5c618c0cf451f1d9746296b0d30c84af1bce3604 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Thu, 24 May 2018 12:18:58 +0200 Subject: [PATCH 050/572] s390/dasd: simplify locking in process_final_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify locking in __dasd_device_process_final_queue to fix the following sparse warning: drivers/s390/block/dasd.c:1902:9: warning: context imbalance in '__dasd_device_process_final_queue' - different lock contexts for basic block Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Reviewed-by: Jan Höppner Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 59 +++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 73cce3ecb97f..790b1fa3fec0 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1885,6 +1885,33 @@ static void __dasd_device_process_ccw_queue(struct dasd_device *device, } } +static void __dasd_process_cqr(struct dasd_device *device, + struct dasd_ccw_req *cqr) +{ + char errorstring[ERRORLENGTH]; + + switch (cqr->status) { + case DASD_CQR_SUCCESS: + cqr->status = DASD_CQR_DONE; + break; + case DASD_CQR_ERROR: + cqr->status = DASD_CQR_NEED_ERP; + break; + case DASD_CQR_CLEARED: + cqr->status = DASD_CQR_TERMINATED; + break; + default: + /* internal error 12 - wrong cqr status*/ + snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status); + dev_err(&device->cdev->dev, + "An error occurred in the DASD device driver, " + "reason=%s\n", errorstring); + BUG(); + } + if (cqr->callback) + cqr->callback(cqr, cqr->callback_data); +} + /* * the cqrs from the final queue are returned to the upper layer * by setting a dasd_block state and calling the callback function @@ -1895,40 +1922,18 @@ static void __dasd_device_process_final_queue(struct dasd_device *device, struct list_head *l, *n; struct dasd_ccw_req *cqr; struct dasd_block *block; - void (*callback)(struct dasd_ccw_req *, void *data); - void *callback_data; - char errorstring[ERRORLENGTH]; list_for_each_safe(l, n, final_queue) { cqr = list_entry(l, struct dasd_ccw_req, devlist); list_del_init(&cqr->devlist); block = cqr->block; - callback = cqr->callback; - callback_data = cqr->callback_data; - if (block) + if (!block) { + __dasd_process_cqr(device, cqr); + } else { spin_lock_bh(&block->queue_lock); - switch (cqr->status) { - case DASD_CQR_SUCCESS: - cqr->status = DASD_CQR_DONE; - break; - case DASD_CQR_ERROR: - cqr->status = DASD_CQR_NEED_ERP; - break; - case DASD_CQR_CLEARED: - cqr->status = DASD_CQR_TERMINATED; - break; - default: - /* internal error 12 - wrong cqr status*/ - snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status); - dev_err(&device->cdev->dev, - "An error occurred in the DASD device driver, " - "reason=%s\n", errorstring); - BUG(); - } - if (cqr->callback != NULL) - (callback)(cqr, callback_data); - if (block) + __dasd_process_cqr(device, cqr); spin_unlock_bh(&block->queue_lock); + } } } From c5205f2ff2bec6acf398211aed66b3e6ac44eee6 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 4 Jun 2018 19:07:39 +0200 Subject: [PATCH 051/572] s390/dasd: move dasd_ccw_req to per request data Let the block layer allocate per request data to store struct dasd_ccw_req. We still need extra preallocated memory for usage by ccw programs (which vary in length) and for requests which don't originate from the block layer. Link: https://lkml.kernel.org/r/20180530074130.GA6927@infradead.org Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 48 ++++++++++++++++++---------------- drivers/s390/block/dasd_diag.c | 3 ++- drivers/s390/block/dasd_eckd.c | 46 +++++++++++++++++--------------- drivers/s390/block/dasd_fba.c | 6 +++-- drivers/s390/block/dasd_int.h | 3 ++- 5 files changed, 58 insertions(+), 48 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 790b1fa3fec0..01a1d1dabb43 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1267,35 +1267,37 @@ struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength, } EXPORT_SYMBOL(dasd_kmalloc_request); -struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, - int datasize, - struct dasd_device *device) +struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, int datasize, + struct dasd_device *device, + struct dasd_ccw_req *cqr) { unsigned long flags; - struct dasd_ccw_req *cqr; - char *data; - int size; + char *data, *chunk; + int size = 0; - size = (sizeof(struct dasd_ccw_req) + 7L) & -8L; if (cplength > 0) size += cplength * sizeof(struct ccw1); if (datasize > 0) size += datasize; + if (!cqr) + size += (sizeof(*cqr) + 7L) & -8L; + spin_lock_irqsave(&device->mem_lock, flags); - cqr = (struct dasd_ccw_req *) - dasd_alloc_chunk(&device->ccw_chunks, size); + data = chunk = dasd_alloc_chunk(&device->ccw_chunks, size); spin_unlock_irqrestore(&device->mem_lock, flags); - if (cqr == NULL) + if (!chunk) return ERR_PTR(-ENOMEM); - memset(cqr, 0, sizeof(struct dasd_ccw_req)); - data = (char *) cqr + ((sizeof(struct dasd_ccw_req) + 7L) & -8L); - cqr->cpaddr = NULL; - if (cplength > 0) { - cqr->cpaddr = (struct ccw1 *) data; - data += cplength*sizeof(struct ccw1); - memset(cqr->cpaddr, 0, cplength*sizeof(struct ccw1)); + if (!cqr) { + cqr = (void *) data; + data += (sizeof(*cqr) + 7L) & -8L; + } + memset(cqr, 0, sizeof(*cqr)); + cqr->mem_chunk = chunk; + if (cplength > 0) { + cqr->cpaddr = data; + data += cplength * sizeof(struct ccw1); + memset(cqr->cpaddr, 0, cplength * sizeof(struct ccw1)); } - cqr->data = NULL; if (datasize > 0) { cqr->data = data; memset(cqr->data, 0, datasize); @@ -1333,7 +1335,7 @@ void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) unsigned long flags; spin_lock_irqsave(&device->mem_lock, flags); - dasd_free_chunk(&device->ccw_chunks, cqr); + dasd_free_chunk(&device->ccw_chunks, cqr->mem_chunk); spin_unlock_irqrestore(&device->mem_lock, flags); dasd_put_device(device); } @@ -3046,7 +3048,6 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx, cqr->callback_data = req; cqr->status = DASD_CQR_FILLED; cqr->dq = dq; - *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)) = cqr; blk_mq_start_request(req); spin_lock(&block->queue_lock); @@ -3077,7 +3078,7 @@ enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved) unsigned long flags; int rc = 0; - cqr = *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)); + cqr = blk_mq_rq_to_pdu(req); if (!cqr) return BLK_EH_DONE; @@ -3179,7 +3180,7 @@ static int dasd_alloc_queue(struct dasd_block *block) int rc; block->tag_set.ops = &dasd_mq_ops; - block->tag_set.cmd_size = sizeof(struct dasd_ccw_req *); + block->tag_set.cmd_size = sizeof(struct dasd_ccw_req); block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES; block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV; block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; @@ -4043,7 +4044,8 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device, struct ccw1 *ccw; unsigned long *idaw; - cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device); + cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device, + NULL); if (IS_ERR(cqr)) { /* internal error 13 - Allocating the RDC request failed*/ diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 131f1989f6f3..e1fe02477ea8 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -536,7 +536,8 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev, /* Build the request */ datasize = sizeof(struct dasd_diag_req) + count*sizeof(struct dasd_diag_bio); - cqr = dasd_smalloc_request(DASD_DIAG_MAGIC, 0, datasize, memdev); + cqr = dasd_smalloc_request(DASD_DIAG_MAGIC, 0, datasize, memdev, + blk_mq_rq_to_pdu(req)); if (IS_ERR(cqr)) return cqr; diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index be208e7adcb4..bbf95b78ef5d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -886,7 +886,7 @@ static int dasd_eckd_read_conf_lpm(struct dasd_device *device, } cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* RCD */, 0, /* use rcd_buf as data ara */ - device); + device, NULL); if (IS_ERR(cqr)) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", "Could not allocate RCD request"); @@ -1442,7 +1442,7 @@ static int dasd_eckd_read_features(struct dasd_device *device) cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, (sizeof(struct dasd_psf_prssd_data) + sizeof(struct dasd_rssd_features)), - device); + device, NULL); if (IS_ERR(cqr)) { DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", "Could not " "allocate initialization request"); @@ -1504,7 +1504,7 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device, cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ , sizeof(struct dasd_psf_ssc_data), - device); + device, NULL); if (IS_ERR(cqr)) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", @@ -1815,7 +1815,8 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) cplength = 8; datasize = sizeof(struct DE_eckd_data) + 2*sizeof(struct LO_eckd_data); - cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device); + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device, + NULL); if (IS_ERR(cqr)) return cqr; ccw = cqr->cpaddr; @@ -2092,7 +2093,8 @@ dasd_eckd_build_check_tcw(struct dasd_device *base, struct format_data_t *fdata, */ itcw_size = itcw_calc_size(0, count, 0); - cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev); + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev, + NULL); if (IS_ERR(cqr)) return cqr; @@ -2186,7 +2188,7 @@ dasd_eckd_build_check(struct dasd_device *base, struct format_data_t *fdata, cplength += count; cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, - startdev); + startdev, NULL); if (IS_ERR(cqr)) return cqr; @@ -2332,7 +2334,7 @@ dasd_eckd_build_format(struct dasd_device *base, } /* Allocate the format ccw request. */ fcp = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, - datasize, startdev); + datasize, startdev, NULL); if (IS_ERR(fcp)) return fcp; @@ -3103,7 +3105,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( } /* Allocate the ccw request. */ cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, - startdev); + startdev, blk_mq_rq_to_pdu(req)); if (IS_ERR(cqr)) return cqr; ccw = cqr->cpaddr; @@ -3262,7 +3264,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track( /* Allocate the ccw request. */ cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, - startdev); + startdev, blk_mq_rq_to_pdu(req)); if (IS_ERR(cqr)) return cqr; ccw = cqr->cpaddr; @@ -3595,7 +3597,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( /* Allocate the ccw request. */ itcw_size = itcw_calc_size(0, ctidaw, 0); - cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev); + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev, + blk_mq_rq_to_pdu(req)); if (IS_ERR(cqr)) return cqr; @@ -3862,7 +3865,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev, /* Allocate the ccw request. */ cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, - datasize, startdev); + datasize, startdev, blk_mq_rq_to_pdu(req)); if (IS_ERR(cqr)) return cqr; @@ -4102,7 +4105,7 @@ dasd_eckd_release(struct dasd_device *device) return -EACCES; useglobal = 0; - cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device); + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device, NULL); if (IS_ERR(cqr)) { mutex_lock(&dasd_reserve_mutex); useglobal = 1; @@ -4157,7 +4160,7 @@ dasd_eckd_reserve(struct dasd_device *device) return -EACCES; useglobal = 0; - cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device); + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device, NULL); if (IS_ERR(cqr)) { mutex_lock(&dasd_reserve_mutex); useglobal = 1; @@ -4211,7 +4214,7 @@ dasd_eckd_steal_lock(struct dasd_device *device) return -EACCES; useglobal = 0; - cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device); + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device, NULL); if (IS_ERR(cqr)) { mutex_lock(&dasd_reserve_mutex); useglobal = 1; @@ -4271,7 +4274,8 @@ static int dasd_eckd_snid(struct dasd_device *device, useglobal = 0; cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, - sizeof(struct dasd_snid_data), device); + sizeof(struct dasd_snid_data), device, + NULL); if (IS_ERR(cqr)) { mutex_lock(&dasd_reserve_mutex); useglobal = 1; @@ -4331,7 +4335,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp) cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, (sizeof(struct dasd_psf_prssd_data) + sizeof(struct dasd_rssd_perf_stats_t)), - device); + device, NULL); if (IS_ERR(cqr)) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", "Could not allocate initialization request"); @@ -4477,7 +4481,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp) psf1 = psf_data[1]; /* setup CCWs for PSF + RSSD */ - cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 2 , 0, device); + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 2, 0, device, NULL); if (IS_ERR(cqr)) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", "Could not allocate initialization request"); @@ -5037,7 +5041,7 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, (sizeof(struct dasd_psf_prssd_data) + sizeof(struct dasd_rssd_messages)), - device); + device, NULL); if (IS_ERR(cqr)) { DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", "Could not allocate read message buffer request"); @@ -5126,7 +5130,7 @@ static int dasd_eckd_query_host_access(struct dasd_device *device, cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, sizeof(struct dasd_psf_prssd_data) + 1, - device); + device, NULL); if (IS_ERR(cqr)) { DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", "Could not allocate read message buffer request"); @@ -5284,8 +5288,8 @@ dasd_eckd_psf_cuir_response(struct dasd_device *device, int response, int rc; cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ , - sizeof(struct dasd_psf_cuir_response), - device); + sizeof(struct dasd_psf_cuir_response), + device, NULL); if (IS_ERR(cqr)) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index a6b132f7e869..56007a3e7f11 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -356,7 +356,8 @@ static struct dasd_ccw_req *dasd_fba_build_cp_discard( datasize = sizeof(struct DE_fba_data) + nr_ccws * (sizeof(struct LO_fba_data) + sizeof(struct ccw1)); - cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev); + cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev, + blk_mq_rq_to_pdu(req)); if (IS_ERR(cqr)) return cqr; @@ -490,7 +491,8 @@ static struct dasd_ccw_req *dasd_fba_build_cp_regular( datasize += (count - 1)*sizeof(struct LO_fba_data); } /* Allocate the ccw request. */ - cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev); + cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev, + blk_mq_rq_to_pdu(req)); if (IS_ERR(cqr)) return cqr; ccw = cqr->cpaddr; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 96709b1a7bf8..0844e5e2f566 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -184,6 +184,7 @@ struct dasd_ccw_req { struct irb irb; /* device status in case of an error */ struct dasd_ccw_req *refers; /* ERP-chain queueing. */ void *function; /* originating ERP action */ + void *mem_chunk; /* these are for statistics only */ unsigned long buildclk; /* TOD-clock of request generation */ @@ -716,7 +717,7 @@ extern struct kmem_cache *dasd_page_cache; struct dasd_ccw_req * dasd_kmalloc_request(int , int, int, struct dasd_device *); struct dasd_ccw_req * -dasd_smalloc_request(int , int, int, struct dasd_device *); +dasd_smalloc_request(int, int, int, struct dasd_device *, struct dasd_ccw_req *); void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_wakeup_cb(struct dasd_ccw_req *, void *); From d8a72d414baf217a2eea9c73f3aac11052161015 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 5 Jun 2018 13:34:42 +0200 Subject: [PATCH 052/572] s390/dasd: remove dasd_kmalloc_set_cda There is no user of this function. Just remove it. Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_int.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 0844e5e2f566..885e7416c368 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -722,12 +722,6 @@ void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_wakeup_cb(struct dasd_ccw_req *, void *); -static inline int -dasd_kmalloc_set_cda(struct ccw1 *ccw, void *cda, struct dasd_device *device) -{ - return set_normalized_cda(ccw, cda); -} - struct dasd_device *dasd_alloc_device(void); void dasd_free_device(struct dasd_device *); From 61d388321032be9097935bbc5efdd6ac42691ed4 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 4 Jun 2018 19:39:38 +0200 Subject: [PATCH 053/572] s390/dasd: reshuffle struct dasd_ccw_req Move some members of struct dasd_ccw_req to get rid of padding bytes. This saves 16 bytes per dasd request. Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_int.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 885e7416c368..55bcbbed1b1d 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -158,41 +158,33 @@ do { \ struct dasd_ccw_req { unsigned int magic; /* Eye catcher */ + int intrc; /* internal error, e.g. from start_IO */ struct list_head devlist; /* for dasd_device request queue */ struct list_head blocklist; /* for dasd_block request queue */ - - /* Where to execute what... */ struct dasd_block *block; /* the originating block device */ struct dasd_device *memdev; /* the device used to allocate this */ struct dasd_device *startdev; /* device the request is started on */ struct dasd_device *basedev; /* base device if no block->base */ void *cpaddr; /* address of ccw or tcw */ + short retries; /* A retry counter */ unsigned char cpmode; /* 0 = cmd mode, 1 = itcw */ char status; /* status of this request */ - short retries; /* A retry counter */ + char lpm; /* logical path mask */ unsigned long flags; /* flags of this request */ struct dasd_queue *dq; - - /* ... and how */ unsigned long starttime; /* jiffies time of request start */ unsigned long expires; /* expiration period in jiffies */ - char lpm; /* logical path mask */ void *data; /* pointer to data area */ - - /* these are important for recovering erroneous requests */ - int intrc; /* internal error, e.g. from start_IO */ struct irb irb; /* device status in case of an error */ struct dasd_ccw_req *refers; /* ERP-chain queueing. */ void *function; /* originating ERP action */ void *mem_chunk; - /* these are for statistics only */ unsigned long buildclk; /* TOD-clock of request generation */ unsigned long startclk; /* TOD-clock of request start */ unsigned long stopclk; /* TOD-clock of request interrupt */ unsigned long endclk; /* TOD-clock of request termination */ - /* Callback that is called after reaching final status. */ void (*callback)(struct dasd_ccw_req *, void *data); void *callback_data; }; From ec530174c43798099d305fbd6511e5d7fc7616d4 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 4 Jun 2018 19:18:03 +0200 Subject: [PATCH 054/572] s390/dasd: only use preallocated requests Change the remaining users of dasd_kmalloc_request to use preallocated memory and remove this function. Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd.c | 66 --------------------------------- drivers/s390/block/dasd_alias.c | 6 +-- drivers/s390/block/dasd_eer.c | 10 ++--- drivers/s390/block/dasd_int.h | 3 -- 4 files changed, 8 insertions(+), 77 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 01a1d1dabb43..d3a38c421503 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1222,51 +1222,6 @@ static void dasd_hosts_init(struct dentry *base_dentry, device->hosts_dentry = pde; } -/* - * Allocate memory for a channel program with 'cplength' channel - * command words and 'datasize' additional space. There are two - * variantes: 1) dasd_kmalloc_request uses kmalloc to get the needed - * memory and 2) dasd_smalloc_request uses the static ccw memory - * that gets allocated for each device. - */ -struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength, - int datasize, - struct dasd_device *device) -{ - struct dasd_ccw_req *cqr; - - /* Sanity checks */ - BUG_ON(datasize > PAGE_SIZE || - (cplength*sizeof(struct ccw1)) > PAGE_SIZE); - - cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC); - if (cqr == NULL) - return ERR_PTR(-ENOMEM); - cqr->cpaddr = NULL; - if (cplength > 0) { - cqr->cpaddr = kcalloc(cplength, sizeof(struct ccw1), - GFP_ATOMIC | GFP_DMA); - if (cqr->cpaddr == NULL) { - kfree(cqr); - return ERR_PTR(-ENOMEM); - } - } - cqr->data = NULL; - if (datasize > 0) { - cqr->data = kzalloc(datasize, GFP_ATOMIC | GFP_DMA); - if (cqr->data == NULL) { - kfree(cqr->cpaddr); - kfree(cqr); - return ERR_PTR(-ENOMEM); - } - } - cqr->magic = magic; - set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags); - dasd_get_device(device); - return cqr; -} -EXPORT_SYMBOL(dasd_kmalloc_request); - struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, int datasize, struct dasd_device *device, struct dasd_ccw_req *cqr) @@ -1309,27 +1264,6 @@ struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, int datasize, } EXPORT_SYMBOL(dasd_smalloc_request); -/* - * Free memory of a channel program. This function needs to free all the - * idal lists that might have been created by dasd_set_cda and the - * struct dasd_ccw_req itself. - */ -void dasd_kfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) -{ - struct ccw1 *ccw; - - /* Clear any idals used for the request. */ - ccw = cqr->cpaddr; - do { - clear_normalized_cda(ccw); - } while (ccw++->flags & (CCW_FLAG_CC | CCW_FLAG_DC)); - kfree(cqr->cpaddr); - kfree(cqr->data); - kfree(cqr); - dasd_put_device(device); -} -EXPORT_SYMBOL(dasd_kfree_request); - void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device) { unsigned long flags; diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 5e963fe0e38d..e36a114354fc 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -407,9 +407,9 @@ static int read_unit_address_configuration(struct dasd_device *device, int rc; unsigned long flags; - cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, (sizeof(struct dasd_psf_prssd_data)), - device); + device, NULL); if (IS_ERR(cqr)) return PTR_ERR(cqr); cqr->startdev = device; @@ -457,7 +457,7 @@ static int read_unit_address_configuration(struct dasd_device *device, lcu->flags |= NEED_UAC_UPDATE; spin_unlock_irqrestore(&lcu->lock, flags); } - dasd_kfree_request(cqr, cqr->memdev); + dasd_sfree_request(cqr, cqr->memdev); return rc; } diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c index fb2c3599d95c..6545342bd43f 100644 --- a/drivers/s390/block/dasd_eer.c +++ b/drivers/s390/block/dasd_eer.c @@ -447,7 +447,7 @@ static void dasd_eer_snss_cb(struct dasd_ccw_req *cqr, void *data) * is a new ccw in device->eer_cqr. Free the "old" * snss request now. */ - dasd_kfree_request(cqr, device); + dasd_sfree_request(cqr, device); } /* @@ -472,8 +472,8 @@ int dasd_eer_enable(struct dasd_device *device) if (rc) goto out; - cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* SNSS */, - SNSS_DATA_SIZE, device); + cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* SNSS */, + SNSS_DATA_SIZE, device, NULL); if (IS_ERR(cqr)) { rc = -ENOMEM; cqr = NULL; @@ -505,7 +505,7 @@ out: spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); if (cqr) - dasd_kfree_request(cqr, device); + dasd_sfree_request(cqr, device); return rc; } @@ -528,7 +528,7 @@ void dasd_eer_disable(struct dasd_device *device) in_use = test_and_clear_bit(DASD_FLAG_EER_IN_USE, &device->flags); spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); if (cqr && !in_use) - dasd_kfree_request(cqr, device); + dasd_sfree_request(cqr, device); } /* diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 55bcbbed1b1d..976b6bd4fb05 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -706,11 +706,8 @@ extern const struct block_device_operations dasd_device_operations; extern struct kmem_cache *dasd_page_cache; -struct dasd_ccw_req * -dasd_kmalloc_request(int , int, int, struct dasd_device *); struct dasd_ccw_req * dasd_smalloc_request(int, int, int, struct dasd_device *, struct dasd_ccw_req *); -void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *); void dasd_wakeup_cb(struct dasd_ccw_req *, void *); From c60c32a5775615d5456a09527aaa12e4a109c3da Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 4 May 2018 17:25:48 +0200 Subject: [PATCH 055/572] posix-cpu-timers: Remove lockdep_assert_irqs_disabled() The lockdep_assert_irqs_disabled() was a BUG_ON() statement in the beginning and it was added just before the "spin_lock(siglock)" statement to ensure this lock was taken with disabled interrupts. This is no longer the case: the siglock is acquired via lock_task_sighand() and this function already disables the interrupts. The lock is also acquired before this "lockdep_assert_irqs_disabled" so it is best to remove it. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Frederic Weisbecker Link: https://lkml.kernel.org/r20180504152548.7166-1-bigeasy@linutronix.de --- kernel/time/posix-cpu-timers.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 5a6251ac6f7a..9cdf54b04ca8 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -604,7 +604,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags, /* * Disarm any old timer after extracting its expiry time. */ - lockdep_assert_irqs_disabled(); ret = 0; old_incr = timer->it.cpu.incr; @@ -1049,7 +1048,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) /* * Now re-arm for the new expiry time. */ - lockdep_assert_irqs_disabled(); arm_timer(timer); unlock: unlock_task_sighand(p, &flags); From 1eb9364ce81d9445ad6f9d44921a91d2a6597156 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 12 Jun 2018 09:40:23 -0600 Subject: [PATCH 056/572] IB/uverbs: Fix ordering of ucontext check in ib_uverbs_write During disassociation the ucontext will become NULL, however due to how the SRCU locking works the ucontext must only be examined after looking at the ib_dev, which governs the RCU control flow. With the wrong ordering userspace will see EINVAL instead of EIO for a disassociated uverbs FD, which breaks rdma-core. Cc: stable@vger.kernel.org Fixes: 491d5c6a3023 ("RDMA/uverbs: Move uncontext check before SRCU read lock") Reported-by: Mark Bloch Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- drivers/infiniband/core/uverbs_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 3ae2339dd27a..2094d136513d 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -736,10 +736,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, if (ret) return ret; - if (!file->ucontext && - (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) - return -EINVAL; - if (extended) { if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; @@ -759,6 +755,16 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, goto out; } + /* + * Must be after the ib_dev check, as once the RCU clears ib_dev == + * NULL means ucontext == NULL + */ + if (!file->ucontext && + (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) { + ret = -EINVAL; + goto out; + } + if (!verify_command_mask(ib_dev, command, extended)) { ret = -EOPNOTSUPP; goto out; From f2ae67941138a1e53cb1bc6a1b5878a8bdc74d26 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 12 Jun 2018 18:16:19 +0200 Subject: [PATCH 057/572] alpha: Remove custom dec_and_lock() implementation Alpha provides a custom implementation of dec_and_lock(). The functions is split into two parts: - atomic_add_unless() + return 0 (fast path in assembly) - remaining part including locking (slow path in C) Comparing the result of the alpha implementation with the generic implementation compiled by gcc it looks like the fast path is optimized by avoiding a stack frame (and reloading the GP), register store and all this. This is only done in the slowpath. After marking the slowpath (atomic_dec_and_lock_1()) as "noinline" and doing the slowpath in C (the atomic_add_unless(atomic, -1, 1) part) I noticed differences in the resulting assembly: - the GP is still reloaded - atomic_add_unless() adds more memory barriers compared to the custom assembly - the custom assembly here does "load, sub, beq" while atomic_add_unless() does "load, cmpeq, add, bne". This is okay because it compares against zero after subtraction while the generic code compares against 1 before. I'm not sure if avoiding the stack frame (and GP reloading) brings a lot in terms of performance. Regarding the different barriers, Peter Zijlstra says: |refcount decrement needs to be a RELEASE operation, such that all the |load/stores to the object happen before we decrement the refcount. | |Otherwise things like: | | obj->foo = 5; | refcnt_dec(&obj->ref); | |can be re-ordered, which then allows fun scenarios like: | | CPU0 CPU1 | | refcnt_dec(&obj->ref); | if (dec_and_test(&obj->ref)) | free(obj); | obj->foo = 5; // oops UaF | | |This means (for alpha) that there should be a memory barrier _before_ |the decrement, however the dec_and_lock asm thing only has one _after_, |which, per the above, is too late. | |The generic version using add_unless will result in memory barrier |before and after (because that is the rule for atomic ops with a return |value) which is strictly too many barriers for the refcount story, but |who knows what other ordering requirements code has. Remove the custom alpha implementation of dec_and_lock() and if it is an issue (performance wise) then the fast path could still be inlined. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Matt Turner Cc: linux-alpha@vger.kernel.org Link: https://lkml.kernel.org/r/20180606115918.GG12198@hirez.programming.kicks-ass.net Link: https://lkml.kernel.org/r20180612161621.22645-2-bigeasy@linutronix.de --- arch/alpha/Kconfig | 5 ---- arch/alpha/lib/Makefile | 2 -- arch/alpha/lib/dec_and_lock.c | 44 ----------------------------------- lib/Makefile | 6 +---- 4 files changed, 1 insertion(+), 56 deletions(-) delete mode 100644 arch/alpha/lib/dec_and_lock.c diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 0c4805a572c8..04a4a138ed13 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -555,11 +555,6 @@ config SMP If you don't know what to do here, say N. -config HAVE_DEC_LOCK - bool - depends on SMP - default y - config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/alpha/lib/Makefile b/arch/alpha/lib/Makefile index 04f9729de57c..854d5e79979e 100644 --- a/arch/alpha/lib/Makefile +++ b/arch/alpha/lib/Makefile @@ -35,8 +35,6 @@ lib-y = __divqu.o __remqu.o __divlu.o __remlu.o \ callback_srm.o srm_puts.o srm_printk.o \ fls.o -lib-$(CONFIG_SMP) += dec_and_lock.o - # The division routines are built from single source, with different defines. AFLAGS___divqu.o = -DDIV AFLAGS___remqu.o = -DREM diff --git a/arch/alpha/lib/dec_and_lock.c b/arch/alpha/lib/dec_and_lock.c deleted file mode 100644 index a117707f57fe..000000000000 --- a/arch/alpha/lib/dec_and_lock.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * arch/alpha/lib/dec_and_lock.c - * - * ll/sc version of atomic_dec_and_lock() - * - */ - -#include -#include -#include - - asm (".text \n\ - .global _atomic_dec_and_lock \n\ - .ent _atomic_dec_and_lock \n\ - .align 4 \n\ -_atomic_dec_and_lock: \n\ - .prologue 0 \n\ -1: ldl_l $1, 0($16) \n\ - subl $1, 1, $1 \n\ - beq $1, 2f \n\ - stl_c $1, 0($16) \n\ - beq $1, 4f \n\ - mb \n\ - clr $0 \n\ - ret \n\ -2: br $29, 3f \n\ -3: ldgp $29, 0($29) \n\ - br $atomic_dec_and_lock_1..ng \n\ - .subsection 2 \n\ -4: br 1b \n\ - .previous \n\ - .end _atomic_dec_and_lock"); - -static int __used atomic_dec_and_lock_1(atomic_t *atomic, spinlock_t *lock) -{ - /* Slow path */ - spin_lock(lock); - if (atomic_dec_and_test(atomic)) - return 1; - spin_unlock(lock); - return 0; -} -EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/lib/Makefile b/lib/Makefile index 84c6dcb31fbb..8b59f4a7c0e2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,7 +23,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o siphash.o \ + earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_PRINTK) += dump_stack.o @@ -98,10 +98,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o -ifneq ($(CONFIG_HAVE_DEC_LOCK),y) - lib-y += dec_and_lock.o -endif - obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o From ccfbb5bed407053b27492a9adc06064d949a9aa6 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 12 Jun 2018 18:16:20 +0200 Subject: [PATCH 058/572] atomic: Add irqsave variant of atomic_dec_and_lock() There are in-tree users of atomic_dec_and_lock() which must acquire the spin lock with interrupts disabled. To workaround the lack of an irqsave variant of atomic_dec_and_lock() they use local_irq_save() at the call site. This causes extra code and creates in some places unneeded long interrupt disabled times. These places need also extra treatment for PREEMPT_RT due to the disconnect of the irq disabling and the lock function. Implement the missing irqsave variant of the function. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r20180612161621.22645-3-bigeasy@linutronix.de --- include/linux/spinlock.h | 5 +++++ lib/dec_and_lock.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 1e8a46435838..fd57888d4942 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -427,6 +427,11 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); #define atomic_dec_and_lock(atomic, lock) \ __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags); +#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ + __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) + int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, size_t max_size, unsigned int cpu_mult, gfp_t gfp); diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 347fa7ac2e8a..9555b68bb774 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) } EXPORT_SYMBOL(_atomic_dec_and_lock); + +int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock_irqrestore(lock, *flags); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); From 7ea959c45769612aa92557fb6464679f5fec7d9e Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Tue, 12 Jun 2018 18:16:21 +0200 Subject: [PATCH 059/572] locking/refcounts: Implement refcount_dec_and_lock_irqsave() There are in-tree users of refcount_dec_and_lock() which must acquire the spin lock with interrupts disabled. To workaround the lack of an irqsave variant of refcount_dec_and_lock() they use local_irq_save() at the call site. This causes extra code and creates in some places unneeded long interrupt disabled times. These places need also extra treatment for PREEMPT_RT due to the disconnect of the irq disabling and the lock function. Implement the missing irqsave variant of the function. Signed-off-by: Anna-Maria Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r20180612161621.22645-4-bigeasy@linutronix.de [bigeasy: s@atomic_dec_and_lock@refcount_dec_and_lock@g] --- include/linux/refcount.h | 4 +++- lib/refcount.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 4193c41e383a..a685da2c4522 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -98,5 +98,7 @@ extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock); extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock); - +extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r, + spinlock_t *lock, + unsigned long *flags); #endif /* _LINUX_REFCOUNT_H */ diff --git a/lib/refcount.c b/lib/refcount.c index 0eb48353abe3..d3b81cefce91 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) } EXPORT_SYMBOL(refcount_dec_and_lock); +/** + * refcount_dec_and_lock_irqsave - return holding spinlock with disabled + * interrupts if able to decrement refcount to 0 + * @r: the refcount + * @lock: the spinlock to be locked + * @flags: saved IRQ-flags if the is acquired + * + * Same as refcount_dec_and_lock() above except that the spinlock is acquired + * with disabled interupts. + * + * Return: true and hold spinlock if able to decrement refcount to 0, false + * otherwise + */ +bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, + unsigned long *flags) +{ + if (refcount_dec_not_one(r)) + return false; + + spin_lock_irqsave(lock, *flags); + if (!refcount_dec_and_test(r)) { + spin_unlock_irqrestore(lock, *flags); + return false; + } + + return true; +} +EXPORT_SYMBOL(refcount_dec_and_lock_irqsave); From a26ed66c20f080c510fcf5bd448bce204f2c19d7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 10 Jun 2018 16:24:15 +0200 Subject: [PATCH 060/572] clocksource/drivers/stm32: Fix error return code Return an error code on failure. Problem found using Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Thomas Gleixner Cc: Daniel Lezcano Cc: kernel-janitors@vger.kernel.org Cc: Maxime Coquelin Cc: Alexandre Torgue Cc: linux-arm-kernel@lists.infradead.org Link: https://lkml.kernel.org/r1528640655-18948-3-git-send-email-Julia.Lawall@lip6.fr --- drivers/clocksource/timer-stm32.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/timer-stm32.c b/drivers/clocksource/timer-stm32.c index e5cdc3af684c..2717f88c7904 100644 --- a/drivers/clocksource/timer-stm32.c +++ b/drivers/clocksource/timer-stm32.c @@ -304,8 +304,10 @@ static int __init stm32_timer_init(struct device_node *node) to->private_data = kzalloc(sizeof(struct stm32_timer_private), GFP_KERNEL); - if (!to->private_data) + if (!to->private_data) { + ret = -ENOMEM; goto deinit; + } rstc = of_reset_control_get(node, NULL); if (!IS_ERR(rstc)) { From 73df93c57c0b18195a2fe5429747e00018b3e863 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 11 Jun 2018 22:35:48 -0700 Subject: [PATCH 061/572] tools/bpftool: fix a bug in bpftool perf Commit b04df400c302 ("tools/bpftool: add perf subcommand") introduced bpftool subcommand perf to query bpf program kuprobe and tracepoint attachments. The perf subcommand will first test whether bpf subcommand BPF_TASK_FD_QUERY is supported in kernel or not. It does it by opening a file with argv[0] and feeds the file descriptor and current task pid to the kernel for querying. Such an approach won't work if the argv[0] cannot be opened successfully in the current directory. This is especially true when bpftool is accessible through PATH env variable. The error below reflects the open failure for file argv[0] at home directory. [yhs@localhost ~]$ which bpftool /usr/local/sbin/bpftool [yhs@localhost ~]$ bpftool perf Error: perf_query_support: No such file or directory To fix the issue, let us open root directory ("/") which exists in every linux system. With the fix, the error message will correctly reflect the permission issue. [yhs@localhost ~]$ which bpftool /usr/local/sbin/bpftool [yhs@localhost ~]$ bpftool perf Error: perf_query_support: Operation not permitted HINT: non root or kernel doesn't support TASK_FD_QUERY Fixes: b04df400c302 ("tools/bpftool: add perf subcommand") Reported-by: Alexei Starovoitov Signed-off-by: Yonghong Song Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/perf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index ac6b1a12c9b7..b76b77dcfd1f 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -29,9 +29,10 @@ static bool has_perf_query_support(void) if (perf_query_supported) goto out; - fd = open(bin_name, O_RDONLY); + fd = open("/", O_RDONLY); if (fd < 0) { - p_err("perf_query_support: %s", strerror(errno)); + p_err("perf_query_support: cannot open directory \"/\" (%s)", + strerror(errno)); goto out; } From 3bce593ac06b4f18710274cfb084369b3d7909eb Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 12 Jun 2018 13:05:10 +0200 Subject: [PATCH 062/572] selftests: bpf: config: add config fragments Tests test_tunnel.sh fails due to config fragments ins't enabled. Fixes: 933a741e3b82 ("selftests/bpf: bpf tunnel test.") Signed-off-by: Anders Roxell Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/config | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 1eefe211a4a8..7eb613ffef55 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -7,3 +7,13 @@ CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m CONFIG_NET_CLS_ACT=y CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_IPIP=y +CONFIG_IPV6=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPGRE=y +CONFIG_IPV6_GRE=y +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_CRYPTO_HMAC=m +CONFIG_CRYPTO_SHA256=m +CONFIG_VXLAN=y +CONFIG_GENEVE=y From 8efaac07d7e6694f39521f9fb8a5c848b712ecee Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 8 Jun 2018 08:04:57 +0200 Subject: [PATCH 063/572] drm/bridge/sii8620: simplify hardware reset procedure There is no need to flip reset pin twice. Also delays can be changed to values present in vendor's code. Signed-off-by: Andrzej Hajda Tested-by: Marek Szyprowski Reviewed-by: Maciej Purski Link: https://patchwork.freedesktop.org/patch/msgid/20180608060457.18357-1-a.hajda@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 7ab36042a822..d1e780fba4b6 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -971,8 +971,17 @@ static int sii8620_hw_on(struct sii8620 *ctx) ret = regulator_bulk_enable(ARRAY_SIZE(ctx->supplies), ctx->supplies); if (ret) return ret; + usleep_range(10000, 20000); - return clk_prepare_enable(ctx->clk_xtal); + ret = clk_prepare_enable(ctx->clk_xtal); + if (ret) + return ret; + + msleep(100); + gpiod_set_value(ctx->gpio_reset, 0); + msleep(100); + + return 0; } static int sii8620_hw_off(struct sii8620 *ctx) @@ -982,17 +991,6 @@ static int sii8620_hw_off(struct sii8620 *ctx) return regulator_bulk_disable(ARRAY_SIZE(ctx->supplies), ctx->supplies); } -static void sii8620_hw_reset(struct sii8620 *ctx) -{ - usleep_range(10000, 20000); - gpiod_set_value(ctx->gpio_reset, 0); - usleep_range(5000, 20000); - gpiod_set_value(ctx->gpio_reset, 1); - usleep_range(10000, 20000); - gpiod_set_value(ctx->gpio_reset, 0); - msleep(300); -} - static void sii8620_cbus_reset(struct sii8620 *ctx) { sii8620_write(ctx, REG_PWD_SRST, BIT_PWD_SRST_CBUS_RST @@ -2112,7 +2110,6 @@ static void sii8620_cable_in(struct sii8620 *ctx) dev_err(dev, "Error powering on, %d.\n", ret); return; } - sii8620_hw_reset(ctx); sii8620_read_buf(ctx, REG_VND_IDL, ver, ARRAY_SIZE(ver)); ret = sii8620_clear_error(ctx); From 8e627a1b1ce8feb3e1da4428b71b9b4905f04888 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Mon, 15 Jan 2018 18:33:57 +0100 Subject: [PATCH 064/572] drm/bridge/sii8620: fix loops in EDID fetch logic Function should constantly check if cable is connected and finish in finite time. Signed-off-by: Andrzej Hajda Tested-by: Marek Szyprowski Reviewed-by: Maciej Purski Link: https://patchwork.freedesktop.org/patch/msgid/20180115173357.31067-4-a.hajda@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 31 ++++++++++++++++++---------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index d1e780fba4b6..720bc7c325e0 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -807,6 +807,7 @@ static void sii8620_burst_rx_all(struct sii8620 *ctx) static void sii8620_fetch_edid(struct sii8620 *ctx) { u8 lm_ddc, ddc_cmd, int3, cbus; + unsigned long timeout; int fetched, i; int edid_len = EDID_LENGTH; u8 *edid; @@ -856,23 +857,31 @@ static void sii8620_fetch_edid(struct sii8620 *ctx) REG_DDC_CMD, ddc_cmd | VAL_DDC_CMD_ENH_DDC_READ_NO_ACK ); - do { - int3 = sii8620_readb(ctx, REG_INTR3); + int3 = 0; + timeout = jiffies + msecs_to_jiffies(200); + for (;;) { cbus = sii8620_readb(ctx, REG_CBUS_STATUS); - - if (int3 & BIT_DDC_CMD_DONE) - break; - - if (!(cbus & BIT_CBUS_STATUS_CBUS_CONNECTED)) { + if (~cbus & BIT_CBUS_STATUS_CBUS_CONNECTED) { + kfree(edid); + edid = NULL; + goto end; + } + if (int3 & BIT_DDC_CMD_DONE) { + if (sii8620_readb(ctx, REG_DDC_DOUT_CNT) + >= FETCH_SIZE) + break; + } else { + int3 = sii8620_readb(ctx, REG_INTR3); + } + if (time_is_before_jiffies(timeout)) { + ctx->error = -ETIMEDOUT; + dev_err(ctx->dev, "timeout during EDID read\n"); kfree(edid); edid = NULL; goto end; } - } while (1); - - sii8620_readb(ctx, REG_DDC_STATUS); - while (sii8620_readb(ctx, REG_DDC_DOUT_CNT) < FETCH_SIZE) usleep_range(10, 20); + } sii8620_read_buf(ctx, REG_DDC_DATA, edid + fetched, FETCH_SIZE); if (fetched + FETCH_SIZE == EDID_LENGTH) { From ecba7cfa3afbe489288f2c819158b7402afd7ee9 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Fri, 2 Feb 2018 11:54:25 +0100 Subject: [PATCH 065/572] drm/bridge/sii8620: fix display modes validation Current implementation of mode_valid() and mode_fixup() callbacks handle packed pixel modes improperly. Fix it by using proper maximum clock values from the documentation. Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1517568865-25219-1-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 78 ++++++++++++++-------------- 1 file changed, 40 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 720bc7c325e0..5267dc6551af 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -36,8 +36,11 @@ #define SII8620_BURST_BUF_LEN 288 #define VAL_RX_HDMI_CTRL2_DEFVAL VAL_RX_HDMI_CTRL2_IDLE_CNT(3) -#define MHL1_MAX_LCLK 225000 -#define MHL3_MAX_LCLK 600000 + +#define MHL1_MAX_PCLK 75000 +#define MHL1_MAX_PCLK_PP_MODE 150000 +#define MHL3_MAX_PCLK 200000 +#define MHL3_MAX_PCLK_PP_MODE 300000 enum sii8620_mode { CM_DISCONNECTED, @@ -2274,17 +2277,43 @@ static void sii8620_detach(struct drm_bridge *bridge) rc_unregister_device(ctx->rc_dev); } +static int sii8620_is_packing_required(struct sii8620 *ctx, + const struct drm_display_mode *mode) +{ + int max_pclk, max_pclk_pp_mode; + + if (sii8620_is_mhl3(ctx)) { + max_pclk = MHL3_MAX_PCLK; + max_pclk_pp_mode = MHL3_MAX_PCLK_PP_MODE; + } else { + max_pclk = MHL1_MAX_PCLK; + max_pclk_pp_mode = MHL1_MAX_PCLK_PP_MODE; + } + + if (mode->clock < max_pclk) + return 0; + else if (mode->clock < max_pclk_pp_mode) + return 1; + else + return -1; +} + static enum drm_mode_status sii8620_mode_valid(struct drm_bridge *bridge, const struct drm_display_mode *mode) { struct sii8620 *ctx = bridge_to_sii8620(bridge); + int pack_required = sii8620_is_packing_required(ctx, mode); bool can_pack = ctx->devcap[MHL_DCAP_VID_LINK_MODE] & MHL_DCAP_VID_LINK_PPIXEL; - unsigned int max_pclk = sii8620_is_mhl3(ctx) ? MHL3_MAX_LCLK : - MHL1_MAX_LCLK; - max_pclk /= can_pack ? 2 : 3; - return (mode->clock > max_pclk) ? MODE_CLOCK_HIGH : MODE_OK; + switch (pack_required) { + case 0: + return MODE_OK; + case 1: + return (can_pack) ? MODE_OK : MODE_CLOCK_HIGH; + default: + return MODE_CLOCK_HIGH; + } } static bool sii8620_mode_fixup(struct drm_bridge *bridge, @@ -2292,43 +2321,16 @@ static bool sii8620_mode_fixup(struct drm_bridge *bridge, struct drm_display_mode *adjusted_mode) { struct sii8620 *ctx = bridge_to_sii8620(bridge); - int max_lclk; - bool ret = true; mutex_lock(&ctx->lock); - max_lclk = sii8620_is_mhl3(ctx) ? MHL3_MAX_LCLK : MHL1_MAX_LCLK; - if (max_lclk > 3 * adjusted_mode->clock) { - ctx->use_packed_pixel = 0; - goto end; - } - if ((ctx->devcap[MHL_DCAP_VID_LINK_MODE] & MHL_DCAP_VID_LINK_PPIXEL) && - max_lclk > 2 * adjusted_mode->clock) { - ctx->use_packed_pixel = 1; - goto end; - } - ret = false; -end: - if (ret) { - u8 vic = drm_match_cea_mode(adjusted_mode); + ctx->use_packed_pixel = sii8620_is_packing_required(ctx, adjusted_mode); + ctx->video_code = drm_match_cea_mode(adjusted_mode); + ctx->pixel_clock = adjusted_mode->clock; - if (!vic) { - union hdmi_infoframe frm; - u8 mhl_vic[] = { 0, 95, 94, 93, 98 }; - - /* FIXME: We need the connector here */ - drm_hdmi_vendor_infoframe_from_display_mode( - &frm.vendor.hdmi, NULL, adjusted_mode); - vic = frm.vendor.hdmi.vic; - if (vic >= ARRAY_SIZE(mhl_vic)) - vic = 0; - vic = mhl_vic[vic]; - } - ctx->video_code = vic; - ctx->pixel_clock = adjusted_mode->clock; - } mutex_unlock(&ctx->lock); - return ret; + + return true; } static const struct drm_bridge_funcs sii8620_bridge_funcs = { From 9378cecb1ce5d618b8aff4d65113ddcf72fc1011 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Wed, 22 Nov 2017 10:08:38 +0100 Subject: [PATCH 066/572] drm/bridge/sii8620: fix potential buffer overflow Buffer overflow error should not occur, as mode_fixup() callback filters pixel clock value and it should never exceed 600000. However, current implementation is not obviously safe and relies on implementation of mode_fixup(). Make 'i' variable never reach unsafe value in order to avoid buffer overflow error. Reported-by: Dan Carpenter Fixes: bf1722ca ("drm/bridge/sii8620: rewrite hdmi start sequence") Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1511341718-6974-1-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 5267dc6551af..61fd3e0a4ba6 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -1226,7 +1226,7 @@ static void sii8620_start_video(struct sii8620 *ctx) int clk = ctx->pixel_clock * (ctx->use_packed_pixel ? 2 : 3); int i; - for (i = 0; i < ARRAY_SIZE(clk_spec); ++i) + for (i = 0; i < ARRAY_SIZE(clk_spec) - 1; ++i) if (clk < clk_spec[i].max_clk) break; From bbc05e172fad9affa388be35b78b9e5e5da76648 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Wed, 29 Nov 2017 12:48:50 +0100 Subject: [PATCH 067/572] drm/bridge/sii8620: start MHL transmission after HDMI signal detection The vendor code waits for infoframe to detect video mode set by source. We do not need to follow this pattern, because video mode information is provided by drm core. As a result most of the infoframe handling code can be removed. Start transmission immediately after detecting stream on HDMI lines in irq_scdt() function without waiting for infoframe interrupt. Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1511956130-24482-1-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 53 ++-------------------------- 1 file changed, 2 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 61fd3e0a4ba6..853c4f97c7c9 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -1941,14 +1941,6 @@ static void sii8620_irq_edid(struct sii8620 *ctx) ctx->mt_state = MT_STATE_DONE; } -static void sii8620_scdt_high(struct sii8620 *ctx) -{ - sii8620_write_seq_static(ctx, - REG_INTR8_MASK, BIT_CEA_NEW_AVI | BIT_CEA_NEW_VSI, - REG_TPI_SC, BIT_TPI_SC_TPI_OUTPUT_MODE_0_HDMI, - ); -} - static void sii8620_irq_scdt(struct sii8620 *ctx) { u8 stat = sii8620_readb(ctx, REG_INTR5); @@ -1956,53 +1948,13 @@ static void sii8620_irq_scdt(struct sii8620 *ctx) if (stat & BIT_INTR_SCDT_CHANGE) { u8 cstat = sii8620_readb(ctx, REG_TMDS_CSTAT_P3); - if (cstat & BIT_TMDS_CSTAT_P3_SCDT) { - if (ctx->sink_type == SINK_HDMI) - /* enable infoframe interrupt */ - sii8620_scdt_high(ctx); - else - sii8620_start_video(ctx); - } + if (cstat & BIT_TMDS_CSTAT_P3_SCDT) + sii8620_start_video(ctx); } sii8620_write(ctx, REG_INTR5, stat); } -static void sii8620_new_vsi(struct sii8620 *ctx) -{ - u8 vsif[11]; - - sii8620_write(ctx, REG_RX_HDMI_CTRL2, - VAL_RX_HDMI_CTRL2_DEFVAL | - BIT_RX_HDMI_CTRL2_VSI_MON_SEL_VSI); - sii8620_read_buf(ctx, REG_RX_HDMI_MON_PKT_HEADER1, vsif, - ARRAY_SIZE(vsif)); -} - -static void sii8620_new_avi(struct sii8620 *ctx) -{ - sii8620_write(ctx, REG_RX_HDMI_CTRL2, VAL_RX_HDMI_CTRL2_DEFVAL); - sii8620_read_buf(ctx, REG_RX_HDMI_MON_PKT_HEADER1, ctx->avif, - ARRAY_SIZE(ctx->avif)); -} - -static void sii8620_irq_infr(struct sii8620 *ctx) -{ - u8 stat = sii8620_readb(ctx, REG_INTR8) - & (BIT_CEA_NEW_VSI | BIT_CEA_NEW_AVI); - - sii8620_write(ctx, REG_INTR8, stat); - - if (stat & BIT_CEA_NEW_VSI) - sii8620_new_vsi(ctx); - - if (stat & BIT_CEA_NEW_AVI) - sii8620_new_avi(ctx); - - if (stat & (BIT_CEA_NEW_VSI | BIT_CEA_NEW_AVI)) - sii8620_start_video(ctx); -} - static void sii8620_got_xdevcap(struct sii8620 *ctx, int ret) { if (ret < 0) @@ -2084,7 +2036,6 @@ static irqreturn_t sii8620_irq_thread(int irq, void *data) { BIT_FAST_INTR_STAT_EDID, sii8620_irq_edid }, { BIT_FAST_INTR_STAT_DDC, sii8620_irq_ddc }, { BIT_FAST_INTR_STAT_SCDT, sii8620_irq_scdt }, - { BIT_FAST_INTR_STAT_INFR, sii8620_irq_infr }, }; struct sii8620 *ctx = data; u8 stats[LEN_FAST_INTR_STAT]; From 95e8522588c81fdef1b10777a874c4a1fe14bf88 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Wed, 29 Nov 2017 16:12:47 +0100 Subject: [PATCH 068/572] drm/bridge/sii8620: remove HSIC initialization HSIC initialization was taken from the vendor code. HSIC in MHL circuit is not connected, so it is not possible to test it. Tests prove that without HSIC the device works well. Therefore it can be removed. Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1511968368-30884-1-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 38 ---------------------------- 1 file changed, 38 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 853c4f97c7c9..ff041bec0cf4 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -511,50 +511,12 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret) sink_str[ctx->sink_type], sink_name); } -static void sii8620_hsic_init(struct sii8620 *ctx) -{ - if (!sii8620_is_mhl3(ctx)) - return; - - sii8620_write(ctx, REG_FCGC, - BIT_FCGC_HSIC_HOSTMODE | BIT_FCGC_HSIC_ENABLE); - sii8620_setbits(ctx, REG_HRXCTRL3, - BIT_HRXCTRL3_HRX_STAY_RESET | BIT_HRXCTRL3_STATUS_EN, ~0); - sii8620_setbits(ctx, REG_TTXNUMB, MSK_TTXNUMB_TTX_NUMBPS, 4); - sii8620_setbits(ctx, REG_TRXCTRL, BIT_TRXCTRL_TRX_FROM_SE_COC, ~0); - sii8620_setbits(ctx, REG_HTXCTRL, BIT_HTXCTRL_HTX_DRVCONN1, 0); - sii8620_setbits(ctx, REG_KEEPER, MSK_KEEPER_MODE, VAL_KEEPER_MODE_HOST); - sii8620_write_seq_static(ctx, - REG_TDMLLCTL, 0, - REG_UTSRST, BIT_UTSRST_HRX_SRST | BIT_UTSRST_HTX_SRST | - BIT_UTSRST_KEEPER_SRST | BIT_UTSRST_FC_SRST, - REG_UTSRST, BIT_UTSRST_HRX_SRST | BIT_UTSRST_HTX_SRST, - REG_HRXINTL, 0xff, - REG_HRXINTH, 0xff, - REG_TTXINTL, 0xff, - REG_TTXINTH, 0xff, - REG_TRXINTL, 0xff, - REG_TRXINTH, 0xff, - REG_HTXINTL, 0xff, - REG_HTXINTH, 0xff, - REG_FCINTR0, 0xff, - REG_FCINTR1, 0xff, - REG_FCINTR2, 0xff, - REG_FCINTR3, 0xff, - REG_FCINTR4, 0xff, - REG_FCINTR5, 0xff, - REG_FCINTR6, 0xff, - REG_FCINTR7, 0xff - ); -} - static void sii8620_edid_read(struct sii8620 *ctx, int ret) { if (ret < 0) return; sii8620_set_upstream_edid(ctx); - sii8620_hsic_init(ctx); sii8620_enable_hpd(ctx); } From c7d6d511eb56495f065600c90336da0f0fe1b174 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Tue, 23 Jan 2018 12:13:16 +0100 Subject: [PATCH 069/572] drm/bridge/sii8620: fix HDMI cable connection to dongle MHL bridge is usually connected to TV via MHL dongle. Currently plugging HDMI cable to dongle is handled improperly. Fix it by splitting connecting of a dongle and a HDMI cable. The driver should now handle unplugging a sink from a dongle and plugging a different sink with new edid. Tested on MHL1, MHL2 and MHL3 using various vendors' dongles both in DVI and HDMI mode. Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1516705996-8928-1-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 65 ++++++++++++++++++---------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index ff041bec0cf4..4a3deeda065c 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -83,6 +83,9 @@ struct sii8620 { u8 devcap[MHL_DCAP_SIZE]; u8 xdevcap[MHL_XDC_SIZE]; u8 avif[HDMI_INFOFRAME_SIZE(AVI)]; + bool feature_complete; + bool devcap_read; + bool sink_detected; struct edid *edid; unsigned int gen2_write_burst:1; enum sii8620_mt_state mt_state; @@ -479,7 +482,7 @@ static void sii8620_update_array(u8 *dst, u8 *src, int count) } } -static void sii8620_sink_detected(struct sii8620 *ctx, int ret) +static void sii8620_identify_sink(struct sii8620 *ctx) { static const char * const sink_str[] = { [SINK_NONE] = "NONE", @@ -490,7 +493,7 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret) char sink_name[20]; struct device *dev = ctx->dev; - if (ret < 0) + if (!ctx->sink_detected || !ctx->devcap_read) return; sii8620_fetch_edid(ctx); @@ -499,6 +502,7 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret) sii8620_mhl_disconnected(ctx); return; } + sii8620_set_upstream_edid(ctx); if (drm_detect_hdmi_monitor(ctx->edid)) ctx->sink_type = SINK_HDMI; @@ -511,15 +515,6 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret) sink_str[ctx->sink_type], sink_name); } -static void sii8620_edid_read(struct sii8620 *ctx, int ret) -{ - if (ret < 0) - return; - - sii8620_set_upstream_edid(ctx); - sii8620_enable_hpd(ctx); -} - static void sii8620_mr_devcap(struct sii8620 *ctx) { u8 dcap[MHL_DCAP_SIZE]; @@ -535,6 +530,8 @@ static void sii8620_mr_devcap(struct sii8620 *ctx) dcap[MHL_DCAP_ADOPTER_ID_H], dcap[MHL_DCAP_ADOPTER_ID_L], dcap[MHL_DCAP_DEVICE_ID_H], dcap[MHL_DCAP_DEVICE_ID_L]); sii8620_update_array(ctx->devcap, dcap, MHL_DCAP_SIZE); + ctx->devcap_read = true; + sii8620_identify_sink(ctx); } static void sii8620_mr_xdevcap(struct sii8620 *ctx) @@ -1506,6 +1503,16 @@ static void sii8620_set_mode(struct sii8620 *ctx, enum sii8620_mode mode) ); } +static void sii8620_hpd_unplugged(struct sii8620 *ctx) +{ + sii8620_disable_hpd(ctx); + ctx->sink_type = SINK_NONE; + ctx->sink_detected = false; + ctx->feature_complete = false; + kfree(ctx->edid); + ctx->edid = NULL; +} + static void sii8620_disconnect(struct sii8620 *ctx) { sii8620_disable_gen2_write_burst(ctx); @@ -1533,7 +1540,7 @@ static void sii8620_disconnect(struct sii8620 *ctx) REG_MHL_DP_CTL6, 0x2A, REG_MHL_DP_CTL7, 0x03 ); - sii8620_disable_hpd(ctx); + sii8620_hpd_unplugged(ctx); sii8620_write_seq_static(ctx, REG_M3_CTRL, VAL_M3_CTRL_MHL3_VALUE, REG_MHL_COC_CTL1, 0x07, @@ -1581,10 +1588,8 @@ static void sii8620_disconnect(struct sii8620 *ctx) memset(ctx->xstat, 0, sizeof(ctx->xstat)); memset(ctx->devcap, 0, sizeof(ctx->devcap)); memset(ctx->xdevcap, 0, sizeof(ctx->xdevcap)); + ctx->devcap_read = false; ctx->cbus_status = 0; - ctx->sink_type = SINK_NONE; - kfree(ctx->edid); - ctx->edid = NULL; sii8620_mt_cleanup(ctx); } @@ -1675,9 +1680,6 @@ static void sii8620_status_changed_path(struct sii8620 *ctx) sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE), MHL_DST_LM_CLK_MODE_NORMAL | MHL_DST_LM_PATH_ENABLED); - if (!sii8620_is_mhl3(ctx)) - sii8620_mt_read_devcap(ctx, false); - sii8620_mt_set_cont(ctx, sii8620_sink_detected); } else { sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE), MHL_DST_LM_CLK_MODE_NORMAL); @@ -1694,9 +1696,14 @@ static void sii8620_msc_mr_write_stat(struct sii8620 *ctx) sii8620_update_array(ctx->stat, st, MHL_DST_SIZE); sii8620_update_array(ctx->xstat, xst, MHL_XDS_SIZE); - if (ctx->stat[MHL_DST_CONNECTED_RDY] & MHL_DST_CONN_DCAP_RDY) + if (ctx->stat[MHL_DST_CONNECTED_RDY] & st[MHL_DST_CONNECTED_RDY] & + MHL_DST_CONN_DCAP_RDY) { sii8620_status_dcap_ready(ctx); + if (!sii8620_is_mhl3(ctx)) + sii8620_mt_read_devcap(ctx, false); + } + if (st[MHL_DST_LINK_MODE] & MHL_DST_LM_PATH_ENABLED) sii8620_status_changed_path(ctx); } @@ -1780,8 +1787,11 @@ static void sii8620_msc_mr_set_int(struct sii8620 *ctx) } if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_REQ) sii8620_send_features(ctx); - if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_COMPLETE) - sii8620_edid_read(ctx, 0); + if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_COMPLETE) { + ctx->feature_complete = true; + if (ctx->edid) + sii8620_enable_hpd(ctx); + } } static struct sii8620_mt_msg *sii8620_msc_msg_first(struct sii8620 *ctx) @@ -1856,6 +1866,15 @@ static void sii8620_irq_msc(struct sii8620 *ctx) if (stat & BIT_CBUS_MSC_MR_WRITE_STAT) sii8620_msc_mr_write_stat(ctx); + if (stat & BIT_CBUS_HPD_CHG) { + if (ctx->cbus_status & BIT_CBUS_STATUS_CBUS_HPD) { + ctx->sink_detected = true; + sii8620_identify_sink(ctx); + } else { + sii8620_hpd_unplugged(ctx); + } + } + if (stat & BIT_CBUS_MSC_MR_SET_INT) sii8620_msc_mr_set_int(ctx); @@ -1967,11 +1986,11 @@ static void sii8620_irq_ddc(struct sii8620 *ctx) if (stat & BIT_DDC_CMD_DONE) { sii8620_write(ctx, REG_INTR3_MASK, 0); - if (sii8620_is_mhl3(ctx)) + if (sii8620_is_mhl3(ctx) && !ctx->feature_complete) sii8620_mt_set_int(ctx, MHL_INT_REG(RCHANGE), MHL_INT_RC_FEAT_REQ); else - sii8620_edid_read(ctx, 0); + sii8620_enable_hpd(ctx); } sii8620_write(ctx, REG_INTR3, stat); } From e37460c1ca08cf9d3b82eb3b6f205888d8d01182 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 14 Jun 2018 08:49:34 +0200 Subject: [PATCH 070/572] dma-mapping: use obj-y instead of lib-y for generic dma ops We already have exact config symbols to select the direct, non-coherent, or virt dma ops. So use the normal obj- scheme to select them. Signed-off-by: Christoph Hellwig --- lib/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/Makefile b/lib/Makefile index 956b320292fe..5e0e160c9242 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,9 +29,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o -lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o -lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o +obj-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o +obj-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o +obj-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o obj-y += lockref.o From cf65a0f6f6ff7631ba0ac0513a14ca5b65320d80 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Jun 2018 19:01:45 +0200 Subject: [PATCH 071/572] dma-mapping: move all DMA mapping code to kernel/dma Currently the code is split over various files with dma- prefixes in the lib/ and drives/base directories, and the number of files keeps growing. Move them into a single directory to keep the code together and remove the file name prefixes. To match the irq infrastructure this directory is placed under the kernel/ directory. Signed-off-by: Christoph Hellwig --- Documentation/driver-api/infrastructure.rst | 4 +- MAINTAINERS | 9 +--- drivers/base/Makefile | 3 -- include/linux/dma-contiguous.h | 2 +- init/Kconfig | 4 -- kernel/Makefile | 1 + kernel/dma/Kconfig | 50 +++++++++++++++++++ kernel/dma/Makefile | 11 ++++ .../dma-coherent.c => kernel/dma/coherent.c | 0 .../dma/contiguous.c | 0 lib/dma-debug.c => kernel/dma/debug.c | 0 lib/dma-direct.c => kernel/dma/direct.c | 0 .../dma-mapping.c => kernel/dma/mapping.c | 2 +- .../dma/noncoherent.c | 0 {lib => kernel/dma}/swiotlb.c | 0 lib/dma-virt.c => kernel/dma/virt.c | 2 - lib/Kconfig | 47 +---------------- lib/Makefile | 6 --- 18 files changed, 69 insertions(+), 72 deletions(-) create mode 100644 kernel/dma/Kconfig create mode 100644 kernel/dma/Makefile rename drivers/base/dma-coherent.c => kernel/dma/coherent.c (100%) rename drivers/base/dma-contiguous.c => kernel/dma/contiguous.c (100%) rename lib/dma-debug.c => kernel/dma/debug.c (100%) rename lib/dma-direct.c => kernel/dma/direct.c (100%) rename drivers/base/dma-mapping.c => kernel/dma/mapping.c (99%) rename lib/dma-noncoherent.c => kernel/dma/noncoherent.c (100%) rename {lib => kernel/dma}/swiotlb.c (100%) rename lib/dma-virt.c => kernel/dma/virt.c (98%) diff --git a/Documentation/driver-api/infrastructure.rst b/Documentation/driver-api/infrastructure.rst index bee1b9a1702f..6172f3cc3d0b 100644 --- a/Documentation/driver-api/infrastructure.rst +++ b/Documentation/driver-api/infrastructure.rst @@ -49,10 +49,10 @@ Device Drivers Base Device Drivers DMA Management ----------------------------- -.. kernel-doc:: drivers/base/dma-coherent.c +.. kernel-doc:: kernel/dma/coherent.c :export: -.. kernel-doc:: drivers/base/dma-mapping.c +.. kernel-doc:: kernel/dma/mapping.c :export: Device drivers PnP support diff --git a/MAINTAINERS b/MAINTAINERS index c13b9fb3be0b..a6844a9e2f64 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4359,12 +4359,7 @@ L: iommu@lists.linux-foundation.org T: git git://git.infradead.org/users/hch/dma-mapping.git W: http://git.infradead.org/users/hch/dma-mapping.git S: Supported -F: lib/dma-debug.c -F: lib/dma-direct.c -F: lib/dma-noncoherent.c -F: lib/dma-virt.c -F: drivers/base/dma-mapping.c -F: drivers/base/dma-coherent.c +F: kernel/dma/ F: include/asm-generic/dma-mapping.h F: include/linux/dma-direct.h F: include/linux/dma-mapping.h @@ -13642,7 +13637,7 @@ M: Konrad Rzeszutek Wilk L: iommu@lists.linux-foundation.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git S: Supported -F: lib/swiotlb.c +F: kernel/dma/swiotlb.c F: arch/*/kernel/pci-swiotlb.c F: include/linux/swiotlb.h diff --git a/drivers/base/Makefile b/drivers/base/Makefile index b074f242a435..704f44295810 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -8,10 +8,7 @@ obj-y := component.o core.o bus.o dd.o syscore.o \ topology.o container.o property.o cacheinfo.o \ devcon.o obj-$(CONFIG_DEVTMPFS) += devtmpfs.o -obj-$(CONFIG_DMA_CMA) += dma-contiguous.o obj-y += power/ -obj-$(CONFIG_HAS_DMA) += dma-mapping.o -obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_ISA_BUS_API) += isa.o obj-y += firmware_loader/ obj-$(CONFIG_NUMA) += node.o diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index b67bf6ac907d..3c5a4cb3eb95 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -48,7 +48,7 @@ * CMA should not be used by the device drivers directly. It is * only a helper framework for dma-mapping subsystem. * - * For more information, see kernel-docs in drivers/base/dma-contiguous.c + * For more information, see kernel-docs in kernel/dma/contiguous.c */ #ifdef __KERNEL__ diff --git a/init/Kconfig b/init/Kconfig index 5a52f07259a2..fde3d09e8b27 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1719,10 +1719,6 @@ source "arch/Kconfig" endmenu # General setup -config HAVE_GENERIC_DMA_COHERENT - bool - default n - config RT_MUTEXES bool diff --git a/kernel/Makefile b/kernel/Makefile index d2001624fe7a..04bc07c2b42a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -41,6 +41,7 @@ obj-y += printk/ obj-y += irq/ obj-y += rcu/ obj-y += livepatch/ +obj-y += dma/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig new file mode 100644 index 000000000000..9bd54304446f --- /dev/null +++ b/kernel/dma/Kconfig @@ -0,0 +1,50 @@ + +config HAS_DMA + bool + depends on !NO_DMA + default y + +config NEED_SG_DMA_LENGTH + bool + +config NEED_DMA_MAP_STATE + bool + +config ARCH_DMA_ADDR_T_64BIT + def_bool 64BIT || PHYS_ADDR_T_64BIT + +config HAVE_GENERIC_DMA_COHERENT + bool + +config ARCH_HAS_SYNC_DMA_FOR_DEVICE + bool + +config ARCH_HAS_SYNC_DMA_FOR_CPU + bool + select NEED_DMA_MAP_STATE + +config DMA_DIRECT_OPS + bool + depends on HAS_DMA + +config DMA_NONCOHERENT_OPS + bool + depends on HAS_DMA + select DMA_DIRECT_OPS + +config DMA_NONCOHERENT_MMAP + bool + depends on DMA_NONCOHERENT_OPS + +config DMA_NONCOHERENT_CACHE_SYNC + bool + depends on DMA_NONCOHERENT_OPS + +config DMA_VIRT_OPS + bool + depends on HAS_DMA + +config SWIOTLB + bool + select DMA_DIRECT_OPS + select NEED_DMA_MAP_STATE diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile new file mode 100644 index 000000000000..6de44e4eb454 --- /dev/null +++ b/kernel/dma/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_HAS_DMA) += mapping.o +obj-$(CONFIG_DMA_CMA) += contiguous.o +obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o +obj-$(CONFIG_DMA_DIRECT_OPS) += direct.o +obj-$(CONFIG_DMA_NONCOHERENT_OPS) += noncoherent.o +obj-$(CONFIG_DMA_VIRT_OPS) += virt.o +obj-$(CONFIG_DMA_API_DEBUG) += debug.o +obj-$(CONFIG_SWIOTLB) += swiotlb.o + diff --git a/drivers/base/dma-coherent.c b/kernel/dma/coherent.c similarity index 100% rename from drivers/base/dma-coherent.c rename to kernel/dma/coherent.c diff --git a/drivers/base/dma-contiguous.c b/kernel/dma/contiguous.c similarity index 100% rename from drivers/base/dma-contiguous.c rename to kernel/dma/contiguous.c diff --git a/lib/dma-debug.c b/kernel/dma/debug.c similarity index 100% rename from lib/dma-debug.c rename to kernel/dma/debug.c diff --git a/lib/dma-direct.c b/kernel/dma/direct.c similarity index 100% rename from lib/dma-direct.c rename to kernel/dma/direct.c diff --git a/drivers/base/dma-mapping.c b/kernel/dma/mapping.c similarity index 99% rename from drivers/base/dma-mapping.c rename to kernel/dma/mapping.c index f831a582209c..d2a92ddaac4d 100644 --- a/drivers/base/dma-mapping.c +++ b/kernel/dma/mapping.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * drivers/base/dma-mapping.c - arch-independent dma-mapping routines + * arch-independent dma-mapping routines * * Copyright (c) 2006 SUSE Linux Products GmbH * Copyright (c) 2006 Tejun Heo diff --git a/lib/dma-noncoherent.c b/kernel/dma/noncoherent.c similarity index 100% rename from lib/dma-noncoherent.c rename to kernel/dma/noncoherent.c diff --git a/lib/swiotlb.c b/kernel/dma/swiotlb.c similarity index 100% rename from lib/swiotlb.c rename to kernel/dma/swiotlb.c diff --git a/lib/dma-virt.c b/kernel/dma/virt.c similarity index 98% rename from lib/dma-virt.c rename to kernel/dma/virt.c index 8e61a02ef9ca..631ddec4b60a 100644 --- a/lib/dma-virt.c +++ b/kernel/dma/virt.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* - * lib/dma-virt.c - * * DMA operations that map to virtual addresses without flushing memory. */ #include diff --git a/lib/Kconfig b/lib/Kconfig index 809fdd155739..803fcbced729 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -420,60 +420,15 @@ config HAS_IOPORT_MAP depends on HAS_IOMEM && !NO_IOPORT_MAP default y -config HAS_DMA - bool - depends on !NO_DMA - default y +source "kernel/dma/Kconfig" config SGL_ALLOC bool default n -config NEED_SG_DMA_LENGTH - bool - -config NEED_DMA_MAP_STATE - bool - -config ARCH_DMA_ADDR_T_64BIT - def_bool 64BIT || PHYS_ADDR_T_64BIT - config IOMMU_HELPER bool -config ARCH_HAS_SYNC_DMA_FOR_DEVICE - bool - -config ARCH_HAS_SYNC_DMA_FOR_CPU - bool - select NEED_DMA_MAP_STATE - -config DMA_DIRECT_OPS - bool - depends on HAS_DMA - -config DMA_NONCOHERENT_OPS - bool - depends on HAS_DMA - select DMA_DIRECT_OPS - -config DMA_NONCOHERENT_MMAP - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_NONCOHERENT_CACHE_SYNC - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_VIRT_OPS - bool - depends on HAS_DMA - -config SWIOTLB - bool - select DMA_DIRECT_OPS - select NEED_DMA_MAP_STATE - config CHECK_SIGNATURE bool diff --git a/lib/Makefile b/lib/Makefile index 5e0e160c9242..8153fdab287f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,9 +29,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -obj-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o -obj-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o -obj-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o obj-y += lockref.o @@ -148,7 +145,6 @@ obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o -obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o @@ -169,8 +165,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o obj-$(CONFIG_LRU_CACHE) += lru_cache.o -obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o - obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o From a09c591306881dfb04387c6ee7b7e2e4683fa531 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 13 Jun 2018 13:17:26 +0200 Subject: [PATCH 072/572] ACPI / LPSS: Avoid PM quirks on suspend and resume from S3 It is reported that commit a192aa923b66a (ACPI / LPSS: Consolidate runtime PM and system sleep handling) introduced a system suspend regression on some machines, but the only functional change made by it was to cause the PM quirks in the LPSS to also be used during system suspend and resume. While that should always work for suspend-to-idle, it turns out to be problematic for S3 (suspend-to-RAM). To address that issue restore the previous S3 suspend and resume behavior of the LPSS to avoid applying PM quirks then. Fixes: a192aa923b66a (ACPI / LPSS: Consolidate runtime PM and system sleep handling) Link: https://bugs.launchpad.net/bugs/1774950 Reported-by: Kai-Heng Feng Tested-by: Kai-Heng Feng Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Cc: 4.15+ # 4.15+ --- drivers/acpi/acpi_lpss.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index cb6ac5c65c2e..55e4577b504c 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "internal.h" @@ -944,9 +945,10 @@ static void lpss_iosf_exit_d3_state(void) mutex_unlock(&lpss_iosf_mutex); } -static int acpi_lpss_suspend(struct device *dev, bool wakeup) +static int acpi_lpss_suspend(struct device *dev, bool runtime) { struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); + bool wakeup = runtime || device_may_wakeup(dev); int ret; if (pdata->dev_desc->flags & LPSS_SAVE_CTX) @@ -959,13 +961,14 @@ static int acpi_lpss_suspend(struct device *dev, bool wakeup) * wrong status for devices being about to be powered off. See * lpss_iosf_enter_d3_state() for further information. */ - if (lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available()) + if ((runtime || !pm_suspend_via_firmware()) && + lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available()) lpss_iosf_enter_d3_state(); return ret; } -static int acpi_lpss_resume(struct device *dev) +static int acpi_lpss_resume(struct device *dev, bool runtime) { struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev)); int ret; @@ -974,7 +977,8 @@ static int acpi_lpss_resume(struct device *dev) * This call is kept first to be in symmetry with * acpi_lpss_runtime_suspend() one. */ - if (lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available()) + if ((runtime || !pm_resume_via_firmware()) && + lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available()) lpss_iosf_exit_d3_state(); ret = acpi_dev_resume(dev); @@ -998,12 +1002,12 @@ static int acpi_lpss_suspend_late(struct device *dev) return 0; ret = pm_generic_suspend_late(dev); - return ret ? ret : acpi_lpss_suspend(dev, device_may_wakeup(dev)); + return ret ? ret : acpi_lpss_suspend(dev, false); } static int acpi_lpss_resume_early(struct device *dev) { - int ret = acpi_lpss_resume(dev); + int ret = acpi_lpss_resume(dev, false); return ret ? ret : pm_generic_resume_early(dev); } @@ -1018,7 +1022,7 @@ static int acpi_lpss_runtime_suspend(struct device *dev) static int acpi_lpss_runtime_resume(struct device *dev) { - int ret = acpi_lpss_resume(dev); + int ret = acpi_lpss_resume(dev, true); return ret ? ret : pm_generic_runtime_resume(dev); } From 47e5abfb546a3ace23a77453dc2e9db92704c5ac Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 14 Jun 2018 10:01:52 +0200 Subject: [PATCH 073/572] PM / core: Fix supplier device runtime PM usage counter imbalance If a device link is added via device_link_add() by the driver of the link's consumer device, the supplier's runtime PM usage counter is going to be dropped by the pm_runtime_put_suppliers() call in driver_probe_device(). However, in that case it is not incremented unless the supplier driver is already present and the link is not stateless. That leads to a runtime PM usage counter imbalance for the supplier device in a few cases. To prevent that from happening, bump up the supplier runtime PM usage counter in device_link_add() for all links with the DL_FLAG_PM_RUNTIME flag set that are added at the consumer probe time. Use pm_runtime_get_noresume() for that as the callers of device_link_add() who want the supplier to be resumed by it are expected to pass DL_FLAG_RPM_ACTIVE in flags to it anyway, but additionally resume the supplier if the link is added during consumer driver probe to retain the existing behavior for the callers depending on it. Fixes: 21d5c57b3726 (PM / runtime: Use device links) Reported-by: Ulf Hansson Reviewed-by: Ulf Hansson Tested-by: Marek Szyprowski Cc: 4.10+ # 4.10+ Signed-off-by: Rafael J. Wysocki --- drivers/base/core.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 36622b52e419..df3e1a44707a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -236,6 +236,13 @@ struct device_link *device_link_add(struct device *consumer, link->rpm_active = true; } pm_runtime_new_link(consumer); + /* + * If the link is being added by the consumer driver at probe + * time, balance the decrementation of the supplier's runtime PM + * usage counter after consumer probe in driver_probe_device(). + */ + if (consumer->links.status == DL_DEV_PROBING) + pm_runtime_get_noresume(supplier); } get_device(supplier); link->supplier = supplier; @@ -255,12 +262,12 @@ struct device_link *device_link_add(struct device *consumer, switch (consumer->links.status) { case DL_DEV_PROBING: /* - * Balance the decrementation of the supplier's - * runtime PM usage counter after consumer probe - * in driver_probe_device(). + * Some callers expect the link creation during + * consumer driver probe to resume the supplier + * even without DL_FLAG_RPM_ACTIVE. */ if (flags & DL_FLAG_PM_RUNTIME) - pm_runtime_get_sync(supplier); + pm_runtime_resume(supplier); link->status = DL_STATE_CONSUMER_PROBE; break; From d5681f59ee3d4a2e60b9234e94c163cbbf559d0a Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Thu, 14 Jun 2018 09:39:17 -0400 Subject: [PATCH 074/572] NFS: Fix an rcu deadlock in nfs_delegation_find_inode() I was able to reproduce this pretty regularily using xfstests generic/013 on NFS v4.0. Reported-by: Ross Zwisler Fixes: 6c342655022d (NFSv4: Return NFS4ERR_DELAY when a delegation recall fails due to igrab()) Signed-off-by: Anna Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/delegation.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bbd0465535eb..f033f3a69a3b 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -883,8 +883,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { res = nfs_delegation_find_inode_server(server, fhandle); - if (res != ERR_PTR(-ENOENT)) + if (res != ERR_PTR(-ENOENT)) { + rcu_read_unlock(); return res; + } } rcu_read_unlock(); return ERR_PTR(-ENOENT); From 07480cbc05ef1ff7301cb11afb7d894ad3d0916a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Jun 2018 11:06:55 -0700 Subject: [PATCH 075/572] tools: bpftool: improve accuracy of load time BPF program load time is reported from the kernel relative to boot time. If conversion to wall clock does not take nanosecond parts into account, the load time reported by bpftool may differ by one second from run to run. This means JSON object reported by bpftool for a program will randomly change. Fixes: 71bb428fe2c1 ("tools: bpf: add bpftool") Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index a4f435203fef..05f42a46d6ed 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -90,7 +90,9 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) } wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) + - nsecs / 1000000000; + (real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) / + 1000000000; + if (!localtime_r(&wallclock_secs, &load_tm)) { snprintf(buf, size, "%llu", nsecs / 1000000000); From 47cf52a246e526e2092d60ac01c54af9bd45dcc9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Jun 2018 11:06:56 -0700 Subject: [PATCH 076/572] selftests/bpf: test offloads even with BPF programs present Modern distroes increasingly make use of BPF programs. Default Ubuntu 18.04 installation boots with a number of cgroup_skb programs loaded. test_offloads.py tries to check if programs and maps are not leaked on error paths by confirming the list of programs on the system is empty between tests. Since we can no longer expect the system to have no BPF objects at boot try to remember the programs and maps present at the start, and skip those when scanning the system. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_offload.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index e78aad0a68bb..be800d0e7a84 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -163,6 +163,10 @@ def bpftool(args, JSON=True, ns="", fail=True): def bpftool_prog_list(expected=None, ns=""): _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) + # Remove the base progs + for p in base_progs: + if p in progs: + progs.remove(p) if expected is not None: if len(progs) != expected: fail(True, "%d BPF programs loaded, expected %d" % @@ -171,6 +175,10 @@ def bpftool_prog_list(expected=None, ns=""): def bpftool_map_list(expected=None, ns=""): _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + # Remove the base maps + for m in base_maps: + if m in maps: + maps.remove(m) if expected is not None: if len(maps) != expected: fail(True, "%d BPF maps loaded, expected %d" % @@ -585,8 +593,8 @@ skip(os.getuid() != 0, "test must be run as root") # Check tools ret, progs = bpftool("prog", fail=False) skip(ret != 0, "bpftool not installed") -# Check no BPF programs are loaded -skip(len(progs) != 0, "BPF programs already loaded on the system") +base_progs = progs +_, base_maps = bpftool("map") # Check netdevsim ret, out = cmd("modprobe netdevsim", fail=False) From 36ffdbc0a2d9f7280e49dbe5ea53c289ad112a8c Mon Sep 17 00:00:00 2001 From: Jian Wang Date: Fri, 15 Jun 2018 03:22:17 +0200 Subject: [PATCH 077/572] bpf, selftest: check tunnel type more accurately Grep tunnel type directly to make sure 'ip' command supports it. Signed-off-by: Jian Wang Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_tunnel.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index aeb2901f21f4..c4b5fbbaa760 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -668,7 +668,7 @@ cleanup_exit() check() { - ip link help $1 2>&1 | grep -q "^Usage:" + ip link help 2>&1 | grep -q "\s$1\s" if [ $? -ne 0 ];then echo "SKIP $1: iproute2 not support" cleanup From 26bf8a89d887c0686acef0f44eaadd49abfcab03 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 14 Jun 2018 05:01:06 -0700 Subject: [PATCH 078/572] bpf, selftests: delete xfrm tunnel when test exits. Make the printting of bpf xfrm tunnel better and cleanup xfrm state and policy when xfrm test finishes. Signed-off-by: William Tu Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_tunnel.sh | 24 ++++++++++++---------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index c4b5fbbaa760..546aee3e9fb4 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -608,28 +608,26 @@ setup_xfrm_tunnel() test_xfrm_tunnel() { config_device - #tcpdump -nei veth1 ip & - output=$(mktemp) - cat /sys/kernel/debug/tracing/trace_pipe | tee $output & - setup_xfrm_tunnel + > /sys/kernel/debug/tracing/trace + setup_xfrm_tunnel tc qdisc add dev veth1 clsact tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \ sec xfrm_get_state ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 sleep 1 - grep "reqid 1" $output + grep "reqid 1" /sys/kernel/debug/tracing/trace check_err $? - grep "spi 0x1" $output + grep "spi 0x1" /sys/kernel/debug/tracing/trace check_err $? - grep "remote ip 0xac100164" $output + grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace check_err $? cleanup if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: xfrm tunnel"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: xfrm tunnel"${NC} + echo -e ${RED}"FAIL: xfrm tunnel"${NC} + return 1 + fi + echo -e ${GREEN}"PASS: xfrm tunnel"${NC} } attach_bpf() @@ -657,6 +655,10 @@ cleanup() ip link del ip6geneve11 2> /dev/null ip link del erspan11 2> /dev/null ip link del ip6erspan11 2> /dev/null + ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null + ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null + ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null + ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null } cleanup_exit() From f70b359b3830b7a5b72c78136edc740839b67acd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 1 Jun 2018 10:59:25 +0300 Subject: [PATCH 079/572] crypto: chtls - use after free in chtls_pt_recvmsg() We call chtls_free_skb() but then we dereference it on the next lines. Also "skb" can't be NULL, we just dereferenced it on the line before. I have moved the free down a couple lines to fix this issue. Fixes: 17a7d24aa89d ("crypto: chtls - generic handling of data and hdr") Signed-off-by: Dan Carpenter Signed-off-by: Herbert Xu --- drivers/crypto/chelsio/chtls/chtls_io.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/chelsio/chtls/chtls_io.c b/drivers/crypto/chelsio/chtls/chtls_io.c index 51fc6821cbbf..708e232e3cdf 100644 --- a/drivers/crypto/chelsio/chtls/chtls_io.c +++ b/drivers/crypto/chelsio/chtls/chtls_io.c @@ -1548,15 +1548,14 @@ skip_copy: tp->urg_data = 0; if ((avail + offset) >= skb->len) { - if (likely(skb)) - chtls_free_skb(sk, skb); - buffers_freed++; if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { tp->copied_seq += skb->len; hws->rcvpld = skb->hdr_len; } else { tp->copied_seq += hws->rcvpld; } + chtls_free_skb(sk, skb); + buffers_freed++; hws->copied_seq = 0; if (copied >= target && !skb_peek(&sk->sk_receive_queue)) From 6e88f01206edab0e5bc105d8f35fac10f4ee14c5 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 8 Jun 2018 15:41:44 +0800 Subject: [PATCH 080/572] crypto: arm64/aes-blk - fix and move skcipher_walk_done out of kernel_neon_begin, _end In a arm64 server(QDF2400),I met a similar might-sleep warning as [1]: [ 7.019116] BUG: sleeping function called from invalid context at ./include/crypto/algapi.h:416 [ 7.027863] in_atomic(): 1, irqs_disabled(): 0, pid: 410, name: cryptomgr_test [ 7.035106] 1 lock held by cryptomgr_test/410: [ 7.039549] #0: (ptrval) (&drbg->drbg_mutex){+.+.}, at: drbg_instantiate+0x34/0x398 [ 7.048038] CPU: 9 PID: 410 Comm: cryptomgr_test Not tainted 4.17.0-rc6+ #27 [ 7.068228] dump_backtrace+0x0/0x1c0 [ 7.071890] show_stack+0x24/0x30 [ 7.075208] dump_stack+0xb0/0xec [ 7.078523] ___might_sleep+0x160/0x238 [ 7.082360] skcipher_walk_done+0x118/0x2c8 [ 7.086545] ctr_encrypt+0x98/0x130 [ 7.090035] simd_skcipher_encrypt+0x68/0xc0 [ 7.094304] drbg_kcapi_sym_ctr+0xd4/0x1f8 [ 7.098400] drbg_ctr_update+0x98/0x330 [ 7.102236] drbg_seed+0x1b8/0x2f0 [ 7.105637] drbg_instantiate+0x2ac/0x398 [ 7.109646] drbg_kcapi_seed+0xbc/0x188 [ 7.113482] crypto_rng_reset+0x4c/0xb0 [ 7.117319] alg_test_drbg+0xec/0x330 [ 7.120981] alg_test.part.6+0x1c8/0x3c8 [ 7.124903] alg_test+0x58/0xa0 [ 7.128044] cryptomgr_test+0x50/0x58 [ 7.131708] kthread+0x134/0x138 [ 7.134936] ret_from_fork+0x10/0x1c Seems there is a bug in Ard Biesheuvel's commit. Fixes: 683381747270 ("crypto: arm64/aes-blk - move kernel mode neon en/disable into loop") [1] https://www.spinics.net/lists/linux-crypto/msg33103.html Signed-off-by: jia.he@hxt-semitech.com Acked-by: Ard Biesheuvel Cc: # 4.17 Acked-by: Will Deacon Signed-off-by: Herbert Xu --- arch/arm64/crypto/aes-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 253188fb8cb0..e3e50950a863 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -223,8 +223,8 @@ static int ctr_encrypt(struct skcipher_request *req) kernel_neon_begin(); aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, (u8 *)ctx->key_enc, rounds, blocks, walk.iv); - err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); kernel_neon_end(); + err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } if (walk.nbytes) { u8 __aligned(8) tail[AES_BLOCK_SIZE]; From f044a84e040b85cd609851ac88ae8b54b2cc0b75 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 8 Jun 2018 11:53:41 +0200 Subject: [PATCH 081/572] crypto: don't optimize keccakf() keccakf() is the only function in kernel that uses __optimize() macro. __optimize() breaks frame pointer unwinder as optimized code uses RBP, and amusingly this always lead to degraded performance as gcc does not inline across different optimizations levels, so keccakf() wasn't inlined into its callers and keccakf_round() wasn't inlined into keccakf(). Drop __optimize() to resolve both problems. Signed-off-by: Dmitry Vyukov Fixes: 83dee2ce1ae7 ("crypto: sha3-generic - rewrite KECCAK transform to help the compiler optimize") Reported-by: syzbot+37035ccfa9a0a017ffcf@syzkaller.appspotmail.com Reported-by: syzbot+e073e4740cfbb3ae200b@syzkaller.appspotmail.com Cc: linux-crypto@vger.kernel.org Cc: "David S. Miller" Cc: Herbert Xu Cc: Ard Biesheuvel Acked-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/sha3_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 264ec12c0b9c..7f6735d9003f 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -152,7 +152,7 @@ static SHA3_INLINE void keccakf_round(u64 st[25]) st[24] ^= bc[ 4]; } -static void __optimize("O3") keccakf(u64 st[25]) +static void keccakf(u64 st[25]) { int round; From a81ae8095712d1513fe8d58527c92c439b43233e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ondrej=20Mosn=C3=A1=C4=8Dek?= Date: Wed, 13 Jun 2018 16:44:17 +0200 Subject: [PATCH 082/572] crypto: morus640 - Fix out-of-bounds access We must load the block from the temporary variable here, not directly from the input. Also add forgotten zeroing-out of the uninitialized part of the temporary block (as is done correctly in morus1280.c). Fixes: 396be41f16fd ("crypto: morus - Add generic MORUS AEAD implementations") Reported-by: syzbot+1fafa9c4cf42df33f716@syzkaller.appspotmail.com Reported-by: syzbot+d82643ba80bf6937cd44@syzkaller.appspotmail.com Signed-off-by: Ondrej Mosnacek Signed-off-by: Herbert Xu --- crypto/morus640.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/morus640.c b/crypto/morus640.c index 9fbcde307daf..5eede3749e64 100644 --- a/crypto/morus640.c +++ b/crypto/morus640.c @@ -274,8 +274,9 @@ static void crypto_morus640_decrypt_chunk(struct morus640_state *state, u8 *dst, union morus640_block_in tail; memcpy(tail.bytes, src, size); + memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size); - crypto_morus640_load_a(&m, src); + crypto_morus640_load_a(&m, tail.bytes); crypto_morus640_core(state, &m); crypto_morus640_store_a(tail.bytes, &m); memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size); From 837bf7cc3b7504385ae0e829c72e470dfc27cf6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20B=C3=BCsch?= Date: Thu, 14 Jun 2018 20:08:11 +0200 Subject: [PATCH 083/572] hwrng: core - Always drop the RNG in hwrng_unregister() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit enable_best_rng() is used in hwrng_unregister() to switch away from the currently active RNG, if that is the one currently being removed. However enable_best_rng() might fail, if the next RNG's init routine fails. In that case enable_best_rng() will return an error code and the currently active RNG will remain active. After unregistering this might lead to crashes due to use-after-free. Fix this by dropping the currently active RNG, if enable_best_rng() failed. This will result in no RNG to be active, if the next-best one failed to initialize. This problem was introduced by 142a27f0a731ddcf467546960a5585970ca98e21 Fixes: 142a27f0a731 ("hwrng: core - Reset user selected rng by...") Reported-by: Wirz Tested-by: Wirz Signed-off-by: Michael Büsch Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 91bb98c42a1c..aaf9e5afaad4 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -516,11 +516,18 @@ EXPORT_SYMBOL_GPL(hwrng_register); void hwrng_unregister(struct hwrng *rng) { + int err; + mutex_lock(&rng_mutex); list_del(&rng->list); - if (current_rng == rng) - enable_best_rng(); + if (current_rng == rng) { + err = enable_best_rng(); + if (err) { + drop_current_rng(); + cur_rng_set_by_user = 0; + } + } if (list_empty(&rng_list)) { mutex_unlock(&rng_mutex); From 7d1982b4e335c1b184406b7566f6041bfe313c35 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 Jun 2018 02:30:47 +0200 Subject: [PATCH 084/572] bpf: fix panic in prog load calls cleanup While testing I found that when hitting error path in bpf_prog_load() where we jump to free_used_maps and prog contained BPF to BPF calls that were JITed earlier, then we never clean up the bpf_prog_kallsyms_add() done under jit_subprogs(). Add proper API to make BPF kallsyms deletion more clear and fix that. Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs") Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 3 +++ kernel/bpf/core.c | 14 ++++++++++++++ kernel/bpf/syscall.c | 8 ++------ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 45fc0f5000d8..297c56fa9cee 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -961,6 +961,9 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) } #endif /* CONFIG_BPF_JIT */ +void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp); +void bpf_prog_kallsyms_del_all(struct bpf_prog *fp); + #define BPF_ANC BIT(15) static inline bool bpf_needs_clear_a(const struct sock_filter *first) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9f1493705f40..1061968adcc1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -350,6 +350,20 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, return prog_adj; } +void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) +{ + int i; + + for (i = 0; i < fp->aux->func_cnt; i++) + bpf_prog_kallsyms_del(fp->aux->func[i]); +} + +void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) +{ + bpf_prog_kallsyms_del_subprogs(fp); + bpf_prog_kallsyms_del(fp); +} + #ifdef CONFIG_BPF_JIT /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0fa20624707f..0f62692fe635 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1034,14 +1034,9 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu) static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic_dec_and_test(&prog->aux->refcnt)) { - int i; - /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); - - for (i = 0; i < prog->aux->func_cnt; i++) - bpf_prog_kallsyms_del(prog->aux->func[i]); - bpf_prog_kallsyms_del(prog); + bpf_prog_kallsyms_del_all(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } @@ -1384,6 +1379,7 @@ static int bpf_prog_load(union bpf_attr *attr) return err; free_used_maps: + bpf_prog_kallsyms_del_subprogs(prog); free_used_maps(prog->aux); free_prog: bpf_prog_uncharge_memlock(prog); From 9facc336876f7ecf9edba4c67b90426fde4ec898 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 15 Jun 2018 02:30:48 +0200 Subject: [PATCH 085/572] bpf: reject any prog that failed read-only lock We currently lock any JITed image as read-only via bpf_jit_binary_lock_ro() as well as the BPF image as read-only through bpf_prog_lock_ro(). In the case any of these would fail we throw a WARN_ON_ONCE() in order to yell loudly to the log. Perhaps, to some extend, this may be comparable to an allocation where __GFP_NOWARN is explicitly not set. Added via 65869a47f348 ("bpf: improve read-only handling"), this behavior is slightly different compared to any of the other in-kernel set_memory_ro() users who do not check the return code of set_memory_ro() and friends /at all/ (e.g. in the case of module_enable_ro() / module_disable_ro()). Given in BPF this is mandatory hardening step, we want to know whether there are any issues that would leave both BPF data writable. So it happens that syzkaller enabled fault injection and it triggered memory allocation failure deep inside x86's change_page_attr_set_clr() which was triggered from set_memory_ro(). Now, there are two options: i) leaving everything as is, and ii) reworking the image locking code in order to have a final checkpoint out of the central bpf_prog_select_runtime() which probes whether any of the calls during prog setup weren't successful, and then bailing out with an error. Option ii) is a better approach since this additional paranoia avoids altogether leaving any potential W+X pages from BPF side in the system. Therefore, lets be strict about it, and reject programs in such unlikely occasion. While testing I noticed also that one bpf_prog_lock_ro() call was missing on the outer dummy prog in case of calls, e.g. in the destructor we call bpf_prog_free_deferred() on the main prog where we try to bpf_prog_unlock_free() the program, and since we go via bpf_prog_select_runtime() do that as well. Reported-by: syzbot+3b889862e65a98317058@syzkaller.appspotmail.com Reported-by: syzbot+9e762b52dd17e616a7a5@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 60 ++++++++++++++++++++++++++---------------- kernel/bpf/core.c | 53 ++++++++++++++++++++++++++++++++----- kernel/bpf/syscall.c | 4 +-- 3 files changed, 86 insertions(+), 31 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 297c56fa9cee..108f9812e196 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -469,7 +469,8 @@ struct sock_fprog_kern { }; struct bpf_binary_header { - unsigned int pages; + u16 pages; + u16 locked:1; u8 image[]; }; @@ -671,15 +672,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) -#ifdef CONFIG_ARCH_HAS_SET_MEMORY static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { +#ifdef CONFIG_ARCH_HAS_SET_MEMORY fp->locked = 1; - WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages)); + if (set_memory_ro((unsigned long)fp, fp->pages)) + fp->locked = 0; +#endif } static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { +#ifdef CONFIG_ARCH_HAS_SET_MEMORY if (fp->locked) { WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages)); /* In case set_memory_rw() fails, we want to be the first @@ -687,34 +691,30 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) */ fp->locked = 0; } +#endif } static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { - WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages)); +#ifdef CONFIG_ARCH_HAS_SET_MEMORY + hdr->locked = 1; + if (set_memory_ro((unsigned long)hdr, hdr->pages)) + hdr->locked = 0; +#endif } static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { - WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); +#ifdef CONFIG_ARCH_HAS_SET_MEMORY + if (hdr->locked) { + WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages)); + /* In case set_memory_rw() fails, we want to be the first + * to crash here instead of some random place later on. + */ + hdr->locked = 0; + } +#endif } -#else -static inline void bpf_prog_lock_ro(struct bpf_prog *fp) -{ -} - -static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) -{ -} - -static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) -{ -} - -static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) -{ -} -#endif /* CONFIG_ARCH_HAS_SET_MEMORY */ static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) @@ -725,6 +725,22 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp) return (void *)addr; } +#ifdef CONFIG_ARCH_HAS_SET_MEMORY +static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp) +{ + if (!fp->locked) + return -ENOLCK; + if (fp->jited) { + const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); + + if (!hdr->locked) + return -ENOLCK; + } + + return 0; +} +#endif + int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1061968adcc1..a9e6c04d0f4a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -598,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_fill_ill_insns(hdr, size); hdr->pages = size / PAGE_SIZE; + hdr->locked = 0; + hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = (get_random_int() % hole) & ~(alignment - 1); @@ -1448,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) return 0; } +static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp) +{ +#ifdef CONFIG_ARCH_HAS_SET_MEMORY + int i, err; + + for (i = 0; i < fp->aux->func_cnt; i++) { + err = bpf_prog_check_pages_ro_single(fp->aux->func[i]); + if (err) + return err; + } + + return bpf_prog_check_pages_ro_single(fp); +#endif + return 0; +} + +static void bpf_prog_select_func(struct bpf_prog *fp) +{ +#ifndef CONFIG_BPF_JIT_ALWAYS_ON + u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); + + fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; +#else + fp->bpf_func = __bpf_prog_ret0_warn; +#endif +} + /** * bpf_prog_select_runtime - select exec runtime for BPF program * @fp: bpf_prog populated with internal BPF program @@ -1458,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp) */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { -#ifndef CONFIG_BPF_JIT_ALWAYS_ON - u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); + /* In case of BPF to BPF calls, verifier did all the prep + * work with regards to JITing, etc. + */ + if (fp->bpf_func) + goto finalize; - fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1]; -#else - fp->bpf_func = __bpf_prog_ret0_warn; -#endif + bpf_prog_select_func(fp); /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during @@ -1485,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) if (*err) return fp; } + +finalize: bpf_prog_lock_ro(fp); /* The tail call compatibility check can only be done at @@ -1493,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * all eBPF JITs might immediately support all features. */ *err = bpf_check_tail_call(fp); + if (*err) + return fp; + /* Checkpoint: at this point onwards any cBPF -> eBPF or + * native eBPF program is read-only. If we failed to change + * the page attributes (e.g. allocation failure from + * splitting large pages), then reject the whole program + * in order to guarantee not ending up with any W+X pages + * from BPF side in kernel. + */ + *err = bpf_prog_check_pages_ro_locked(fp); return fp; } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0f62692fe635..35dc466641f2 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1353,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr) if (err < 0) goto free_used_maps; - /* eBPF program is ready to be JITed */ - if (!prog->bpf_func) - prog = bpf_prog_select_runtime(prog, &err); + prog = bpf_prog_select_runtime(prog, &err); if (err < 0) goto free_used_maps; From 6d5fc1957989266006db6ef3dfb9159b42cf0189 Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 14 Jun 2018 11:07:42 +0900 Subject: [PATCH 086/572] xdp: Fix handling of devmap in generic XDP Commit 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") changed the return value type of __devmap_lookup_elem() from struct net_device * to struct bpf_dtab_netdev * but forgot to modify generic XDP code accordingly. Thus generic XDP incorrectly used struct bpf_dtab_netdev where struct net_device is expected, then skb->dev was set to invalid value. v2: - Fix compiler warning without CONFIG_BPF_SYSCALL. Fixes: 67f29e07e131 ("bpf: devmap introduce dev_map_enqueue") Signed-off-by: Toshiaki Makita Acked-by: Yonghong Song Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 12 ++++++++++++ include/linux/filter.h | 16 ++++++++++++++++ kernel/bpf/devmap.c | 14 ++++++++++++++ net/core/filter.c | 21 ++++----------------- 4 files changed, 46 insertions(+), 17 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 995c3b1e59bf..7df32a3200f7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -488,12 +488,15 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); /* Map specifics */ struct xdp_buff; +struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, + struct bpf_prog *xdp_prog); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); @@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return 0; } +struct sk_buff; + +static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, + struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + return 0; +} + static inline struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 108f9812e196..b615df57b7d5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -802,6 +803,21 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, + struct net_device *fwd) +{ + unsigned int len; + + if (unlikely(!(fwd->flags & IFF_UP))) + return -ENETDOWN; + + len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; + if (skb->len > len) + return -EMSGSIZE; + + return 0; +} + /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the * same cpu context. Further for best results no more than a single map * for the do_redirect/do_flush pair should be used. This limitation is diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index a7cc7b3494a9..642c97f6d1b8 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return bq_enqueue(dst, xdpf, dev_rx); } +int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + int err; + + err = __xdp_generic_ok_fwd_dev(skb, dst->dev); + if (unlikely(err)) + return err; + skb->dev = dst->dev; + generic_xdp_tx(skb, xdp_prog); + + return 0; +} + static void *dev_map_lookup_elem(struct bpf_map *map, void *key) { struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key); diff --git a/net/core/filter.c b/net/core/filter.c index 3d9ba7e5965a..e7f12e9f598c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3214,20 +3214,6 @@ err: } EXPORT_SYMBOL_GPL(xdp_do_redirect); -static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) -{ - unsigned int len; - - if (unlikely(!(fwd->flags & IFF_UP))) - return -ENETDOWN; - - len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; - if (skb->len > len) - return -EMSGSIZE; - - return 0; -} - static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, @@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, } if (map->map_type == BPF_MAP_TYPE_DEVMAP) { - if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + struct bpf_dtab_netdev *dst = fwd; + + err = dev_map_generic_redirect(dst, skb, xdp_prog); + if (unlikely(err)) goto err; - skb->dev = fwd; - generic_xdp_tx(skb, xdp_prog); } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { struct xdp_sock *xs = fwd; From 9bbe60a67be5a1c6f79b3c9be5003481a50529ff Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 16 Jun 2018 11:55:44 +0100 Subject: [PATCH 087/572] atm: Preserve value of skb->truesize when accounting to vcc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ATM accounts for in-flight TX packets in sk_wmem_alloc of the VCC on which they are to be sent. But it doesn't take ownership of those packets from the sock (if any) which originally owned them. They should remain owned by their actual sender until they've left the box. There's a hack in pskb_expand_head() to avoid adjusting skb->truesize for certain skbs, precisely to avoid messing up sk_wmem_alloc accounting. Ideally that hack would cover the ATM use case too, but it doesn't — skbs which aren't owned by any sock, for example PPP control frames, still get their truesize adjusted when the low-level ATM driver adds headroom. This has always been an issue, it seems. The truesize of a packet increases, and sk_wmem_alloc on the VCC goes negative. But this wasn't for normal traffic, only for control frames. So I think we just got away with it, and we probably needed to send 2GiB of LCP echo frames before the misaccounting would ever have caused a problem and caused atm_may_send() to start refusing packets. Commit 14afee4b609 ("net: convert sock.sk_wmem_alloc from atomic_t to refcount_t") did exactly what it was intended to do, and turned this mostly-theoretical problem into a real one, causing PPPoATM to fail immediately as sk_wmem_alloc underflows and atm_may_send() *immediately* starts refusing to allow new packets. The least intrusive solution to this problem is to stash the value of skb->truesize that was accounted to the VCC, in a new member of the ATM_SKB(skb) structure. Then in atm_pop_raw() subtract precisely that value instead of the then-current value of skb->truesize. Fixes: 158f323b9868 ("net: adjust skb->truesize in pskb_expand_head()") Signed-off-by: David Woodhouse Tested-by: Kevin Darbyshire-Bryant Signed-off-by: David S. Miller --- include/linux/atmdev.h | 15 +++++++++++++++ net/atm/br2684.c | 3 +-- net/atm/clip.c | 3 +-- net/atm/common.c | 3 +-- net/atm/lec.c | 3 +-- net/atm/mpc.c | 3 +-- net/atm/pppoatm.c | 3 +-- net/atm/raw.c | 4 ++-- 8 files changed, 23 insertions(+), 14 deletions(-) diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 0c27515d2cf6..8124815eb121 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -214,6 +214,7 @@ struct atmphy_ops { struct atm_skb_data { struct atm_vcc *vcc; /* ATM VCC */ unsigned long atm_options; /* ATM layer options */ + unsigned int acct_truesize; /* truesize accounted to vcc */ }; #define VCC_HTABLE_SIZE 32 @@ -241,6 +242,20 @@ void vcc_insert_socket(struct sock *sk); void atm_dev_release_vccs(struct atm_dev *dev); +static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb) +{ + /* + * Because ATM skbs may not belong to a sock (and we don't + * necessarily want to), skb->truesize may be adjusted, + * escaping the hack in pskb_expand_head() which avoids + * doing so for some cases. So stash the value of truesize + * at the time we accounted it, and atm_pop_raw() can use + * that value later, in case it changes. + */ + refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); + ATM_SKB(skb)->acct_truesize = skb->truesize; + ATM_SKB(skb)->atm_options = vcc->atm_options; +} static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) { diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 36b3adacc0dd..10462de734ea 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -252,8 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev); - refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = atmvcc->atm_options; + atm_account_tx(atmvcc, skb); dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; diff --git a/net/atm/clip.c b/net/atm/clip.c index 66caa48a27c2..d795b9c5aea4 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -381,8 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb, memcpy(here, llc_oui, sizeof(llc_oui)); ((__be16 *) here)[3] = skb->protocol; } - refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = vcc->atm_options; + atm_account_tx(vcc, skb); entry->vccs->last_use = jiffies; pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev); old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */ diff --git a/net/atm/common.c b/net/atm/common.c index 1f2af59935db..ff5748b2190f 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -630,10 +630,9 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) goto out; } pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); - refcount_add(skb->truesize, &sk->sk_wmem_alloc); + atm_account_tx(vcc, skb); skb->dev = NULL; /* for paths shared with net_device interfaces */ - ATM_SKB(skb)->atm_options = vcc->atm_options; if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { kfree_skb(skb); error = -EFAULT; diff --git a/net/atm/lec.c b/net/atm/lec.c index 5a95fcf6f9b6..d7f5cf5b7594 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -182,9 +182,8 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb) struct net_device *dev = skb->dev; ATM_SKB(skb)->vcc = vcc; - ATM_SKB(skb)->atm_options = vcc->atm_options; + atm_account_tx(vcc, skb); - refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); if (vcc->send(vcc, skb) < 0) { dev->stats.tx_dropped++; return; diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 75620c2f2617..24b53c4c39c6 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -555,8 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) sizeof(struct llc_snap_hdr)); } - refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = entry->shortcut->atm_options; + atm_account_tx(entry->shortcut, skb); entry->shortcut->send(entry->shortcut, skb); entry->packets_fwded++; mpc->in_ops->put(entry); diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 21d9d341a619..af8c4b38b746 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -350,8 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb) return 1; } - refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc); - ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options; + atm_account_tx(vcc, skb); pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev); ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb) diff --git a/net/atm/raw.c b/net/atm/raw.c index ee10e8d46185..b3ba44aab0ee 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -35,8 +35,8 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb) struct sock *sk = sk_atm(vcc); pr_debug("(%d) %d -= %d\n", - vcc->vci, sk_wmem_alloc_get(sk), skb->truesize); - WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)); + vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); + WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); dev_kfree_skb_any(skb); sk->sk_write_space(sk); } From a9122886d9848d00a01888116a58624b9ba95cdc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 17 Jun 2018 08:38:55 +0900 Subject: [PATCH 088/572] bluetooth: hci_nokia: Don't include linux/unaligned/le_struct.h directly. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This breaks the build as this header is not meant to be used in this way. ./include/linux/unaligned/access_ok.h:8:28: error: redefinition of ‘get_unaligned_le16’ static __always_inline u16 get_unaligned_le16(const void *p) ^~~~~~~~~~~~~~~~~~ In file included from drivers/bluetooth/hci_nokia.c:32: ./include/linux/unaligned/le_struct.h:7:19: note: previous definition of ‘get_unaligned_le16’ was here static inline u16 get_unaligned_le16(const void *p) Use asm/unaligned.h instead. Signed-off-by: David S. Miller --- drivers/bluetooth/hci_nokia.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/hci_nokia.c b/drivers/bluetooth/hci_nokia.c index 14d159e2042d..2dc33e65d2d0 100644 --- a/drivers/bluetooth/hci_nokia.c +++ b/drivers/bluetooth/hci_nokia.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include From 536e0019b7da4eb3badb4da5acbb70ae29e1b5ef Mon Sep 17 00:00:00 2001 From: Helge Eichelberg Date: Tue, 5 Jun 2018 19:38:32 +0200 Subject: [PATCH 089/572] hwmon: (dell-smm) Disable fan support for Dell XPS13 9333 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling fan related SMM functions implemented by Dell BIOS firmware on Dell XPS13 9333 freeze kernel for about 500ms. Until Dell fixes it we need to disable fan support for Dell XPS13 9333. Via "force" module param fan support can be enabled. Link: https://bugzilla.kernel.org/show_bug.cgi?id=195751 Signed-off-by: Helge Eichelberg Reviewed-by: Pali Rohár Signed-off-by: Guenter Roeck --- drivers/hwmon/dell-smm-hwmon.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index bf3bb7e1adab..9d3ef879dc51 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -1074,6 +1074,13 @@ static struct dmi_system_id i8k_blacklist_fan_support_dmi_table[] __initdata = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Vostro 3360"), }, }, + { + .ident = "Dell XPS13 9333", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS13 9333"), + }, + }, { } }; From 91bb8f45f73f19a0150c233c0f11cdeb6d71d1e9 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 12 Jun 2018 15:19:35 -0700 Subject: [PATCH 090/572] hwmon: (nct6775) Fix loop limit Commit cc66b3038254 ("hwmon: (nct6775) Rework temperature source and label handling") changed a loop limit from "data->temp_label_num - 1" to "32", as part of moving from a string array to a bit mask. This results in the following error, reported by UBSAN. UBSAN: Undefined behaviour in drivers/hwmon/nct6775.c:4179:27 shift exponent 32 is too large for 32-bit type 'long unsigned int' Similar to the original loop, the limit has to be one less than the number of bits. Fixes: cc66b3038254 ("hwmon: (nct6775) Rework temperature source and label handling") Reported-by: Paul Menzel Cc: Paul Menzel Tested-by: Paul Menzel Signed-off-by: Guenter Roeck --- drivers/hwmon/nct6775.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index 155d4d1d1585..f9d1349c3286 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -4175,7 +4175,7 @@ static int nct6775_probe(struct platform_device *pdev) * The temperature is already monitored if the respective bit in * is set. */ - for (i = 0; i < 32; i++) { + for (i = 0; i < 31; i++) { if (!(data->temp_mask & BIT(i + 1))) continue; if (!reg_temp_alternate[i]) From 7e85dc8cb35abf16455f1511f0670b57c1a84608 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Fri, 15 Jun 2018 13:27:31 +0300 Subject: [PATCH 091/572] net_sched: blackhole: tell upper qdisc about dropped packets When blackhole is used on top of classful qdisc like hfsc it breaks qlen and backlog counters because packets are disappear without notice. In HFSC non-zero qlen while all classes are inactive triggers warning: WARNING: ... at net/sched/sch_hfsc.c:1393 hfsc_dequeue+0xba4/0xe90 [sch_hfsc] and schedules watchdog work endlessly. This patch return __NET_XMIT_BYPASS in addition to NET_XMIT_SUCCESS, this flag tells upper layer: this packet is gone and isn't queued. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David S. Miller --- net/sched/sch_blackhole.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index c98a61e980ba..9c4c2bb547d7 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -21,7 +21,7 @@ static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { qdisc_drop(skb, sch, to_free); - return NET_XMIT_SUCCESS; + return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } static struct sk_buff *blackhole_dequeue(struct Qdisc *sch) From df07101e1c4a29e820df02f9989a066988b160e6 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Tue, 22 May 2018 19:45:09 +0200 Subject: [PATCH 092/572] ARM: dts: imx6q: Use correct SDMA script for SPI5 core According to the reference manual the shp_2_mcu / mcu_2_shp scripts must be used for devices connected through the SPBA. This fixes an issue we saw with DMA transfers. Sometimes the SPI controller RX FIFO was not empty after a DMA transfer and the driver got stuck in the next PIO transfer when it read one word more than expected. commit dd4b487b32a35 ("ARM: dts: imx6: Use correct SDMA script for SPI cores") is fixing the same issue but only for SPI1 - 4. Fixes: 677940258dd8e ("ARM: dts: imx6q: enable dma for ecspi5") Signed-off-by: Sean Nyekjaer Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6q.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi index 70483ce72ba6..77f8f030dd07 100644 --- a/arch/arm/boot/dts/imx6q.dtsi +++ b/arch/arm/boot/dts/imx6q.dtsi @@ -90,7 +90,7 @@ clocks = <&clks IMX6Q_CLK_ECSPI5>, <&clks IMX6Q_CLK_ECSPI5>; clock-names = "ipg", "per"; - dmas = <&sdma 11 7 1>, <&sdma 12 7 2>; + dmas = <&sdma 11 8 1>, <&sdma 12 8 2>; dma-names = "rx", "tx"; status = "disabled"; }; From d5a6cabf02210b896a60eee7c04c670ee9ba6dca Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 13 Jun 2018 12:43:10 +0200 Subject: [PATCH 093/572] ALSA: hda/realtek - Fix pop noise on Lenovo P50 & co Some Lenovo laptops, e.g. Lenovo P50, showed the pop noise at resume or runtime resume. It turned out to be reduced by applying alc_no_shutup() just like TPT440 quirk does. Since there are many Lenovo models showing the same behavior, put this workaround in ALC269_FIXUP_THINKPAD_ACPI entry so that it's applied commonly to all such Lenovo machines. Reported-by: Hans de Goede Tested-by: Benjamin Berg Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index e9bd33ea538f..487ceb9fd038 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4995,7 +4995,6 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec, struct alc_spec *spec = codec->spec; if (action == HDA_FIXUP_ACT_PRE_PROBE) { - spec->shutup = alc_no_shutup; /* reduce click noise */ spec->reboot_notify = alc_d3_at_reboot; /* reduce noise */ spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP; codec->power_save_node = 0; /* avoid click noises */ @@ -5394,6 +5393,13 @@ static void alc274_fixup_bind_dacs(struct hda_codec *codec, /* for hda_fixup_thinkpad_acpi() */ #include "thinkpad_helper.c" +static void alc_fixup_thinkpad_acpi(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc_fixup_no_shutup(codec, fix, action); /* reduce click noise */ + hda_fixup_thinkpad_acpi(codec, fix, action); +} + /* for dell wmi mic mute led */ #include "dell_wmi_helper.c" @@ -5946,7 +5952,7 @@ static const struct hda_fixup alc269_fixups[] = { }, [ALC269_FIXUP_THINKPAD_ACPI] = { .type = HDA_FIXUP_FUNC, - .v.func = hda_fixup_thinkpad_acpi, + .v.func = alc_fixup_thinkpad_acpi, .chained = true, .chain_id = ALC269_FIXUP_SKU_IGNORE, }, From 7919cd82b9f9be97111be386cbaac59958800ef9 Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Fri, 15 Jun 2018 21:56:15 -0400 Subject: [PATCH 094/572] ALSA: hda/ca0132: Delete pointless assignments to struct auto_pin_cfg fields ca0132_config() was setting some values in the auto_pin_cfg for the codec... but it is called prior to snd_hda_parse_pin_defcfg(), which does a memset() to clear the entire structure as one of its first actions, making the entire exercise pointless. Kill all use of struct auto_pin_cfg from ca0132_config(). Signed-off-by: Alastair Bridgewater Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 04e949aa01ad..c66834a9bac6 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -7343,7 +7343,6 @@ static const struct hda_codec_ops ca0132_patch_ops = { static void ca0132_config(struct hda_codec *codec) { struct ca0132_spec *spec = codec->spec; - struct auto_pin_cfg *cfg = &spec->autocfg; spec->dacs[0] = 0x2; spec->dacs[1] = 0x3; @@ -7405,12 +7404,7 @@ static void ca0132_config(struct hda_codec *codec) /* SPDIF I/O */ spec->dig_out = 0x05; spec->multiout.dig_out_nid = spec->dig_out; - cfg->dig_out_pins[0] = 0x0c; - cfg->dig_outs = 1; - cfg->dig_out_type[0] = HDA_PCM_TYPE_SPDIF; spec->dig_in = 0x09; - cfg->dig_in_pin = 0x0e; - cfg->dig_in_type = HDA_PCM_TYPE_SPDIF; break; case QUIRK_R3DI: codec_dbg(codec, "%s: QUIRK_R3DI applied.\n", __func__); @@ -7438,9 +7432,6 @@ static void ca0132_config(struct hda_codec *codec) /* SPDIF I/O */ spec->dig_out = 0x05; spec->multiout.dig_out_nid = spec->dig_out; - cfg->dig_out_pins[0] = 0x0c; - cfg->dig_outs = 1; - cfg->dig_out_type[0] = HDA_PCM_TYPE_SPDIF; break; default: spec->num_outputs = 2; @@ -7463,12 +7454,7 @@ static void ca0132_config(struct hda_codec *codec) /* SPDIF I/O */ spec->dig_out = 0x05; spec->multiout.dig_out_nid = spec->dig_out; - cfg->dig_out_pins[0] = 0x0c; - cfg->dig_outs = 1; - cfg->dig_out_type[0] = HDA_PCM_TYPE_SPDIF; spec->dig_in = 0x09; - cfg->dig_in_pin = 0x0e; - cfg->dig_in_type = HDA_PCM_TYPE_SPDIF; break; } } From a3d90d6775e2066a100b9124e40880bd317d2e02 Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Fri, 15 Jun 2018 21:56:16 -0400 Subject: [PATCH 095/572] ALSA: hda/ca0132: Delete redundant UNSOL event requests During ca0132_init(), ca0132_init_unsol() is run before the spec->spec_init_verbs are written. ca0132_init_unsol() calls snd_hda_jack_detect_enable_callback(), which requests UNSOL events for three or four nodes, two of which were also (redundantly) requested by spec_init_verbs. Kill the redundant AC_VERB_SET_UNSOLICITED_ENABLE verbs. Signed-off-by: Alastair Bridgewater Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index c66834a9bac6..dba731c0bf53 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -7462,7 +7462,7 @@ static void ca0132_config(struct hda_codec *codec) static int ca0132_prepare_verbs(struct hda_codec *codec) { /* Verbs + terminator (an empty element) */ -#define NUM_SPEC_VERBS 4 +#define NUM_SPEC_VERBS 2 struct ca0132_spec *spec = codec->spec; spec->chip_init_verbs = ca0132_init_verbs0; @@ -7474,34 +7474,24 @@ static int ca0132_prepare_verbs(struct hda_codec *codec) if (!spec->spec_init_verbs) return -ENOMEM; - /* HP jack autodetection */ - spec->spec_init_verbs[0].nid = spec->unsol_tag_hp; - spec->spec_init_verbs[0].param = AC_VERB_SET_UNSOLICITED_ENABLE; - spec->spec_init_verbs[0].verb = AC_USRSP_EN | spec->unsol_tag_hp; - - /* MIC1 jack autodetection */ - spec->spec_init_verbs[1].nid = spec->unsol_tag_amic1; - spec->spec_init_verbs[1].param = AC_VERB_SET_UNSOLICITED_ENABLE; - spec->spec_init_verbs[1].verb = AC_USRSP_EN | spec->unsol_tag_amic1; - /* config EAPD */ - spec->spec_init_verbs[2].nid = 0x0b; - spec->spec_init_verbs[2].param = 0x78D; - spec->spec_init_verbs[2].verb = 0x00; + spec->spec_init_verbs[0].nid = 0x0b; + spec->spec_init_verbs[0].param = 0x78D; + spec->spec_init_verbs[0].verb = 0x00; /* Previously commented configuration */ /* - spec->spec_init_verbs[3].nid = 0x0b; - spec->spec_init_verbs[3].param = AC_VERB_SET_EAPD_BTLENABLE; + spec->spec_init_verbs[2].nid = 0x0b; + spec->spec_init_verbs[2].param = AC_VERB_SET_EAPD_BTLENABLE; + spec->spec_init_verbs[2].verb = 0x02; + + spec->spec_init_verbs[3].nid = 0x10; + spec->spec_init_verbs[3].param = 0x78D; spec->spec_init_verbs[3].verb = 0x02; spec->spec_init_verbs[4].nid = 0x10; - spec->spec_init_verbs[4].param = 0x78D; + spec->spec_init_verbs[4].param = AC_VERB_SET_EAPD_BTLENABLE; spec->spec_init_verbs[4].verb = 0x02; - - spec->spec_init_verbs[5].nid = 0x10; - spec->spec_init_verbs[5].param = AC_VERB_SET_EAPD_BTLENABLE; - spec->spec_init_verbs[5].verb = 0x02; */ /* Terminator: spec->spec_init_verbs[NUM_SPEC_VERBS-1] */ From 365c7f25cd0a72b16979ba948e6c389a9a89818b Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Fri, 15 Jun 2018 21:56:17 -0400 Subject: [PATCH 096/572] ALSA: hda/ca0132: Restore behavior of QUIRK_ALIENWARE Commit e93ac30a32a6ba7ac3b4b2a4379af1dadb91e505 (ALSA: HDA/ca0132: add extra init functions for r3di + sbz) introduced an extra initialization function that was improperly guarded, taking effect on systems with QUIRK_ALIENWARE, even though such systems were supposedly not affected. It may be that this piece of initialization should be done for all systems, but that's not a call that I can make. Fixes: e93ac30a32a6 ("ALSA: HDA/ca0132: add extra init functions for r3di + sbz") Signed-off-by: Alastair Bridgewater Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index dba731c0bf53..25fccce1eff1 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -7223,7 +7223,7 @@ static int ca0132_init(struct hda_codec *codec) snd_hda_sequence_write(codec, spec->base_init_verbs); - if (spec->quirk != QUIRK_NONE) + if (spec->use_alt_functions) ca0132_alt_init(codec); ca0132_download_dsp(codec); From 126b75e03894f3053b723130cb9b824e6f15d8e6 Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Fri, 15 Jun 2018 21:56:18 -0400 Subject: [PATCH 097/572] ALSA: hda/ca0132: Don't test for QUIRK_NONE QUIRK_NONE is, quite explicitly, the default case. The entire point of a quirks system is to allow "programming by difference" from a given base case, which requires that merely defining a new quirk for some piece of hardware should not change the behavior of the driver for that hardware. In turn, this means that testing for QUIRK_NONE explicitly is a violation of that implicit contract. Change a test for QUIRK_NONE and QUIRK_ALIENWARE to default, and add a test for QUIRK_SBZ to disable the default behavior in that instance. Signed-off-by: Alastair Bridgewater Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 25fccce1eff1..49635d1c9c39 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -7237,8 +7237,9 @@ static int ca0132_init(struct hda_codec *codec) case QUIRK_R3DI: r3di_setup_defaults(codec); break; - case QUIRK_NONE: - case QUIRK_ALIENWARE: + case QUIRK_SBZ: + break; + default: ca0132_setup_defaults(codec); ca0132_init_analog_mic2(codec); ca0132_init_dmic(codec); From 5f8ddc6ee63a9057522923a6b2eb8c51993e1db0 Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Fri, 15 Jun 2018 21:56:19 -0400 Subject: [PATCH 098/572] ALSA: hda/ca0132: Restore PCM Analog Mic-In2 Commit 009b8f979bf8cb5f7ec6d3dd7683585122ed10f8 conditionalized adding the "CA0132 Analog Mic-In2" PCM with a comment to the effect that, "desktops don't use this ADC", but the test was set up such that the ADC was only created for desktops. Invert the test. Fixes: 009b8f979bf8 ("ALSA: hda/ca0132: update core functions for sbz + r3di") Signed-off-by: Alastair Bridgewater Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 49635d1c9c39..09ff85c622bb 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -5966,7 +5966,7 @@ static int ca0132_build_pcms(struct hda_codec *codec) info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adcs[0]; /* With the DSP enabled, desktops don't use this ADC. */ - if (spec->use_alt_functions) { + if (!spec->use_alt_functions) { info = snd_hda_codec_pcm_new(codec, "CA0132 Analog Mic-In2"); if (!info) return -ENOMEM; From a57a46b93244a0a916b894d8b2ca2a6cfe1904d3 Mon Sep 17 00:00:00 2001 From: Alastair Bridgewater Date: Fri, 15 Jun 2018 21:56:20 -0400 Subject: [PATCH 099/572] ALSA: hda/ca0132: Fix DMic data rate for Alienware M17x R4 The commentary says to use various parameters, and lays out what the mapping is... The code used a 32KHz rate when the comment says that it needs to use a 48KHz rate. And this has been the case since day one. On the Alienware M17x R4, the DMic used to have exceptionally quiet pickup and a lot of noise. Changing the data rate fixes both of these issues. Searching the kernel bug tracker for ca0132-related issues shows no mention of this being an issue for other hardware, and I have no other hardware to test with, so a quirk is used to limit the effect to just the M17x R4. Signed-off-by: Alastair Bridgewater Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 09ff85c622bb..d62c56feaf7d 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -991,6 +991,7 @@ struct ca0132_spec { enum { QUIRK_NONE, QUIRK_ALIENWARE, + QUIRK_ALIENWARE_M17XR4, QUIRK_SBZ, QUIRK_R3DI, }; @@ -1040,6 +1041,7 @@ static const struct hda_pintbl r3di_pincfgs[] = { }; static const struct snd_pci_quirk ca0132_quirks[] = { + SND_PCI_QUIRK(0x1028, 0x057b, "Alienware M17x R4", QUIRK_ALIENWARE_M17XR4), SND_PCI_QUIRK(0x1028, 0x0685, "Alienware 15 2015", QUIRK_ALIENWARE), SND_PCI_QUIRK(0x1028, 0x0688, "Alienware 17 2015", QUIRK_ALIENWARE), SND_PCI_QUIRK(0x1028, 0x0708, "Alienware 15 R2 2016", QUIRK_ALIENWARE), @@ -6130,7 +6132,10 @@ static void ca0132_init_dmic(struct hda_codec *codec) * Bit 6: set to select Data2, clear for Data1 * Bit 7: set to enable DMic, clear for AMic */ - val = 0x23; + if (spec->quirk == QUIRK_ALIENWARE_M17XR4) + val = 0x33; + else + val = 0x23; /* keep a copy of dmic ctl val for enable/disable dmic purpuse */ spec->dmic_ctl = val; snd_hda_codec_write(codec, spec->input_pins[0], 0, From 3637f12faf507b0a4b8ac1e7115fc99583ab1db3 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Wed, 30 May 2018 09:30:42 +0800 Subject: [PATCH 100/572] soc: imx: gpcv2: correct PGC offset Correct MIPI/PCIe/USB_HSIC's PGC offset based on design RTL, the values in the Reference Manual (Rev. 1, 01/2018 and the older ones) are incorrect. The correct offset values should be as below: 0x800 ~ 0x83F: PGC for core0 of A7 platform; 0x840 ~ 0x87F: PGC for core1 of A7 platform; 0x880 ~ 0x8BF: PGC for SCU of A7 platform; 0xA00 ~ 0xA3F: PGC for fastmix/megamix; 0xC00 ~ 0xC3F: PGC for MIPI PHY; 0xC40 ~ 0xC7F: PGC for PCIe_PHY; 0xC80 ~ 0xCBF: PGC for USB OTG1 PHY; 0xCC0 ~ 0xCFF: PGC for USB OTG2 PHY; 0xD00 ~ 0xD3F: PGC for USB HSIC PHY; Signed-off-by: Anson Huang Fixes: 03aa12629fc4 ("soc: imx: Add GPCv2 power gating driver") Acked-by: Andrey Smirnov Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- drivers/soc/imx/gpcv2.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/soc/imx/gpcv2.c b/drivers/soc/imx/gpcv2.c index f4e3bd40c72e..6ef18cf8f243 100644 --- a/drivers/soc/imx/gpcv2.c +++ b/drivers/soc/imx/gpcv2.c @@ -39,10 +39,15 @@ #define GPC_M4_PU_PDN_FLG 0x1bc - -#define PGC_MIPI 4 -#define PGC_PCIE 5 -#define PGC_USB_HSIC 8 +/* + * The PGC offset values in Reference Manual + * (Rev. 1, 01/2018 and the older ones) GPC chapter's + * GPC_PGC memory map are incorrect, below offset + * values are from design RTL. + */ +#define PGC_MIPI 16 +#define PGC_PCIE 17 +#define PGC_USB_HSIC 20 #define GPC_PGC_CTRL(n) (0x800 + (n) * 0x40) #define GPC_PGC_SR(n) (GPC_PGC_CTRL(n) + 0xc) From 9fcf2b3c1c0276650fea537c71b513d27d929b05 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 17 Jun 2018 10:48:22 +0200 Subject: [PATCH 101/572] drm/atmel-hlcdc: check stride values in the first plane The statement always evaluates to true since the struct fields are arrays. This has shown up as a warning when compiling with clang: warning: address of array 'desc->layout.xstride' will always evaluate to 'true' [-Wpointer-bool-conversion] Check for values in the first plane instead. Fixes: 1a396789f65a ("drm: add Atmel HLCDC Display Controller support") Cc: Signed-off-by: Stefan Agner Signed-off-by: Boris Brezillon Link: https://patchwork.freedesktop.org/patch/msgid/20180617084826.31885-1-stefan@agner.ch --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index e18800ed7cd1..7b8191eae68a 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -875,7 +875,7 @@ static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane, drm_object_attach_property(&plane->base.base, props->alpha, 255); - if (desc->layout.xstride && desc->layout.pstride) { + if (desc->layout.xstride[0] && desc->layout.pstride[0]) { int ret; ret = drm_plane_create_rotation_property(&plane->base, From fb7298e1669ee84aa76f3483c91f1de4814aac11 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 31 May 2018 11:39:42 +0000 Subject: [PATCH 102/572] pinctrl: mediatek: remove redundant return value check of platform_get_resource() Remove unneeded error handling on the result of a call to platform_get_resource() when the value is passed to devm_ioremap_resource(). Signed-off-by: Wei Yongjun Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c index b3799695d8db..16ff56f93501 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common.c @@ -1000,11 +1000,6 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Unable to get eint resource\n"); - return -ENODEV; - } - pctl->eint->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(pctl->eint->base)) return PTR_ERR(pctl->eint->base); From bc3322bc166a2905bc91f774d7b22773dc7c063a Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Thu, 7 Jun 2018 13:51:33 -0300 Subject: [PATCH 103/572] pinctrl: devicetree: Fix pctldev pointer overwrite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit b89405b6102f ("pinctrl: devicetree: Fix dt_to_map_one_config handling of hogs") causes the pinctrl hog pins to not get initialized on i.MX platforms leaving them with the IOMUX settings untouched. This causes several regressions on i.MX such as: - OV5640 camera driver can not be probed anymore on imx6qdl-sabresd because the camera clock pin is in a pinctrl_hog group and since its pinctrl initialization is skipped, the camera clock is kept in GPIO functionality instead of CLK_CKO function. - Audio stopped working on imx6qdl-wandboard and imx53-qsb for the same reason. Richard Fitzgerald explains the problem: "I see the bug. If the hog node isn't a 1st level child of the pinctrl parent node it will go around the for(;;) loop again but on the first pass I overwrite pctldev with the result of get_pinctrl_dev_from_of_node() so it doesn't point to the pinctrl driver any more." Fix the issue by stashing the original pctldev so it doesn't get overwritten. Fixes: b89405b6102f ("pinctrl: devicetree: Fix dt_to_map_one_config handling of hogs") Cc: Reported-by: Mika Penttilä Reported-by: Steve Longerbeam Suggested-by: Richard Fitzgerald Signed-off-by: Fabio Estevam Reviewed-by: Dong Aisheng Reviewed-by: Richard Fitzgerald Signed-off-by: Linus Walleij --- drivers/pinctrl/devicetree.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index b601039d6c69..c4aa411f5935 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -101,10 +101,11 @@ struct pinctrl_dev *of_pinctrl_get(struct device_node *np) } static int dt_to_map_one_config(struct pinctrl *p, - struct pinctrl_dev *pctldev, + struct pinctrl_dev *hog_pctldev, const char *statename, struct device_node *np_config) { + struct pinctrl_dev *pctldev = NULL; struct device_node *np_pctldev; const struct pinctrl_ops *ops; int ret; @@ -123,8 +124,10 @@ static int dt_to_map_one_config(struct pinctrl *p, return -EPROBE_DEFER; } /* If we're creating a hog we can use the passed pctldev */ - if (pctldev && (np_pctldev == p->dev->of_node)) + if (hog_pctldev && (np_pctldev == p->dev->of_node)) { + pctldev = hog_pctldev; break; + } pctldev = get_pinctrl_dev_from_of_node(np_pctldev); if (pctldev) break; From 7f57871f39912978e95db920ddbbfb2304a4bfbf Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 6 Jun 2018 14:43:38 +0100 Subject: [PATCH 104/572] pinctrl: single: Add allocation failure checking of saved_vals Currently saved_vals is being allocated and there is no check for failed allocation (which is more likely than normal when using GFP_ATOMIC). Fix this by checking for a failed allocation and propagating this error return down the the caller chain. Detected by CoverityScan, CID#1469841 ("Dereference null return value") Fixes: 88a1dbdec682 ("pinctrl: pinctrl-single: Add functions to save and restore pinctrl context") Signed-off-by: Colin Ian King Reviewed-by: Johan Hovold Acked-by: Tony Lindgren Signed-off-by: Linus Walleij --- drivers/pinctrl/pinctrl-single.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index b3153c095199..e5647dac0818 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -1590,8 +1590,11 @@ static int pcs_save_context(struct pcs_device *pcs) mux_bytes = pcs->width / BITS_PER_BYTE; - if (!pcs->saved_vals) + if (!pcs->saved_vals) { pcs->saved_vals = devm_kzalloc(pcs->dev, pcs->size, GFP_ATOMIC); + if (!pcs->saved_vals) + return -ENOMEM; + } switch (pcs->width) { case 64: @@ -1651,8 +1654,13 @@ static int pinctrl_single_suspend(struct platform_device *pdev, if (!pcs) return -EINVAL; - if (pcs->flags & PCS_CONTEXT_LOSS_OFF) - pcs_save_context(pcs); + if (pcs->flags & PCS_CONTEXT_LOSS_OFF) { + int ret; + + ret = pcs_save_context(pcs); + if (ret < 0) + return ret; + } return pinctrl_force_sleep(pcs->pctl); } From 71fd5d07b791b90587f695f048ad82e8d4c1c67e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 8 Jun 2018 12:05:47 +0200 Subject: [PATCH 105/572] pinctrl: actions: Fix uninitialized error in owl_pin_config_set() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc 4.1.2: drivers/pinctrl/actions/pinctrl-owl.c: In function ‘owl_pin_config_set’: drivers/pinctrl/actions/pinctrl-owl.c:336: warning: ‘ret’ may be used uninitialized in this function Indeed, if num_configs is zero, the uninitialized value will be returned as an error code. Fix this by preinitializing it to zero. Fixes: 2242ddfbf4d699b5 ("pinctrl: actions: Add Actions S900 pinctrl driver") Signed-off-by: Geert Uytterhoeven Acked-by: Manivannan Sadhasivam Signed-off-by: Linus Walleij --- drivers/pinctrl/actions/pinctrl-owl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/actions/pinctrl-owl.c b/drivers/pinctrl/actions/pinctrl-owl.c index 76243caa08c6..b5c880b50bb3 100644 --- a/drivers/pinctrl/actions/pinctrl-owl.c +++ b/drivers/pinctrl/actions/pinctrl-owl.c @@ -333,7 +333,7 @@ static int owl_pin_config_set(struct pinctrl_dev *pctrldev, unsigned long flags; unsigned int param; u32 reg, bit, width, arg; - int ret, i; + int ret = 0, i; info = &pctrl->soc->padinfo[pin]; From 5f591543a937310e48baf0ee23680be09cdedfb8 Mon Sep 17 00:00:00 2001 From: Sean Wang Date: Thu, 14 Jun 2018 16:55:49 +0800 Subject: [PATCH 106/572] pinctrl: mt7622: fix a kernel panic when pio don't work as EINT controller The function, external interrupt controller, is made as an optional to mt7622 pinctrl. But if we don't want pio behaves as an external interrupt controller, it would lead to hw->eint not be created properly and then will cause 'kernel NULL pointer' issue when gpiochip try to call .to_irq or .set_config. To fix it, check hw->eint before accessing the member. [ 1.339494] Unable to handle kernel NULL pointer dereference at virtual address 00000010 [ 1.347857] Mem abort info: [ 1.350742] ESR = 0x96000005 [ 1.353905] Exception class = DABT (current EL), IL = 32 bits [ 1.360024] SET = 0, FnV = 0 [ 1.363185] EA = 0, S1PTW = 0 [ 1.366431] Data abort info: [ 1.369405] ISV = 0, ISS = 0x00000005 [ 1.373363] CM = 0, WnR = 0 [ 1.376437] [0000000000000010] user address but active_mm is swapper [ 1.383005] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 1.388748] Modules linked in: [ 1.391897] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1+ #344 [ 1.398625] Hardware name: MediaTek MT7622 RFB1 board (DT) [ 1.404279] pstate: 80000005 (Nzcv daif -PAN -UAO) [ 1.409221] pc : mtk_eint_find_irq+0x8/0x24 [ 1.413532] lr : mtk_gpio_to_irq+0x20/0x28 [ 1.417749] sp : ffffff800801baf0 [ 1.421161] x29: ffffff800801baf0 x28: ffffff8008792f40 [ 1.426637] x27: ffffff800886b000 x26: ffffff8008615620 [ 1.432113] x25: ffffffc00e4dbdc8 x24: ffffff80087b8000 [ 1.437589] x23: ffffffc00325a000 x22: ffffffc00325a010 [ 1.443066] x21: ffffffc0033dec18 x20: 00000000ffffffea [ 1.448542] x19: ffffffc00e4db800 x18: 0000000000000130 [ 1.454018] x17: 000000000000000e x16: 0000000000000007 [ 1.459494] x15: ffffff80085ee000 x14: 0000000000000001 [ 1.464970] x13: 0000000000000001 x12: 0000000000000010 [ 1.470446] x11: 0101010101010101 x10: 0000000000000880 [ 1.475922] x9 : ffffff800801b990 x8 : ffffffc0030688e0 [ 1.481399] x7 : ffffff80080c0660 x6 : ffffffc00e4dbbb0 [ 1.486875] x5 : 0000000000000000 x4 : 0000000000000000 [ 1.492351] x3 : ffffff80082a92f4 x2 : 00000000fffffffa [ 1.497826] x1 : 0000000000000051 x0 : 0000000000000000 [ 1.503305] Process swapper/0 (pid: 1, stack limit = 0x0000000054e053bd) [ 1.510210] Call trace: [ 1.512727] mtk_eint_find_irq+0x8/0x24 [ 1.516677] mtk_gpio_to_irq+0x20/0x28 [ 1.520539] gpiod_to_irq+0x48/0x60 [ 1.524135] mmc_gpiod_request_cd_irq+0x3c/0xc4 [ 1.528804] mmc_start_host+0x6c/0x8c [ 1.532575] mmc_add_host+0x58/0x7c [ 1.536168] msdc_drv_probe+0x4fc/0x67c [ 1.540121] platform_drv_probe+0x58/0xa4 [ 1.544251] driver_probe_device+0x204/0x44c [ 1.548649] __driver_attach+0x84/0xf8 [ 1.552512] bus_for_each_dev+0x68/0xa0 [ 1.556461] driver_attach+0x20/0x28 [ 1.560142] bus_add_driver+0xec/0x240 [ 1.564002] driver_register+0x98/0xe4 [ 1.567863] __platform_driver_register+0x48/0x50 [ 1.572711] mt_msdc_driver_init+0x18/0x20 [ 1.576932] do_one_initcall+0x98/0x130 [ 1.580886] kernel_init_freeable+0x13c/0x1d4 [ 1.585375] kernel_init+0x10/0xf8 [ 1.588879] ret_from_fork+0x10/0x18 [ 1.592564] Code: a8c67bfd d65f03c0 a9bf7bfd 910003fd (f9400800) [ 1.598849] ---[ end trace 4bbcb7bc30e98492 ]--- [ 1.603677] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b [ 1.603677] cc: Kevin Hilman Cc: stable@vger.kernel.org Fixes: e6dabd38d8e7 ("pinctrl: mediatek: add EINT support to MT7622 SoC") Signed-off-by: Sean Wang Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mt7622.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/mediatek/pinctrl-mt7622.c b/drivers/pinctrl/mediatek/pinctrl-mt7622.c index ad6da1184c9f..e3f1ab2290fc 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mt7622.c +++ b/drivers/pinctrl/mediatek/pinctrl-mt7622.c @@ -1459,6 +1459,9 @@ static int mtk_gpio_to_irq(struct gpio_chip *chip, unsigned int offset) struct mtk_pinctrl *hw = gpiochip_get_data(chip); unsigned long eint_n; + if (!hw->eint) + return -ENOTSUPP; + eint_n = offset; return mtk_eint_find_irq(hw->eint, eint_n); @@ -1471,7 +1474,8 @@ static int mtk_gpio_set_config(struct gpio_chip *chip, unsigned int offset, unsigned long eint_n; u32 debounce; - if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE) + if (!hw->eint || + pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE) return -ENOTSUPP; debounce = pinconf_to_config_argument(config); From 2f839823382748664b643daa73f41ee0cc01ced6 Mon Sep 17 00:00:00 2001 From: Karoly Pados Date: Sat, 9 Jun 2018 13:26:08 +0200 Subject: [PATCH 107/572] USB: serial: cp210x: add Silicon Labs IDs for Windows Update Silicon Labs defines alternative VID/PID pairs for some chips that when used will automatically install drivers for Windows users without manual intervention. Unfortunately, these IDs are not recognized by the Linux module, so using these IDs improves user experience on one platform but degrades it on Linux. This patch addresses this problem. Signed-off-by: Karoly Pados Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index eb6c26cbe579..14cf657247b6 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -143,8 +143,11 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ + { USB_DEVICE(0x10C4, 0xEA63) }, /* Silicon Labs Windows Update (CP2101-4/CP2102N) */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */ + { USB_DEVICE(0x10C4, 0xEA7A) }, /* Silicon Labs Windows Update (CP2105) */ + { USB_DEVICE(0x10C4, 0xEA7B) }, /* Silicon Labs Windows Update (CP2108) */ { USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */ { USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */ { USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */ From 58b3d02f066e5b1480d80bd308c545526eea3250 Mon Sep 17 00:00:00 2001 From: Paul Kocialkowski Date: Wed, 13 Jun 2018 10:16:47 +0200 Subject: [PATCH 108/572] Revert "drm/sun4i: Handle DRM_BUS_FLAG_PIXDATA_*EDGE" This reverts commit 2c17a4368aad2b88b68e4390c819e226cf320f70. The offending commit triggers a run-time fault when accessing the panel element of the sun4i_tcon structure when no such panel is attached. It was apparently assumed in said commit that a panel is always used with the TCON. Although it is often the case, this is not always true. For instance a bridge might be used instead of a panel. This issue was discovered using an A13-OLinuXino, that uses the TCON in RGB mode for a simple DAC-based VGA bridge. Cc: stable@vger.kernel.org Signed-off-by: Paul Kocialkowski Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20180613081647.31183-1-paul.kocialkowski@bootlin.com --- drivers/gpu/drm/sun4i/sun4i_tcon.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index c3d92d537240..8045871335b5 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -350,9 +349,6 @@ static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon, static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, const struct drm_display_mode *mode) { - struct drm_panel *panel = tcon->panel; - struct drm_connector *connector = panel->connector; - struct drm_display_info display_info = connector->display_info; unsigned int bp, hsync, vsync; u8 clk_delay; u32 val = 0; @@ -410,27 +406,6 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, if (mode->flags & DRM_MODE_FLAG_PVSYNC) val |= SUN4I_TCON0_IO_POL_VSYNC_POSITIVE; - /* - * On A20 and similar SoCs, the only way to achieve Positive Edge - * (Rising Edge), is setting dclk clock phase to 2/3(240°). - * By default TCON works in Negative Edge(Falling Edge), - * this is why phase is set to 0 in that case. - * Unfortunately there's no way to logically invert dclk through - * IO_POL register. - * The only acceptable way to work, triple checked with scope, - * is using clock phase set to 0° for Negative Edge and set to 240° - * for Positive Edge. - * On A33 and similar SoCs there would be a 90° phase option, - * but it divides also dclk by 2. - * Following code is a way to avoid quirks all around TCON - * and DOTCLOCK drivers. - */ - if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE) - clk_set_phase(tcon->dclk, 240); - - if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE) - clk_set_phase(tcon->dclk, 0); - regmap_update_bits(tcon->regs, SUN4I_TCON0_IO_POL_REG, SUN4I_TCON0_IO_POL_HSYNC_POSITIVE | SUN4I_TCON0_IO_POL_VSYNC_POSITIVE, val); From 24160628a34af962ac99f2f58e547ac3c4cbd26f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Mon, 18 Jun 2018 10:24:03 +0200 Subject: [PATCH 109/572] USB: serial: cp210x: add CESINEL device ids Add device ids for CESINEL products. Reported-by: Carlos Barcala Lara Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 14cf657247b6..ee0cc1d90b51 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -95,6 +95,9 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */ { USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */ { USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */ + { USB_DEVICE(0x10C4, 0x817C) }, /* CESINEL MEDCAL N Power Quality Monitor */ + { USB_DEVICE(0x10C4, 0x817D) }, /* CESINEL MEDCAL NT Power Quality Monitor */ + { USB_DEVICE(0x10C4, 0x817E) }, /* CESINEL MEDCAL S Power Quality Monitor */ { USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */ { USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */ { USB_DEVICE(0x10C4, 0x81A6) }, /* ThinkOptics WavIt */ @@ -112,6 +115,9 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */ { USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */ { USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */ + { USB_DEVICE(0x10C4, 0x82EF) }, /* CESINEL FALCO 6105 AC Power Supply */ + { USB_DEVICE(0x10C4, 0x82F1) }, /* CESINEL MEDCAL EFD Earth Fault Detector */ + { USB_DEVICE(0x10C4, 0x82F2) }, /* CESINEL MEDCAL ST Network Analyzer */ { USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */ { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */ { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ @@ -124,7 +130,9 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */ { USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */ + { USB_DEVICE(0x10C4, 0x851E) }, /* CESINEL MEDCAL PT Network Analyzer */ { USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */ + { USB_DEVICE(0x10C4, 0x85B8) }, /* CESINEL ReCon T Energy Logger */ { USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */ { USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */ { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ @@ -134,10 +142,13 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer */ + { USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer */ { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ { USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */ { USB_DEVICE(0x10C4, 0x8977) }, /* CEL MeshWorks DevKit Device */ { USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */ + { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ From 00908693c481f7298adf8cf4d2ff3dfbea8c375f Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 15 Jun 2017 12:57:30 +0300 Subject: [PATCH 110/572] usb: dwc3: pci: add support for Intel IceLake PCI IDs for Intel IceLake. Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index c961a94d136b..f57e7c94b8e5 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -34,6 +34,7 @@ #define PCI_DEVICE_ID_INTEL_GLK 0x31aa #define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e +#define PCI_DEVICE_ID_INTEL_ICLLP 0x34ee #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" #define PCI_INTEL_BXT_FUNC_PMU_PWR 4 @@ -289,6 +290,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_GLK), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPLP), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CNPH), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICLLP), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; From d52e4d0c0c428bf2ba35074a7495cdb28e2efbae Mon Sep 17 00:00:00 2001 From: Vincent Pelletier Date: Wed, 13 Jun 2018 11:05:06 +0000 Subject: [PATCH 111/572] usb: gadget: ffs: Fix BUG when userland exits with submitted AIO transfers This bug happens only when the UDC needs to sleep during usb_ep_dequeue, as is the case for (at least) dwc3. [ 382.200896] BUG: scheduling while atomic: screen/1808/0x00000100 [ 382.207124] 4 locks held by screen/1808: [ 382.211266] #0: (rcu_callback){....}, at: [] rcu_process_callbacks+0x260/0x440 [ 382.219949] #1: (rcu_read_lock_sched){....}, at: [] percpu_ref_switch_to_atomic_rcu+0xb0/0x130 [ 382.230034] #2: (&(&ctx->ctx_lock)->rlock){....}, at: [] free_ioctx_users+0x23/0xd0 [ 382.230096] #3: (&(&ffs->eps_lock)->rlock){....}, at: [] ffs_aio_cancel+0x20/0x60 [usb_f_fs] [ 382.230160] Modules linked in: usb_f_fs libcomposite configfs bnep btsdio bluetooth ecdh_generic brcmfmac brcmutil intel_powerclamp coretemp dwc3 kvm_intel ulpi udc_core kvm irqbypass crc32_pclmul crc32c_intel pcbc dwc3_pci aesni_intel aes_i586 crypto_simd cryptd ehci_pci ehci_hcd gpio_keys usbcore basincove_gpadc industrialio usb_common [ 382.230407] CPU: 1 PID: 1808 Comm: screen Not tainted 4.14.0-edison+ #117 [ 382.230416] Hardware name: Intel Corporation Merrifield/BODEGA BAY, BIOS 542 2015.01.21:18.19.48 [ 382.230425] Call Trace: [ 382.230438] [ 382.230466] dump_stack+0x47/0x62 [ 382.230498] __schedule_bug+0x61/0x80 [ 382.230522] __schedule+0x43/0x7a0 [ 382.230587] schedule+0x5f/0x70 [ 382.230625] dwc3_gadget_ep_dequeue+0x14c/0x270 [dwc3] [ 382.230669] ? do_wait_intr_irq+0x70/0x70 [ 382.230724] usb_ep_dequeue+0x19/0x90 [udc_core] [ 382.230770] ffs_aio_cancel+0x37/0x60 [usb_f_fs] [ 382.230798] kiocb_cancel+0x31/0x40 [ 382.230822] free_ioctx_users+0x4d/0xd0 [ 382.230858] percpu_ref_switch_to_atomic_rcu+0x10a/0x130 [ 382.230881] ? percpu_ref_exit+0x40/0x40 [ 382.230904] rcu_process_callbacks+0x2b3/0x440 [ 382.230965] __do_softirq+0xf8/0x26b [ 382.231011] ? __softirqentry_text_start+0x8/0x8 [ 382.231033] do_softirq_own_stack+0x22/0x30 [ 382.231042] [ 382.231071] irq_exit+0x45/0xc0 [ 382.231089] smp_apic_timer_interrupt+0x13c/0x150 [ 382.231118] apic_timer_interrupt+0x35/0x3c [ 382.231132] EIP: __copy_user_ll+0xe2/0xf0 [ 382.231142] EFLAGS: 00210293 CPU: 1 [ 382.231154] EAX: bfd4508c EBX: 00000004 ECX: 00000003 EDX: f3d8fe50 [ 382.231165] ESI: f3d8fe51 EDI: bfd4508d EBP: f3d8fe14 ESP: f3d8fe08 [ 382.231176] DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 [ 382.231265] core_sys_select+0x25f/0x320 [ 382.231346] ? __wake_up_common_lock+0x62/0x80 [ 382.231399] ? tty_ldisc_deref+0x13/0x20 [ 382.231438] ? ldsem_up_read+0x1b/0x40 [ 382.231459] ? tty_ldisc_deref+0x13/0x20 [ 382.231479] ? tty_write+0x29f/0x2e0 [ 382.231514] ? n_tty_ioctl+0xe0/0xe0 [ 382.231541] ? tty_write_unlock+0x30/0x30 [ 382.231566] ? __vfs_write+0x22/0x110 [ 382.231604] ? security_file_permission+0x2f/0xd0 [ 382.231635] ? rw_verify_area+0xac/0x120 [ 382.231677] ? vfs_write+0x103/0x180 [ 382.231711] SyS_select+0x87/0xc0 [ 382.231739] ? SyS_write+0x42/0x90 [ 382.231781] do_fast_syscall_32+0xd6/0x1a0 [ 382.231836] entry_SYSENTER_32+0x47/0x71 [ 382.231848] EIP: 0xb7f75b05 [ 382.231857] EFLAGS: 00000246 CPU: 1 [ 382.231868] EAX: ffffffda EBX: 00000400 ECX: bfd4508c EDX: bfd4510c [ 382.231878] ESI: 00000000 EDI: 00000000 EBP: 00000000 ESP: bfd45020 [ 382.231889] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b [ 382.232281] softirq: huh, entered softirq 9 RCU c10b4d90 with preempt_count 00000100, exited with 00000000? Tested-by: Sam Protsenko Signed-off-by: Vincent Pelletier Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index dce9d12c7981..33e2030503fa 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -215,6 +215,7 @@ struct ffs_io_data { struct mm_struct *mm; struct work_struct work; + struct work_struct cancellation_work; struct usb_ep *ep; struct usb_request *req; @@ -1072,22 +1073,31 @@ ffs_epfile_open(struct inode *inode, struct file *file) return 0; } +static void ffs_aio_cancel_worker(struct work_struct *work) +{ + struct ffs_io_data *io_data = container_of(work, struct ffs_io_data, + cancellation_work); + + ENTER(); + + usb_ep_dequeue(io_data->ep, io_data->req); +} + static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; - struct ffs_epfile *epfile = kiocb->ki_filp->private_data; + struct ffs_data *ffs = io_data->ffs; int value; ENTER(); - spin_lock_irq(&epfile->ffs->eps_lock); - - if (likely(io_data && io_data->ep && io_data->req)) - value = usb_ep_dequeue(io_data->ep, io_data->req); - else + if (likely(io_data && io_data->ep && io_data->req)) { + INIT_WORK(&io_data->cancellation_work, ffs_aio_cancel_worker); + queue_work(ffs->io_completion_wq, &io_data->cancellation_work); + value = -EINPROGRESS; + } else { value = -EINVAL; - - spin_unlock_irq(&epfile->ffs->eps_lock); + } return value; } From 3ec148ebe3112b40c9a2c0c543bcb0cd1a3abd43 Mon Sep 17 00:00:00 2001 From: Jaejoong Kim Date: Thu, 14 Jun 2018 18:56:31 +0900 Subject: [PATCH 112/572] doc: usb: Fix typo in gadget_configfs documentation Fix the directory name from 'configfs' to 'configs'. Signed-off-by: Jaejoong Kim Signed-off-by: Felipe Balbi --- Documentation/usb/gadget_configfs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/usb/gadget_configfs.txt b/Documentation/usb/gadget_configfs.txt index 635e57493709..b8cb38a98c19 100644 --- a/Documentation/usb/gadget_configfs.txt +++ b/Documentation/usb/gadget_configfs.txt @@ -226,7 +226,7 @@ $ rm configs/./ where . specify the configuration and is a symlink to a function being removed from the configuration, e.g.: -$ rm configfs/c.1/ncm.usb0 +$ rm configs/c.1/ncm.usb0 ... ... From 1ffba9058737af2ddeebc813faa8ea9b16bc892a Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 12 Jun 2018 12:37:29 +0400 Subject: [PATCH 113/572] usb: dwc2: gadget: Fix issue in dwc2_gadget_start_isoc() In case of requests queue is empty reset EP target_frame to initial value. This allow restarting ISOC traffic in case when function driver queued requests with interruptions. Tested-by: Zeng Tao Signed-off-by: Minas Harutyunyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index f0d9ccf1d665..bb499fea6100 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -884,6 +884,7 @@ static void dwc2_gadget_start_isoc_ddma(struct dwc2_hsotg_ep *hs_ep) struct dwc2_dma_desc *desc; if (list_empty(&hs_ep->queue)) { + hs_ep->target_frame = TARGET_FRAME_INITIAL; dev_dbg(hsotg->dev, "%s: No requests in queue\n", __func__); return; } From 896e518883f18e601335908192e33426c1f599a4 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 31 May 2018 16:45:52 +0200 Subject: [PATCH 114/572] usb: dwc3: of-simple: fix use-after-free on remove The clocks have already been explicitly disabled and put as part of remove() so the runtime suspend callback must not be run when balancing the runtime PM usage count before returning. Fixes: 16adc674d0d6 ("usb: dwc3: add generic OF glue layer") Signed-off-by: Johan Hovold Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-of-simple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index 6b3ccd542bd7..dbeff5e6ad14 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -165,8 +165,9 @@ static int dwc3_of_simple_remove(struct platform_device *pdev) reset_control_put(simple->resets); - pm_runtime_put_sync(dev); pm_runtime_disable(dev); + pm_runtime_put_noidle(dev); + pm_runtime_set_suspended(dev); return 0; } From 6e967d7e2c4822eba4847ec09037119a0418aaef Mon Sep 17 00:00:00 2001 From: Zeng Tao Date: Tue, 12 Jun 2018 22:49:06 +0800 Subject: [PATCH 115/572] usb: dwc2: gadget: fix packet drop issue in dwc2_gadget_handle_nak In ISOC transfer, when the NAK interrupt happens, we shouldn't complete a usb request, the current flow will complete one usb request with no hardware transfer, this will lead to a packet drop on the usb bus. Acked-by: Minas Harutyunyan Signed-off-by: Zeng Tao Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index bb499fea6100..bb5eb3c8132d 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2818,9 +2818,6 @@ static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep) tmp = dwc2_hsotg_read_frameno(hsotg); if (using_desc_dma(hsotg)) { - dwc2_hsotg_complete_request(hsotg, hs_ep, - get_ep_head(hs_ep), 0); - hs_ep->target_frame = tmp; dwc2_gadget_incr_frame_num(hs_ep); dwc2_gadget_start_isoc_ddma(hs_ep); From 615277779f41a753e68f531613e344c54fdc95bf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Jun 2018 10:24:48 +0200 Subject: [PATCH 116/572] usb: dwc3: Only call clk_bulk_get() on devicetree instantiated devices Commit fe8abf332b8f ("usb: dwc3: support clocks and resets for DWC3 core") adds support for handling clocks and resets in the DWC3 core, so that for platforms following the standard devicetree bindings this does not need to be duplicated in all the different glue layers. These changes intended for devicetree based platforms introduce an uncoditional clk_bulk_get() in the core probe path. This leads to the following error being logged on x86/ACPI systems: [ 26.276783] dwc3 dwc3.3.auto: Failed to get clk 'ref': -2 This commits wraps the clk_bulk_get() in an if (dev->of_node) check so that it only is done on devicetree instantiated devices, fixing this error. Cc: Masahiro Yamada Reviewed-by: Masahiro Yamada Signed-off-by: Hans de Goede Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/core.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index ea91310113b9..103807587dc6 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1272,7 +1272,6 @@ static int dwc3_probe(struct platform_device *pdev) if (!dwc->clks) return -ENOMEM; - dwc->num_clks = ARRAY_SIZE(dwc3_core_clks); dwc->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1307,15 +1306,19 @@ static int dwc3_probe(struct platform_device *pdev) if (IS_ERR(dwc->reset)) return PTR_ERR(dwc->reset); - ret = clk_bulk_get(dev, dwc->num_clks, dwc->clks); - if (ret == -EPROBE_DEFER) - return ret; - /* - * Clocks are optional, but new DT platforms should support all clocks - * as required by the DT-binding. - */ - if (ret) - dwc->num_clks = 0; + if (dev->of_node) { + dwc->num_clks = ARRAY_SIZE(dwc3_core_clks); + + ret = clk_bulk_get(dev, dwc->num_clks, dwc->clks); + if (ret == -EPROBE_DEFER) + return ret; + /* + * Clocks are optional, but new DT platforms should support all + * clocks as required by the DT-binding. + */ + if (ret) + dwc->num_clks = 0; + } ret = reset_control_deassert(dwc->reset); if (ret) From 21cbbc6bb7b4ba6aee303a3b4aef1578253724d8 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Tue, 12 Jun 2018 12:27:36 +0400 Subject: [PATCH 117/572] usb: dwc2: gadget: fix packet drop issue for ISOC OUT transfers In ISOC OUT transfer, when the OUT token received while EP disabled, we shouldn't complete a usb request. The current flow completed one usb request, this will lead to a packet drop to function driver. Signed-off-by: Minas Harutyunyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index bb5eb3c8132d..fa3b6f361074 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2756,8 +2756,6 @@ static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep) */ tmp = dwc2_hsotg_read_frameno(hsotg); - dwc2_hsotg_complete_request(hsotg, ep, get_ep_head(ep), 0); - if (using_desc_dma(hsotg)) { if (ep->target_frame == TARGET_FRAME_INITIAL) { /* Start first ISO Out */ From 8195a655e5ce09550aff81b2573d9b015d520cb9 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 18 Jun 2018 14:17:16 +0300 Subject: [PATCH 118/572] ACPI / EC: Use ec_no_wakeup on Thinkpad X1 Carbon 6th On this system EC interrupt triggers constantly kicking devices out of low power states and thus blocking power management. The system also has a PCIe root port hosting Alpine Ridge Thunderbolt controller and it never gets a chance to go to D3cold because of this. Since the power button works the same regardless if EC interrupt is enabled or not during s2idle, add a quirk for this machine that sets ec_no_wakeup=true preventing spurious wakeups. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/ec.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index bb94cf0731fe..442a9e24f439 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2037,6 +2037,17 @@ static inline void acpi_ec_query_exit(void) } } +static const struct dmi_system_id acpi_ec_no_wakeup[] = { + { + .ident = "Thinkpad X1 Carbon 6th", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "20KGS3JF01"), + }, + }, + { }, +}; + int __init acpi_ec_init(void) { int result; @@ -2047,6 +2058,15 @@ int __init acpi_ec_init(void) if (result) return result; + /* + * Disable EC wakeup on following systems to prevent periodic + * wakeup from EC GPE. + */ + if (dmi_check_system(acpi_ec_no_wakeup)) { + ec_no_wakeup = true; + pr_debug("Disabling EC wakeup on suspend-to-idle\n"); + } + /* Drivers must be started after acpi_ec_query_init() */ dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver); /* From 856e7c4b619af622d56b3b454f7bec32a170ac99 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 16:46:03 -0600 Subject: [PATCH 119/572] selftests: pstore: return Kselftest Skip code for skipped tests When pstore_post_reboot test gets skipped because of unmet dependencies and/or unsupported configuration, it returns 0 which is treated as a pass by the Kselftest framework. This leads to false positive result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Kees Cook Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/pstore/pstore_post_reboot_tests | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests index 6ccb154cb4aa..22f8df1ad7d4 100755 --- a/tools/testing/selftests/pstore/pstore_post_reboot_tests +++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests @@ -7,13 +7,16 @@ # # Released under the terms of the GPL v2. +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + . ./common_tests if [ -e $REBOOT_FLAG ]; then rm $REBOOT_FLAG else prlog "pstore_crash_test has not been executed yet. we skip further tests." - exit 0 + exit $ksft_skip fi prlog -n "Mounting pstore filesystem ... " From 8781578087b8fb8829558bac96c3c24e5ba26f82 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 17:40:31 -0600 Subject: [PATCH 120/572] selftests: static_keys: return Kselftest Skip code for skipped tests When static_keys test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Added an explicit searches for test_static_key_base and test_static_keys modules and return skip code if they aren't found to differentiate between the failure to load the module condition and module not found condition. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) --- .../selftests/static_keys/test_static_keys.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh index 24cff498b31a..fc9f8cde7d42 100755 --- a/tools/testing/selftests/static_keys/test_static_keys.sh +++ b/tools/testing/selftests/static_keys/test_static_keys.sh @@ -2,6 +2,19 @@ # SPDX-License-Identifier: GPL-2.0 # Runs static keys kernel module tests +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n test_static_key_base; then + echo "static_key: module test_static_key_base is not found [SKIP]" + exit $ksft_skip +fi + +if ! /sbin/modprobe -q -n test_static_keys; then + echo "static_key: module test_static_keys is not found [SKIP]" + exit $ksft_skip +fi + if /sbin/modprobe -q test_static_key_base; then if /sbin/modprobe -q test_static_keys; then echo "static_key: ok" From c7db6ffb831fd36a03485a0d88b1e505378975ad Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 18:11:37 -0600 Subject: [PATCH 121/572] selftests: sysctl: return Kselftest Skip code for skipped tests When sysctl test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Changed return code to kselftest skip code in skip error legs that check requirements and module probe test error leg. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Kees Cook Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/sysctl/sysctl.sh | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index ec232c3cfcaa..584eb8ea780a 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -14,6 +14,9 @@ # This performs a series tests against the proc sysctl interface. +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + TEST_NAME="sysctl" TEST_DRIVER="test_${TEST_NAME}" TEST_DIR=$(dirname $0) @@ -41,7 +44,7 @@ test_modprobe() echo "$0: $DIR not present" >&2 echo "You must have the following enabled in your kernel:" >&2 cat $TEST_DIR/config >&2 - exit 1 + exit $ksft_skip fi } @@ -98,28 +101,30 @@ test_reqs() uid=$(id -u) if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi if ! which perl 2> /dev/null > /dev/null; then echo "$0: You need perl installed" - exit 1 + exit $ksft_skip fi if ! which getconf 2> /dev/null > /dev/null; then echo "$0: You need getconf installed" - exit 1 + exit $ksft_skip fi if ! which diff 2> /dev/null > /dev/null; then echo "$0: You need diff installed" - exit 1 + exit $ksft_skip fi } function load_req_mod() { - trap "test_modprobe" EXIT - if [ ! -d $DIR ]; then + if ! modprobe -q -n $TEST_DRIVER; then + echo "$0: module $TEST_DRIVER not found [SKIP]" + exit $ksft_skip + fi modprobe $TEST_DRIVER if [ $? -ne 0 ]; then exit @@ -765,6 +770,7 @@ function parse_args() test_reqs allow_user_defaults check_production_sysctl_writes_strict +test_modprobe load_req_mod trap "test_finish" EXIT From d7d5311d4aa9611fe1a5a851e6f75733237a668a Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Wed, 13 Jun 2018 21:10:48 -0600 Subject: [PATCH 122/572] selftests: user: return Kselftest Skip code for skipped tests When user test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Add an explicit check for module presence and return skip code if module isn't present. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/user/test_user_copy.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh index d60506fc77f8..f9b31a57439b 100755 --- a/tools/testing/selftests/user/test_user_copy.sh +++ b/tools/testing/selftests/user/test_user_copy.sh @@ -2,6 +2,13 @@ # SPDX-License-Identifier: GPL-2.0 # Runs copy_to/from_user infrastructure using test_user_copy kernel module +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n test_user_copy; then + echo "user: module test_user_copy is not found [SKIP]" + exit $ksft_skip +fi if /sbin/modprobe -q test_user_copy; then /sbin/modprobe -q -r test_user_copy echo "user_copy: ok" From 685814466bf8398192cf855415a0bb2cefc1930e Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Thu, 14 Jun 2018 16:56:13 -0600 Subject: [PATCH 123/572] selftests: zram: return Kselftest Skip code for skipped tests When zram test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/zram/zram.sh | 5 ++++- tools/testing/selftests/zram/zram_lib.sh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 754de7da426a..232e958ec454 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,6 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh" +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + . ./zram_lib.sh run_zram () { @@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then else echo "$TCID : No zram.ko module or /dev/zram0 device file not found" echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + exit $ksft_skip fi diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index f6a9c73e7a44..9e73a4fb9b0a 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -18,6 +18,9 @@ MODULE=0 dev_makeswap=-1 dev_mounted=-1 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + trap INT check_prereqs() @@ -27,7 +30,7 @@ check_prereqs() if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi } From a4d7537789724985cafbc9260a31ca4f2b7cf123 Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Wed, 13 Jun 2018 21:31:43 -0600 Subject: [PATCH 124/572] selftests: vm: return Kselftest Skip code for skipped tests When vm test is skipped because of unmet dependencies and/or unsupported configuration, it exits with error which is treated as a fail by the Kselftest framework. This leads to false negative result even when the test could not be run. Change it to return kselftest skip code when a test gets skipped to clearly report that the test could not be run. Kselftest framework SKIP code is 4 and the framework prints appropriate messages to indicate that the test is skipped. Signed-off-by: Shuah Khan (Samsung OSG) Acked-by: Mike Rapoport Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/vm/compaction_test.c | 4 +++- tools/testing/selftests/vm/mlock2-tests.c | 12 +++++++----- tools/testing/selftests/vm/run_vmtests | 5 ++++- tools/testing/selftests/vm/userfaultfd.c | 4 +++- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index 1097f04e4d80..bcec71250873 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -16,6 +16,8 @@ #include #include +#include "../kselftest.h" + #define MAP_SIZE 1048576 struct map_list { @@ -169,7 +171,7 @@ int main(int argc, char **argv) printf("Either the sysctl compact_unevictable_allowed is not\n" "set to 1 or couldn't read the proc file.\n" "Skipping the test\n"); - return 0; + return KSFT_SKIP; } lim.rlim_cur = RLIM_INFINITY; diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c index 4997b9222cfa..637b6d0ac0d0 100644 --- a/tools/testing/selftests/vm/mlock2-tests.c +++ b/tools/testing/selftests/vm/mlock2-tests.c @@ -9,6 +9,8 @@ #include #include "mlock2.h" +#include "../kselftest.h" + struct vm_boundaries { unsigned long start; unsigned long end; @@ -303,7 +305,7 @@ static int test_mlock_lock() if (mlock2_(map, 2 * page_size, 0)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(0)"); goto unmap; @@ -412,7 +414,7 @@ static int test_mlock_onfault() if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(MLOCK_ONFAULT)"); goto unmap; @@ -425,7 +427,7 @@ static int test_mlock_onfault() if (munlock(map, 2 * page_size)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("munlock()"); goto unmap; @@ -457,7 +459,7 @@ static int test_lock_onfault_of_present() if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock2(MLOCK_ONFAULT)"); goto unmap; @@ -583,7 +585,7 @@ static int test_vma_management(bool call_mlock) if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) { if (errno == ENOSYS) { printf("Cannot call new mlock family, skipping test\n"); - _exit(0); + _exit(KSFT_SKIP); } perror("mlock(ONFAULT)\n"); goto out; diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index 22d564673830..88cbe5575f0c 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -2,6 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 #please run as root +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + mnt=./huge exitcode=0 @@ -36,7 +39,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages if [ $? -ne 0 ]; then echo "Please run this test as root" - exit 1 + exit $ksft_skip fi while read name size unit; do if [ "$name" = "HugePages_Free:" ]; then diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index de2f9ec8a87f..7b8171e3128a 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -69,6 +69,8 @@ #include #include +#include "../kselftest.h" + #ifdef __NR_userfaultfd static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; @@ -1322,7 +1324,7 @@ int main(int argc, char **argv) int main(void) { printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n"); - return 0; + return KSFT_SKIP; } #endif /* __NR_userfaultfd */ From d6a3e55131fcb1e5ca1753f4b6f297a177b2fc91 Mon Sep 17 00:00:00 2001 From: Fathi Boudra Date: Thu, 14 Jun 2018 11:57:08 +0200 Subject: [PATCH 125/572] selftests: sync: add config fragment for testing sync framework Unless the software synchronization objects (CONFIG_SW_SYNC) is enabled, the sync test will be skipped: TAP version 13 1..0 # Skipped: Sync framework not supported by kernel Add a config fragment file to be able to run "make kselftest-merge" to enable relevant configuration required in order to run the sync test. Signed-off-by: Fathi Boudra Link: https://lkml.org/lkml/2017/5/5/14 Signed-off-by: Anders Roxell Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/sync/config | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 tools/testing/selftests/sync/config diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config new file mode 100644 index 000000000000..1ab7e8130db2 --- /dev/null +++ b/tools/testing/selftests/sync/config @@ -0,0 +1,4 @@ +CONFIG_STAGING=y +CONFIG_ANDROID=y +CONFIG_SYNC=y +CONFIG_SW_SYNC=y From 3c62c91a3635d6c4ef23e00b4fc84fa77bbf99cc Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 16:50:37 -0600 Subject: [PATCH 126/572] selftests: sparc64: Fix to do nothing on non-sparc64 sparc64 test fails with the following errors on non-sparc64 systems. Fix the Makefile to do nothing on non-sparc64 systems to suppress the errors: make run_tests adi-test.c: Assembler messages: adi-test.c:302: Error: no such instruction: `rd %tick,%r13' adi-test.c:304: Error: no such instruction: `rd %tick,%rbp' adi-test.c:190: Error: no such instruction: `rd %tick,%rbp' adi-test.c:192: Error: no such instruction: `rd %tick,%rdx' adi-test.c:273: Error: no such instruction: `rd %tick,%rbx' adi-test.c:276: Error: no such instruction: `rd %tick,%rdx' adi-test.c:217: Error: no such instruction: `rd %tick,%rbp' adi-test.c:220: Error: no such instruction: `rd %tick,%rdx' adi-test.c:79: Error: no such instruction: `rd %tick,%rax' adi-test.c:79: Error: no such instruction: `rd %tick,%rax' adi-test.c:79: Error: no such instruction: `rd %tick,%rax' adi-test.c:79: Error: no such instruction: `rd %tick,%rax' adi-test.c:246: Error: no such instruction: `rd %tick,%rbp' adi-test.c:248: Error: no such instruction: `rd %tick,%rdx' adi-test.c:79: Error: no such instruction: `rd %tick,%rax' adi-test.c:79: Error: no such instruction: `rd %tick,%rax' adi-test.c:79: Error: no such instruction: `rd %tick,%rax' : recipe for target 'adi-test' failed make[1]: *** [adi-test] Error 1 adi: [FAIL] ./drivers_test.sh: 24: ./drivers_test.sh: ./adi-test: not found ../lib.mk:73: recipe for target 'run_tests' failed make: *** [run_tests] Error 1 Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Tom Hromatka Acked-by: David S. Miller Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/sparc64/Makefile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile index 2082eeffd779..442f0ca45441 100644 --- a/tools/testing/selftests/sparc64/Makefile +++ b/tools/testing/selftests/sparc64/Makefile @@ -1,7 +1,17 @@ +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/) + +ifneq ($(ARCH),sparc64) +nothing: +.PHONY: all clean run_tests install +.SILENT: +else + SUBDIRS := drivers TEST_PROGS := run.sh + .PHONY: all clean include ../lib.mk @@ -44,3 +54,4 @@ override define CLEAN make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done endef +endif From 953c9d28d2c9bca15a17b11a022fbc657cc5631a Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Tue, 12 Jun 2018 17:12:24 -0600 Subject: [PATCH 127/572] selftests: sparc64: delete RUN_TESTS and EMIT_TESTS overrides Delete RUN_TESTS and EMIT_TESTS overrides and use common defines in lib.mk. Common defines work just fine and there is no need to define custom overrides. Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Tom Hromatka Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/sparc64/Makefile | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile index 442f0ca45441..76b2206932c3 100644 --- a/tools/testing/selftests/sparc64/Makefile +++ b/tools/testing/selftests/sparc64/Makefile @@ -28,10 +28,6 @@ all: fi \ done -override define RUN_TESTS - @cd $(OUTPUT); ./run.sh -endef - override define INSTALL_RULE mkdir -p $(INSTALL_PATH) install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) @@ -43,10 +39,6 @@ override define INSTALL_RULE done; endef -override define EMIT_TESTS - echo "./run.sh" -endef - override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ From eb83d5f7d07ed913d62ca4ad1e14fb6ca4937bab Mon Sep 17 00:00:00 2001 From: "Shuah Khan (Samsung OSG)" Date: Wed, 13 Jun 2018 16:20:52 -0600 Subject: [PATCH 128/572] selftests: sparc64: Add missing SPDX License Identifiers Add missing SPDX License Identifiers to Makefile(s). Signed-off-by: Shuah Khan (Samsung OSG) Reviewed-by: Tom Hromatka Signed-off-by: Shuah Khan (Samsung OSG) --- tools/testing/selftests/sparc64/Makefile | 1 + tools/testing/selftests/sparc64/drivers/Makefile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile index 76b2206932c3..a19531dba4dc 100644 --- a/tools/testing/selftests/sparc64/Makefile +++ b/tools/testing/selftests/sparc64/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/) diff --git a/tools/testing/selftests/sparc64/drivers/Makefile b/tools/testing/selftests/sparc64/drivers/Makefile index 6264f40bbdbc..deb0df415565 100644 --- a/tools/testing/selftests/sparc64/drivers/Makefile +++ b/tools/testing/selftests/sparc64/drivers/Makefile @@ -1,4 +1,4 @@ - +# SPDX-License-Identifier: GPL-2.0 INCLUDEDIR := -I. CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g From 9f3cb9b71bc3166f27ceffe0b7295c464c638151 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 5 Jun 2018 09:22:37 -0700 Subject: [PATCH 129/572] MAINAINTERS: Corrected Broadcom Northstar2 entry While moving the Northstar 2 DTS into a dedicated directory, the corresponding MAINTAINERS file entry was not updated accordingly, fix that. Fixes: 63a913c157f5 ("arm64: dts: move ns2 into northstar2 directory") Signed-off-by: Florian Fainelli --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9d5eeff51b5f..67b014ea81f2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2971,7 +2971,7 @@ N: bcm585* N: bcm586* N: bcm88312 N: hr2 -F: arch/arm64/boot/dts/broadcom/ns2* +F: arch/arm64/boot/dts/broadcom/northstar2/* F: drivers/clk/bcm/clk-ns* F: drivers/pinctrl/bcm/pinctrl-ns* From d64324acdb7052b08b13ffc8c12af232c6a29afd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 5 Jun 2018 09:24:55 -0700 Subject: [PATCH 130/572] MAINTAINERS: Update Broadcom iProc entry with Stingray Update the MAINTAINERS file to cover the "stingray" pattern and a few files under arch/arm64/boot/dts/broadcom/* as well as the clock driver and binding. Signed-off-by: Florian Fainelli --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 67b014ea81f2..c78feb02a8c9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2971,9 +2971,13 @@ N: bcm585* N: bcm586* N: bcm88312 N: hr2 +N: stingray F: arch/arm64/boot/dts/broadcom/northstar2/* +F: arch/arm64/boot/dts/broadcom/stingray/* F: drivers/clk/bcm/clk-ns* +F: drivers/clk/bcm/clk-sr* F: drivers/pinctrl/bcm/pinctrl-ns* +F: include/dt-bindings/clock/bcm-sr* BROADCOM KONA GPIO DRIVER M: Ray Jui From a3e32e78a40017756c71ef6dad429ffe3301126a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Jun 2018 15:47:12 -0700 Subject: [PATCH 131/572] ARM: dts: NSP: Fix i2c controller interrupt type The i2c controller should use IRQ_TYPE_LEVEL_HIGH instead of IRQ_TYPE_NONE. Fixes: 0f9f27a36d09 ("ARM: dts: NSP: Add I2C support to the DT") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-nsp.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index dcc55aa84583..c6aa62386941 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -391,7 +391,7 @@ reg = <0x38000 0x50>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; dma-coherent; status = "disabled"; From 403fde644855bc71318c8db65646383e22653b13 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Jun 2018 15:47:13 -0700 Subject: [PATCH 132/572] ARM: dts: NSP: Fix PCIe controllers interrupt types The interrupts for the PCIe controllers should all be of type IRQ_TYPE_LEVEL_HIGH instead of IRQ_TYPE_NONE. Fixes: d71eb9412088 ("ARM: dts: NSP: Add MSI support on PCI") Fixes: 522199029fdc ("ARM: dts: NSP: Fix PCIE DT issue") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-nsp.dtsi | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index c6aa62386941..09ba85046322 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -496,7 +496,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 131 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <0>; @@ -519,10 +519,10 @@ compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; brcm,pcie-msi-inten; }; }; @@ -533,7 +533,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 137 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 137 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <1>; @@ -556,10 +556,10 @@ compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; brcm,pcie-msi-inten; }; }; @@ -570,7 +570,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 143 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 143 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <2>; @@ -593,10 +593,10 @@ compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; brcm,pcie-msi-inten; }; }; From dbe4a39331b7aa8bcac8ef2da780724e1af1619a Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Jun 2018 15:47:14 -0700 Subject: [PATCH 133/572] ARM: dts: HR2: Fix interrupt types for i2c and PCIe The i2c and PCIe controllers had an incorrect type which should have been set to IRQ_TYPE_LEVEL_HIGH, fix that. Fixes: b9099ec754b5 ("ARM: dts: Add Broadcom Hurricane 2 DTS include file") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-hr2.dtsi | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/boot/dts/bcm-hr2.dtsi b/arch/arm/boot/dts/bcm-hr2.dtsi index 3f9cedd8011f..3084a7c95733 100644 --- a/arch/arm/boot/dts/bcm-hr2.dtsi +++ b/arch/arm/boot/dts/bcm-hr2.dtsi @@ -264,7 +264,7 @@ reg = <0x38000 0x50>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; }; @@ -279,7 +279,7 @@ reg = <0x3b000 0x50>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; }; }; @@ -300,7 +300,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 186 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 186 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <0>; @@ -322,10 +322,10 @@ compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; brcm,pcie-msi-inten; }; }; @@ -336,7 +336,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 192 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 192 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <1>; @@ -358,10 +358,10 @@ compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; brcm,pcie-msi-inten; }; }; From a0a8338e905734518ab9b10b06e7fd0201228f8b Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 11 Jun 2018 15:53:40 -0700 Subject: [PATCH 134/572] ARM: dts: BCM5301x: Fix i2c controller interrupt type The i2c controller should be using IRQ_TYPE_LEVEL_HIGH, fix that. Fixes: bb097e3e0045 ("ARM: dts: BCM5301X: Add I2C support to the DT") Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm5301x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 9a076c409f4e..ef995e50ee12 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -365,7 +365,7 @@ i2c0: i2c@18009000 { compatible = "brcm,iproc-i2c"; reg = <0x18009000 0x50>; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; clock-frequency = <100000>; From 71ca3409703b62b6a092d0d9d13f366c121bc5d3 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:27 -0700 Subject: [PATCH 135/572] ARM: dts: Cygnus: Fix I2C controller interrupt type Fix I2C controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom Cygnus SoC. Fixes: b51c05a331ff ("ARM: dts: add I2C device nodes for Broadcom Cygnus") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-cygnus.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 9fe4f5a6379e..835a6f736da4 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -216,7 +216,7 @@ reg = <0x18008000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; @@ -245,7 +245,7 @@ reg = <0x1800b000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; From 6cb1628ad3506b315cdddd7676db0ff2af378d28 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:28 -0700 Subject: [PATCH 136/572] ARM: dts: Cygnus: Fix PCIe controller interrupt type Fix PCIe controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom Cygnus SoC Fixes: cd590b50a936 ("ARM: dts: enable PCIe support for Cygnus") Fixes: f6b889358a82 ("ARM: dts: Enable MSI support for Broadcom Cygnus") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm-cygnus.dtsi | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 835a6f736da4..2c4df2d2d4a6 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -256,7 +256,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <0>; @@ -278,10 +278,10 @@ compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; }; }; @@ -291,7 +291,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <1>; @@ -313,10 +313,10 @@ compatible = "brcm,iproc-msi"; msi-controller; interrupt-parent = <&gic>; - interrupts = , - , - , - ; + interrupts = , + , + , + ; }; }; From eba92503e980c08ac353d0d669d0bb143979abcd Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Fri, 18 May 2018 08:21:20 -0700 Subject: [PATCH 137/572] arm64: dts: specify 1.8V EMMC capabilities for bcm958742k Specify 1.8V EMMC capabilities for bcm958742k board to indicate support for UHS mode. Fixes: d4b4aba6be8a ("arm64: dts: Initial DTS files for Broadcom Stingray SOC") Signed-off-by: Scott Branden Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts b/arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts index eb6f08cdbd79..77efa28c4dd5 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts +++ b/arch/arm64/boot/dts/broadcom/stingray/bcm958742k.dts @@ -43,6 +43,10 @@ enet-phy-lane-swap; }; +&sdio0 { + mmc-ddr-1_8v; +}; + &uart2 { status = "okay"; }; From 37c2bd81a86ebb1cc934bf52a29c33d6f9abff7f Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Tue, 22 May 2018 10:01:39 -0700 Subject: [PATCH 138/572] arm64: dts: specify 1.8V EMMC capabilities for bcm958742t Specify 1.8V EMMC capabilities for bcm958742t board to indicate support for UHS mode. Fixes: d4b4aba6be8a ("arm64: dts: Initial DTS files for Broadcom Stingray SOC") Signed-off-by: Scott Branden Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts b/arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts index 5084b037320f..55ba495ef56e 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts +++ b/arch/arm64/boot/dts/broadcom/stingray/bcm958742t.dts @@ -42,3 +42,7 @@ &gphy0 { enet-phy-lane-swap; }; + +&sdio0 { + mmc-ddr-1_8v; +}; From e605c287deed45624e8d35a15e3f0b4faab1a62d Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:29 -0700 Subject: [PATCH 139/572] arm64: dts: ns2: Fix I2C controller interrupt type Fix I2C controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom NS2 SoC. Fixes: 7ac674e8df7a ("arm64: dts: Add I2C nodes for NS2") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index 4a2a6af8e752..c0e48966a5e2 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -566,7 +566,7 @@ reg = <0x66080000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; @@ -594,7 +594,7 @@ reg = <0x660b0000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; From d0b8aed9e80ab526dbb04020bfc94ecea7bddb44 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:30 -0700 Subject: [PATCH 140/572] arm64: dts: ns2: Fix PCIe controller interrupt type Fix PCIe controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom NS2 SoC. Fixes: fd5e5dd56a2f ("arm64: dts: Add PCIe0 and PCIe4 DT nodes for NS2") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi index c0e48966a5e2..4057197048dc 100644 --- a/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi +++ b/arch/arm64/boot/dts/broadcom/northstar2/ns2.dtsi @@ -118,7 +118,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 281 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 281 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <0>; @@ -149,7 +149,7 @@ #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0>; - interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 305 IRQ_TYPE_NONE>; + interrupt-map = <0 0 0 0 &gic 0 GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>; linux,pci-domain = <4>; From 75af23c4736c5633894ea0baf9bca1cf6b248ca4 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Tue, 12 Jun 2018 13:21:31 -0700 Subject: [PATCH 141/572] arm64: dts: Stingray: Fix I2C controller interrupt type Fix I2C controller interrupt to use IRQ_TYPE_LEVEL_HIGH for Broadcom Stingray SoC. Fixes: 1256ea18875d ("arm64: dts: Add I2C DT nodes for Stingray SoC") Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 99aaff0b6d72..b203152ad67c 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -409,7 +409,7 @@ reg = <0x000b0000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; @@ -453,7 +453,7 @@ reg = <0x000e0000 0x100>; #address-cells = <1>; #size-cells = <0>; - interrupts = ; + interrupts = ; clock-frequency = <100000>; status = "disabled"; }; From a45fc268db20ecd859bb61e25045912b3194b5e6 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Thu, 7 Jun 2018 14:35:01 +0100 Subject: [PATCH 142/572] drivers/perf: xgene_pmu: Fix IOB SLOW PMU parser error This patch fixes the below parser error of the IOB SLOW PMU. # perf stat -a -e iob-slow0/cycle-count/ sleep 1 evenf syntax error: 'iob-slow0/cycle-count/' \___ parser error It replaces the "-" character by "_" character inside the PMU name. Signed-off-by: Hoan Tran Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 6bdb1dad805f..0e31f1392a53 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1463,7 +1463,7 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id) case PMU_TYPE_IOB: return devm_kasprintf(dev, GFP_KERNEL, "iob%d", id); case PMU_TYPE_IOB_SLOW: - return devm_kasprintf(dev, GFP_KERNEL, "iob-slow%d", id); + return devm_kasprintf(dev, GFP_KERNEL, "iob_slow%d", id); case PMU_TYPE_MCB: return devm_kasprintf(dev, GFP_KERNEL, "mcb%d", id); case PMU_TYPE_MC: From 109c4d18e57445afcaf728b8716a69375a3daab2 Mon Sep 17 00:00:00 2001 From: Ayan Kumar Halder Date: Fri, 20 Apr 2018 15:44:57 +0100 Subject: [PATCH 143/572] drm/arm/malidp: Ensure that the crtcs are shutdown before removing any encoder/connector One needs to ensure that the crtcs are shutdown so that the drm_crtc_state->connector_mask reflects that no connectors are currently active. Further, it reduces the reference count for each connector. This ensures that the connectors and encoders can be cleanly removed either when _unbind is called for the corresponding drivers or by drm_mode_config_cleanup(). We need drm_atomic_helper_shutdown() to be called before component_unbind_all() otherwise the connectors attached to the component device will have the wrong reference count value and will not be cleanly removed. Signed-off-by: Ayan Kumar Halder Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index 8d20faa198cf..0a788d76ed5f 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -278,7 +278,6 @@ static int malidp_init(struct drm_device *drm) static void malidp_fini(struct drm_device *drm) { - drm_atomic_helper_shutdown(drm); drm_mode_config_cleanup(drm); } @@ -646,6 +645,7 @@ vblank_fail: malidp_de_irq_fini(drm); drm->irq_enabled = false; irq_init_fail: + drm_atomic_helper_shutdown(drm); component_unbind_all(dev, drm); bind_fail: of_node_put(malidp->crtc.port); @@ -681,6 +681,7 @@ static void malidp_unbind(struct device *dev) malidp_se_irq_fini(drm); malidp_de_irq_fini(drm); drm->irq_enabled = false; + drm_atomic_helper_shutdown(drm); component_unbind_all(dev, drm); of_node_put(malidp->crtc.port); malidp->crtc.port = NULL; From 89610dc2c235e7b02bb9fba0ce247e12d4dde7cd Mon Sep 17 00:00:00 2001 From: Alison Wang Date: Tue, 24 Apr 2018 10:42:32 +0800 Subject: [PATCH 144/572] drm: mali-dp: Enable Global SE interrupts mask for DP500 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the situation that DE and SE aren’t shared the same interrupt number, the Global SE interrupts mask bit MASK_IRQ_EN in MASKIRQ must be set, or else other mask bits will not work and no SE interrupt will occur. This patch enables MASK_IRQ_EN for SE to fix this problem. Signed-off-by: Alison Wang Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_hw.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c index d789b46dc817..069783e715f1 100644 --- a/drivers/gpu/drm/arm/malidp_hw.c +++ b/drivers/gpu/drm/arm/malidp_hw.c @@ -634,7 +634,8 @@ const struct malidp_hw malidp_device[MALIDP_MAX_DEVICES] = { .vsync_irq = MALIDP500_DE_IRQ_VSYNC, }, .se_irq_map = { - .irq_mask = MALIDP500_SE_IRQ_CONF_MODE, + .irq_mask = MALIDP500_SE_IRQ_CONF_MODE | + MALIDP500_SE_IRQ_GLOBAL, .vsync_irq = 0, }, .dc_irq_map = { From ad7fda2e378f4356df621a39655f7c200b495d81 Mon Sep 17 00:00:00 2001 From: Ayan Kumar Halder Date: Tue, 10 Apr 2018 19:25:03 +0100 Subject: [PATCH 145/572] drm/arm/malidp: Preserve LAYER_FORMAT contents when setting format On some Mali-DP processors, the LAYER_FORMAT register contains fields other than the format. These bits were unconditionally cleared when setting the pixel format, whereas they should be preserved at their reset values. Reported-by: Brian Starkey Reported-by: Liviu Dudau Signed-off-by: Ayan Kumar halder Acked-by: Liviu Dudau Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_planes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 7a44897c50fe..4af3c1fabb23 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -23,6 +23,7 @@ /* Layer specific register offsets */ #define MALIDP_LAYER_FORMAT 0x000 +#define LAYER_FORMAT_MASK 0x3f #define MALIDP_LAYER_CONTROL 0x004 #define LAYER_ENABLE (1 << 0) #define LAYER_FLOWCFG_MASK 7 @@ -337,7 +338,9 @@ static void malidp_de_plane_update(struct drm_plane *plane, dest_w = plane->state->crtc_w; dest_h = plane->state->crtc_h; - malidp_hw_write(mp->hwdev, ms->format, mp->layer->base); + val = malidp_hw_read(mp->hwdev, mp->layer->base); + val = (val & ~LAYER_FORMAT_MASK) | ms->format; + malidp_hw_write(mp->hwdev, val, mp->layer->base); for (i = 0; i < ms->n_planes; i++) { /* calculate the offset for the layer's plane registers */ From c6cf387ec56c19028333274747bbb4ae145a2d13 Mon Sep 17 00:00:00 2001 From: Ayan Kumar Halder Date: Mon, 18 Jun 2018 18:08:43 +0100 Subject: [PATCH 146/572] drm/mali-dp: Rectify the width and height passed to rotmem_required() The width and height needs to be swapped Signed-off-by: Ayan Kumar halder Reviewed-by: Brian Starkey Reviewed-by: Alexandru Gheorghe Acked-by: Liviu Dudau [rebased on top of v4.18-rc1] Signed-off-by: Liviu Dudau --- drivers/gpu/drm/arm/malidp_planes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c index 4af3c1fabb23..29409a65d864 100644 --- a/drivers/gpu/drm/arm/malidp_planes.c +++ b/drivers/gpu/drm/arm/malidp_planes.c @@ -236,8 +236,8 @@ static int malidp_de_plane_check(struct drm_plane *plane, if (state->rotation & MALIDP_ROTATED_MASK) { int val; - val = mp->hwdev->hw->rotmem_required(mp->hwdev, state->crtc_h, - state->crtc_w, + val = mp->hwdev->hw->rotmem_required(mp->hwdev, state->crtc_w, + state->crtc_h, fb->format->format); if (val < 0) return val; From 7350cdd0257e73a37df57253fb9decd8effacd37 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri Date: Fri, 15 Jun 2018 20:52:33 +0530 Subject: [PATCH 147/572] RDMA/core: Save kernel caller name when creating CQ using ib_create_cq() Few kernel applications like SCST-iSER create CQ using ib_create_cq(), where accessing CQ structures using rdma restrack tool leads to below NULL pointer dereference. This patch saves caller kernel module name similar to ib_alloc_cq(). BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] skip_spaces+0x30/0x30 PGD 738bac067 PUD 8533f0067 PMD 0 Oops: 0000 [#1] SMP R10: ffff88017fc03300 R11: 0000000000000246 R12: 0000000000000000 R13: ffff88082fa5a668 R14: ffff88017475a000 R15: 0000000000000000 FS: 00002b32726582c0(0000) GS:ffff88087fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000008491a1000 CR4: 00000000003607e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: [] ? fill_res_name_pid+0x7c/0x90 [ib_core] [] fill_res_cq_entry+0xef/0x170 [ib_core] [] res_get_common_dumpit+0x3c4/0x480 [ib_core] [] nldev_res_get_cq_dumpit+0x13/0x20 [ib_core] [] netlink_dump+0x117/0x2e0 [] __netlink_dump_start+0x1ab/0x230 [] ibnl_rcv_msg+0x11d/0x1f0 [ib_core] [] ? nldev_res_get_mr_dumpit+0x20/0x20 [ib_core] [] ? rdma_nl_multicast+0x30/0x30 [ib_core] [] netlink_rcv_skb+0xa9/0xc0 [] ibnl_rcv+0x98/0xb0 [ib_core] [] netlink_unicast+0xf2/0x1b0 [] netlink_sendmsg+0x31f/0x6a0 [] sock_sendmsg+0xb0/0xf0 [] ? _raw_spin_unlock_bh+0x1e/0x20 [] ? release_sock+0x118/0x170 [] SYSC_sendto+0x121/0x1c0 [] ? sock_alloc_file+0xa0/0x140 [] ? __fd_install+0x25/0x60 [] SyS_sendto+0xe/0x10 [] system_call_fastpath+0x16/0x1b RIP [] skip_spaces+0x30/0x30 RSP CR2: 0000000000000000 Cc: Fixes: f66c8ba4c9fa ("RDMA/core: Save kernel caller name when creating PD and CQ objects") Reviewed-by: Steve Wise Signed-off-by: Potnuri Bharat Teja Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/verbs.c | 14 ++++++++------ include/rdma/ib_verbs.h | 13 ++++++++----- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 0b56828c1319..9d6beb948535 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1562,11 +1562,12 @@ EXPORT_SYMBOL(ib_destroy_qp); /* Completion queues */ -struct ib_cq *ib_create_cq(struct ib_device *device, - ib_comp_handler comp_handler, - void (*event_handler)(struct ib_event *, void *), - void *cq_context, - const struct ib_cq_init_attr *cq_attr) +struct ib_cq *__ib_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, + const struct ib_cq_init_attr *cq_attr, + const char *caller) { struct ib_cq *cq; @@ -1580,12 +1581,13 @@ struct ib_cq *ib_create_cq(struct ib_device *device, cq->cq_context = cq_context; atomic_set(&cq->usecnt, 0); cq->res.type = RDMA_RESTRACK_CQ; + cq->res.kern_name = caller; rdma_restrack_add(&cq->res); } return cq; } -EXPORT_SYMBOL(ib_create_cq); +EXPORT_SYMBOL(__ib_create_cq); int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period) { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 2043e1a8f851..4f71d6a073ba 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3394,11 +3394,14 @@ int ib_process_cq_direct(struct ib_cq *cq, int budget); * * Users can examine the cq structure to determine the actual CQ size. */ -struct ib_cq *ib_create_cq(struct ib_device *device, - ib_comp_handler comp_handler, - void (*event_handler)(struct ib_event *, void *), - void *cq_context, - const struct ib_cq_init_attr *cq_attr); +struct ib_cq *__ib_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, + const struct ib_cq_init_attr *cq_attr, + const char *caller); +#define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \ + __ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME) /** * ib_resize_cq - Modifies the capacity of the CQ. From 375dc53d032fc11e98036b5f228ad13f7c5933f5 Mon Sep 17 00:00:00 2001 From: Vijay Immanuel Date: Tue, 12 Jun 2018 18:16:05 -0700 Subject: [PATCH 148/572] IB/rxe: Fix missing completion for mem_reg work requests Run the completer task to post a work completion after processing a memory registration or invalidate work request. This covers the case where the memory registration or invalidate was the last work request posted to the qp. Signed-off-by: Vijay Immanuel Reviewed-by: Yonatan Cohen Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_req.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 829ecb93661f..8be27238a86e 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -645,6 +645,9 @@ next_wqe: } else { goto exit; } + if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || + qp->sq_sig_type == IB_SIGNAL_ALL_WR) + rxe_run_task(&qp->comp.task, 1); qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); goto next_wqe; From 326345f995a83e326fa2e01d54bfa9a6a307bd4d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 26 May 2018 19:12:51 +0200 Subject: [PATCH 149/572] MIPS: pb44: Fix i2c-gpio GPIO descriptor table I used bad names in my clumsiness when rewriting many board files to use GPIO descriptors instead of platform data. A few had the platform_device ID set to -1 which would indeed give the device name "i2c-gpio". But several had it set to >=0 which gives the names "i2c-gpio.0", "i2c-gpio.1" ... Fix the one affected board in the MIPS tree. Sorry. Fixes: b2e63555592f ("i2c: gpio: Convert to use descriptors") Reported-by: Simon Guinot Signed-off-by: Linus Walleij Reviewed-by: Paul Burton Cc: Ralf Baechle Cc: Wolfram Sang Cc: Simon Guinot Cc: linux-mips@linux-mips.org Cc: # 4.15+ Patchwork: https://patchwork.linux-mips.org/patch/19387/ Signed-off-by: James Hogan --- arch/mips/ath79/mach-pb44.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ath79/mach-pb44.c b/arch/mips/ath79/mach-pb44.c index 6b2c6f3baefa..75fb96ca61db 100644 --- a/arch/mips/ath79/mach-pb44.c +++ b/arch/mips/ath79/mach-pb44.c @@ -34,7 +34,7 @@ #define PB44_KEYS_DEBOUNCE_INTERVAL (3 * PB44_KEYS_POLL_INTERVAL) static struct gpiod_lookup_table pb44_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("ath79-gpio", PB44_GPIO_I2C_SDA, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), From 2a027b47dba6b77ab8c8e47b589ae9bbc5ac6175 Mon Sep 17 00:00:00 2001 From: Tokunori Ikegami Date: Sun, 3 Jun 2018 23:02:01 +0900 Subject: [PATCH 150/572] MIPS: BCM47XX: Enable 74K Core ExternalSync for PCIe erratum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The erratum and workaround are described by BCM5300X-ES300-RDS.pdf as below. R10: PCIe Transactions Periodically Fail Description: The BCM5300X PCIe does not maintain transaction ordering. This may cause PCIe transaction failure. Fix Comment: Add a dummy PCIe configuration read after a PCIe configuration write to ensure PCIe configuration access ordering. Set ES bit of CP0 configu7 register to enable sync function so that the sync instruction is functional. Resolution: hndpci.c: extpci_write_config() hndmips.c: si_mips_init() mipsinc.h CONF7_ES This is fixed by the CFE MIPS bcmsi chipset driver also for BCM47XX. Also the dummy PCIe configuration read is already implemented in the Linux BCMA driver. Enable ExternalSync in Config7 when CONFIG_BCMA_DRIVER_PCI_HOSTMODE=y too so that the sync instruction is externalised. Signed-off-by: Tokunori Ikegami Reviewed-by: Paul Burton Acked-by: Hauke Mehrtens Cc: Chris Packham Cc: Rafał Miłecki Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/19461/ Signed-off-by: James Hogan --- arch/mips/bcm47xx/setup.c | 6 ++++++ arch/mips/include/asm/mipsregs.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 6054d49e608e..8c9cbf13d32a 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -212,6 +212,12 @@ static int __init bcm47xx_cpu_fixes(void) */ if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706) cpu_wait = NULL; + + /* + * BCM47XX Erratum "R10: PCIe Transactions Periodically Fail" + * Enable ExternalSync for sync instruction to take effect + */ + set_c0_config7(MIPS_CONF7_ES); break; #endif } diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index ae461d91cd1f..0bc270806ec5 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -681,6 +681,8 @@ #define MIPS_CONF7_WII (_ULCAST_(1) << 31) #define MIPS_CONF7_RPS (_ULCAST_(1) << 2) +/* ExternalSync */ +#define MIPS_CONF7_ES (_ULCAST_(1) << 8) #define MIPS_CONF7_IAR (_ULCAST_(1) << 10) #define MIPS_CONF7_AR (_ULCAST_(1) << 16) @@ -2765,6 +2767,7 @@ __BUILD_SET_C0(status) __BUILD_SET_C0(cause) __BUILD_SET_C0(config) __BUILD_SET_C0(config5) +__BUILD_SET_C0(config7) __BUILD_SET_C0(intcontrol) __BUILD_SET_C0(intctl) __BUILD_SET_C0(srsmap) From 73c4b15eff163f633a86589c5baf071f41af26b1 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 18 Jun 2018 14:41:36 -0700 Subject: [PATCH 151/572] MAINTAINERS: Add me as an x86 entry code maintainer And update my email address. Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andy Lutomirski Signed-off-by: Linus Torvalds --- MAINTAINERS | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 9d5eeff51b5f..624c3fd11d04 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15575,6 +15575,13 @@ S: Maintained F: Documentation/x86/ F: arch/x86/ +X86 ENTRY CODE +M: Andy Lutomirski +L: linux-kernel@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/asm +S: Maintained +F: arch/x86/entry/ + X86 MCE INFRASTRUCTURE M: Tony Luck M: Borislav Petkov @@ -15597,7 +15604,7 @@ F: drivers/platform/x86/ F: drivers/platform/olpc/ X86 VDSO -M: Andy Lutomirski +M: Andy Lutomirski L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso S: Maintained From 6cc22dc08a247b7b4a173e4561e39705a557d300 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 18 Jun 2018 14:15:30 -0700 Subject: [PATCH 152/572] revert "mm/memblock: add missing include " The patch fixed a W=1 warning but broke the ia64 build: CC mm/memblock.o mm/memblock.c:1340: error: redefinition of `memblock_virt_alloc_try_nid_raw' ./include/linux/bootmem.h:335: error: previous definition of `memblock_virt_alloc_try_nid_raw' was here Because inlcude/linux/bootmem.h says #if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) whereas mm/Makefile says obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o So revert 26f09e9b3a06 ("mm/memblock: add missing include ") while a full fix can be worked on. Fixes: 26f09e9b3a06 ("mm/memblock: add missing include ") Reported-by: Tony Luck Cc: Mathieu Malaterre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memblock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/memblock.c b/mm/memblock.c index cc16d70b8333..03d48d8835ba 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include From 1264f8325e9b8c004f36f1ae7bacd2a46a7ed771 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 18 Jun 2018 18:06:13 +1000 Subject: [PATCH 153/572] drm/nouveau/kms/nv50-: cursors always use core channel vram ctxdma Ctxdmas for cursors from all heads are setup in the core channel, and due to us tracking allocated handles per-window, we were failing with -EEXIST on multiple-head setups trying to allocate duplicate handles. The cursor code is hardcoded to use the core channel vram ctxdma already, so just skip ctxdma allocation for cursor fbs to fix the issue. Fixes: 5bca1621c07 ("drm/nouveau/kms/nv50-: move fb ctxdma tracking into windows") Reported-by: Adam Borowski Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/curs507a.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c index 291c08117ab6..397143b639c6 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c +++ b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c @@ -132,7 +132,7 @@ curs507a_new_(const struct nv50_wimm_func *func, struct nouveau_drm *drm, nvif_object_map(&wndw->wimm.base.user, NULL, 0); wndw->immd = func; - wndw->ctxdma.parent = &disp->core->chan.base.user; + wndw->ctxdma.parent = NULL; return 0; } diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index 224963b533a6..c5a9bc1af5af 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -444,14 +444,17 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) if (ret) return ret; - ctxdma = nv50_wndw_ctxdma_new(wndw, fb); - if (IS_ERR(ctxdma)) { - nouveau_bo_unpin(fb->nvbo); - return PTR_ERR(ctxdma); + if (wndw->ctxdma.parent) { + ctxdma = nv50_wndw_ctxdma_new(wndw, fb); + if (IS_ERR(ctxdma)) { + nouveau_bo_unpin(fb->nvbo); + return PTR_ERR(ctxdma); + } + + asyw->image.handle[0] = ctxdma->object.handle; } asyw->state.fence = reservation_object_get_excl_rcu(fb->nvbo->bo.resv); - asyw->image.handle[0] = ctxdma->object.handle; asyw->image.offset[0] = fb->nvbo->bo.offset; if (wndw->func->prepare) { From 6c3796d130ed2860489885a934dcb7bb334d5eb0 Mon Sep 17 00:00:00 2001 From: "bstroesser@ts.fujitsu.com" Date: Thu, 24 May 2018 18:49:41 +0200 Subject: [PATCH 154/572] scsi: target: tcmu: add read length support Generally target core and TCMUser seem to work fine for tape devices and media changers. But there is at least one situation where TCMUser is not able to support sequential access device emulation correctly. The situation is when an initiator sends a SCSI READ CDB with a length that is greater than the length of the tape block to read. We can distinguish two subcases: A) The initiator sent the READ CDB with the SILI bit being set. In this case the sequential access device has to transfer the data from the tape block (only the length of the tape block) and transmit a good status. The current interface between TCMUser and the userspace does not support reduction of the read data size by the userspace program. The patch below fixes this subcase by allowing the userspace program to specify a reduced data size in read direction. B) The initiator sent the READ CDB with the SILI bit not being set. In this case the sequential access device has to transfer the data from the tape block as in A), but additionally has to transmit CHECK CONDITION with the ILI bit set and NO SENSE in the sensebytes. The information field in the sensebytes must contain the residual count. With the below patch a user space program can specify the real read data length and appropriate sensebytes. TCMUser then uses the se_cmd flag SCF_TREAT_READ_AS_NORMAL, to force target core to transmit the real data size and the sensebytes. Note: the flag SCF_TREAT_READ_AS_NORMAL is introduced by Lee Duncan's patch "[PATCH v4] target: transport should handle st FM/EOM/ILI reads" from Tue, 15 May 2018 18:25:24 -0700. Signed-off-by: Bodo Stroesser Acked-by: Mike Christie Reviewed-by: Lee Duncan Signed-off-by: Martin K. Petersen --- drivers/target/target_core_user.c | 44 ++++++++++++++++++++++----- include/uapi/linux/target_core_user.h | 4 ++- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 7f96dfa32b9c..d8dc3d22051f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -656,7 +656,7 @@ static void scatter_data_area(struct tcmu_dev *udev, } static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, - bool bidi) + bool bidi, uint32_t read_len) { struct se_cmd *se_cmd = cmd->se_cmd; int i, dbi; @@ -689,7 +689,7 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, for_each_sg(data_sg, sg, data_nents, i) { int sg_remaining = sg->length; to = kmap_atomic(sg_page(sg)) + sg->offset; - while (sg_remaining > 0) { + while (sg_remaining > 0 && read_len > 0) { if (block_remaining == 0) { if (from) kunmap_atomic(from); @@ -701,6 +701,8 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, } copy_bytes = min_t(size_t, sg_remaining, block_remaining); + if (read_len < copy_bytes) + copy_bytes = read_len; offset = DATA_BLOCK_SIZE - block_remaining; tcmu_flush_dcache_range(from, copy_bytes); memcpy(to + sg->length - sg_remaining, from + offset, @@ -708,8 +710,11 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, sg_remaining -= copy_bytes; block_remaining -= copy_bytes; + read_len -= copy_bytes; } kunmap_atomic(to - sg->offset); + if (read_len == 0) + break; } if (from) kunmap_atomic(from); @@ -1042,6 +1047,8 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * { struct se_cmd *se_cmd = cmd->se_cmd; struct tcmu_dev *udev = cmd->tcmu_dev; + bool read_len_valid = false; + uint32_t read_len = se_cmd->data_length; /* * cmd has been completed already from timeout, just reclaim @@ -1056,13 +1063,28 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n", cmd->se_cmd); entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION; - } else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { + goto done; + } + + if (se_cmd->data_direction == DMA_FROM_DEVICE && + (entry->hdr.uflags & TCMU_UFLAG_READ_LEN) && entry->rsp.read_len) { + read_len_valid = true; + if (entry->rsp.read_len < read_len) + read_len = entry->rsp.read_len; + } + + if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { transport_copy_sense_to_cmd(se_cmd, entry->rsp.sense_buffer); - } else if (se_cmd->se_cmd_flags & SCF_BIDI) { + if (!read_len_valid ) + goto done; + else + se_cmd->se_cmd_flags |= SCF_TREAT_READ_AS_NORMAL; + } + if (se_cmd->se_cmd_flags & SCF_BIDI) { /* Get Data-In buffer before clean up */ - gather_data_area(udev, cmd, true); + gather_data_area(udev, cmd, true, read_len); } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { - gather_data_area(udev, cmd, false); + gather_data_area(udev, cmd, false, read_len); } else if (se_cmd->data_direction == DMA_TO_DEVICE) { /* TODO: */ } else if (se_cmd->data_direction != DMA_NONE) { @@ -1070,7 +1092,13 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * se_cmd->data_direction); } - target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); +done: + if (read_len_valid) { + pr_debug("read_len = %d\n", read_len); + target_complete_cmd_with_length(cmd->se_cmd, + entry->rsp.scsi_status, read_len); + } else + target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); out: cmd->se_cmd = NULL; @@ -1740,7 +1768,7 @@ static int tcmu_configure_device(struct se_device *dev) /* Initialise the mailbox of the ring buffer */ mb = udev->mb_addr; mb->version = TCMU_MAILBOX_VERSION; - mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC; + mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC | TCMU_MAILBOX_FLAG_CAP_READ_LEN; mb->cmdr_off = CMDR_OFF; mb->cmdr_size = udev->cmdr_size; diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h index 6e299349b158..b7b57967d90f 100644 --- a/include/uapi/linux/target_core_user.h +++ b/include/uapi/linux/target_core_user.h @@ -44,6 +44,7 @@ #define TCMU_MAILBOX_VERSION 2 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */ #define TCMU_MAILBOX_FLAG_CAP_OOOC (1 << 0) /* Out-of-order completions */ +#define TCMU_MAILBOX_FLAG_CAP_READ_LEN (1 << 1) /* Read data length */ struct tcmu_mailbox { __u16 version; @@ -71,6 +72,7 @@ struct tcmu_cmd_entry_hdr { __u16 cmd_id; __u8 kflags; #define TCMU_UFLAG_UNKNOWN_OP 0x1 +#define TCMU_UFLAG_READ_LEN 0x2 __u8 uflags; } __packed; @@ -119,7 +121,7 @@ struct tcmu_cmd_entry { __u8 scsi_status; __u8 __pad1; __u16 __pad2; - __u32 __pad3; + __u32 read_len; char sense_buffer[TCMU_SENSE_BUFFERSIZE]; } rsp; }; From f2233a33dc1fef4aa30dc11e4c676637bf358c3d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 19 Mar 2018 22:50:05 -0400 Subject: [PATCH 155/572] scsi: ipr: Eliminate duplicate barriers Driver does both wmb() and writel(). The latter already has a barrier on some architectures like arm64. This ends up with CPU observing two barriers back to back before executing the register write. Drivers should generally assume that the barrier implied by writel() is sufficient for ordering DMA. Remove the extraneous wmb() before it. [mkp: Squashed Arnd's and Sinan's patches] Signed-off-by: Arnd Bergmann Reported-by: Sinan Kaya Acked-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 0a9b8b387bd2..02d65dce74e5 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -760,7 +760,6 @@ static void ipr_mask_and_clear_interrupts(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->hrrq[i].allow_interrupts = 0; spin_unlock(&ioa_cfg->hrrq[i]._lock); } - wmb(); /* Set interrupt mask to stop all new interrupts */ if (ioa_cfg->sis64) @@ -8403,7 +8402,6 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) ioa_cfg->hrrq[i].allow_interrupts = 1; spin_unlock(&ioa_cfg->hrrq[i]._lock); } - wmb(); if (ioa_cfg->sis64) { /* Set the adapter to the correct endian mode. */ writel(IPR_ENDIAN_SWAP_KEY, ioa_cfg->regs.endian_swap_reg); From 49d7bd36813ea8e6b4c97b640d24e7fbd44c84f0 Mon Sep 17 00:00:00 2001 From: Mikhail Malygin Date: Wed, 13 Jun 2018 13:05:57 +0000 Subject: [PATCH 156/572] scsi: qla2xxx: Spinlock recursion in qla_target The patch reverts changes done in qlt_schedule_sess_for_deletion() to avoid spinlock recursion sess->vha->work_lock should be used instead of ha->tgt.sess_lock, that can be locked in callers: qlt_reset() or qlt_handle_login() [mkp: roll in build warning reported by sfr] Fixes: 1c6cacf4ea6c04 ("scsi: qla2xxx: Fixup locking for session deletion") Cc: #v4.17 Signed-off-by: Mikhail Malygin Reported-by: Mikhail Malygin Tested-by: Mikhail Malygin Acked-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_target.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 0fea2e2326be..1027b0cb7fa3 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -1224,7 +1224,6 @@ static void qla24xx_chk_fcp_state(struct fc_port *sess) void qlt_schedule_sess_for_deletion(struct fc_port *sess) { struct qla_tgt *tgt = sess->tgt; - struct qla_hw_data *ha = sess->vha->hw; unsigned long flags; if (sess->disc_state == DSC_DELETE_PEND) @@ -1241,16 +1240,16 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess) return; } - spin_lock_irqsave(&ha->tgt.sess_lock, flags); if (sess->deleted == QLA_SESS_DELETED) sess->logout_on_delete = 0; + spin_lock_irqsave(&sess->vha->work_lock, flags); if (sess->deleted == QLA_SESS_DELETION_IN_PROGRESS) { - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + spin_unlock_irqrestore(&sess->vha->work_lock, flags); return; } sess->deleted = QLA_SESS_DELETION_IN_PROGRESS; - spin_unlock_irqrestore(&ha->tgt.sess_lock, flags); + spin_unlock_irqrestore(&sess->vha->work_lock, flags); sess->disc_state = DSC_DELETE_PEND; From 52ab9768f723823a71dc659f0fad803a90f80236 Mon Sep 17 00:00:00 2001 From: Luis Henriques Date: Mon, 18 Jun 2018 17:08:03 +0100 Subject: [PATCH 157/572] scsi: scsi_debug: Fix memory leak on module unload Since commit 80c49563e250 ("scsi: scsi_debug: implement IMMED bit") there are long delays in F_SYNC_DELAY and F_SSU_DELAY. This can cause a memory leak in schedule_resp(), which can be invoked while unloading the scsi_debug module: free_all_queued() had already freed all sd_dp and schedule_resp will alloc a new one, which will never get freed. Here's the kmemleak report while running xfstests generic/350: unreferenced object 0xffff88007d752b00 (size 128): comm "rmmod", pid 26940, jiffies 4295816945 (age 7.588s) hex dump (first 32 bytes): 00 2b 75 7d 00 88 ff ff 00 00 00 00 00 00 00 00 .+u}............ 00 00 00 00 00 00 00 00 8e 31 a2 34 5f 03 00 00 .........1.4_... backtrace: [<000000002abd83d0>] 0xffffffffa000705e [<000000004c063fda>] scsi_dispatch_cmd+0xc7/0x1a0 [<000000000c119a00>] scsi_request_fn+0x251/0x550 [<000000009de0c736>] __blk_run_queue+0x3f/0x60 [<000000001c4453c8>] blk_execute_rq_nowait+0x98/0xd0 [<00000000d17ec79f>] blk_execute_rq+0x3a/0x50 [<00000000a7654b6e>] scsi_execute+0x113/0x250 [<00000000fd78f7cd>] sd_sync_cache+0x95/0x160 [<0000000024dacb14>] sd_shutdown+0x9b/0xd0 [<00000000e9101710>] sd_remove+0x5f/0xb0 [<00000000c43f0d63>] device_release_driver_internal+0x13c/0x1f0 [<00000000e8ad57b6>] bus_remove_device+0xe9/0x160 [<00000000713a7b8a>] device_del+0x120/0x320 [<00000000e5db670c>] __scsi_remove_device+0x115/0x150 [<00000000eccbef30>] scsi_forget_host+0x20/0x60 [<00000000cd5a0738>] scsi_remove_host+0x6d/0x120 Cc: stable@vger.kernel.org # v4.17+ Signed-off-by: Luis Henriques Acked-by: Douglas Gilbert Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 24d7496cd9e2..364e71861bfd 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5507,9 +5507,9 @@ static void __exit scsi_debug_exit(void) int k = sdebug_add_host; stop_all_queued(); - free_all_queued(); for (; k; k--) sdebug_remove_adapter(); + free_all_queued(); driver_unregister(&sdebug_driverfs_driver); bus_unregister(&pseudo_lld_bus); root_device_unregister(pseudo_primary); From 1bcfe0564044be578841744faea1c2f46adc8178 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 15 Jun 2018 09:41:29 +0200 Subject: [PATCH 158/572] ARM: dts: imx6sx: fix irq for pcie bridge Use the correct IRQ line for the MSI controller in the PCIe host controller. Apparently a different IRQ line is used compared to other i.MX6 variants. Without this change MSI IRQs aren't properly propagated to the upstream interrupt controller. Signed-off-by: Oleksij Rempel Reviewed-by: Lucas Stach Fixes: b1d17f68e5c5 ("ARM: dts: imx: add initial imx6sx device tree source") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6sx.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index d8b94f47498b..4e4a55aad5c9 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -1344,7 +1344,7 @@ ranges = <0x81000000 0 0 0x08f80000 0 0x00010000 /* downstream I/O */ 0x82000000 0 0x08000000 0x08000000 0 0x00f00000>; /* non-prefetchable memory */ num-lanes = <1>; - interrupts = ; + interrupts = ; interrupt-names = "msi"; #interrupt-cells = <1>; interrupt-map-mask = <0 0 0 0x7>; From 0fe2795516b9e1c59b58b02bdf8658698117ec4e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Jun 2018 16:07:59 +0200 Subject: [PATCH 159/572] posix-timers: Fix nanosleep_copyout() for CONFIG_COMPAT_32BIT_TIME Commit b5793b0d92c9 added support for building the nanosleep compat system call on 32-bit architectures, but missed one change in nanosleep_copyout(), which would trigger a BUG() as soon as any architecture is switched over to use it. Use the proper config symbol to enable the code path. Fixes: Commit b5793b0d92c9 ("posix-timers: Make compat syscalls depend on CONFIG_COMPAT_32BIT_TIME") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Cc: y2038@lists.linaro.org Cc: Anna-Maria Gleixner Cc: Deepa Dinamani Cc: "Rafael J. Wysocki" Link: https://lkml.kernel.org/r/20180618140811.2998503-1-arnd@arndb.de --- kernel/time/hrtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 055a4a728c00..3e93c54bd3a1 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -1659,7 +1659,7 @@ EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) { switch(restart->nanosleep.type) { -#ifdef CONFIG_COMPAT +#ifdef CONFIG_COMPAT_32BIT_TIME case TT_COMPAT: if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp)) return -EFAULT; From ee3dbcf963c17775417d2fdf74e60f0c1b563dda Mon Sep 17 00:00:00 2001 From: Ilia Lin Date: Sun, 17 Jun 2018 21:58:42 +0200 Subject: [PATCH 160/572] cpufreq: kryo: Fix possible error code dereference In event of error returned by the nvmem_cell_read() non-pointer value may be dereferenced. Fix this with error handling. Additionally free the allocated speedbin buffer, as per the API. Fixes: 9ce36edd1a52 (cpufreq: Add Kryo CPU scaling driver) Signed-off-by: Ilia Lin Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/qcom-cpufreq-kryo.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c index d049fe4b80c4..74b9b93d511b 100644 --- a/drivers/cpufreq/qcom-cpufreq-kryo.c +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c @@ -115,6 +115,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) speedbin = nvmem_cell_read(speedbin_nvmem, &len); nvmem_cell_put(speedbin_nvmem); + if (IS_ERR(speedbin)) + return PTR_ERR(speedbin); switch (msm8996_version) { case MSM8996_V3: @@ -127,6 +129,7 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) BUG(); break; } + kfree(speedbin); for_each_possible_cpu(cpu) { cpu_dev = get_cpu_device(cpu); From 5ad7346b4ae2d59cfee106ec8e40ee2561476d47 Mon Sep 17 00:00:00 2001 From: Ilia Lin Date: Sun, 17 Jun 2018 22:01:46 +0200 Subject: [PATCH 161/572] cpufreq: kryo: Add module remove and exit Add device remove and module exit code to make the driver functioning as a loadable module. Fixes: ac28927659be (cpufreq: kryo: allow building as a loadable module) Signed-off-by: Ilia Lin Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/qcom-cpufreq-kryo.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c index 74b9b93d511b..01bddacf5c3b 100644 --- a/drivers/cpufreq/qcom-cpufreq-kryo.c +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c @@ -42,6 +42,8 @@ enum _msm8996_version { NUM_OF_MSM8996_VERSIONS, }; +struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev; + static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void) { size_t len; @@ -74,7 +76,6 @@ static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void) static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) { struct opp_table *opp_tables[NR_CPUS] = {0}; - struct platform_device *cpufreq_dt_pdev; enum _msm8996_version msm8996_version; struct nvmem_cell *speedbin_nvmem; struct device_node *np; @@ -165,8 +166,15 @@ free_opp: return ret; } +static int qcom_cpufreq_kryo_remove(struct platform_device *pdev) +{ + platform_device_unregister(cpufreq_dt_pdev); + return 0; +} + static struct platform_driver qcom_cpufreq_kryo_driver = { .probe = qcom_cpufreq_kryo_probe, + .remove = qcom_cpufreq_kryo_remove, .driver = { .name = "qcom-cpufreq-kryo", }, @@ -201,8 +209,9 @@ static int __init qcom_cpufreq_kryo_init(void) if (unlikely(ret < 0)) return ret; - ret = PTR_ERR_OR_ZERO(platform_device_register_simple( - "qcom-cpufreq-kryo", -1, NULL, 0)); + kryo_cpufreq_pdev = platform_device_register_simple( + "qcom-cpufreq-kryo", -1, NULL, 0); + ret = PTR_ERR_OR_ZERO(kryo_cpufreq_pdev); if (0 == ret) return 0; @@ -211,5 +220,12 @@ static int __init qcom_cpufreq_kryo_init(void) } module_init(qcom_cpufreq_kryo_init); +static void __init qcom_cpufreq_kryo_exit(void) +{ + platform_device_unregister(kryo_cpufreq_pdev); + platform_driver_unregister(&qcom_cpufreq_kryo_driver); +} +module_exit(qcom_cpufreq_kryo_exit); + MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver"); MODULE_LICENSE("GPL v2"); From ff7c9917143b3a6cf2fa61212a32d67cf259bf9c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 18 Jun 2018 12:47:45 -0700 Subject: [PATCH 162/572] cpufreq: intel_pstate: Fix scaling max/min limits with Turbo 3.0 When scaling max/min settings are changed, internally they are converted to a ratio using the max turbo 1 core turbo frequency. This works fine when 1 core max is same irrespective of the core. But under Turbo 3.0, this will not be the case. For example: Core 0: max turbo pstate: 43 (4.3GHz) Core 1: max turbo pstate: 45 (4.5GHz) In this case 1 core turbo ratio will be maximum of all, so it will be 45 (4.5GHz). Suppose scaling max is set to 4GHz (ratio 40) for all cores ,then on core one it will be = max_state * policy->max / max_freq; = 43 * (4000000/4500000) = 38 (3.8GHz) = 38 which is 200MHz less than the desired. On core2, it will be correctly set to ratio 40 (4GHz). Same holds true for scaling min frequency limit. So this requires usage of correct turbo max frequency for core one, which in this case is 4.3GHz. So we need to adjust per CPU cpu->pstate.turbo_freq using the maximum HWP ratio of that core. This change uses the HWP capability of a core to adjust max turbo frequency. But since Broadwell HWP doesn't use ratios in the HWP capabilities, we have to use legacy max 1 core turbo ratio. This is not a problem as the HWP capabilities don't differ among cores in Broadwell. We need to check for non Broadwell CPU model for applying this change, though. Signed-off-by: Srinivas Pandruvada Cc: 4.6+ # 4.6+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1de5ec8d5ea3..ece120da3353 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -294,6 +294,7 @@ struct pstate_funcs { static struct pstate_funcs pstate_funcs __read_mostly; static int hwp_active __read_mostly; +static int hwp_mode_bdw __read_mostly; static bool per_cpu_limits __read_mostly; static bool hwp_boost __read_mostly; @@ -1413,7 +1414,15 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; - cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + + if (hwp_active && !hwp_mode_bdw) { + unsigned int phy_max, current_max; + + intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max); + cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; + } else { + cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + } if (pstate_funcs.get_aperf_mperf_shift) cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); @@ -2467,28 +2476,36 @@ static inline bool intel_pstate_has_acpi_ppc(void) { return false; } static inline void intel_pstate_request_control_from_smm(void) {} #endif /* CONFIG_ACPI */ +#define INTEL_PSTATE_HWP_BROADWELL 0x01 + +#define ICPU_HWP(model, hwp_mode) \ + { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode } + static const struct x86_cpu_id hwp_support_ids[] __initconst = { - { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP }, + ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL), + ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL), + ICPU_HWP(X86_MODEL_ANY, 0), {} }; static int __init intel_pstate_init(void) { + const struct x86_cpu_id *id; int rc; if (no_load) return -ENODEV; - if (x86_match_cpu(hwp_support_ids)) { + id = x86_match_cpu(hwp_support_ids); + if (id) { copy_cpu_funcs(&core_funcs); if (!no_hwp) { hwp_active++; + hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; goto hwp_cpu_matched; } } else { - const struct x86_cpu_id *id; - id = x86_match_cpu(intel_pstate_cpu_ids); if (!id) return -ENODEV; From 12baf7721143c83150fa973484b7b5fcd86b23f0 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Tue, 19 Jun 2018 17:31:24 +1200 Subject: [PATCH 163/572] mtd: rawnand: micron: add ONFI_FEATURE_ON_DIE_ECC to supported features Add ONFI_FEATURE_ON_DIE_ECC to the set/get features list for Micron NAND flash. Fixes: 789157e41a06 ("mtd: rawnand: allow vendors to declare (un)supported features") Cc: Signed-off-by: Chris Packham Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/nand_micron.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c index 0af45b134c0c..5ec4c90a637d 100644 --- a/drivers/mtd/nand/raw/nand_micron.c +++ b/drivers/mtd/nand/raw/nand_micron.c @@ -66,7 +66,9 @@ static int micron_nand_onfi_init(struct nand_chip *chip) if (p->supports_set_get_features) { set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->set_feature_list); + set_bit(ONFI_FEATURE_ON_DIE_ECC, p->set_feature_list); set_bit(ONFI_FEATURE_ADDR_READ_RETRY, p->get_feature_list); + set_bit(ONFI_FEATURE_ON_DIE_ECC, p->get_feature_list); } return 0; From 93ef2dc0c4f7acf4253882a03c11f2ca8e41f387 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 6 Jun 2018 14:22:24 +0000 Subject: [PATCH 164/572] usb: dwc3: Fix error return code in dwc3_qcom_probe() Fix to return error code -ENODEV from the get device failed error handling case instead of 0, as done elsewhere in this function. Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Signed-off-by: Wei Yongjun Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-qcom.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index b0e67ab2f98c..8854e428ae08 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -490,6 +490,7 @@ static int dwc3_qcom_probe(struct platform_device *pdev) qcom->dwc3 = of_find_device_by_node(dwc3_np); if (!qcom->dwc3) { dev_err(&pdev->dev, "failed to get dwc3 platform device\n"); + ret = -ENODEV; goto depopulate; } From 9925e6ebe5c2da9601037cca73462782900b9190 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 May 2018 18:06:20 +0200 Subject: [PATCH 165/572] usb: dwc3: qcom: mark PM functions as __maybe_unused The #ifdef guards around these are wrong, resulting in warnings in certain configurations: drivers/usb/dwc3/dwc3-qcom.c:244:12: error: 'dwc3_qcom_resume' defined but not used [-Werror=unused-function] static int dwc3_qcom_resume(struct dwc3_qcom *qcom) ^~~~~~~~~~~~~~~~ drivers/usb/dwc3/dwc3-qcom.c:223:12: error: 'dwc3_qcom_suspend' defined but not used [-Werror=unused-function] static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) This replaces the guards with __maybe_unused annotations to shut up the warnings and give better compile time coverage. Fixes: a4333c3a6ba9 ("usb: dwc3: Add Qualcomm DWC3 glue driver") Reviewed-by: Douglas Anderson Acked-by: Geert Uytterhoeven Signed-off-by: Arnd Bergmann Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-qcom.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 8854e428ae08..a6d0203e40b6 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -548,8 +548,7 @@ static int dwc3_qcom_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP -static int dwc3_qcom_pm_suspend(struct device *dev) +static int __maybe_unused dwc3_qcom_pm_suspend(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); int ret = 0; @@ -561,7 +560,7 @@ static int dwc3_qcom_pm_suspend(struct device *dev) return ret; } -static int dwc3_qcom_pm_resume(struct device *dev) +static int __maybe_unused dwc3_qcom_pm_resume(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); int ret; @@ -572,23 +571,20 @@ static int dwc3_qcom_pm_resume(struct device *dev) return ret; } -#endif -#ifdef CONFIG_PM -static int dwc3_qcom_runtime_suspend(struct device *dev) +static int __maybe_unused dwc3_qcom_runtime_suspend(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); return dwc3_qcom_suspend(qcom); } -static int dwc3_qcom_runtime_resume(struct device *dev) +static int __maybe_unused dwc3_qcom_runtime_resume(struct device *dev) { struct dwc3_qcom *qcom = dev_get_drvdata(dev); return dwc3_qcom_resume(qcom); } -#endif static const struct dev_pm_ops dwc3_qcom_dev_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dwc3_qcom_pm_suspend, dwc3_qcom_pm_resume) From 22bb5cfdf13ae70c6a34508a16cfeee48f162443 Mon Sep 17 00:00:00 2001 From: Artur Petrosyan Date: Wed, 23 May 2018 09:26:08 -0400 Subject: [PATCH 166/572] usb: dwc2: Fix host exit from hibernation flow. In case when a hub is connected to DWC2 host auto suspend occurs and host goes to hibernation. When any device connected to hub host hibernation exiting incorrectly. - Added dwc2_hcd_rem_wakeup() function call to exit from suspend state by remote wakeup. - Increase timeout value for port suspend bit to be set. Acked-by: Minas Harutyunyan Signed-off-by: Artur Petrosyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/hcd.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index edaf0b6af4f0..bae08a7d7abd 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5435,7 +5435,7 @@ int dwc2_host_enter_hibernation(struct dwc2_hsotg *hsotg) dwc2_writel(hprt0, hsotg->regs + HPRT0); /* Wait for the HPRT0.PrtSusp register field to be set */ - if (dwc2_hsotg_wait_bit_set(hsotg, HPRT0, HPRT0_SUSP, 300)) + if (dwc2_hsotg_wait_bit_set(hsotg, HPRT0, HPRT0_SUSP, 3000)) dev_warn(hsotg->dev, "Suspend wasn't generated\n"); /* @@ -5616,6 +5616,8 @@ int dwc2_host_exit_hibernation(struct dwc2_hsotg *hsotg, int rem_wakeup, return ret; } + dwc2_hcd_rem_wakeup(hsotg); + hsotg->hibernated = 0; hsotg->bus_suspended = 0; hsotg->lx_state = DWC2_L0; From 8760675932ddb614e83702117d36ea644050c609 Mon Sep 17 00:00:00 2001 From: William Wu Date: Mon, 21 May 2018 18:12:00 +0800 Subject: [PATCH 167/572] usb: dwc2: fix the incorrect bitmaps for the ports of multi_tt hub The dwc2_get_ls_map() use ttport to reference into the bitmap if we're on a multi_tt hub. But the bitmaps index from 0 to (hub->maxchild - 1), while the ttport index from 1 to hub->maxchild. This will cause invalid memory access when the number of ttport is hub->maxchild. Without this patch, I can easily meet a Kernel panic issue if connect a low-speed USB mouse with the max port of FE2.1 multi-tt hub (1a40:0201) on rk3288 platform. Fixes: 9f9f09b048f5 ("usb: dwc2: host: Totally redo the microframe scheduler") Cc: Reviewed-by: Douglas Anderson Acked-by: Minas Harutyunyan hminas@synopsys.com> Signed-off-by: William Wu Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/hcd_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c index d7c3d6c776d8..9c55d1addba9 100644 --- a/drivers/usb/dwc2/hcd_queue.c +++ b/drivers/usb/dwc2/hcd_queue.c @@ -383,7 +383,7 @@ static unsigned long *dwc2_get_ls_map(struct dwc2_hsotg *hsotg, /* Get the map and adjust if this is a multi_tt hub */ map = qh->dwc_tt->periodic_bitmaps; if (qh->dwc_tt->usb_tt->multi) - map += DWC2_ELEMENTS_PER_LS_BITMAP * qh->ttport; + map += DWC2_ELEMENTS_PER_LS_BITMAP * (qh->ttport - 1); return map; } From af424a410749ed7e0c2bffd3cedbc7c274d0ff6f Mon Sep 17 00:00:00 2001 From: William Wu Date: Fri, 11 May 2018 17:46:31 +0800 Subject: [PATCH 168/572] usb: dwc2: alloc dma aligned buffer for isoc split in The commit 3bc04e28a030 ("usb: dwc2: host: Get aligned DMA in a more supported way") rips out a lot of code to simply the allocation of aligned DMA. However, it also introduces a new issue when use isoc split in transfer. In my test case, I connect the dwc2 controller with an usb hs Hub (GL852G-12), and plug an usb fs audio device (Plantronics headset) into the downstream port of Hub. Then use the usb mic to record, we can find noise when playback. It's because that the usb Hub uses an MDATA for the first transaction and a DATA0 for the second transaction for the isoc split in transaction. An typical isoc split in transaction sequence like this: - SSPLIT IN transaction - CSPLIT IN transaction - MDATA packet - CSPLIT IN transaction - DATA0 packet The DMA address of MDATA (urb->dma) is always DWORD-aligned, but the DMA address of DATA0 (urb->dma + qtd->isoc_split_offset) may not be DWORD-aligned, it depends on the qtd->isoc_split_offset (the length of MDATA). In my test case, the length of MDATA is usually unaligned, this cause DATA0 packet transmission error. This patch use kmem_cache to allocate aligned DMA buf for isoc split in transaction. Note that according to usb 2.0 spec, the maximum data payload size is 1023 bytes for each fs isoc ep, and the maximum allowable interrupt data payload size is 64 bytes or less for fs interrupt ep. So we set the size of object to be 1024 bytes in the kmem cache. Tested-by: Gevorg Sahakyan Tested-by: Heiko Stuebner Acked-by: Minas Harutyunyan hminas@synopsys.com> Signed-off-by: William Wu Reviewed-by: Douglas Anderson Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/core.h | 3 ++ drivers/usb/dwc2/hcd.c | 89 ++++++++++++++++++++++++++++++++++-- drivers/usb/dwc2/hcd.h | 8 ++++ drivers/usb/dwc2/hcd_intr.c | 8 ++++ drivers/usb/dwc2/hcd_queue.c | 3 ++ 5 files changed, 106 insertions(+), 5 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 4a56ac772a3c..71b3b08ad516 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1004,6 +1004,7 @@ struct dwc2_hregs_backup { * @frame_list_sz: Frame list size * @desc_gen_cache: Kmem cache for generic descriptors * @desc_hsisoc_cache: Kmem cache for hs isochronous descriptors + * @unaligned_cache: Kmem cache for DMA mode to handle non-aligned buf * * These are for peripheral mode: * @@ -1177,6 +1178,8 @@ struct dwc2_hsotg { u32 frame_list_sz; struct kmem_cache *desc_gen_cache; struct kmem_cache *desc_hsisoc_cache; + struct kmem_cache *unaligned_cache; +#define DWC2_KMEM_UNALIGNED_BUF_SIZE 1024 #endif /* CONFIG_USB_DWC2_HOST || CONFIG_USB_DWC2_DUAL_ROLE */ diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index bae08a7d7abd..b1104be3429c 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -1567,11 +1567,20 @@ static void dwc2_hc_start_transfer(struct dwc2_hsotg *hsotg, } if (hsotg->params.host_dma) { - dwc2_writel((u32)chan->xfer_dma, - hsotg->regs + HCDMA(chan->hc_num)); + dma_addr_t dma_addr; + + if (chan->align_buf) { + if (dbg_hc(chan)) + dev_vdbg(hsotg->dev, "align_buf\n"); + dma_addr = chan->align_buf; + } else { + dma_addr = chan->xfer_dma; + } + dwc2_writel((u32)dma_addr, hsotg->regs + HCDMA(chan->hc_num)); + if (dbg_hc(chan)) dev_vdbg(hsotg->dev, "Wrote %08lx to HCDMA(%d)\n", - (unsigned long)chan->xfer_dma, chan->hc_num); + (unsigned long)dma_addr, chan->hc_num); } /* Start the split */ @@ -2625,6 +2634,35 @@ static void dwc2_hc_init_xfer(struct dwc2_hsotg *hsotg, } } +static int dwc2_alloc_split_dma_aligned_buf(struct dwc2_hsotg *hsotg, + struct dwc2_qh *qh, + struct dwc2_host_chan *chan) +{ + if (!hsotg->unaligned_cache || + chan->max_packet > DWC2_KMEM_UNALIGNED_BUF_SIZE) + return -ENOMEM; + + if (!qh->dw_align_buf) { + qh->dw_align_buf = kmem_cache_alloc(hsotg->unaligned_cache, + GFP_ATOMIC | GFP_DMA); + if (!qh->dw_align_buf) + return -ENOMEM; + } + + qh->dw_align_buf_dma = dma_map_single(hsotg->dev, qh->dw_align_buf, + DWC2_KMEM_UNALIGNED_BUF_SIZE, + DMA_FROM_DEVICE); + + if (dma_mapping_error(hsotg->dev, qh->dw_align_buf_dma)) { + dev_err(hsotg->dev, "can't map align_buf\n"); + chan->align_buf = 0; + return -EINVAL; + } + + chan->align_buf = qh->dw_align_buf_dma; + return 0; +} + #define DWC2_USB_DMA_ALIGN 4 struct dma_aligned_buffer { @@ -2802,6 +2840,32 @@ static int dwc2_assign_and_init_hc(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh) /* Set the transfer attributes */ dwc2_hc_init_xfer(hsotg, chan, qtd); + /* For non-dword aligned buffers */ + if (hsotg->params.host_dma && qh->do_split && + chan->ep_is_in && (chan->xfer_dma & 0x3)) { + dev_vdbg(hsotg->dev, "Non-aligned buffer\n"); + if (dwc2_alloc_split_dma_aligned_buf(hsotg, qh, chan)) { + dev_err(hsotg->dev, + "Failed to allocate memory to handle non-aligned buffer\n"); + /* Add channel back to free list */ + chan->align_buf = 0; + chan->multi_count = 0; + list_add_tail(&chan->hc_list_entry, + &hsotg->free_hc_list); + qtd->in_process = 0; + qh->channel = NULL; + return -ENOMEM; + } + } else { + /* + * We assume that DMA is always aligned in non-split + * case or split out case. Warn if not. + */ + WARN_ON_ONCE(hsotg->params.host_dma && + (chan->xfer_dma & 0x3)); + chan->align_buf = 0; + } + if (chan->ep_type == USB_ENDPOINT_XFER_INT || chan->ep_type == USB_ENDPOINT_XFER_ISOC) /* @@ -5246,6 +5310,19 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg) } } + if (hsotg->params.host_dma) { + /* + * Create kmem caches to handle non-aligned buffer + * in Buffer DMA mode. + */ + hsotg->unaligned_cache = kmem_cache_create("dwc2-unaligned-dma", + DWC2_KMEM_UNALIGNED_BUF_SIZE, 4, + SLAB_CACHE_DMA, NULL); + if (!hsotg->unaligned_cache) + dev_err(hsotg->dev, + "unable to create dwc2 unaligned cache\n"); + } + hsotg->otg_port = 1; hsotg->frame_list = NULL; hsotg->frame_list_dma = 0; @@ -5280,8 +5357,9 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg) return 0; error4: - kmem_cache_destroy(hsotg->desc_gen_cache); + kmem_cache_destroy(hsotg->unaligned_cache); kmem_cache_destroy(hsotg->desc_hsisoc_cache); + kmem_cache_destroy(hsotg->desc_gen_cache); error3: dwc2_hcd_release(hsotg); error2: @@ -5322,8 +5400,9 @@ void dwc2_hcd_remove(struct dwc2_hsotg *hsotg) usb_remove_hcd(hcd); hsotg->priv = NULL; - kmem_cache_destroy(hsotg->desc_gen_cache); + kmem_cache_destroy(hsotg->unaligned_cache); kmem_cache_destroy(hsotg->desc_hsisoc_cache); + kmem_cache_destroy(hsotg->desc_gen_cache); dwc2_hcd_release(hsotg); usb_put_hcd(hcd); diff --git a/drivers/usb/dwc2/hcd.h b/drivers/usb/dwc2/hcd.h index 7db1ee7e7a77..5502a501f516 100644 --- a/drivers/usb/dwc2/hcd.h +++ b/drivers/usb/dwc2/hcd.h @@ -76,6 +76,8 @@ struct dwc2_qh; * (micro)frame * @xfer_buf: Pointer to current transfer buffer position * @xfer_dma: DMA address of xfer_buf + * @align_buf: In Buffer DMA mode this will be used if xfer_buf is not + * DWORD aligned * @xfer_len: Total number of bytes to transfer * @xfer_count: Number of bytes transferred so far * @start_pkt_count: Packet count at start of transfer @@ -133,6 +135,7 @@ struct dwc2_host_chan { u8 *xfer_buf; dma_addr_t xfer_dma; + dma_addr_t align_buf; u32 xfer_len; u32 xfer_count; u16 start_pkt_count; @@ -302,6 +305,9 @@ struct dwc2_hs_transfer_time { * speed. Note that this is in "schedule slice" which * is tightly packed. * @ntd: Actual number of transfer descriptors in a list + * @dw_align_buf: Used instead of original buffer if its physical address + * is not dword-aligned + * @dw_align_buf_dma: DMA address for dw_align_buf * @qtd_list: List of QTDs for this QH * @channel: Host channel currently processing transfers for this QH * @qh_list_entry: Entry for QH in either the periodic or non-periodic @@ -350,6 +356,8 @@ struct dwc2_qh { struct dwc2_hs_transfer_time hs_transfers[DWC2_HS_SCHEDULE_UFRAMES]; u32 ls_start_schedule_slice; u16 ntd; + u8 *dw_align_buf; + dma_addr_t dw_align_buf_dma; struct list_head qtd_list; struct dwc2_host_chan *channel; struct list_head qh_list_entry; diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c index fbea5e3fb947..f3429ac4a6bb 100644 --- a/drivers/usb/dwc2/hcd_intr.c +++ b/drivers/usb/dwc2/hcd_intr.c @@ -950,6 +950,14 @@ static int dwc2_xfercomp_isoc_split_in(struct dwc2_hsotg *hsotg, frame_desc->actual_length += len; + if (chan->align_buf) { + dev_vdbg(hsotg->dev, "non-aligned buffer\n"); + dma_unmap_single(hsotg->dev, chan->qh->dw_align_buf_dma, + DWC2_KMEM_UNALIGNED_BUF_SIZE, DMA_FROM_DEVICE); + memcpy(qtd->urb->buf + (chan->xfer_dma - qtd->urb->dma), + chan->qh->dw_align_buf, len); + } + qtd->isoc_split_offset += len; hctsiz = dwc2_readl(hsotg->regs + HCTSIZ(chnum)); diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c index 9c55d1addba9..301ced1618f8 100644 --- a/drivers/usb/dwc2/hcd_queue.c +++ b/drivers/usb/dwc2/hcd_queue.c @@ -1696,6 +1696,9 @@ void dwc2_hcd_qh_free(struct dwc2_hsotg *hsotg, struct dwc2_qh *qh) if (qh->desc_list) dwc2_hcd_qh_free_ddma(hsotg, qh); + else if (hsotg->unaligned_cache && qh->dw_align_buf) + kmem_cache_free(hsotg->unaligned_cache, qh->dw_align_buf); + kfree(qh); } From 70c3c8cb83856758025c2a211dd022bc0478922a Mon Sep 17 00:00:00 2001 From: William Wu Date: Fri, 11 May 2018 17:46:32 +0800 Subject: [PATCH 169/572] usb: dwc2: fix isoc split in transfer with no data If isoc split in transfer with no data (the length of DATA0 packet is zero), we can't simply return immediately. Because the DATA0 can be the first transaction or the second transaction for the isoc split in transaction. If the DATA0 packet with no data is in the first transaction, we can return immediately. But if the DATA0 packet with no data is in the second transaction of isoc split in transaction sequence, we need to increase the qtd->isoc_frame_index and giveback urb to device driver if needed, otherwise, the MDATA packet will be lost. A typical test case is that connect the dwc2 controller with an usb hs Hub (GL852G-12), and plug an usb fs audio device (Plantronics headset) into the downstream port of Hub. Then use the usb mic to record, we can find noise when playback. In the case, the isoc split in transaction sequence like this: - SSPLIT IN transaction - CSPLIT IN transaction - MDATA packet (176 bytes) - CSPLIT IN transaction - DATA0 packet (0 byte) This patch use both the length of DATA0 and qtd->isoc_split_offset to check if the DATA0 is in the second transaction. Tested-by: Gevorg Sahakyan Tested-by: Heiko Stuebner Acked-by: Minas Harutyunyan hminas@synopsys.com> Signed-off-by: William Wu Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/hcd_intr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c index f3429ac4a6bb..ed7f05cf4906 100644 --- a/drivers/usb/dwc2/hcd_intr.c +++ b/drivers/usb/dwc2/hcd_intr.c @@ -942,9 +942,8 @@ static int dwc2_xfercomp_isoc_split_in(struct dwc2_hsotg *hsotg, frame_desc = &qtd->urb->iso_descs[qtd->isoc_frame_index]; len = dwc2_get_actual_xfer_length(hsotg, chan, chnum, qtd, DWC2_HC_XFER_COMPLETE, NULL); - if (!len) { + if (!len && !qtd->isoc_split_offset) { qtd->complete_split = 0; - qtd->isoc_split_offset = 0; return 0; } From 980900d6318066b9f8314bfb87329a20fd0d1ca4 Mon Sep 17 00:00:00 2001 From: Chunfeng Yun Date: Fri, 25 May 2018 17:24:57 +0800 Subject: [PATCH 170/572] usb: gadget: composite: fix delayed_status race condition when set_interface It happens when enable debug log, if set_alt() returns USB_GADGET_DELAYED_STATUS and usb_composite_setup_continue() is called before increasing count of @delayed_status, so fix it by using spinlock of @cdev->lock. Signed-off-by: Chunfeng Yun Tested-by: Jay Hsu Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index f242c2bcea81..d2fa071c21b1 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1719,6 +1719,8 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) */ if (w_value && !f->get_alt) break; + + spin_lock(&cdev->lock); value = f->set_alt(f, w_index, w_value); if (value == USB_GADGET_DELAYED_STATUS) { DBG(cdev, @@ -1728,6 +1730,7 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) DBG(cdev, "delayed_status count %d\n", cdev->delayed_status); } + spin_unlock(&cdev->lock); break; case USB_REQ_GET_INTERFACE: if (ctrl->bRequestType != (USB_DIR_IN|USB_RECIP_INTERFACE)) From 9bb073a053f0464ea74a4d4c331fdb7da58568d6 Mon Sep 17 00:00:00 2001 From: Grigor Tovmasyan Date: Thu, 24 May 2018 18:22:30 +0400 Subject: [PATCH 171/572] usb: gadget: dwc2: fix memory leak in gadget_init() Freed allocated request for ep0 to prevent memory leak in case when dwc2_driver_probe() failed. Cc: Stefan Wahren Cc: Marek Szyprowski Tested-by: Stefan Wahren Tested-by: Marek Szyprowski Acked-by: Minas Harutyunyan Signed-off-by: Grigor Tovmasyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index fa3b6f361074..76e4f2e7d947 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -4735,9 +4735,11 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg) } ret = usb_add_gadget_udc(dev, &hsotg->gadget); - if (ret) + if (ret) { + dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep, + hsotg->ctrl_req); return ret; - + } dwc2_hsotg_dump(hsotg); return 0; @@ -4751,6 +4753,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg) int dwc2_hsotg_remove(struct dwc2_hsotg *hsotg) { usb_del_gadget_udc(&hsotg->gadget); + dwc2_hsotg_ep_free_request(&hsotg->eps_out[0]->ep, hsotg->ctrl_req); return 0; } From 1d8e5c00275825fc42aaa5597dab1d0b5b26bb64 Mon Sep 17 00:00:00 2001 From: Minas Harutyunyan Date: Wed, 23 May 2018 16:24:44 +0400 Subject: [PATCH 172/572] dwc2: gadget: Fix ISOC IN DDMA PID bitfield value calculation PID bitfield in descriptor should be set based on particular request length, not based on EP's mc value. PID value can't be set to 0 even request length is 0. Signed-off-by: Minas Harutyunyan Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 76e4f2e7d947..a0f82cca2d9a 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -812,6 +812,7 @@ static int dwc2_gadget_fill_isoc_desc(struct dwc2_hsotg_ep *hs_ep, u32 index; u32 maxsize = 0; u32 mask = 0; + u8 pid = 0; maxsize = dwc2_gadget_get_desc_params(hs_ep, &mask); @@ -840,7 +841,11 @@ static int dwc2_gadget_fill_isoc_desc(struct dwc2_hsotg_ep *hs_ep, ((len << DEV_DMA_NBYTES_SHIFT) & mask)); if (hs_ep->dir_in) { - desc->status |= ((hs_ep->mc << DEV_DMA_ISOC_PID_SHIFT) & + if (len) + pid = DIV_ROUND_UP(len, hs_ep->ep.maxpacket); + else + pid = 1; + desc->status |= ((pid << DEV_DMA_ISOC_PID_SHIFT) & DEV_DMA_ISOC_PID_MASK) | ((len % hs_ep->ep.maxpacket) ? DEV_DMA_SHORT : 0) | From c5c2a97b3ac7d1ec19e7cff9e38caca6afefc3de Mon Sep 17 00:00:00 2001 From: Waldemar Rymarkiewicz Date: Thu, 14 Jun 2018 15:56:08 +0200 Subject: [PATCH 173/572] PM / OPP: Update voltage in case freq == old_freq This commit fixes a rare but possible case when the clk rate is updated without update of the regulator voltage. At boot up, CPUfreq checks if the system is running at the right freq. This is a sanity check in case a bootloader set clk rate that is outside of freq table present with cpufreq core. In such cases system can be unstable so better to change it to a freq that is preset in freq-table. The CPUfreq takes next freq that is >= policy->cur and this is our target_freq that needs to be set now. dev_pm_opp_set_rate(dev, target_freq) checks the target_freq and the old_freq (a current rate). If these are equal it returns early. If not, it searches for OPP (old_opp) that fits best to old_freq (not listed in the table) and updates old_freq (!). Here, we can end up with old_freq = old_opp.rate = target_freq, which is not handled in _generic_set_opp_regulator(). It's supposed to update voltage only when freq > old_freq || freq > old_freq. if (freq > old_freq) { ret = _set_opp_voltage(dev, reg, new_supply); [...] if (freq < old_freq) { ret = _set_opp_voltage(dev, reg, new_supply); if (ret) It results in, no voltage update while clk rate is updated. Example: freq-table = { 1000MHz 1.15V 666MHZ 1.10V 333MHz 1.05V } boot-up-freq = 800MHz # not listed in freq-table freq = target_freq = 1GHz old_freq = 800Mhz old_opp = _find_freq_ceil(opp_table, &old_freq); #(old_freq is modified!) old_freq = 1GHz Fixes: 6a0712f6f199 ("PM / OPP: Add dev_pm_opp_set_rate()") Cc: 4.6+ # v4.6+ Signed-off-by: Waldemar Rymarkiewicz Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/opp/core.c b/drivers/opp/core.c index ab2f3fead6b1..31ff03dbeb83 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -598,7 +598,7 @@ static int _generic_set_opp_regulator(const struct opp_table *opp_table, } /* Scaling up? Scale voltage before frequency */ - if (freq > old_freq) { + if (freq >= old_freq) { ret = _set_opp_voltage(dev, reg, new_supply); if (ret) goto restore_voltage; From 3f77f244d8ec28e3a0a81240ffac7d626390060c Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Mon, 18 Jun 2018 22:41:03 +0200 Subject: [PATCH 174/572] mtd: rawnand: mxc: set spare area size register explicitly The v21 version of the NAND flash controller contains a Spare Area Size Register (SPAS) at offset 0x10. Its setting defaults to the maximum spare area size of 218 bytes. The size that is set in this register is used by the controller when it calculates the ECC bytes internally in hardware. Usually, this register is updated from settings in the IIM fuses when the system is booting from NAND flash. For other boot media, however, the SPAS register remains at the default setting, which may not work for the particular flash chip on the board. The same goes for flash chips whose configuration cannot be set in the IIM fuses (e.g. chips with 2k sector size and 128 bytes spare area size can't be configured in the IIM fuses on imx25 systems). Set the SPAS register explicitly during the preset operation. Derive the register value from mtd->oobsize that was detected during probe by decoding the flash chip's ID bytes. While at it, rename the define for the spare area register's offset to NFC_V21_RSLTSPARE_AREA. The register at offset 0x10 on v1 controllers is different from the register on v21 controllers. Fixes: d484018 ("mtd: mxc_nand: set NFC registers after reset") Cc: stable@vger.kernel.org Signed-off-by: Martin Kaiser Reviewed-by: Sascha Hauer Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/mxc_nand.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c index 45786e707b7b..26cef218bb43 100644 --- a/drivers/mtd/nand/raw/mxc_nand.c +++ b/drivers/mtd/nand/raw/mxc_nand.c @@ -48,7 +48,7 @@ #define NFC_V1_V2_CONFIG (host->regs + 0x0a) #define NFC_V1_V2_ECC_STATUS_RESULT (host->regs + 0x0c) #define NFC_V1_V2_RSLTMAIN_AREA (host->regs + 0x0e) -#define NFC_V1_V2_RSLTSPARE_AREA (host->regs + 0x10) +#define NFC_V21_RSLTSPARE_AREA (host->regs + 0x10) #define NFC_V1_V2_WRPROT (host->regs + 0x12) #define NFC_V1_UNLOCKSTART_BLKADDR (host->regs + 0x14) #define NFC_V1_UNLOCKEND_BLKADDR (host->regs + 0x16) @@ -1274,6 +1274,9 @@ static void preset_v2(struct mtd_info *mtd) writew(config1, NFC_V1_V2_CONFIG1); /* preset operation */ + /* spare area size in 16-bit half-words */ + writew(mtd->oobsize / 2, NFC_V21_RSLTSPARE_AREA); + /* Unlock the internal RAM Buffer */ writew(0x2, NFC_V1_V2_CONFIG); From 69a8405999aa1c489de4b8d349468f0c2b83f093 Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Thu, 14 Jun 2018 11:27:42 -0400 Subject: [PATCH 175/572] powerpc/e500mc: Set assembler machine type to e500mc In binutils 2.26 a new opcode for the "wait" instruction was added for the POWER9 and has precedence over the one specific to the e500mc. Commit ebf714ff3756 ("powerpc/e500mc: Add support for the wait instruction in e500_idle") uses this instruction specifically on the e500mc to work around an erratum. This results in an invalid instruction in idle_e500 when we build for the e500mc on bintutils >= 2.26 with the default assembler machine type. Since multiplatform between e500 and non-e500 is not supported, set the assembler machine type globaly when CONFIG_PPC_E500MC=y. Signed-off-by: Michael Jeanson Reviewed-by: Mathieu Desnoyers CC: Benjamin Herrenschmidt CC: Paul Mackerras CC: Michael Ellerman CC: Kumar Gala CC: Vakul Garg CC: Scott Wood CC: Mathieu Desnoyers CC: linuxppc-dev@lists.ozlabs.org CC: linux-kernel@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index bd06a3ccda31..2ea575cb3401 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -244,6 +244,7 @@ cpu-as-$(CONFIG_4xx) += -Wa,-m405 cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 +cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc) KBUILD_AFLAGS += $(cpu-as-y) KBUILD_CFLAGS += $(cpu-as-y) From 02390f66bd2362df114a0a0770d80ec33061f6d1 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 15 Jun 2018 11:38:37 +1000 Subject: [PATCH 176/572] powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss problem with THP The patch 99baac21e4 ("mm: fix MADV_[FREE|DONTNEED] TLB flush miss problem") added a force flush mode to the mmu_gather flush, which unconditionally flushes the entire address range being invalidated (even if actual ptes only covered a smaller range), to solve a problem with concurrent threads invalidating the same PTEs causing them to miss TLBs that need flushing. This does not work with powerpc that invalidates mmu_gather batches according to page size. Have powerpc flush all possible page sizes in the range if it encounters this concurrency condition. Patch 4647706ebe ("mm: always flush VMA ranges affected by zap_page_range") does add a TLB flush for all page sizes on powerpc for the zap_page_range case, but that is to be removed and replaced with the mmu_gather flush to avoid redundant flushing. It is also thought to not cover other obscure race conditions: https://lkml.kernel.org/r/BD3A0EBE-ECF4-41D4-87FA-C755EA9AB6BD@gmail.com Hash does not have a problem because it invalidates TLBs inside the page table locks. Reported-by: Aneesh Kumar K.V Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 96 +++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 67a6e86d3e7e..a734e486664d 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range); static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; -void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) +static inline void __radix__flush_tlb_range(struct mm_struct *mm, + unsigned long start, unsigned long end, + bool flush_all_sizes) { - struct mm_struct *mm = vma->vm_mm; unsigned long pid; unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; bool local, full; -#ifdef CONFIG_HUGETLB_PAGE - if (is_vm_hugetlb_page(vma)) - return radix__flush_hugetlb_tlb_range(vma, start, end); -#endif - pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) return; @@ -738,37 +733,64 @@ is_local: _tlbie_pid(pid, RIC_FLUSH_TLB); } } else { - bool hflush = false; + bool hflush = flush_all_sizes; + bool gflush = flush_all_sizes; unsigned long hstart, hend; + unsigned long gstart, gend; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT; - hend = end >> HPAGE_PMD_SHIFT; - if (hstart < hend) { - hstart <<= HPAGE_PMD_SHIFT; - hend <<= HPAGE_PMD_SHIFT; + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) hflush = true; + + if (hflush) { + hstart = (start + PMD_SIZE - 1) & PMD_MASK; + hend = end & PMD_MASK; + if (hstart == hend) + hflush = false; + } + + if (gflush) { + gstart = (start + PUD_SIZE - 1) & PUD_MASK; + gend = end & PUD_MASK; + if (gstart == gend) + gflush = false; } -#endif asm volatile("ptesync": : :"memory"); if (local) { __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) __tlbiel_va_range(hstart, hend, pid, - HPAGE_PMD_SIZE, MMU_PAGE_2M); + PMD_SIZE, MMU_PAGE_2M); + if (gflush) + __tlbiel_va_range(gstart, gend, pid, + PUD_SIZE, MMU_PAGE_1G); asm volatile("ptesync": : :"memory"); } else { __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) __tlbie_va_range(hstart, hend, pid, - HPAGE_PMD_SIZE, MMU_PAGE_2M); + PMD_SIZE, MMU_PAGE_2M); + if (gflush) + __tlbie_va_range(gstart, gend, pid, + PUD_SIZE, MMU_PAGE_1G); fixup_tlbie(); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } } preempt_enable(); } + +void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) + +{ +#ifdef CONFIG_HUGETLB_PAGE + if (is_vm_hugetlb_page(vma)) + return radix__flush_hugetlb_tlb_range(vma, start, end); +#endif + + __radix__flush_tlb_range(vma->vm_mm, start, end, false); +} EXPORT_SYMBOL(radix__flush_tlb_range); static int radix_get_mmu_psize(int page_size) @@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb) int psize = 0; struct mm_struct *mm = tlb->mm; int page_size = tlb->page_size; + unsigned long start = tlb->start; + unsigned long end = tlb->end; /* * if page size is not something we understand, do a full mm flush @@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb) */ if (tlb->fullmm) { __flush_all_mm(mm, true); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE) + } else if (mm_tlb_flush_nested(mm)) { + /* + * If there is a concurrent invalidation that is clearing ptes, + * then it's possible this invalidation will miss one of those + * cleared ptes and miss flushing the TLB. If this invalidate + * returns before the other one flushes TLBs, that can result + * in it returning while there are still valid TLBs inside the + * range to be invalidated. + * + * See mm/memory.c:tlb_finish_mmu() for more details. + * + * The solution to this is ensure the entire range is always + * flushed here. The problem for powerpc is that the flushes + * are page size specific, so this "forced flush" would not + * do the right thing if there are a mix of page sizes in + * the range to be invalidated. So use __flush_tlb_range + * which invalidates all possible page sizes in the range. + * + * PWC flush probably is not be required because the core code + * shouldn't free page tables in this path, but accounting + * for the possibility makes us a bit more robust. + * + * need_flush_all is an uncommon case because page table + * teardown should be done with exclusive locks held (but + * after locks are dropped another invalidate could come + * in), it could be optimized further if necessary. + */ + if (!tlb->need_flush_all) + __radix__flush_tlb_range(mm, start, end, true); + else + radix__flush_all_mm(mm); +#endif } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { if (!tlb->need_flush_all) radix__flush_tlb_mm(mm); else radix__flush_all_mm(mm); } else { - unsigned long start = tlb->start; - unsigned long end = tlb->end; - if (!tlb->need_flush_all) radix__flush_tlb_range_psize(mm, start, end, psize); else From 749a0278c2177b2d16da5d8b135ba7f940bb4199 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 13 Jun 2018 23:23:56 +1000 Subject: [PATCH 177/572] powerpc/64s: Fix DT CPU features Power9 DD2.1 logic In the device tree CPU features quirk code we want to set CPU_FTR_POWER9_DD2_1 on all Power9s that aren't DD2.0 or earlier. But we got the logic wrong and instead set it on all CPUs that aren't Power9 DD2.0 or earlier, ie. including Power8. Fix it by making sure we're on a Power9. This isn't a bug in practice because the only code that checks the feature is Power9 only to begin with. But we'll backport it anyway to avoid confusion. Fixes: 9e9626ed3a4a ("powerpc/64s: Fix POWER9 DD2.2 and above in DT CPU features") Cc: stable@vger.kernel.org # v4.17+ Reported-by: Paul Mackerras Signed-off-by: Michael Ellerman Acked-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/dt_cpu_ftrs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 4be1c0de9406..96dd3d871986 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -711,7 +711,8 @@ static __init void cpufeatures_cpu_quirks(void) cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST; cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG; cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; - } else /* DD2.1 and up have DD2_1 */ + } else if ((version & 0xffff0000) == 0x004e0000) + /* DD2.1 and up have DD2_1 */ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; if ((version & 0xffff0000) == 0x004e0000) { From 8c1aef6a682f87a059f10ab606cc1e2cdd663d5a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 19 May 2018 14:35:52 +1000 Subject: [PATCH 178/572] powerpc/64: hard disable irqs in panic_smp_self_stop Similarly to commit 855bfe0de1 ("powerpc: hard disable irqs in smp_send_stop loop"), irqs should be hard disabled by panic_smp_self_stop. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 7a7ce8ad455e..225bc5f91049 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -387,6 +387,14 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ +void panic_smp_self_stop(void) +{ + hard_irq_disable(); + spin_begin(); + while (1) + spin_cpu_relax(); +} + #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) static bool use_spinloop(void) { From de6e5d38417e6cdb005843db420a2974993d36ff Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 19 May 2018 14:35:53 +1000 Subject: [PATCH 179/572] powerpc: smp_send_stop do not offline stopped CPUs Marking CPUs stopped by smp_send_stop as offline can cause warnings due to cross-CPU wakeups. This trace was noticed on a busy system running a sysrq+c crash test, after the injected crash: WARNING: CPU: 51 PID: 1546 at kernel/sched/core.c:1179 set_task_cpu+0x22c/0x240 CPU: 51 PID: 1546 Comm: kworker/u352:1 Tainted: G D Workqueue: mlx5e mlx5e_update_stats_work [mlx5_core] [...] NIP [c00000000017c21c] set_task_cpu+0x22c/0x240 LR [c00000000017d580] try_to_wake_up+0x230/0x720 Call Trace: [c000000001017700] runqueues+0x0/0xb00 (unreliable) [c00000000017d580] try_to_wake_up+0x230/0x720 [c00000000015a214] insert_work+0x104/0x140 [c00000000015adb0] __queue_work+0x230/0x690 [c000003fc5007910] [c00000000015b26c] queue_work_on+0x5c/0x90 [c0080000135fc8f8] mlx5_cmd_exec+0x538/0xcb0 [mlx5_core] [c008000013608fd0] mlx5_core_access_reg+0x140/0x1d0 [mlx5_core] [c00800001362777c] mlx5e_update_pport_counters.constprop.59+0x6c/0x90 [mlx5_core] [c008000013628868] mlx5e_update_ndo_stats+0x28/0x90 [mlx5_core] [c008000013625558] mlx5e_update_stats_work+0x68/0xb0 [mlx5_core] [c00000000015bcec] process_one_work+0x1bc/0x5f0 [c00000000015ecac] worker_thread+0xac/0x6b0 [c000000000168338] kthread+0x168/0x1b0 [c00000000000b628] ret_from_kernel_thread+0x5c/0xb4 This happens because firstly the CPU is not really offline in the usual sense, processes and interrupts have not been migrated away. Secondly smp_send_stop does not happen atomically on all CPUs, so one CPU can have marked itself offline, while another CPU is still running processes or interrupts which can affect the first CPU. Fix this by just not marking the CPU as offline. It's more like frozen in time, so offline does not really reflect its state properly anyway. There should be nothing in the crash/panic path that walks online CPUs and synchronously waits for them, so this change should not introduce new hangs. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/smp.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 5eadfffabe35..4794d6b4f4d2 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -600,9 +600,6 @@ static void nmi_stop_this_cpu(struct pt_regs *regs) nmi_ipi_busy_count--; nmi_ipi_unlock(); - /* Remove this CPU */ - set_cpu_online(smp_processor_id(), false); - spin_begin(); while (1) spin_cpu_relax(); @@ -617,9 +614,6 @@ void smp_send_stop(void) static void stop_this_cpu(void *dummy) { - /* Remove this CPU */ - set_cpu_online(smp_processor_id(), false); - hard_irq_disable(); spin_begin(); while (1) From 855b6232dda2b6941ecd22979893e8a1d25642db Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 19 May 2018 14:35:54 +1000 Subject: [PATCH 180/572] powerpc/64: hard disable irqs on the panic()ing CPU Similar to previous patches, hard disable interrupts when a CPU is in panic. This reduces the chance the watchdog has to interfere with the panic, and avoids any other type of masked interrupt being executed when crashing which minimises the length of the crash path. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup-common.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 62b1a40d8957..40b44bb53a4e 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -700,12 +700,19 @@ EXPORT_SYMBOL(check_legacy_ioport); static int ppc_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { + /* + * panic does a local_irq_disable, but we really + * want interrupts to be hard disabled. + */ + hard_irq_disable(); + /* * If firmware-assisted dump has been registered then trigger * firmware-assisted dump and let firmware handle everything else. */ crash_fadump(NULL, ptr); - ppc_md.panic(ptr); /* May not return */ + if (ppc_md.panic) + ppc_md.panic(ptr); /* May not return */ return NOTIFY_DONE; } @@ -716,7 +723,8 @@ static struct notifier_block ppc_panic_block = { void __init setup_panic(void) { - if (!ppc_md.panic) + /* PPC64 always does a hard irq disable in its panic handler */ + if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic) return; atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); } From 1fe83888a2b776c204cb06629700adfb8e9cc123 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Fri, 8 Jun 2018 10:40:38 +0200 Subject: [PATCH 181/572] xen: share start flags between PV and PVH MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use a global variable to store the start flags for both PV and PVH. This allows the xen_initial_domain macro to work properly on PVH. Note that ARM is also switched to use the new variable. Signed-off-by: Boris Ostrovsky Signed-off-by: Roger Pau Monné Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 7 ++++--- arch/x86/xen/enlighten.c | 7 +++++++ arch/x86/xen/enlighten_pv.c | 1 + arch/x86/xen/enlighten_pvh.c | 1 + include/xen/xen.h | 6 +++++- 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 8073625371f5..07060e5b5864 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -59,6 +59,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata; static __read_mostly unsigned int xen_events_irq; +uint32_t xen_start_flags; +EXPORT_SYMBOL(xen_start_flags); + int xen_remap_domain_gfn_array(struct vm_area_struct *vma, unsigned long addr, xen_pfn_t *gfn, int nr, @@ -293,9 +296,7 @@ void __init xen_early_init(void) xen_setup_features(); if (xen_feature(XENFEAT_dom0)) - xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED; - else - xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED); + xen_start_flags |= SIF_INITDOMAIN|SIF_PRIVILEGED; if (!console_set_on_cmdline && !xen_initial_domain()) add_preferred_console("hvc", 0, NULL); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c9081c6671f0..3b5318505c69 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -64,6 +64,13 @@ struct shared_info xen_dummy_shared_info; __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); +/* + * NB: needs to live in .data because it's used by xen_prepare_pvh which runs + * before clearing the bss. + */ +uint32_t xen_start_flags __attribute__((section(".data"))) = 0; +EXPORT_SYMBOL(xen_start_flags); + /* * Point at some empty memory to start with. We map the real shared_info * page as soon as fixmap is up and running. diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 357969a3697c..8d4e2e1ae60b 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1203,6 +1203,7 @@ asmlinkage __visible void __init xen_start_kernel(void) return; xen_domain_type = XEN_PV_DOMAIN; + xen_start_flags = xen_start_info->flags; xen_setup_features(); diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c index aa1c6a6831a9..c85d1a88f476 100644 --- a/arch/x86/xen/enlighten_pvh.c +++ b/arch/x86/xen/enlighten_pvh.c @@ -97,6 +97,7 @@ void __init xen_prepare_pvh(void) } xen_pvh = 1; + xen_start_flags = pvh_start_info.flags; msr = cpuid_ebx(xen_cpuid_base() + 2); pfn = __pa(hypercall_page); diff --git a/include/xen/xen.h b/include/xen/xen.h index 9d4340c907d1..1e1d9bd0bd37 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -25,12 +25,16 @@ extern bool xen_pvh; #define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) #define xen_pvh_domain() (xen_pvh) +#include + +extern uint32_t xen_start_flags; + #ifdef CONFIG_XEN_DOM0 #include #include #define xen_initial_domain() (xen_domain() && \ - xen_start_info && xen_start_info->flags & SIF_INITDOMAIN) + (xen_start_flags & SIF_INITDOMAIN)) #else /* !CONFIG_XEN_DOM0 */ #define xen_initial_domain() (0) #endif /* CONFIG_XEN_DOM0 */ From 84c029a73327cef571eaa61c7d6e67e8031b52ec Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Fri, 15 Jun 2018 07:34:52 +0800 Subject: [PATCH 182/572] xen: add error handling for xenbus_printf When xenbus_printf fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling xenbus_printf. Signed-off-by: Zhouyang Jia Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/manage.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 8835065029d3..c93d8ef8df34 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -289,8 +289,15 @@ static void sysrq_handler(struct xenbus_watch *watch, const char *path, return; } - if (sysrq_key != '\0') - xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + if (sysrq_key != '\0') { + err = xenbus_printf(xbt, "control", "sysrq", "%c", '\0'); + if (err) { + pr_err("%s: Error %d writing sysrq in control/sysrq\n", + __func__, err); + xenbus_transaction_end(xbt, 1); + return; + } + } err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) @@ -342,7 +349,12 @@ static int setup_shutdown_watcher(void) continue; snprintf(node, FEATURE_PATH_SIZE, "feature-%s", shutdown_handlers[idx].command); - xenbus_printf(XBT_NIL, "control", node, "%u", 1); + err = xenbus_printf(XBT_NIL, "control", node, "%u", 1); + if (err) { + pr_err("%s: Error %d writing %s\n", __func__, + err, node); + return err; + } } return 0; From 6e3cc2a6e259f49f5817d1561109832eff90f8e4 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Fri, 1 Jun 2018 14:41:24 +0300 Subject: [PATCH 183/572] xen/grant-table: Export gnttab_{alloc|free}_pages as GPL Only gnttab_{alloc|free}_pages are exported as EXPORT_SYMBOL while all the rest are exported as EXPORT_SYMBOL_GPL, thus effectively making it not possible for non-GPL driver modules to use grant table module. Export gnttab_{alloc|free}_pages as EXPORT_SYMBOL_GPL so all the exports are aligned. Signed-off-by: Oleksandr Andrushchenko Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/grant-table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 27be107d6480..ba36ff3e4903 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -799,7 +799,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages) return 0; } -EXPORT_SYMBOL(gnttab_alloc_pages); +EXPORT_SYMBOL_GPL(gnttab_alloc_pages); /** * gnttab_free_pages - free pages allocated by gnttab_alloc_pages() @@ -820,7 +820,7 @@ void gnttab_free_pages(int nr_pages, struct page **pages) } free_xenballooned_pages(nr_pages, pages); } -EXPORT_SYMBOL(gnttab_free_pages); +EXPORT_SYMBOL_GPL(gnttab_free_pages); /* Handling of paged out grant targets (GNTST_eagain) */ #define MAX_DELAY 256 From 7ba33e1c9d1e03f442b161c701d1f811ea13c75e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 10 Jun 2018 20:43:09 +0100 Subject: [PATCH 184/572] drm/i915: Apply batch location restrictions before pinning We special case the position of the batch within the GTT to prevent negative self-relocation deltas from underflowing. However, that restriction is being applied after a trial pin of the batch in its current position. Thus we are not rejecting an invalid location if the batch has been used before, leading to an assertion if we happen to need to rearrange the entire payload. In the worst case, this may cause a GPU hang on gen7 or perhaps missing state. References: https://bugs.freedesktop.org/show_bug.cgi?id=105720 Fixes: 2889caa92321 ("drm/i915: Eliminate lots of iterations over the execobjects array") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Martin Peres Link: https://patchwork.freedesktop.org/patch/msgid/20180610194325.13467-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen (cherry picked from commit 746c8f143afad7aaa66c484485fc39888d437a3f) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 49 ++++++++++++---------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index f627a8c47c58..22df17c8ca9b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -489,7 +489,9 @@ eb_validate_vma(struct i915_execbuffer *eb, } static int -eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) +eb_add_vma(struct i915_execbuffer *eb, + unsigned int i, unsigned batch_idx, + struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; int err; @@ -522,6 +524,24 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma) eb->flags[i] = entry->flags; vma->exec_flags = &eb->flags[i]; + /* + * SNA is doing fancy tricks with compressing batch buffers, which leads + * to negative relocation deltas. Usually that works out ok since the + * relocate address is still positive, except when the batch is placed + * very low in the GTT. Ensure this doesn't happen. + * + * Note that actual hangs have only been observed on gen7, but for + * paranoia do it everywhere. + */ + if (i == batch_idx) { + if (!(eb->flags[i] & EXEC_OBJECT_PINNED)) + eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; + if (eb->reloc_cache.has_fence) + eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; + + eb->batch = vma; + } + err = 0; if (eb_pin_vma(eb, entry, vma)) { if (entry->offset != vma->node.start) { @@ -716,7 +736,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->ctx->handles_vma; struct drm_i915_gem_object *obj; - unsigned int i; + unsigned int i, batch; int err; if (unlikely(i915_gem_context_is_closed(eb->ctx))) @@ -728,6 +748,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) INIT_LIST_HEAD(&eb->relocs); INIT_LIST_HEAD(&eb->unbound); + batch = eb_batch_index(eb); + for (i = 0; i < eb->buffer_count; i++) { u32 handle = eb->exec[i].handle; struct i915_lut_handle *lut; @@ -770,33 +792,16 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) lut->handle = handle; add_vma: - err = eb_add_vma(eb, i, vma); + err = eb_add_vma(eb, i, batch, vma); if (unlikely(err)) goto err_vma; GEM_BUG_ON(vma != eb->vma[i]); GEM_BUG_ON(vma->exec_flags != &eb->flags[i]); + GEM_BUG_ON(drm_mm_node_allocated(&vma->node) && + eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i])); } - /* take note of the batch buffer before we might reorder the lists */ - i = eb_batch_index(eb); - eb->batch = eb->vma[i]; - GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]); - - /* - * SNA is doing fancy tricks with compressing batch buffers, which leads - * to negative relocation deltas. Usually that works out ok since the - * relocate address is still positive, except when the batch is placed - * very low in the GTT. Ensure this doesn't happen. - * - * Note that actual hangs have only been observed on gen7, but for - * paranoia do it everywhere. - */ - if (!(eb->flags[i] & EXEC_OBJECT_PINNED)) - eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS; - if (eb->reloc_cache.has_fence) - eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE; - eb->args->flags |= __EXEC_VALIDATED; return eb_reserve(eb); From a5bfcdf0e16b33c1690ded31f863466136480ddc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 11 Jun 2018 16:33:32 +0100 Subject: [PATCH 185/572] drm/i915/execlists: Avoid putting the error pointer On allocation error, do not jump to the unwind handler that tries to free the error pointer. Reported-by: Lionel Landwerlin Fixes: a89d1f921c15 ("drm/i915: Split i915_gem_timeline into individual timelines") Signed-off-by: Chris Wilson Cc: Lionel Landwerlin Cc: Tvrtko Ursulin Reviewed-by: Lionel Landwerlin Link: https://patchwork.freedesktop.org/patch/msgid/20180611153332.14824-1-chris@chris-wilson.co.uk (cherry picked from commit 467d35789e5a4f47428b65ef711b30fdabbb0fd4) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_lrc.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 15434cad5430..f3968580e5e2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2641,10 +2641,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, context_size += LRC_HEADER_PAGES * PAGE_SIZE; ctx_obj = i915_gem_object_create(ctx->i915, context_size); - if (IS_ERR(ctx_obj)) { - ret = PTR_ERR(ctx_obj); - goto error_deref_obj; - } + if (IS_ERR(ctx_obj)) + return PTR_ERR(ctx_obj); vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); if (IS_ERR(vma)) { From 541ab84d2b6ea79021d5df0b54d81600334fa2a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Thu, 24 May 2018 15:54:03 +0300 Subject: [PATCH 186/572] drm/i915: Allow DBLSCAN user modes with eDP/LVDS/DSI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When encountering a connector with the scaling mode property both intel and modesetting ddxs sometimes add tons of DBLSCAN modes to the output's mode list. The idea presumably being that since the output will be going through the panel fitter anyway we can pretend to use any kind of mode. Sadly that means we can't reject user modes with the DBLSCAN flag until we know whether we're going to be using the panel's native mode or the user mode directly. Doing otherwise means X clients using xf86vidmode/xrandr will get a protocol error (and often self terminate as a result) when the kernel refuses to use the requested mode with the DBLSCAN flag. To undo the regression we'll move the DBLSCAN checks into the connector->mode_valid() and encoder->compute_config() hooks. Cc: stable@vger.kernel.org Cc: Vito Caputo Reported-by: Vito Caputo Fixes: e995ca0b8139 ("drm/i915: Provide a device level .mode_valid() hook") References: https://lkml.org/lkml/2018/5/21/715 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180524125403.23445-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106804 Tested-by: Arkadiusz Miskiewicz (cherry picked from commit e4dd27aadd205417a2e9ea9902b698a0252ec3a0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_crt.c | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/intel_display.c | 16 +++++++++++++--- drivers/gpu/drm/i915/intel_dp.c | 6 ++++++ drivers/gpu/drm/i915/intel_dp_mst.c | 6 ++++++ drivers/gpu/drm/i915/intel_dsi.c | 6 ++++++ drivers/gpu/drm/i915/intel_dvo.c | 6 ++++++ drivers/gpu/drm/i915/intel_hdmi.c | 6 ++++++ drivers/gpu/drm/i915/intel_lvds.c | 5 +++++ drivers/gpu/drm/i915/intel_sdvo.c | 6 ++++++ drivers/gpu/drm/i915/intel_tv.c | 12 ++++++++++-- 10 files changed, 84 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index de0e22322c76..072b326d5ee0 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -304,6 +304,9 @@ intel_crt_mode_valid(struct drm_connector *connector, int max_dotclk = dev_priv->max_dotclk_freq; int max_clock; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + if (mode->clock < 25000) return MODE_CLOCK_LOW; @@ -337,6 +340,12 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct drm_display_mode *adjusted_mode = + &pipe_config->base.adjusted_mode; + + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + return true; } @@ -344,6 +353,12 @@ static bool pch_crt_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { + struct drm_display_mode *adjusted_mode = + &pipe_config->base.adjusted_mode; + + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + pipe_config->has_pch_encoder = true; return true; @@ -354,6 +369,11 @@ static bool hsw_crt_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct drm_display_mode *adjusted_mode = + &pipe_config->base.adjusted_mode; + + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; pipe_config->has_pch_encoder = true; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index dee3a8e659f1..2cc6faa1daa8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14469,12 +14469,22 @@ static enum drm_mode_status intel_mode_valid(struct drm_device *dev, const struct drm_display_mode *mode) { + /* + * Can't reject DBLSCAN here because Xorg ddxen can add piles + * of DBLSCAN modes to the output's mode list when they detect + * the scaling mode property on the connector. And they don't + * ask the kernel to validate those modes in any way until + * modeset time at which point the client gets a protocol error. + * So in order to not upset those clients we silently ignore the + * DBLSCAN flag on such connectors. For other connectors we will + * reject modes with the DBLSCAN flag in encoder->compute_config(). + * And we always reject DBLSCAN modes in connector->mode_valid() + * as we never want such modes on the connector's mode list. + */ + if (mode->vscan > 1) return MODE_NO_VSCAN; - if (mode->flags & DRM_MODE_FLAG_DBLSCAN) - return MODE_NO_DBLESCAN; - if (mode->flags & DRM_MODE_FLAG_HSKEW) return MODE_H_ILLEGAL; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8320f0e8e3be..6cce43a6eaad 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -420,6 +420,9 @@ intel_dp_mode_valid(struct drm_connector *connector, int max_rate, mode_rate, max_lanes, max_link_clock; int max_dotclk; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + max_dotclk = intel_dp_downstream_max_dotclock(intel_dp); if (intel_dp_is_edp(intel_dp) && fixed_mode) { @@ -1862,6 +1865,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, conn_state->scaling_mode); } + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) return false; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 9e6956c08688..5890500a3a8b 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -48,6 +48,9 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_LIMITED_M_N); + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + pipe_config->has_pch_encoder = false; bpp = 24; if (intel_dp->compliance.test_data.bpc) { @@ -366,6 +369,9 @@ intel_dp_mst_mode_valid(struct drm_connector *connector, if (!intel_dp) return MODE_ERROR; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + max_link_clock = intel_dp_max_link_rate(intel_dp); max_lanes = intel_dp_max_lane_count(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index cf39ca90d887..f349b3920199 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -326,6 +326,9 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, conn_state->scaling_mode); } + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + /* DSI uses short packets for sync events, so clear mode flags for DSI */ adjusted_mode->flags = 0; @@ -1266,6 +1269,9 @@ intel_dsi_mode_valid(struct drm_connector *connector, DRM_DEBUG_KMS("\n"); + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + if (fixed_mode) { if (mode->hdisplay > fixed_mode->hdisplay) return MODE_PANEL; diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index a70d767313aa..61d908e0df0e 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -219,6 +219,9 @@ intel_dvo_mode_valid(struct drm_connector *connector, int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; int target_clock = mode->clock; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + /* XXX: Validate clock range */ if (fixed_mode) { @@ -254,6 +257,9 @@ static bool intel_dvo_compute_config(struct intel_encoder *encoder, if (fixed_mode) intel_fixed_panel_mode(fixed_mode, adjusted_mode); + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + return true; } diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ee929f31f7db..d8cb53ef4351 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1557,6 +1557,9 @@ intel_hdmi_mode_valid(struct drm_connector *connector, bool force_dvi = READ_ONCE(to_intel_digital_connector_state(connector->state)->force_audio) == HDMI_AUDIO_OFF_DVI; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + clock = mode->clock; if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING) @@ -1677,6 +1680,9 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder, int desired_bpp; bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI; + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + pipe_config->has_hdmi_sink = !force_dvi && intel_hdmi->has_hdmi_sink; if (pipe_config->has_hdmi_sink) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index d278f24ba6ae..48f618dc9abb 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -380,6 +380,8 @@ intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode; int max_pixclk = to_i915(connector->dev)->max_dotclk_freq; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; if (mode->hdisplay > fixed_mode->hdisplay) return MODE_PANEL; if (mode->vdisplay > fixed_mode->vdisplay) @@ -429,6 +431,9 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder, intel_fixed_panel_mode(intel_connector->panel.fixed_mode, adjusted_mode); + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + if (HAS_PCH_SPLIT(dev_priv)) { pipe_config->has_pch_encoder = true; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 25005023c243..26975df4e593 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1160,6 +1160,9 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder, adjusted_mode); } + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + /* * Make the CRTC code factor in the SDVO pixel multiplier. The * SDVO device will factor out the multiplier during mode_set. @@ -1621,6 +1624,9 @@ intel_sdvo_mode_valid(struct drm_connector *connector, struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector); int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + if (intel_sdvo->pixel_clock_min > mode->clock) return MODE_CLOCK_LOW; diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 885fc3809f7f..b55b5c157e38 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -850,6 +850,9 @@ intel_tv_mode_valid(struct drm_connector *connector, const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state); int max_dotclk = to_i915(connector->dev)->max_dotclk_freq; + if (mode->flags & DRM_MODE_FLAG_DBLSCAN) + return MODE_NO_DBLESCAN; + if (mode->clock > max_dotclk) return MODE_CLOCK_HIGH; @@ -877,16 +880,21 @@ intel_tv_compute_config(struct intel_encoder *encoder, struct drm_connector_state *conn_state) { const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state); + struct drm_display_mode *adjusted_mode = + &pipe_config->base.adjusted_mode; if (!tv_mode) return false; - pipe_config->base.adjusted_mode.crtc_clock = tv_mode->clock; + if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) + return false; + + adjusted_mode->crtc_clock = tv_mode->clock; DRM_DEBUG_KMS("forcing bpc to 8 for TV\n"); pipe_config->pipe_bpp = 8*3; /* TV has it's own notion of sync and other mode flags, so clear them. */ - pipe_config->base.adjusted_mode.flags = 0; + adjusted_mode->flags = 0; /* * FIXME: We don't check whether the input mode is actually what we want From 4dc055c9cc8b3dac966b54d3cd5cf463a988299b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 11 Jun 2018 23:02:55 +0300 Subject: [PATCH 187/572] drm/i915: Fix PIPESTAT irq ack on i965/g4x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On i965/g4x IIR is edge triggered. So in order for IIR to notice that there is still a pending interrupt we have to force and edge in ISR. For the ISR/IIR pipe event bits we can do that by temporarily clearing all the PIPESTAT enable bits when we ack the status bits. This will force the ISR pipe event bit low, and it can then go back high when we restore the PIPESTAT enable bits. This avoids the following race: 1. stat = read(PIPESTAT) 2. an enabled PIPESTAT status bit goes high 3. write(PIPESTAT, enable|stat); 4. write(IIR, PIPE_EVENT) The end result is IIR==0 and ISR!=0. This can lead to nasty vblank wait/flip_done timeouts if another interrupt source doesn't trick us into looking at the PIPESTAT status bits despite the IIR PIPE_EVENT bit being low. Before i965 IIR was level triggered so this problem can't actually happen there. And curiously VLV/CHV went back to the level triggered scheme as well. But for simplicity we'll use the same i965/g4x compatible code for all platforms. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106033 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105225 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106030 Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180611200258.27121-1-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson (cherry picked from commit 132c27c97cb958f637dc05adc35a61b47779bcd8) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index f9bc3aaa90d0..4a02747ac658 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1893,9 +1893,17 @@ static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv, /* * Clear the PIPE*STAT regs before the IIR + * + * Toggle the enable bits to make sure we get an + * edge in the ISR pipe event bit if we don't clear + * all the enabled status bits. Otherwise the edge + * triggered IIR on i965/g4x wouldn't notice that + * an interrupt is still pending. */ - if (pipe_stats[pipe]) - I915_WRITE(reg, enable_mask | pipe_stats[pipe]); + if (pipe_stats[pipe]) { + I915_WRITE(reg, pipe_stats[pipe]); + I915_WRITE(reg, enable_mask); + } } spin_unlock(&dev_priv->irq_lock); } From 1e34f1d36804be1a446212a33ca5397bf0e5acdd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Jun 2018 19:05:52 +0300 Subject: [PATCH 188/572] drm/i915: Disallow interlaced modes on g4x DP outputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like interlaced DP output doesn't work on g4x either. Not all that surprising considering we already established that interlaced DP output is busted on VLV/CHV. Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20180613160553.11664-1-ville.syrjala@linux.intel.com (cherry picked from commit 929168c5f3df5d9ea0ef426c33e971157d045eab) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6cce43a6eaad..2bdfe4b2d6dc 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1868,7 +1868,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN) return false; - if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + if (HAS_GMCH_DISPLAY(dev_priv) && adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) return false; @@ -6343,7 +6343,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, drm_connector_init(dev, connector, &intel_dp_connector_funcs, type); drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs); - if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) + if (!HAS_GMCH_DISPLAY(dev_priv)) connector->interlace_allowed = true; connector->doublescan_allowed = 0; From 4dccc4d517481282e84335c7acbfd7a1481004b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 13 Jun 2018 19:05:53 +0300 Subject: [PATCH 189/572] drm/i915: Turn off g4x DP port in .post_disable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While Bspec doesn't list a specific sequence for turning off the DP port on g4x we are getting an underrun if the port is disabled in the .disable() hook. Looks like the pipe stops when the port stops, and by that time the plane disable may not have completed yet. Also the plane(s) seem to end up in some wonky state when this happens as they also signal another underrun immediately after we turn them back on during the next enable sequence. We could add a vblank wait in .disable() to avoid wedging the planes, but I assume we're still tripping up the pipe in some way. So it seems better to me to just follow the ILK+ sequence and turn off the DP port in .post_disable() instead. This sequence doesn't seem to suffer from this problem. Could be it was always the intended sequence for DP and the gen4 bspec was just never updated to include it. Originally we used the bad sequence even on ilk+, but I changed that in commit 08aff3fe26ae ("drm/i915: Move DP port disable to post_disable for pch platforms") as it was causing issues on those platforms as well. I left out g4x then only because I didn't have the hardware to test it. Now that I do it's fairly clear that the ilk+ sequence is also the right choice for g4x. v2: Fix whitespace fail (Jani) Mention the ilk+ commit (Jani) Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180613160553.11664-2-ville.syrjala@linux.intel.com Reviewed-by: Jani Nikula (cherry picked from commit 51a9f6dfc00d35f927ecfaf6f0ae8ebaba39b3fe) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2bdfe4b2d6dc..16faea30114a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2788,16 +2788,6 @@ static void intel_disable_dp(struct intel_encoder *encoder, static void g4x_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) -{ - intel_disable_dp(encoder, old_crtc_state, old_conn_state); - - /* disable the port before the pipe on g4x */ - intel_dp_link_down(encoder, old_crtc_state); -} - -static void ilk_disable_dp(struct intel_encoder *encoder, - const struct intel_crtc_state *old_crtc_state, - const struct drm_connector_state *old_conn_state) { intel_disable_dp(encoder, old_crtc_state, old_conn_state); } @@ -2813,13 +2803,19 @@ static void vlv_disable_dp(struct intel_encoder *encoder, intel_disable_dp(encoder, old_crtc_state, old_conn_state); } -static void ilk_post_disable_dp(struct intel_encoder *encoder, +static void g4x_post_disable_dp(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); enum port port = encoder->port; + /* + * Bspec does not list a specific disable sequence for g4x DP. + * Follow the ilk+ sequence (disable pipe before the port) for + * g4x DP as it does not suffer from underruns like the normal + * g4x modeset sequence (disable pipe after the port). + */ intel_dp_link_down(encoder, old_crtc_state); /* Only ilk+ has port A */ @@ -6442,15 +6438,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_encoder->enable = vlv_enable_dp; intel_encoder->disable = vlv_disable_dp; intel_encoder->post_disable = vlv_post_disable_dp; - } else if (INTEL_GEN(dev_priv) >= 5) { - intel_encoder->pre_enable = g4x_pre_enable_dp; - intel_encoder->enable = g4x_enable_dp; - intel_encoder->disable = ilk_disable_dp; - intel_encoder->post_disable = ilk_post_disable_dp; } else { intel_encoder->pre_enable = g4x_pre_enable_dp; intel_encoder->enable = g4x_enable_dp; intel_encoder->disable = g4x_disable_dp; + intel_encoder->post_disable = g4x_post_disable_dp; } intel_dig_port->dp.output_reg = output_reg; From bc64e05408cafe3668e7460834935ea3f1764f31 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 15 Jun 2018 13:44:29 +0300 Subject: [PATCH 190/572] drm/i915: Fix context ban and hang accounting for client If client is smart or lucky enough to create a new context after each hang, our context banning mechanism will never catch up, and as a result of that it will be saved from client banning. This can result in a never ending streak of gpu hangs caused by bad or malicious client, preventing access from other legit gpu clients. Fix this by always incrementing per client ban score if it hangs in short successions regardless of context ban scoring. The exception are non bannable contexts. They remain detached from client ban scoring mechanism. v2: xchg timestamp, tidyup (Chris) v3: comment, bannable & banned together (Chris) Fixes: b083a0870c79 ("drm/i915: Add per client max context ban limit") Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180615104429.31477-1-mika.kuoppala@linux.intel.com (cherry picked from commit 14921f3cef85b0167a9145e5f29b9dfc3b2a84dc) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 21 ++++++--- drivers/gpu/drm/i915/i915_gem.c | 57 +++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 34c125e2d90c..7014a96546f4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -340,14 +340,21 @@ struct drm_i915_file_private { unsigned int bsd_engine; -/* Client can have a maximum of 3 contexts banned before - * it is denied of creating new contexts. As one context - * ban needs 4 consecutive hangs, and more if there is - * progress in between, this is a last resort stop gap measure - * to limit the badly behaving clients access to gpu. +/* + * Every context ban increments per client ban score. Also + * hangs in short succession increments ban score. If ban threshold + * is reached, client is considered banned and submitting more work + * will fail. This is a stop gap measure to limit the badly behaving + * clients access to gpu. Note that unbannable contexts never increment + * the client ban score. */ -#define I915_MAX_CLIENT_CONTEXT_BANS 3 - atomic_t context_bans; +#define I915_CLIENT_SCORE_HANG_FAST 1 +#define I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ) +#define I915_CLIENT_SCORE_CONTEXT_BAN 3 +#define I915_CLIENT_SCORE_BANNED 9 + /** ban_score: Accumulated score of all ctx bans and fast hangs. */ + atomic_t ban_score; + unsigned long hang_timestamp; }; /* Interface history: diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3704f4c0c2c9..d44ad7bc1e94 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2933,32 +2933,54 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj, return 0; } +static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv, + const struct i915_gem_context *ctx) +{ + unsigned int score; + unsigned long prev_hang; + + if (i915_gem_context_is_banned(ctx)) + score = I915_CLIENT_SCORE_CONTEXT_BAN; + else + score = 0; + + prev_hang = xchg(&file_priv->hang_timestamp, jiffies); + if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) + score += I915_CLIENT_SCORE_HANG_FAST; + + if (score) { + atomic_add(score, &file_priv->ban_score); + + DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", + ctx->name, score, + atomic_read(&file_priv->ban_score)); + } +} + static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) { - bool banned; + unsigned int score; + bool banned, bannable; atomic_inc(&ctx->guilty_count); - banned = false; - if (i915_gem_context_is_bannable(ctx)) { - unsigned int score; + bannable = i915_gem_context_is_bannable(ctx); + score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score); + banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; - score = atomic_add_return(CONTEXT_SCORE_GUILTY, - &ctx->ban_score); - banned = score >= CONTEXT_SCORE_BAN_THRESHOLD; + DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s\n", + ctx->name, atomic_read(&ctx->guilty_count), + score, yesno(banned && bannable)); - DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", - ctx->name, score, yesno(banned)); - } - if (!banned) + /* Cool contexts don't accumulate client ban score */ + if (!bannable) return; - i915_gem_context_set_banned(ctx); - if (!IS_ERR_OR_NULL(ctx->file_priv)) { - atomic_inc(&ctx->file_priv->context_bans); - DRM_DEBUG_DRIVER("client %s has had %d context banned\n", - ctx->name, atomic_read(&ctx->file_priv->context_bans)); - } + if (banned) + i915_gem_context_set_banned(ctx); + + if (!IS_ERR_OR_NULL(ctx->file_priv)) + i915_gem_client_mark_guilty(ctx->file_priv, ctx); } static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) @@ -5736,6 +5758,7 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file) INIT_LIST_HEAD(&file_priv->mm.request_list); file_priv->bsd_engine = -1; + file_priv->hang_timestamp = jiffies; ret = i915_gem_context_open(i915, file); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 33f8a4b3c981..060335d3d9e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -652,7 +652,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) static bool client_is_banned(struct drm_i915_file_private *file_priv) { - return atomic_read(&file_priv->context_bans) > I915_MAX_CLIENT_CONTEXT_BANS; + return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED; } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, From 7a3727f385dc64773db1c144f6b15c1e9d4735bb Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 15 Jun 2018 20:06:05 +0100 Subject: [PATCH 191/572] drm/i915: Enable provoking vertex fix on Gen9 systems. The SF and clipper units mishandle the provoking vertex in some cases, which can cause misrendering with shaders that use flat shaded inputs. There are chicken bits in 3D_CHICKEN3 (for SF) and FF_SLICE_CHICKEN (for the clipper) that work around the issue. These registers are unfortunately not part of the logical context (even the power context), and so we must reload them every time we start executing in a context. Bugzilla: https://bugs.freedesktop.org/103047 Signed-off-by: Kenneth Graunke Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180615190605.16238-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Cc: stable@vger.kernel.org (cherry picked from commit b77422f80337d363eed60c8c48db9cb6e33085c9) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_lrc.c | 12 +++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f11bb213ec07..7720569f2024 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2425,12 +2425,17 @@ enum i915_power_well_id { #define _3D_CHICKEN _MMIO(0x2084) #define _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB (1 << 10) #define _3D_CHICKEN2 _MMIO(0x208c) + +#define FF_SLICE_CHICKEN _MMIO(0x2088) +#define FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX (1 << 1) + /* Disables pipelining of read flushes past the SF-WIZ interface. * Required on all Ironlake steppings according to the B-Spec, but the * particular danger of not doing so is not specified. */ # define _3D_CHICKEN2_WM_READ_PIPELINED (1 << 14) #define _3D_CHICKEN3 _MMIO(0x2090) +#define _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX (1 << 12) #define _3D_CHICKEN_SF_DISABLE_OBJEND_CULL (1 << 10) #define _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE (1 << 5) #define _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL (1 << 5) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f3968580e5e2..7c4c8fb1dae4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1545,11 +1545,21 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ batch = gen8_emit_flush_coherentl3_wa(engine, batch); + *batch++ = MI_LOAD_REGISTER_IMM(3); + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ - *batch++ = MI_LOAD_REGISTER_IMM(1); *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2); *batch++ = _MASKED_BIT_DISABLE( GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE); + + /* BSpec: 11391 */ + *batch++ = i915_mmio_reg_offset(FF_SLICE_CHICKEN); + *batch++ = _MASKED_BIT_ENABLE(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX); + + /* BSpec: 11299 */ + *batch++ = i915_mmio_reg_offset(_3D_CHICKEN3); + *batch++ = _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX); + *batch++ = MI_NOOP; /* WaClearSlmSpaceAtContextSwitch:kbl */ From 2dbf8dffbf35fd8f611083b9d9fe74fdccf912a3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Jun 2018 15:58:45 -0400 Subject: [PATCH 192/572] pNFS: Always free the session slot on error in nfs4_layoutget_handle_exception Right now, we can call nfs_commit_inode() while holding the session slot, which could lead to NFSv4 deadlocks. Ensure we only keep the slot if the server returned a layout that we have to process. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ed45090e4df6..2c8c2696415e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8650,6 +8650,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); + nfs4_sequence_free_slot(&lgp->res.seq_res); + switch (nfs4err) { case 0: goto out; @@ -8714,7 +8716,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, goto out; } - nfs4_sequence_free_slot(&lgp->res.seq_res); err = nfs4_handle_exception(server, nfs4err, exception); if (!status) { if (exception->retry) @@ -8786,20 +8787,22 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout) if (IS_ERR(task)) return ERR_CAST(task); status = rpc_wait_for_completion_task(task); - if (status == 0) { + if (status != 0) + goto out; + + /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ + if (task->tk_status < 0 || lgp->res.layoutp->len == 0) { status = nfs4_layoutget_handle_exception(task, lgp, &exception); *timeout = exception.timeout; - } - + } else + lseg = pnfs_layout_process(lgp); +out: trace_nfs4_layoutget(lgp->args.ctx, &lgp->args.range, &lgp->res.range, &lgp->res.stateid, status); - /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */ - if (status == 0 && lgp->res.layoutp->len) - lseg = pnfs_layout_process(lgp); rpc_put_task(task); dprintk("<-- %s status=%d\n", __func__, status); if (status) From c8bf70735382401a161d9c818e8ea89500812d0c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Jun 2018 16:31:02 -0400 Subject: [PATCH 193/572] pNFS: Don't send layoutreturn if the layout is already invalid If the layout was invalidated due to a reboot, then don't try to send a layoutreturn for it. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 16 ++++++++++++++++ fs/nfs/pnfs.h | 5 +++++ 2 files changed, 21 insertions(+) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2c8c2696415e..6dd146885da9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3294,6 +3294,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) struct nfs4_closedata *calldata = data; struct nfs4_state *state = calldata->state; struct inode *inode = calldata->inode; + struct pnfs_layout_hdr *lo; bool is_rdonly, is_wronly, is_rdwr; int call_close = 0; @@ -3337,6 +3338,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) goto out_wait; } + lo = calldata->arg.lr_args ? calldata->arg.lr_args->layout : NULL; + if (lo && !pnfs_layout_is_valid(lo)) { + calldata->arg.lr_args = NULL; + calldata->res.lr_res = NULL; + } + if (calldata->arg.fmode == 0) task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; @@ -5972,12 +5979,19 @@ static void nfs4_delegreturn_release(void *calldata) static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) { struct nfs4_delegreturndata *d_data; + struct pnfs_layout_hdr *lo; d_data = (struct nfs4_delegreturndata *)data; if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) return; + lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL; + if (lo && !pnfs_layout_is_valid(lo)) { + d_data->args.lr_args = NULL; + d_data->res.lr_res = NULL; + } + nfs4_setup_sequence(d_data->res.server->nfs_client, &d_data->args.seq_args, &d_data->res.seq_res, @@ -8820,6 +8834,8 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) &lrp->args.seq_args, &lrp->res.seq_res, task); + if (!pnfs_layout_is_valid(lrp->args.layout)) + rpc_exit(task, 0); } static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index a8f5e6b16749..3fe81424337d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -801,6 +801,11 @@ static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp) { } +static inline bool pnfs_layout_is_valid(const struct pnfs_layout_hdr *lo) +{ + return false; +} + #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_V4_2) From 0dae72d581dfe795aedaf5523c1faeb18958b1a7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 18 Jun 2018 15:55:43 -0400 Subject: [PATCH 194/572] sunrpc: Prevent duplicate XID allocation Krzysztof Kozlowski reports that a heavy NFSv4 WRITE workload against a slow NFS server causes his Raspberry Pi clients to stall. Krzysztof bisected it to commit 37ac86c3a76c ("SUNRPC: Initialize rpc_rqst outside of xprt->reserve_lock") . I was able to reproduce similar behavior and it appears that rarely the RPC client layer is re-allocating an XID for an RPC that it has already partially sent. This results in the client ignoring the subsequent reply, which carries the original XID. For various reasons, checking !req->rq_xmit_bytes_sent in xprt_prepare_transmit is not a 100% reliable mechanism for determining when a fresh XID is needed. Trond's preference is to allocate the XID at the time each rpc_rqst slot is initialized. This patch should also address a gcc 4.1.2 complaint reported by Geert Uytterhoeven . Reported-by: Krzysztof Kozlowski Fixes: 37ac86c3a76c ("SUNRPC: Initialize rpc_rqst outside of ... ") Signed-off-by: Chuck Lever Tested-by: Krzysztof Kozlowski Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3c85af058227..3fabf9f6a0f9 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -987,8 +987,6 @@ bool xprt_prepare_transmit(struct rpc_task *task) task->tk_status = -EAGAIN; goto out_unlock; } - if (!bc_prealloc(req) && !req->rq_xmit_bytes_sent) - req->rq_xid = xprt_alloc_xid(xprt); ret = true; out_unlock: spin_unlock_bh(&xprt->transport_lock); @@ -1298,7 +1296,12 @@ void xprt_retry_reserve(struct rpc_task *task) static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) { - return (__force __be32)xprt->xid++; + __be32 xid; + + spin_lock(&xprt->reserve_lock); + xid = (__force __be32)xprt->xid++; + spin_unlock(&xprt->reserve_lock); + return xid; } static inline void xprt_init_xid(struct rpc_xprt *xprt) @@ -1316,6 +1319,7 @@ void xprt_request_init(struct rpc_task *task) req->rq_task = task; req->rq_xprt = xprt; req->rq_buffer = NULL; + req->rq_xid = xprt_alloc_xid(xprt); req->rq_connect_cookie = xprt->connect_cookie - 1; req->rq_bytes_sent = 0; req->rq_snd_buf.len = 0; From 93efbd39870474cc536b9caf4a6efeb03b0bc56f Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Sat, 16 Jun 2018 01:05:01 +0800 Subject: [PATCH 195/572] scsi: xen-scsifront: add error handling for xenbus_printf When xenbus_printf fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling xenbus_printf. Signed-off-by: Zhouyang Jia Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/scsi/xen-scsifront.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 36f59a1be7e9..61389bdc7926 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -654,10 +654,17 @@ static int scsifront_dev_reset_handler(struct scsi_cmnd *sc) static int scsifront_sdev_configure(struct scsi_device *sdev) { struct vscsifrnt_info *info = shost_priv(sdev->host); + int err; - if (info && current == info->curr) - xenbus_printf(XBT_NIL, info->dev->nodename, + if (info && current == info->curr) { + err = xenbus_printf(XBT_NIL, info->dev->nodename, info->dev_state_path, "%d", XenbusStateConnected); + if (err) { + xenbus_dev_error(info->dev, err, + "%s: writing dev_state_path", __func__); + return err; + } + } return 0; } @@ -665,10 +672,15 @@ static int scsifront_sdev_configure(struct scsi_device *sdev) static void scsifront_sdev_destroy(struct scsi_device *sdev) { struct vscsifrnt_info *info = shost_priv(sdev->host); + int err; - if (info && current == info->curr) - xenbus_printf(XBT_NIL, info->dev->nodename, + if (info && current == info->curr) { + err = xenbus_printf(XBT_NIL, info->dev->nodename, info->dev_state_path, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing dev_state_path", __func__); + } } static struct scsi_host_template scsifront_sht = { @@ -1003,9 +1015,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) if (scsi_add_device(info->host, chn, tgt, lun)) { dev_err(&dev->dev, "scsi_add_device\n"); - xenbus_printf(XBT_NIL, dev->nodename, + err = xenbus_printf(XBT_NIL, dev->nodename, info->dev_state_path, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(dev, err, + "%s: writing dev_state_path", __func__); } break; case VSCSIFRONT_OP_DEL_LUN: @@ -1019,10 +1034,14 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) } break; case VSCSIFRONT_OP_READD_LUN: - if (device_state == XenbusStateConnected) - xenbus_printf(XBT_NIL, dev->nodename, + if (device_state == XenbusStateConnected) { + err = xenbus_printf(XBT_NIL, dev->nodename, info->dev_state_path, "%d", XenbusStateConnected); + if (err) + xenbus_dev_error(dev, err, + "%s: writing dev_state_path", __func__); + } break; default: break; From 7c63ca24c878e0051c91904b72174029320ef4bd Mon Sep 17 00:00:00 2001 From: Zhouyang Jia Date: Sat, 16 Jun 2018 08:14:37 +0800 Subject: [PATCH 196/572] xen/scsiback: add error handling for xenbus_printf When xenbus_printf fails, the lack of error-handling code may cause unexpected results. This patch adds error-handling code after calling xenbus_printf. Signed-off-by: Zhouyang Jia Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/xen-scsiback.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 7bc88fd43cfc..e2f3e8b0fba9 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -1012,6 +1012,7 @@ static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state, { struct v2p_entry *entry; unsigned long flags; + int err; if (try) { spin_lock_irqsave(&info->v2p_lock, flags); @@ -1027,8 +1028,11 @@ static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state, scsiback_del_translation_entry(info, vir); } } else if (!try) { - xenbus_printf(XBT_NIL, info->dev->nodename, state, + err = xenbus_printf(XBT_NIL, info->dev->nodename, state, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing %s", __func__, state); } } @@ -1067,8 +1071,11 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op, snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", ent); val = xenbus_read(XBT_NIL, dev->nodename, str, NULL); if (IS_ERR(val)) { - xenbus_printf(XBT_NIL, dev->nodename, state, + err = xenbus_printf(XBT_NIL, dev->nodename, state, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing %s", __func__, state); return; } strlcpy(phy, val, VSCSI_NAMELEN); @@ -1079,8 +1086,11 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op, err = xenbus_scanf(XBT_NIL, dev->nodename, str, "%u:%u:%u:%u", &vir.hst, &vir.chn, &vir.tgt, &vir.lun); if (XENBUS_EXIST_ERR(err)) { - xenbus_printf(XBT_NIL, dev->nodename, state, + err = xenbus_printf(XBT_NIL, dev->nodename, state, "%d", XenbusStateClosed); + if (err) + xenbus_dev_error(info->dev, err, + "%s: writing %s", __func__, state); return; } From e08ecba17b72aeb01859601bc242a5bc48620109 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 19 Jun 2018 21:51:55 +1000 Subject: [PATCH 197/572] powerpc/64s: Fix build failures with CONFIG_NMI_IPI=n I broke the build when CONFIG_NMI_IPI=n with my recent commit to add arch_trigger_cpumask_backtrace(), eg: stacktrace.c:(.text+0x1b0): undefined reference to `.smp_send_safe_nmi_ipi' We should rework the CONFIG symbols here in future to avoid these double barrelled ifdefs but for now they fix the build. Fixes: 5cc05910f26e ("powerpc/64s: Wire up arch_trigger_cpumask_backtrace()") Reported-by: Christophe LEROY Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nmi.h | 2 +- arch/powerpc/kernel/stacktrace.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 0f571e0ebca1..bd9ba8defd72 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -8,7 +8,7 @@ extern void arch_touch_nmi_watchdog(void); static inline void arch_touch_nmi_watchdog(void) {} #endif -#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_STACKTRACE) +#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE) extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self); #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 07e97f289c52..e2c50b55138f 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -196,7 +196,7 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk, EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable); #endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ -#ifdef CONFIG_PPC_BOOK3S_64 +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) static void handle_backtrace_ipi(struct pt_regs *regs) { nmi_cpu_backtrace(regs); @@ -242,4 +242,4 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) { nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); } -#endif /* CONFIG_PPC64 */ +#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */ From dd65a941f6ba473a5cb9d013d57fa43b48450a04 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 12 Jun 2018 13:08:40 +0200 Subject: [PATCH 198/572] arm64: dma-mapping: clear buffers allocated with FORCE_CONTIGUOUS flag dma_alloc_*() buffers might be exposed to userspace via mmap() call, so they should be cleared on allocation. In case of IOMMU-based dma-mapping implementation such buffer clearing was missing in the code path for DMA_ATTR_FORCE_CONTIGUOUS flag handling, because dma_alloc_from_contiguous() doesn't honor __GFP_ZERO flag. This patch fixes this issue. For more information on clearing buffers allocated by dma_alloc_* functions, see commit 6829e274a623 ("arm64: dma-mapping: always clear allocated buffers"). Fixes: 44176bb38fa4 ("arm64: Add support for DMA_ATTR_FORCE_CONTIGUOUS to IOMMU") Signed-off-by: Marek Szyprowski Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 49e217ac7e1e..61e93f0b5482 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -583,13 +583,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, size >> PAGE_SHIFT); return NULL; } - if (!coherent) - __dma_flush_area(page_to_virt(page), iosize); - addr = dma_common_contiguous_remap(page, size, VM_USERMAP, prot, __builtin_return_address(0)); - if (!addr) { + if (addr) { + memset(addr, 0, size); + if (!coherent) + __dma_flush_area(page_to_virt(page), iosize); + } else { iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs); dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT); From b154886f7892499d0d3054026e19dfb9a731df61 Mon Sep 17 00:00:00 2001 From: Zhizhou Zhang Date: Tue, 12 Jun 2018 17:07:37 +0800 Subject: [PATCH 199/572] arm64: make secondary_start_kernel() notrace We can't call function trace hook before setup percpu offset. When entering secondary_start_kernel(), percpu offset has not been initialized. So this lead hotplug malfunction. Here is the flow to reproduce this bug: echo 0 > /sys/devices/system/cpu/cpu1/online echo function > /sys/kernel/debug/tracing/current_tracer echo 1 > /sys/kernel/debug/tracing/tracing_on echo 1 > /sys/devices/system/cpu/cpu1/online Acked-by: Mark Rutland Tested-by: Suzuki K Poulose Signed-off-by: Zhizhou Zhang Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f3e2e3aec0b0..2faa9863d2e5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -179,7 +179,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ -asmlinkage void secondary_start_kernel(void) +asmlinkage notrace void secondary_start_kernel(void) { u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; struct mm_struct *mm = &init_mm; From 42f86b44a4d356edba626171dfe0be061fc695af Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Jun 2018 19:07:24 -0400 Subject: [PATCH 200/572] pNFS/flexfiles: Don't tie up all the rpciod threads in resends We do not want to have rpciod threads perform recursive calls into the RPC layer since that can deadlock. In particular, having to wait for a layoutget can be nasty... We want rather to defer scheduling those retries until we're in the rpc_release() callback, since that is called from the nfsiod workqueue. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 11 ++++++++--- include/linux/nfs_xdr.h | 2 ++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 3ae038d9c292..336b4d560e2c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1243,17 +1243,18 @@ static int ff_layout_read_done_cb(struct rpc_task *task, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); + clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { case -NFS4ERR_RESET_TO_PNFS: if (ff_layout_choose_best_ds_for_read(hdr->lseg, hdr->pgio_mirror_idx + 1, &hdr->pgio_mirror_idx)) goto out_eagain; - ff_layout_read_record_layoutstats_done(task, hdr); - pnfs_read_resend_pnfs(hdr); + set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: - ff_layout_reset_read(hdr); + set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); return task->tk_status; case -EAGAIN: goto out_eagain; @@ -1403,6 +1404,10 @@ static void ff_layout_read_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_read_record_layoutstats_done(&hdr->task, hdr); + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + pnfs_read_resend_pnfs(hdr); + else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + ff_layout_reset_read(hdr); pnfs_generic_rw_release(data); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9dee3c23895d..712eed156d09 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1438,6 +1438,8 @@ enum { NFS_IOHDR_EOF, NFS_IOHDR_REDO, NFS_IOHDR_STAT, + NFS_IOHDR_RESEND_PNFS, + NFS_IOHDR_RESEND_MDS, }; struct nfs_io_completion; From 7b0df92ac12148098391bf53f3494af17812f264 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 Jun 2018 19:23:50 -0400 Subject: [PATCH 201/572] pNFS/flexfiles: Process writeback resends from nfsiod context as well Although the writeback resends are more robust than the reads, since they are not immediately rescheduled by the same thread, we are better off processing them in the same place as the reads. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 336b4d560e2c..1386b774ec95 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1428,12 +1428,14 @@ static int ff_layout_write_done_cb(struct rpc_task *task, hdr->ds_clp, hdr->lseg, hdr->pgio_mirror_idx); + clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); + clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); switch (err) { case -NFS4ERR_RESET_TO_PNFS: - ff_layout_reset_write(hdr, true); + set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: - ff_layout_reset_write(hdr, false); + set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags); return task->tk_status; case -EAGAIN: return -EAGAIN; @@ -1580,6 +1582,10 @@ static void ff_layout_write_release(void *data) struct nfs_pgio_header *hdr = data; ff_layout_write_record_layoutstats_done(&hdr->task, hdr); + if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags)) + ff_layout_reset_write(hdr, true); + else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags)) + ff_layout_reset_write(hdr, false); pnfs_generic_rw_release(data); } From 0cc61e64e21cfc24fa0d938fd148aba4a595163b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 19 Jun 2018 18:40:14 +0200 Subject: [PATCH 202/572] block: fix timeout changes for legacy request drivers blk_mq_complete_request can only be called for blk-mq drivers, but when removing the BLK_EH_HANDLED return value, two legacy request timeout methods incorrectly got switched to call blk_mq_complete_request. Call __blk_complete_request instead to reinstance the previous behavior. For that __blk_complete_request needs to be exported. Fixes: 1fc2b62e ("scsi_transport_fc: complete requests from ->timeout") Fixes: 0df0bb08 ("null_blk: complete requests from ->timeout") Reported-by: Jianchao Wang Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-softirq.c | 1 + drivers/block/null_blk.c | 2 +- drivers/scsi/scsi_transport_fc.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 01e2b353a2b9..15c1f5e12eb8 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -144,6 +144,7 @@ do_local: local_irq_restore(flags); } +EXPORT_SYMBOL(__blk_complete_request); /** * blk_complete_request - end I/O on a request diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 2bdadd7f1454..3d8bdbe9bd35 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1365,7 +1365,7 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio) static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq) { pr_info("null: rq %p timed out\n", rq); - blk_mq_complete_request(rq); + __blk_complete_request(rq); return BLK_EH_DONE; } diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 1da3d71e9f61..13948102ca29 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3592,7 +3592,7 @@ fc_bsg_job_timeout(struct request *req) /* the blk_end_sync_io() doesn't check the error */ if (inflight) - blk_mq_complete_request(req); + __blk_complete_request(req); return BLK_EH_DONE; } From 91c822c33066b7c4f8cc47d7532f47e3bb89979b Mon Sep 17 00:00:00 2001 From: Rajan Vaja Date: Mon, 18 Jun 2018 13:01:02 +0530 Subject: [PATCH 203/572] drm/amd/pp: Fix uninitialized variable Initialize variable to 0 before performing logical OR operation. Reviewed-by: Rex Zhu Signed-off-by: Rajan Vaja Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c index dbe4b1f66784..22364875a943 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c @@ -1090,7 +1090,7 @@ static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr) static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr) { struct amdgpu_device *adev = hwmgr->adev; - int result; + int result = 0; uint32_t num_se = 0; uint32_t count, data; From 6fa39bc1e01dab8b4f54b23e95a181a2ed5a2d38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 8 Jun 2018 12:58:15 +0200 Subject: [PATCH 204/572] drm/amdgpu: Use kvmalloc_array for allocating VRAM manager nodes array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It can be quite big, and there's no need for it to be physically contiguous. This is less likely to fail under memory pressure (has actually happened while running piglit). Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 9aca653bec07..9c47e860e5e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -135,7 +135,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); } - nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL); + nodes = kvmalloc_array(num_nodes, sizeof(*nodes), + GFP_KERNEL | __GFP_ZERO); if (!nodes) return -ENOMEM; @@ -190,7 +191,7 @@ error: drm_mm_remove_node(&nodes[i]); spin_unlock(&mgr->lock); - kfree(nodes); + kvfree(nodes); return r == -ENOSPC ? 0 : r; } @@ -229,7 +230,7 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man, atomic64_sub(usage, &mgr->usage); atomic64_sub(vis_usage, &mgr->vis_usage); - kfree(mem->mm_node); + kvfree(mem->mm_node); mem->mm_node = NULL; } From d9fda248046ac035f18a6e663f2f9245b4bf9470 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Tue, 8 May 2018 11:33:42 -0400 Subject: [PATCH 205/572] drm/amdgpu: Don't default to DC support for Kaveri and older We've had a number of users report failures to detect and light up display with DC with LVDS and VGA. These connector types are not currently supported with DC. I'd like to add support but unfortunately don't have a system with LVDS or VGA available. In order not to cause regressions we should probably fallback to the non-DC driver for ASICs that support VGA and LVDS. These ASICs are: * Bonaire * Kabini * Kaveri * Mullins ASIC support can always be force enabled with amdgpu.dc=1 v2: Keep Hawaii on DC v3: Added Mullins to the list Cc: stable@vger.kernel.org Signed-off-by: Harry Wentland Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 3317d1536f4f..6e5284e6c028 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2158,10 +2158,18 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) switch (asic_type) { #if defined(CONFIG_DRM_AMD_DC) case CHIP_BONAIRE: - case CHIP_HAWAII: case CHIP_KAVERI: case CHIP_KABINI: case CHIP_MULLINS: + /* + * We have systems in the wild with these ASICs that require + * LVDS and VGA support which is not supported with DC. + * + * Fallback to the non-DC driver here by default so as not to + * cause regressions. + */ + return amdgpu_dc > 0; + case CHIP_HAWAII: case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_POLARIS10: From 9c24c10a2c1e1bb478b6bb70612d9e885aee044f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 19 Jun 2018 10:26:40 -0700 Subject: [PATCH 206/572] Revert "block: Add warning for bi_next not NULL in bio_endio()" Commit 0ba99ca4838b ("block: Add warning for bi_next not NULL in bio_endio()") breaks the dm driver. end_clone_bio() detects whether or not a bio is the last bio associated with a request by checking the .bi_next field. Commit 0ba99ca4838b clears that field before end_clone_bio() has had a chance to inspect that field. Hence revert commit 0ba99ca4838b. This patch avoids that KASAN reports the following complaint when running the srp-test software (srp-test/run_tests -c -d -r 10 -t 02-mq): ================================================================== BUG: KASAN: use-after-free in bio_advance+0x11b/0x1d0 Read of size 4 at addr ffff8801300e06d0 by task ksoftirqd/0/9 CPU: 0 PID: 9 Comm: ksoftirqd/0 Not tainted 4.18.0-rc1-dbg+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Call Trace: dump_stack+0xa4/0xf5 print_address_description+0x6f/0x270 kasan_report+0x241/0x360 __asan_load4+0x78/0x80 bio_advance+0x11b/0x1d0 blk_update_request+0xa7/0x5b0 scsi_end_request+0x56/0x320 [scsi_mod] scsi_io_completion+0x7d6/0xb20 [scsi_mod] scsi_finish_command+0x1c0/0x280 [scsi_mod] scsi_softirq_done+0x19a/0x230 [scsi_mod] blk_mq_complete_request+0x160/0x240 scsi_mq_done+0x50/0x1a0 [scsi_mod] srp_recv_done+0x515/0x1330 [ib_srp] __ib_process_cq+0xa0/0xf0 [ib_core] ib_poll_handler+0x38/0xa0 [ib_core] irq_poll_softirq+0xe8/0x1f0 __do_softirq+0x128/0x60d run_ksoftirqd+0x3f/0x60 smpboot_thread_fn+0x352/0x460 kthread+0x1c1/0x1e0 ret_from_fork+0x24/0x30 Allocated by task 1918: save_stack+0x43/0xd0 kasan_kmalloc+0xad/0xe0 kasan_slab_alloc+0x11/0x20 kmem_cache_alloc+0xfe/0x350 mempool_alloc_slab+0x15/0x20 mempool_alloc+0xfb/0x270 bio_alloc_bioset+0x244/0x350 submit_bh_wbc+0x9c/0x2f0 __block_write_full_page+0x299/0x5a0 block_write_full_page+0x16b/0x180 blkdev_writepage+0x18/0x20 __writepage+0x42/0x80 write_cache_pages+0x376/0x8a0 generic_writepages+0xbe/0x110 blkdev_writepages+0xe/0x10 do_writepages+0x9b/0x180 __filemap_fdatawrite_range+0x178/0x1c0 file_write_and_wait_range+0x59/0xc0 blkdev_fsync+0x46/0x80 vfs_fsync_range+0x66/0x100 do_fsync+0x3d/0x70 __x64_sys_fsync+0x21/0x30 do_syscall_64+0x77/0x230 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 9: save_stack+0x43/0xd0 __kasan_slab_free+0x137/0x190 kasan_slab_free+0xe/0x10 kmem_cache_free+0xd3/0x380 mempool_free_slab+0x17/0x20 mempool_free+0x63/0x160 bio_free+0x81/0xa0 bio_put+0x59/0x60 end_bio_bh_io_sync+0x5d/0x70 bio_endio+0x1a7/0x360 blk_update_request+0xd0/0x5b0 end_clone_bio+0xa3/0xd0 [dm_mod] bio_endio+0x1a7/0x360 blk_update_request+0xd0/0x5b0 scsi_end_request+0x56/0x320 [scsi_mod] scsi_io_completion+0x7d6/0xb20 [scsi_mod] scsi_finish_command+0x1c0/0x280 [scsi_mod] scsi_softirq_done+0x19a/0x230 [scsi_mod] blk_mq_complete_request+0x160/0x240 scsi_mq_done+0x50/0x1a0 [scsi_mod] srp_recv_done+0x515/0x1330 [ib_srp] __ib_process_cq+0xa0/0xf0 [ib_core] ib_poll_handler+0x38/0xa0 [ib_core] irq_poll_softirq+0xe8/0x1f0 __do_softirq+0x128/0x60d The buggy address belongs to the object at ffff8801300e0640 which belongs to the cache bio-0 of size 200 The buggy address is located 144 bytes inside of 200-byte region [ffff8801300e0640, ffff8801300e0708) The buggy address belongs to the page: page:ffffea0004c03800 count:1 mapcount:0 mapping:ffff88015a563a00 index:0x0 compound_mapcount: 0 flags: 0x8000000000008100(slab|head) raw: 8000000000008100 dead000000000100 dead000000000200 ffff88015a563a00 raw: 0000000000000000 0000000000330033 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801300e0580: fb fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc ffff8801300e0600: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb >ffff8801300e0680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801300e0700: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8801300e0780: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ================================================================== Cc: Kent Overstreet Fixes: 0ba99ca4838b ("block: Add warning for bi_next not NULL in bio_endio()") Acked-by: Mike Snitzer Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/bio.c | 3 --- block/blk-core.c | 8 +------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/block/bio.c b/block/bio.c index db9a40e9a136..f7e3d88bd0b6 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1807,9 +1807,6 @@ again: if (!bio_integrity_endio(bio)) return; - if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL")) - bio->bi_next = NULL; - /* * Need to have a real endio function for chained bios, otherwise * various corner cases will break (like stacking block devices that diff --git a/block/blk-core.c b/block/blk-core.c index cf0ee764b908..afd2596ea3d3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -273,10 +273,6 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ - /* - * XXX this code looks suspicious - it's not consistent with advancing - * req->bio in caller - */ if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ)) bio_endio(bio); } @@ -3081,10 +3077,8 @@ bool blk_update_request(struct request *req, blk_status_t error, struct bio *bio = req->bio; unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); - if (bio_bytes == bio->bi_iter.bi_size) { + if (bio_bytes == bio->bi_iter.bi_size) req->bio = bio->bi_next; - bio->bi_next = NULL; - } /* Completion has already been traced */ bio_clear_flag(bio, BIO_TRACE_COMPLETION); From 5c53d19b76dccbaf340b11acb837d40c0789049d Mon Sep 17 00:00:00 2001 From: James Zhu Date: Mon, 18 Jun 2018 13:46:16 -0400 Subject: [PATCH 207/572] drm/amdgpu:All UVD instances share one idle_work handle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All UVD instanses have only one dpm control, so it is better to share one idle_work handle. Signed-off-by: James Zhu Reviewed-by: Alex Deucher Reviewed-by: Christian König Tested-by: Stefan Agner Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index bcf68f80bbf0..3ff08e326838 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -130,7 +130,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) unsigned version_major, version_minor, family_id; int i, j, r; - INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler); + INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler); switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK @@ -314,12 +314,12 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) void *ptr; int i, j; + cancel_delayed_work_sync(&adev->uvd.idle_work); + for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { if (adev->uvd.inst[j].vcpu_bo == NULL) continue; - cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work); - /* only valid for physical mode */ if (adev->asic_type < CHIP_POLARIS10) { for (i = 0; i < adev->uvd.max_handles; ++i) @@ -1145,7 +1145,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, static void amdgpu_uvd_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = - container_of(work, struct amdgpu_device, uvd.inst->idle_work.work); + container_of(work, struct amdgpu_device, uvd.idle_work.work); unsigned fences = 0, i, j; for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { @@ -1167,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work) AMD_CG_STATE_GATE); } } else { - schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT); + schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT); } } @@ -1179,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) if (amdgpu_sriov_vf(adev)) return; - set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work); + set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work); if (set_clocks) { if (adev->pm.dpm_enabled) { amdgpu_dpm_enable_uvd(adev, true); @@ -1196,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) { if (!amdgpu_sriov_vf(ring->adev)) - schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT); + schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index b1579fba134c..8b23a1b00c76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -44,7 +44,6 @@ struct amdgpu_uvd_inst { void *saved_bo; atomic_t handles[AMDGPU_MAX_UVD_HANDLES]; struct drm_file *filp[AMDGPU_MAX_UVD_HANDLES]; - struct delayed_work idle_work; struct amdgpu_ring ring; struct amdgpu_ring ring_enc[AMDGPU_MAX_UVD_ENC_RINGS]; struct amdgpu_irq_src irq; @@ -62,6 +61,7 @@ struct amdgpu_uvd { bool address_64_bit; bool use_ctx_buf; struct amdgpu_uvd_inst inst[AMDGPU_MAX_UVD_INSTANCES]; + struct delayed_work idle_work; }; int amdgpu_uvd_sw_init(struct amdgpu_device *adev); From 34d6d59986abb1d2cb5415a49b6c50f51ba1d2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 15 Jun 2018 11:06:56 +0200 Subject: [PATCH 208/572] drm/amdgpu: Update pin_size values before unpinning BO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least in theory, ttm_bo_validate may move the BO, in which case the pin_size accounting would be inconsistent with when the BO was pinned. Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5e4e1bd90383..026140f08ee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -790,15 +790,6 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) bo->pin_count--; if (bo->pin_count) return 0; - for (i = 0; i < bo->placement.num_placement; i++) { - bo->placements[i].lpfn = 0; - bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; - } - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (unlikely(r)) { - dev_err(adev->dev, "%p validate failed for unpin\n", bo); - goto error; - } if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { adev->vram_pin_size -= amdgpu_bo_size(bo); @@ -808,7 +799,14 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) adev->gart_pin_size -= amdgpu_bo_size(bo); } -error: + for (i = 0; i < bo->placement.num_placement; i++) { + bo->placements[i].lpfn = 0; + bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; + } + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (unlikely(r)) + dev_err(adev->dev, "%p validate failed for unpin\n", bo); + return r; } From 5e9244ff585239630f15f8ad8e676bc91a94ca9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 12 Jun 2018 12:07:33 +0200 Subject: [PATCH 209/572] drm/amdgpu: Refactor amdgpu_vram_mgr_bo_invisible_size helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preparation for the following fix, no functional change intended. Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 026140f08ee9..3526efa8960e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -762,8 +762,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); if (domain == AMDGPU_GEM_DOMAIN_VRAM) { adev->vram_pin_size += amdgpu_bo_size(bo); - if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) - adev->invisible_pin_size += amdgpu_bo_size(bo); + adev->invisible_pin_size += amdgpu_vram_mgr_bo_invisible_size(bo); } else if (domain == AMDGPU_GEM_DOMAIN_GTT) { adev->gart_pin_size += amdgpu_bo_size(bo); } @@ -793,8 +792,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) if (bo->tbo.mem.mem_type == TTM_PL_VRAM) { adev->vram_pin_size -= amdgpu_bo_size(bo); - if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) - adev->invisible_pin_size -= amdgpu_bo_size(bo); + adev->invisible_pin_size -= amdgpu_vram_mgr_bo_invisible_size(bo); } else if (bo->tbo.mem.mem_type == TTM_PL_TT) { adev->gart_pin_size -= amdgpu_bo_size(bo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index e969c879d87e..e5da4654b630 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -73,6 +73,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem); uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man); int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man); +u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo); uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man); uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 9c47e860e5e6..ae0049c6c52c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -96,6 +96,22 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, adev->gmc.visible_vram_size : end) - start; } +/** + * amdgpu_vram_mgr_bo_invisible_size - CPU invisible BO size + * + * @bo: &amdgpu_bo buffer object (must be in VRAM) + * + * Returns: + * How much of the given &amdgpu_bo buffer object lies in CPU invisible VRAM. + */ +u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo) +{ + if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) + return amdgpu_bo_size(bo); + + return 0; +} + /** * amdgpu_vram_mgr_new - allocate new ranges * From 7303b39e46b2f523334591f05fd9566cf929eb26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 14 Jun 2018 13:02:07 +0200 Subject: [PATCH 210/572] drm/amdgpu: Make amdgpu_vram_mgr_bo_invisible_size always accurate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even BOs with AMDGPU_GEM_CREATE_NO_CPU_ACCESS may end up at least partially in CPU visible VRAM, in particular when all VRAM is visible. v2: * Don't take VRAM mgr spinlock, not needed (Christian König) * Make loop logic simpler and clearer. Cc: stable@vger.kernel.org Signed-off-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index ae0049c6c52c..b6333f92ba45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -106,10 +106,26 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, */ u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo) { - if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_mem_reg *mem = &bo->tbo.mem; + struct drm_mm_node *nodes = mem->mm_node; + unsigned pages = mem->num_pages; + u64 usage = 0; + + if (adev->gmc.visible_vram_size == adev->gmc.real_vram_size) + return 0; + + if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return amdgpu_bo_size(bo); - return 0; + while (nodes && pages) { + usage += nodes->size << PAGE_SHIFT; + usage -= amdgpu_vram_mgr_vis_size(adev, nodes); + pages -= nodes->size; + ++nodes; + } + + return usage; } /** From 6fb8656646f996d1eef42e6d56203c4915cb9e08 Mon Sep 17 00:00:00 2001 From: Matthias Schiffer Date: Sat, 24 Mar 2018 17:57:49 +0100 Subject: [PATCH 211/572] mips: ftrace: fix static function graph tracing ftrace_graph_caller was never run after calling ftrace_trace_function, breaking the function graph tracer. Fix this, bringing it in line with the x86 implementation. While we're at it, also streamline the control flow of _mcount a bit to reduce the number of branches. This issue was reported before: https://www.linux-mips.org/archives/linux-mips/2014-11/msg00295.html Signed-off-by: Matthias Schiffer Tested-by: Matt Redfearn Patchwork: https://patchwork.linux-mips.org/patch/18929/ Signed-off-by: Paul Burton Cc: stable@vger.kernel.org # v3.17+ --- arch/mips/kernel/mcount.S | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/mips/kernel/mcount.S b/arch/mips/kernel/mcount.S index f2ee7e1e3342..cff52b283e03 100644 --- a/arch/mips/kernel/mcount.S +++ b/arch/mips/kernel/mcount.S @@ -119,10 +119,20 @@ NESTED(_mcount, PT_SIZE, ra) EXPORT_SYMBOL(_mcount) PTR_LA t1, ftrace_stub PTR_L t2, ftrace_trace_function /* Prepare t2 for (1) */ - bne t1, t2, static_trace + beq t1, t2, fgraph_trace nop + MCOUNT_SAVE_REGS + + move a0, ra /* arg1: self return address */ + jalr t2 /* (1) call *ftrace_trace_function */ + move a1, AT /* arg2: parent's return address */ + + MCOUNT_RESTORE_REGS + +fgraph_trace: #ifdef CONFIG_FUNCTION_GRAPH_TRACER + PTR_LA t1, ftrace_stub PTR_L t3, ftrace_graph_return bne t1, t3, ftrace_graph_caller nop @@ -131,24 +141,11 @@ EXPORT_SYMBOL(_mcount) bne t1, t3, ftrace_graph_caller nop #endif - b ftrace_stub -#ifdef CONFIG_32BIT - addiu sp, sp, 8 -#else - nop -#endif -static_trace: - MCOUNT_SAVE_REGS - - move a0, ra /* arg1: self return address */ - jalr t2 /* (1) call *ftrace_trace_function */ - move a1, AT /* arg2: parent's return address */ - - MCOUNT_RESTORE_REGS #ifdef CONFIG_32BIT addiu sp, sp, 8 #endif + .globl ftrace_stub ftrace_stub: RETURN_BACK From 4f9de4df901fb84709fe3a864dfa4eaf35700f68 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 18 Jun 2018 21:58:00 -0700 Subject: [PATCH 212/572] qed: Fix possible memory leak in Rx error path handling. Memory for packet buffers need to be freed in the error paths as there is no consumer (e.g., upper layer) for such packets and that memory will never get freed. The issue was uncovered when port was attacked with flood of isatap packets, these are multicast packets hence were directed at all the PFs. For foce PF, this meant they were routed to the ll2 module which in turn drops such packets. Fixes: 0a7fb11c ("qed: Add Light L2 support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index c97ebd681c47..012973d75ad0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -201,8 +201,9 @@ void qed_ll2b_complete_rx_packet(void *cxt, struct qed_ll2_comp_rx_data *data) skb = build_skb(buffer->data, 0); if (!skb) { - rc = -ENOMEM; - goto out_post; + DP_INFO(cdev, "Failed to build SKB\n"); + kfree(buffer->data); + goto out_post1; } data->u.placement_offset += NET_SKB_PAD; @@ -224,8 +225,14 @@ void qed_ll2b_complete_rx_packet(void *cxt, struct qed_ll2_comp_rx_data *data) cdev->ll2->cbs->rx_cb(cdev->ll2->cb_cookie, skb, data->opaque_data_0, data->opaque_data_1); + } else { + DP_VERBOSE(p_hwfn, (NETIF_MSG_RX_STATUS | NETIF_MSG_PKTDATA | + QED_MSG_LL2 | QED_MSG_STORAGE), + "Dropping the packet\n"); + kfree(buffer->data); } +out_post1: /* Update Buffer information and update FW producer */ buffer->data = new_data; buffer->phys_addr = new_phys_addr; From 3935a70968820c3994db4de7e6e1c7e814bff875 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 18 Jun 2018 21:58:01 -0700 Subject: [PATCH 213/572] qed: Add sanity check for SIMD fastpath handler. Avoid calling a SIMD fastpath handler if it is NULL. The check is needed to handle an unlikely scenario where unsolicited interrupt is destined to a PF in INTa mode. Fixes: fe56b9e6a ("qed: Add module with basic common support") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index b04d57ca5176..5c10fd7210c3 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -567,8 +567,16 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance) /* Fastpath interrupts */ for (j = 0; j < 64; j++) { if ((0x2ULL << j) & status) { - hwfn->simd_proto_handler[j].func( - hwfn->simd_proto_handler[j].token); + struct qed_simd_fp_handler *p_handler = + &hwfn->simd_proto_handler[j]; + + if (p_handler->func) + p_handler->func(p_handler->token); + else + DP_NOTICE(hwfn, + "Not calling fastpath handler as it is NULL [handler #%d, status 0x%llx]\n", + j, status); + status &= ~(0x2ULL << j); rc = IRQ_HANDLED; } From ff54d5cd9ec15546abc870452dd0b66eef4b4606 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Mon, 18 Jun 2018 21:58:02 -0700 Subject: [PATCH 214/572] qed: Do not advertise DCBX_LLD_MANAGED capability. Do not advertise DCBX_LLD_MANAGED capability i.e., do not allow external agent to manage the dcbx/lldp negotiation. MFW acts as lldp agent for qed* devices, and no other lldp agent is allowed to coexist with mfw. Also updated a debug print, to not to display the redundant info. Fixes: a1d8d8a51 ("qed: Add dcbnl support.") Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Ariel Elior Signed-off-by: Michal Kalderon Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 8f31406ec894..f0b01385d5cb 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -255,9 +255,8 @@ qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn, *type = DCBX_PROTOCOL_ROCE_V2; } else { *type = DCBX_MAX_PROTOCOL_TYPE; - DP_ERR(p_hwfn, - "No action required, App TLV id = 0x%x app_prio_bitmap = 0x%x\n", - id, app_prio_bitmap); + DP_ERR(p_hwfn, "No action required, App TLV entry = 0x%x\n", + app_prio_bitmap); return false; } @@ -1479,8 +1478,8 @@ static u8 qed_dcbnl_getcap(struct qed_dev *cdev, int capid, u8 *cap) *cap = 0x80; break; case DCB_CAP_ATTR_DCBX: - *cap = (DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_CEE | - DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_STATIC); + *cap = (DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_VER_IEEE | + DCB_CAP_DCBX_STATIC); break; default: *cap = false; @@ -1548,8 +1547,6 @@ static u8 qed_dcbnl_getdcbx(struct qed_dev *cdev) if (!dcbx_info) return 0; - if (dcbx_info->operational.enabled) - mode |= DCB_CAP_DCBX_LLD_MANAGED; if (dcbx_info->operational.ieee) mode |= DCB_CAP_DCBX_VER_IEEE; if (dcbx_info->operational.cee) From c51818d5b793302b0923ade9856574ac28b9333b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 19 Jun 2018 14:33:54 -0700 Subject: [PATCH 215/572] bpf, xdp, i40e: fix i40e_build_skb skb reserve and truesize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using skb_reserve(skb, I40E_SKB_PAD + (xdp->data - xdp->data_hard_start)) is clearly wrong since I40E_SKB_PAD already points to the offset where the original xdp->data was sitting since xdp->data_hard_start is defined as xdp->data - i40e_rx_offset(rx_ring) where latter offsets to I40E_SKB_PAD when build skb is used. However, also before cc5b114dcf98 ("bpf, i40e: add meta data support") this seems broken since bpf_xdp_adjust_head() helper could have been used to alter headroom and enlarge / shrink the frame and with that the assumption that the xdp->data remains unchanged does not hold and would push a bogus packet to upper stack. ixgbe got this right in 924708081629 ("ixgbe: add XDP support for pass and drop actions"). In any case, fix it by removing the I40E_SKB_PAD from both skb_reserve() and truesize calculation. Fixes: cc5b114dcf98 ("bpf, i40e: add meta data support") Fixes: 0c8493d90b6b ("i40e: add XDP support for pass and drop actions") Reported-by: Keith Busch Reported-by: Toshiaki Makita Signed-off-by: Daniel Borkmann Cc: Björn Töpel Cc: John Fastabend Tested-by: Keith Busch Acked-by: John Fastabend Acked-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 8ffb7454e67c..ed6dbcfd4e96 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2103,9 +2103,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(I40E_SKB_PAD + - (xdp->data_end - - xdp->data_hard_start)); + SKB_DATA_ALIGN(xdp->data_end - + xdp->data_hard_start); #endif struct sk_buff *skb; @@ -2124,7 +2123,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, return NULL; /* update pointers within the skb to store the data */ - skb_reserve(skb, I40E_SKB_PAD + (xdp->data - xdp->data_hard_start)); + skb_reserve(skb, xdp->data - xdp->data_hard_start); __skb_put(skb, xdp->data_end - xdp->data); if (metasize) skb_metadata_set(skb, metasize); From 87975a0117815b9b63527e8b8d9a9dffa6913132 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 19 Jun 2018 15:08:31 +0930 Subject: [PATCH 216/572] net/ncsi: Silence debug messages In normal operation we see this series of messages as the host drives the network device: ftgmac100 1e660000.ethernet eth0: NCSI: LSC AEN - channel 0 state down ftgmac100 1e660000.ethernet eth0: NCSI: suspending channel 0 ftgmac100 1e660000.ethernet eth0: NCSI: configuring channel 0 ftgmac100 1e660000.ethernet eth0: NCSI: channel 0 link down after config ftgmac100 1e660000.ethernet eth0: NCSI interface down ftgmac100 1e660000.ethernet eth0: NCSI: LSC AEN - channel 0 state up ftgmac100 1e660000.ethernet eth0: NCSI: configuring channel 0 ftgmac100 1e660000.ethernet eth0: NCSI interface up ftgmac100 1e660000.ethernet eth0: NCSI: LSC AEN - channel 0 state down ftgmac100 1e660000.ethernet eth0: NCSI: suspending channel 0 ftgmac100 1e660000.ethernet eth0: NCSI: configuring channel 0 ftgmac100 1e660000.ethernet eth0: NCSI: channel 0 link down after config ftgmac100 1e660000.ethernet eth0: NCSI interface down ftgmac100 1e660000.ethernet eth0: NCSI: LSC AEN - channel 0 state up ftgmac100 1e660000.ethernet eth0: NCSI: configuring channel 0 ftgmac100 1e660000.ethernet eth0: NCSI interface up This makes all of these messages netdev_dbg. They are still useful to debug eg. misbehaving network device firmware, but we do not need them filling up the kernel logs in normal operation. Acked-by: Samuel Mendoza-Jonas Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- drivers/net/ethernet/faraday/ftgmac100.c | 4 ++-- net/ncsi/ncsi-aen.c | 4 ++-- net/ncsi/ncsi-manage.c | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 78db8e62a83f..ed6c76d20b45 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1735,8 +1735,8 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd) if (unlikely(nd->state != ncsi_dev_state_functional)) return; - netdev_info(nd->dev, "NCSI interface %s\n", - nd->link_up ? "up" : "down"); + netdev_dbg(nd->dev, "NCSI interface %s\n", + nd->link_up ? "up" : "down"); } static void ftgmac100_setup_clk(struct ftgmac100 *priv) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index e7b05de1e6d1..f899ed61bb57 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -73,8 +73,8 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, ncm->data[2] = data; ncm->data[4] = ntohl(lsc->oem_status); - netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n", - nc->id, data & 0x1 ? "up" : "down"); + netdev_dbg(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n", + nc->id, data & 0x1 ? "up" : "down"); chained = !list_empty(&nc->link); state = nc->state; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 5561e221b71f..616441c2b54f 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -816,9 +816,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) } else { hot_nc = NULL; nc->state = NCSI_CHANNEL_INACTIVE; - netdev_warn(ndp->ndev.dev, - "NCSI: channel %u link down after config\n", - nc->id); + netdev_dbg(ndp->ndev.dev, + "NCSI: channel %u link down after config\n", + nc->id); } spin_unlock_irqrestore(&nc->lock, flags); @@ -1199,14 +1199,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) switch (old_state) { case NCSI_CHANNEL_INACTIVE: ndp->ndev.state = ncsi_dev_state_config; - netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n", - nc->id); + netdev_dbg(ndp->ndev.dev, "NCSI: configuring channel %u\n", + nc->id); ncsi_configure_channel(ndp); break; case NCSI_CHANNEL_ACTIVE: ndp->ndev.state = ncsi_dev_state_suspend; - netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n", - nc->id); + netdev_dbg(ndp->ndev.dev, "NCSI: suspending channel %u\n", + nc->id); ncsi_suspend_channel(ndp); break; default: From 5d3b146736d5f47d1c806e3043ebc8b627c6277e Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 19 Jun 2018 15:08:32 +0930 Subject: [PATCH 217/572] net/ncsi: Drop no more channels message This does not provide useful information. As the ncsi maintainer said: > either we get a channel or broadcom has gone out to lunch Acked-by: Samuel Mendoza-Jonas Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 616441c2b54f..716493a61ba6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1226,8 +1226,6 @@ out: return ncsi_choose_active_channel(ndp); } - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "NCSI: No more channels to process\n"); ncsi_report_link(ndp, false); return -ENODEV; } From 6e42a3f5cdb60e2641472a8d668cce13736e0443 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 19 Jun 2018 15:08:33 +0930 Subject: [PATCH 218/572] net/ncsi: Use netdev_dbg for debug messages This moves all of the netdev_printk(KERN_DEBUG, ...) messages over to netdev_dbg. As Joe explains: > netdev_dbg is not included in object code unless > DEBUG is defined or CONFIG_DYNAMIC_DEBUG is set. > And then, it is not emitted into the log unless > DEBUG is set or this specific netdev_dbg is enabled > via the dynamic debug control file. Which is what we're after in this case. Acked-by: Samuel Mendoza-Jonas Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 6 +++--- net/ncsi/ncsi-manage.c | 33 +++++++++++++++------------------ 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index f899ed61bb57..25e483e8278b 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -148,9 +148,9 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; ncm->data[3] = ntohl(hncdsc->status); spin_unlock_irqrestore(&nc->lock, flags); - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "NCSI: host driver %srunning on channel %u\n", - ncm->data[3] & 0x1 ? "" : "not ", nc->id); + netdev_dbg(ndp->ndev.dev, + "NCSI: host driver %srunning on channel %u\n", + ncm->data[3] & 0x1 ? "" : "not ", nc->id); return 0; } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 716493a61ba6..091284760d21 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -788,8 +788,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) } break; case ncsi_dev_state_config_done: - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "NCSI: channel %u config done\n", nc->id); + netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n", + nc->id); spin_lock_irqsave(&nc->lock, flags); if (nc->reconfigure_needed) { /* This channel's configuration has been updated @@ -804,8 +804,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); - netdev_printk(KERN_DEBUG, dev, - "Dirty NCSI channel state reset\n"); + netdev_dbg(dev, "Dirty NCSI channel state reset\n"); ncsi_process_next_channel(ndp); break; } @@ -908,9 +907,9 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) } ncm = &found->modes[NCSI_MODE_LINK]; - netdev_printk(KERN_DEBUG, ndp->ndev.dev, - "NCSI: Channel %u added to queue (link %s)\n", - found->id, ncm->data[2] & 0x1 ? "up" : "down"); + netdev_dbg(ndp->ndev.dev, + "NCSI: Channel %u added to queue (link %s)\n", + found->id, ncm->data[2] & 0x1 ? "up" : "down"); out: spin_lock_irqsave(&ndp->lock, flags); @@ -1316,9 +1315,9 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) if ((ndp->ndev.state & 0xff00) == ncsi_dev_state_config || !list_empty(&nc->link)) { - netdev_printk(KERN_DEBUG, nd->dev, - "NCSI: channel %p marked dirty\n", - nc); + netdev_dbg(nd->dev, + "NCSI: channel %p marked dirty\n", + nc); nc->reconfigure_needed = true; } spin_unlock_irqrestore(&nc->lock, flags); @@ -1336,8 +1335,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); - netdev_printk(KERN_DEBUG, nd->dev, - "NCSI: kicked channel %p\n", nc); + netdev_dbg(nd->dev, "NCSI: kicked channel %p\n", nc); n++; } } @@ -1368,8 +1366,8 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { n_vids++; if (vlan->vid == vid) { - netdev_printk(KERN_DEBUG, dev, - "NCSI: vid %u already registered\n", vid); + netdev_dbg(dev, "NCSI: vid %u already registered\n", + vid); return 0; } } @@ -1388,7 +1386,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) vlan->vid = vid; list_add_rcu(&vlan->list, &ndp->vlan_vids); - netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid); + netdev_dbg(dev, "NCSI: Added new vid %u\n", vid); found = ncsi_kick_channels(ndp) != 0; @@ -1417,8 +1415,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) /* Remove the VLAN id from our internal list */ list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) if (vlan->vid == vid) { - netdev_printk(KERN_DEBUG, dev, - "NCSI: vid %u found, removing\n", vid); + netdev_dbg(dev, "NCSI: vid %u found, removing\n", vid); list_del_rcu(&vlan->list); found = true; kfree(vlan); @@ -1545,7 +1542,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd) } } - netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n"); + netdev_dbg(ndp->ndev.dev, "NCSI: Stopping device\n"); ncsi_report_link(ndp, true); } EXPORT_SYMBOL_GPL(ncsi_stop_dev); From 01a21986f8ed52911eafdc728f595d2252b71451 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 19 Jun 2018 15:08:34 +0930 Subject: [PATCH 219/572] MAINTAINERS: Add Sam as the maintainer for NCSI Sam has been handing the maintenance of NCSI for a number release cycles now. Acked-by: Samuel Mendoza-Jonas Signed-off-by: Joel Stanley Signed-off-by: David S. Miller --- MAINTAINERS | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a5f04264ad10..ebb3168fd9e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9756,6 +9756,11 @@ L: linux-scsi@vger.kernel.org S: Maintained F: drivers/scsi/NCR_D700.* +NCSI LIBRARY: +M: Samuel Mendoza-Jonas +S: Maintained +F: net/ncsi/ + NCT6775 HARDWARE MONITOR DRIVER M: Guenter Roeck L: linux-hwmon@vger.kernel.org From 3256d29fc7aecdf99feb1cb9475ed2252769a8a7 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Tue, 19 Jun 2018 08:15:24 -0700 Subject: [PATCH 220/572] enic: initialize enic->rfs_h.lock in enic_probe lockdep spotted that we are using rfs_h.lock in enic_get_rxnfc() without initializing. rfs_h.lock is initialized in enic_open(). But ethtool_ops can be called when interface is down. Move enic_rfs_flw_tbl_init to enic_probe. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 18 PID: 1189 Comm: ethtool Not tainted 4.17.0-rc7-devel+ #27 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/2014 Call Trace: dump_stack+0x85/0xc0 register_lock_class+0x550/0x560 ? __handle_mm_fault+0xa8b/0x1100 __lock_acquire+0x81/0x670 lock_acquire+0xb9/0x1e0 ? enic_get_rxnfc+0x139/0x2b0 [enic] _raw_spin_lock_bh+0x38/0x80 ? enic_get_rxnfc+0x139/0x2b0 [enic] enic_get_rxnfc+0x139/0x2b0 [enic] ethtool_get_rxnfc+0x8d/0x1c0 dev_ethtool+0x16c8/0x2400 ? __mutex_lock+0x64d/0xa00 ? dev_load+0x6a/0x150 dev_ioctl+0x253/0x4b0 sock_do_ioctl+0x9a/0x130 sock_ioctl+0x1af/0x350 do_vfs_ioctl+0x8e/0x670 ? syscall_trace_enter+0x1e2/0x380 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x5a/0x170 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_clsf.c | 3 +-- drivers/net/ethernet/cisco/enic/enic_main.c | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.c b/drivers/net/ethernet/cisco/enic/enic_clsf.c index 973c1fb70d09..99038dfc7fbe 100644 --- a/drivers/net/ethernet/cisco/enic/enic_clsf.c +++ b/drivers/net/ethernet/cisco/enic/enic_clsf.c @@ -79,7 +79,6 @@ void enic_rfs_flw_tbl_init(struct enic *enic) enic->rfs_h.max = enic->config.num_arfs; enic->rfs_h.free = enic->rfs_h.max; enic->rfs_h.toclean = 0; - enic_rfs_timer_start(enic); } void enic_rfs_flw_tbl_free(struct enic *enic) @@ -88,7 +87,6 @@ void enic_rfs_flw_tbl_free(struct enic *enic) enic_rfs_timer_stop(enic); spin_lock_bh(&enic->rfs_h.lock); - enic->rfs_h.free = 0; for (i = 0; i < (1 << ENIC_RFS_FLW_BITSHIFT); i++) { struct hlist_head *hhead; struct hlist_node *tmp; @@ -99,6 +97,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic) enic_delfltr(enic, n->fltr_id); hlist_del(&n->node); kfree(n); + enic->rfs_h.free++; } } spin_unlock_bh(&enic->rfs_h.lock); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 30d2eaa18c04..e6ad581eadd8 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1971,7 +1971,7 @@ static int enic_open(struct net_device *netdev) vnic_intr_unmask(&enic->intr[i]); enic_notify_timer_start(enic); - enic_rfs_flw_tbl_init(enic); + enic_rfs_timer_start(enic); return 0; @@ -2904,6 +2904,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) timer_setup(&enic->notify_timer, enic_notify_timer, 0); + enic_rfs_flw_tbl_init(enic); enic_set_rx_coal_setting(enic); INIT_WORK(&enic->reset, enic_reset); INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset); From 4e8439aa34802deab11cee68b0ecb18f887fb153 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Sun, 17 Jun 2018 23:40:53 +0200 Subject: [PATCH 221/572] net: hamradio: use eth_broadcast_addr The array bpq_eth_addr is only used to get the size of an address, whereas the bcast_addr is used to set the broadcast address. This leads to a warning when using clang: drivers/net/hamradio/bpqether.c:94:13: warning: variable 'bpq_eth_addr' is not needed and will not be emitted [-Wunneeded-internal-declaration] static char bpq_eth_addr[6]; ^ Remove both variables and use the common eth_broadcast_addr to set the broadcast address. Signed-off-by: Stefan Agner Signed-off-by: David S. Miller --- drivers/net/hamradio/bpqether.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index f347fd9c5b28..777fa59f5e0c 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -89,10 +89,6 @@ static const char banner[] __initconst = KERN_INFO \ "AX.25: bpqether driver version 004\n"; -static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; - -static char bpq_eth_addr[6]; - static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); @@ -501,8 +497,8 @@ static int bpq_new_device(struct net_device *edev) bpq->ethdev = edev; bpq->axdev = ndev; - memcpy(bpq->dest_addr, bcast_addr, sizeof(bpq_eth_addr)); - memcpy(bpq->acpt_addr, bcast_addr, sizeof(bpq_eth_addr)); + eth_broadcast_addr(bpq->dest_addr); + eth_broadcast_addr(bpq->acpt_addr); err = register_netdevice(ndev); if (err) From 548feb33c598dfaf9f8e066b842441ac49b84a8a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Jun 2018 16:15:57 +0800 Subject: [PATCH 222/572] ipvlan: use ETH_MAX_MTU as max mtu Similar to the fixes on team and bonding, this restores the ability to set an ipvlan device's mtu to anything higher than 1500. Fixes: 91572088e3fd ("net: use core MTU range checking in core net infra") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 4377c26f714d..d02f0a7c534e 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -693,6 +693,7 @@ void ipvlan_link_setup(struct net_device *dev) { ether_setup(dev); + dev->max_mtu = ETH_MAX_MTU; dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; dev->netdev_ops = &ipvlan_netdev_ops; From e5223438280d76ef782592cf643e09441140d14c Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Mon, 18 Jun 2018 15:04:05 +0300 Subject: [PATCH 223/572] net: net_failover: fix typo in net_failover_slave_register() Sync both unicast and multicast lists instead of unicast twice. Fixes: cfc80d9a116 ("net: Introduce net_failover driver") Reviewed-by: Joao Martins Signed-off-by: Liran Alon Signed-off-by: David S. Miller --- drivers/net/net_failover.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c index 83f7420ddea5..4f390fa557e4 100644 --- a/drivers/net/net_failover.c +++ b/drivers/net/net_failover.c @@ -527,7 +527,7 @@ static int net_failover_slave_register(struct net_device *slave_dev, netif_addr_lock_bh(failover_dev); dev_uc_sync_multiple(slave_dev, failover_dev); - dev_uc_sync_multiple(slave_dev, failover_dev); + dev_mc_sync_multiple(slave_dev, failover_dev); netif_addr_unlock_bh(failover_dev); err = vlan_vids_add_by_dev(slave_dev, failover_dev); From 9b0a8da8c4c6e91012ab03a801acc5d8011c7c2f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 18 Jun 2018 05:24:31 -0700 Subject: [PATCH 224/572] net/ipv6: respect rcu grace period before freeing fib6_info syzbot reported use after free that is caused by fib6_info being freed without a proper RCU grace period. CPU: 0 PID: 1407 Comm: udevd Not tainted 4.17.0+ #39 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1b9/0x294 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 __read_once_size include/linux/compiler.h:188 [inline] find_rr_leaf net/ipv6/route.c:705 [inline] rt6_select net/ipv6/route.c:761 [inline] fib6_table_lookup+0x12b7/0x14d0 net/ipv6/route.c:1823 ip6_pol_route+0x1c2/0x1020 net/ipv6/route.c:1856 ip6_pol_route_output+0x54/0x70 net/ipv6/route.c:2082 fib6_rule_lookup+0x211/0x6d0 net/ipv6/fib6_rules.c:122 ip6_route_output_flags+0x2c5/0x350 net/ipv6/route.c:2110 ip6_route_output include/net/ip6_route.h:82 [inline] icmpv6_xrlim_allow net/ipv6/icmp.c:211 [inline] icmp6_send+0x147c/0x2da0 net/ipv6/icmp.c:535 icmpv6_send+0x17a/0x300 net/ipv6/ip6_icmp.c:43 ip6_link_failure+0xa5/0x790 net/ipv6/route.c:2244 dst_link_failure include/net/dst.h:427 [inline] ndisc_error_report+0xd1/0x1c0 net/ipv6/ndisc.c:695 neigh_invalidate+0x246/0x550 net/core/neighbour.c:892 neigh_timer_handler+0xaf9/0xde0 net/core/neighbour.c:978 call_timer_fn+0x230/0x940 kernel/time/timer.c:1326 expire_timers kernel/time/timer.c:1363 [inline] __run_timers+0x79e/0xc50 kernel/time/timer.c:1666 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:284 invoke_softirq kernel/softirq.c:364 [inline] irq_exit+0x1d1/0x200 kernel/softirq.c:404 exiting_irq arch/x86/include/asm/apic.h:527 [inline] smp_apic_timer_interrupt+0x17e/0x710 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:863 RIP: 0010:strlen+0x5e/0xa0 lib/string.c:482 Code: 24 00 74 3b 48 bb 00 00 00 00 00 fc ff df 4c 89 e0 48 83 c0 01 48 89 c2 48 89 c1 48 c1 ea 03 83 e1 07 0f b6 14 1a 38 ca 7f 04 <84> d2 75 23 80 38 00 75 de 48 83 c4 08 4c 29 e0 5b 41 5c 5d c3 48 RSP: 0018:ffff8801af117850 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffff880197f53bd0 RBX: dffffc0000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffffff81c5b06c RDI: ffff880197f53bc0 RBP: ffff8801af117868 R08: ffff88019a976540 R09: 0000000000000000 R10: ffff88019a976540 R11: 0000000000000000 R12: ffff880197f53bc0 R13: ffff880197f53bc0 R14: ffffffff899e4e90 R15: ffff8801d91c6a00 strlen include/linux/string.h:267 [inline] getname_kernel+0x24/0x370 fs/namei.c:218 open_exec+0x17/0x70 fs/exec.c:882 load_elf_binary+0x968/0x5610 fs/binfmt_elf.c:780 search_binary_handler+0x17d/0x570 fs/exec.c:1653 exec_binprm fs/exec.c:1695 [inline] __do_execve_file.isra.35+0x16fe/0x2710 fs/exec.c:1819 do_execveat_common fs/exec.c:1866 [inline] do_execve fs/exec.c:1883 [inline] __do_sys_execve fs/exec.c:1964 [inline] __se_sys_execve fs/exec.c:1959 [inline] __x64_sys_execve+0x8f/0xc0 fs/exec.c:1959 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f1576a46207 Code: 77 19 f4 48 89 d7 44 89 c0 0f 05 48 3d 00 f0 ff ff 76 e0 f7 d8 64 41 89 01 eb d8 f7 d8 64 41 89 01 eb df b8 3b 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 02 f3 c3 48 8b 15 00 8c 2d 00 f7 d8 64 89 02 RSP: 002b:00007ffff2784568 EFLAGS: 00000202 ORIG_RAX: 000000000000003b RAX: ffffffffffffffda RBX: 00000000ffffffff RCX: 00007f1576a46207 RDX: 0000000001215b10 RSI: 00007ffff2784660 RDI: 00007ffff2785670 RBP: 0000000000625500 R08: 000000000000589c R09: 000000000000589c R10: 0000000000000000 R11: 0000000000000202 R12: 0000000001215b10 R13: 0000000000000007 R14: 0000000001204250 R15: 0000000000000005 Allocated by task 12188: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 kmem_cache_alloc_trace+0x152/0x780 mm/slab.c:3620 kmalloc include/linux/slab.h:513 [inline] kzalloc include/linux/slab.h:706 [inline] fib6_info_alloc+0xbb/0x280 net/ipv6/ip6_fib.c:152 ip6_route_info_create+0x782/0x2b50 net/ipv6/route.c:3013 ip6_route_add+0x23/0xb0 net/ipv6/route.c:3154 ipv6_route_ioctl+0x5a5/0x760 net/ipv6/route.c:3660 inet6_ioctl+0x100/0x1f0 net/ipv6/af_inet6.c:546 sock_do_ioctl+0xe4/0x3e0 net/socket.c:973 sock_ioctl+0x30d/0x680 net/socket.c:1097 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:500 [inline] do_vfs_ioctl+0x1cf/0x16f0 fs/ioctl.c:684 ksys_ioctl+0xa9/0xd0 fs/ioctl.c:701 __do_sys_ioctl fs/ioctl.c:708 [inline] __se_sys_ioctl fs/ioctl.c:706 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:706 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 1402: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xd9/0x260 mm/slab.c:3813 fib6_info_destroy+0x29b/0x350 net/ipv6/ip6_fib.c:207 fib6_info_release include/net/ip6_fib.h:286 [inline] __ip6_del_rt_siblings net/ipv6/route.c:3235 [inline] ip6_route_del+0x11c4/0x13b0 net/ipv6/route.c:3316 ipv6_route_ioctl+0x616/0x760 net/ipv6/route.c:3663 inet6_ioctl+0x100/0x1f0 net/ipv6/af_inet6.c:546 sock_do_ioctl+0xe4/0x3e0 net/socket.c:973 sock_ioctl+0x30d/0x680 net/socket.c:1097 vfs_ioctl fs/ioctl.c:46 [inline] file_ioctl fs/ioctl.c:500 [inline] do_vfs_ioctl+0x1cf/0x16f0 fs/ioctl.c:684 ksys_ioctl+0xa9/0xd0 fs/ioctl.c:701 __do_sys_ioctl fs/ioctl.c:708 [inline] __se_sys_ioctl fs/ioctl.c:706 [inline] __x64_sys_ioctl+0x73/0xb0 fs/ioctl.c:706 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801b5df2580 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 8 bytes inside of 256-byte region [ffff8801b5df2580, ffff8801b5df2680) The buggy address belongs to the page: page:ffffea0006d77c80 count:1 mapcount:0 mapping:ffff8801da8007c0 index:0xffff8801b5df2e40 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0006c5cc48 ffffea0007363308 ffff8801da8007c0 raw: ffff8801b5df2e40 ffff8801b5df2080 0000000100000006 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801b5df2480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801b5df2500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc > ffff8801b5df2580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8801b5df2600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801b5df2680: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb Fixes: a64efe142f5e ("net/ipv6: introduce fib6_info struct and helpers") Signed-off-by: Eric Dumazet Cc: David Ahern Reported-by: syzbot+9e6d75e3edef427ee888@syzkaller.appspotmail.com Acked-by: David Ahern Tested-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++-- net/ipv6/ip6_fib.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 5cba71d2dc44..71b9043aa0e7 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -170,6 +170,7 @@ struct fib6_info { unused:3; struct fib6_nh fib6_nh; + struct rcu_head rcu; }; struct rt6_info { @@ -273,7 +274,7 @@ static inline void ip6_rt_put(struct rt6_info *rt) } struct fib6_info *fib6_info_alloc(gfp_t gfp_flags); -void fib6_info_destroy(struct fib6_info *f6i); +void fib6_info_destroy_rcu(struct rcu_head *head); static inline void fib6_info_hold(struct fib6_info *f6i) { @@ -283,7 +284,7 @@ static inline void fib6_info_hold(struct fib6_info *f6i) static inline void fib6_info_release(struct fib6_info *f6i) { if (f6i && atomic_dec_and_test(&f6i->fib6_ref)) - fib6_info_destroy(f6i); + call_rcu(&f6i->rcu, fib6_info_destroy_rcu); } enum fib6_walk_state { diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 39d1d487eca2..1fb2f3118d60 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -167,8 +167,9 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags) return f6i; } -void fib6_info_destroy(struct fib6_info *f6i) +void fib6_info_destroy_rcu(struct rcu_head *head) { + struct fib6_info *f6i = container_of(head, struct fib6_info, rcu); struct rt6_exception_bucket *bucket; struct dst_metrics *m; @@ -206,7 +207,7 @@ void fib6_info_destroy(struct fib6_info *f6i) kfree(f6i); } -EXPORT_SYMBOL_GPL(fib6_info_destroy); +EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu); static struct fib6_node *node_alloc(struct net *net) { From f696a21c229ac3e85bc239efc52f4530b43002c5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Jun 2018 16:20:39 +0200 Subject: [PATCH 225/572] ptp: replace getnstimeofday64() with ktime_get_real_ts64() getnstimeofday64() is deprecated and getting replaced throughout the kernel with ktime_get_*() based helpers for a more consistent interface. The two functions do the exact same thing, so this is just a cosmetic change. Signed-off-by: Arnd Bergmann Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 4 ++-- drivers/ptp/ptp_qoriq.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 767c485af59b..547dbdac9d54 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -221,7 +221,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) } pct = &sysoff->ts[0]; for (i = 0; i < sysoff->n_samples; i++) { - getnstimeofday64(&ts); + ktime_get_real_ts64(&ts); pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; pct++; @@ -230,7 +230,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) pct->nsec = ts.tv_nsec; pct++; } - getnstimeofday64(&ts); + ktime_get_real_ts64(&ts); pct->sec = ts.tv_sec; pct->nsec = ts.tv_nsec; if (copy_to_user((void __user *)arg, sysoff, sizeof(*sysoff))) diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 1468a1642b49..e8652c148c52 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -374,7 +374,7 @@ static int qoriq_ptp_probe(struct platform_device *dev) pr_err("ioremap ptp registers failed\n"); goto no_ioremap; } - getnstimeofday64(&now); + ktime_get_real_ts64(&now); ptp_qoriq_settime(&qoriq_ptp->caps, &now); tmr_ctrl = From 8c43bd1706885ba1acfa88da02bc60a2ec16f68c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 18 Jun 2018 12:30:37 -0700 Subject: [PATCH 226/572] net/tcp: Fix socket lookups with SO_BINDTODEVICE Similar to 69678bcd4d2d ("udp: fix SO_BINDTODEVICE"), TCP socket lookups need to fail if dev_match is not true. Currently, a packet to a given port can match a socket bound to device when it should not. In the VRF case, this causes the lookup to hit a VRF socket and not a global socket resulting in a response trying to go through the VRF when it should not. Fixes: 3fa6f616a7a4d ("net: ipv4: add second dif to inet socket lookups") Fixes: 4297a0ef08572 ("net: ipv6: add second dif to inet6 socket lookups") Reported-by: Lou Berger Diagnosed-by: Renato Westphal Tested-by: Renato Westphal Signed-off-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 4 ++-- net/ipv6/inet6_hashtables.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 31ff46daae97..3647167c8fa3 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -243,9 +243,9 @@ static inline int compute_score(struct sock *sk, struct net *net, bool dev_match = (sk->sk_bound_dev_if == dif || sk->sk_bound_dev_if == sdif); - if (exact_dif && !dev_match) + if (!dev_match) return -1; - if (sk->sk_bound_dev_if && dev_match) + if (sk->sk_bound_dev_if) score += 4; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 2febe26de6a1..595ad408dba0 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -113,9 +113,9 @@ static inline int compute_score(struct sock *sk, struct net *net, bool dev_match = (sk->sk_bound_dev_if == dif || sk->sk_bound_dev_if == sdif); - if (exact_dif && !dev_match) + if (!dev_match) return -1; - if (sk->sk_bound_dev_if && dev_match) + if (sk->sk_bound_dev_if) score++; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) From 56f772279a762984f6e9ebbf24a7c829faba5712 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Mon, 18 Jun 2018 10:01:05 -0700 Subject: [PATCH 227/572] enic: do not overwrite error code In failure path, we overwrite err to what vnic_rq_disable() returns. In case it returns 0, enic_open() returns success in case of error. Reported-by: Ben Hutchings Fixes: e8588e268509 ("enic: enable rq before updating rq descriptors") Signed-off-by: Govindarajulu Varadarajan Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/enic_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index e6ad581eadd8..90c645b8538e 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1920,7 +1920,7 @@ static int enic_open(struct net_device *netdev) { struct enic *enic = netdev_priv(netdev); unsigned int i; - int err; + int err, ret; err = enic_request_intr(enic); if (err) { @@ -1977,10 +1977,9 @@ static int enic_open(struct net_device *netdev) err_out_free_rq: for (i = 0; i < enic->rq_count; i++) { - err = vnic_rq_disable(&enic->rq[i]); - if (err) - return err; - vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); + ret = vnic_rq_disable(&enic->rq[i]); + if (!ret) + vnic_rq_clean(&enic->rq[i], enic_free_rq_buf); } enic_dev_notify_unset(enic); err_out_free_intr: From 7892bd081045222b9e4027fec279a28d6fe7aa66 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Jun 2018 17:23:17 +0800 Subject: [PATCH 228/572] net: propagate dev_get_valid_name return code if dev_get_valid_name failed, propagate its return code and remove the setting err to ENODEV, it will be set to 0 again before dev_change_net_namespace exits. Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 57b7bab5f70b..a5aa1c7444e6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8643,7 +8643,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char /* We get here if we can't use the current device name */ if (!pat) goto out; - if (dev_get_valid_name(net, dev, pat) < 0) + err = dev_get_valid_name(net, dev, pat); + if (err < 0) goto out; } @@ -8655,7 +8656,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_close(dev); /* And unlink it from device chain */ - err = -ENODEV; unlist_netdevice(dev); synchronize_net(); From 758380b8155f69b4e2f77f27562f8a7a466749d6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 12 Jun 2018 19:38:08 +1000 Subject: [PATCH 229/572] powerpc/64s/radix: Fix radix_kvm_prefetch_workaround paca access of not possible CPU If possible CPUs are limited (e.g., by kexec), then the kvm prefetch workaround function can access the paca pointer for a !possible CPU. Fixes: d2e60075a3d44 ("powerpc/64: Use array of paca pointers and allocate pacas individually") Cc: stable@kernel.org Reported-by: Pridhiviraj Paidipeddi Tested-by: Pridhiviraj Paidipeddi Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/mm/tlb-radix.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index a734e486664d..1135b43a597c 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -1097,6 +1097,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { if (sib == cpu) continue; + if (!cpu_possible(sib)) + continue; if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) flush = true; } From fadd03c615922d8521a2e76d4ba2335891cb2790 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 14 Jun 2018 16:01:52 +0530 Subject: [PATCH 230/572] powerpc/mm/hash/4k: Free hugetlb page table caches correctly. With 4k page size for hugetlb we allocate hugepage directories from its on slab cache. With patch 0c4d26802 ("powerpc/book3s64/mm: Simplify the rcu callback for page table free") we missed to free these allocated hugepd tables. Update pgtable_free to handle hugetlb hugepd directory table. Fixes: 0c4d268029bf ("powerpc/book3s64/mm: Simplify the rcu callback for page table free") Signed-off-by: Aneesh Kumar K.V [mpe: Add CONFIG_HUGETLB_PAGE guard to fix build break] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 1 + .../include/asm/book3s/64/pgtable-4k.h | 21 +++++++++++++++++++ .../include/asm/book3s/64/pgtable-64k.h | 9 ++++++++ arch/powerpc/include/asm/book3s/64/pgtable.h | 5 +++++ arch/powerpc/include/asm/nohash/32/pgalloc.h | 1 + arch/powerpc/include/asm/nohash/64/pgalloc.h | 1 + arch/powerpc/mm/hugetlbpage.c | 3 ++- arch/powerpc/mm/pgtable-book3s64.c | 12 +++++++++++ 8 files changed, 52 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index 6a6673907e45..e4633803fe43 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -108,6 +108,7 @@ static inline void pgtable_free(void *table, unsigned index_size) } #define check_pgt_cache() do { } while (0) +#define get_hugepd_cache_index(x) (x) #ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h index af5f2baac80f..a069dfcac9a9 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h @@ -49,6 +49,27 @@ static inline int hugepd_ok(hugepd_t hpd) } #define is_hugepd(hpd) (hugepd_ok(hpd)) +/* + * 16M and 16G huge page directory tables are allocated from slab cache + * + */ +#define H_16M_CACHE_INDEX (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE - 24) +#define H_16G_CACHE_INDEX \ + (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + H_PUD_INDEX_SIZE - 34) + +static inline int get_hugepd_cache_index(int index) +{ + switch (index) { + case H_16M_CACHE_INDEX: + return HTLB_16M_INDEX; + case H_16G_CACHE_INDEX: + return HTLB_16G_INDEX; + default: + BUG(); + } + /* should not reach */ +} + #else /* !CONFIG_HUGETLB_PAGE */ static inline int pmd_huge(pmd_t pmd) { return 0; } static inline int pud_huge(pud_t pud) { return 0; } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h index fb4b3ba52339..d7ee249d6890 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h @@ -45,8 +45,17 @@ static inline int hugepd_ok(hugepd_t hpd) { return 0; } + #define is_hugepd(pdep) 0 +/* + * This should never get called + */ +static inline int get_hugepd_cache_index(int index) +{ + BUG(); +} + #else /* !CONFIG_HUGETLB_PAGE */ static inline int pmd_huge(pmd_t pmd) { return 0; } static inline int pud_huge(pud_t pud) { return 0; } diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 63cee159022b..42aafba7a308 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -287,6 +287,11 @@ enum pgtable_index { PMD_INDEX, PUD_INDEX, PGD_INDEX, + /* + * Below are used with 4k page size and hugetlb + */ + HTLB_16M_INDEX, + HTLB_16G_INDEX, }; extern unsigned long __vmalloc_start; diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 1707781d2f20..9de40eb614da 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -109,6 +109,7 @@ static inline void pgtable_free(void *table, unsigned index_size) } #define check_pgt_cache() do { } while (0) +#define get_hugepd_cache_index(x) (x) #ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h index 0e693f322cb2..e2d62d033708 100644 --- a/arch/powerpc/include/asm/nohash/64/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h @@ -141,6 +141,7 @@ static inline void pgtable_free(void *table, int shift) } } +#define get_hugepd_cache_index(x) (x) #ifdef CONFIG_SMP static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) { diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7c5f479c5c00..8a9a49c13865 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -337,7 +337,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif if (shift >= pdshift) hugepd_free(tlb, hugepte); else - pgtable_free_tlb(tlb, hugepte, pdshift - shift); + pgtable_free_tlb(tlb, hugepte, + get_hugepd_cache_index(pdshift - shift)); } static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index c1f4ca45c93a..4afbfbb64bfd 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -409,6 +409,18 @@ static inline void pgtable_free(void *table, int index) case PUD_INDEX: kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table); break; +#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE) + /* 16M hugepd directory at pud level */ + case HTLB_16M_INDEX: + BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0); + kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table); + break; + /* 16G hugepd directory at the pgd level */ + case HTLB_16G_INDEX: + BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0); + kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table); + break; +#endif /* We don't free pgd table via RCU callback */ default: BUG(); From dc45519eb181b5687ac8382361a8aa085acd1fe1 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 19 Jun 2018 14:44:00 +0200 Subject: [PATCH 231/572] net: ethernet: fix suspend/resume in davinci_emac This patch reverts commit 3243ff2a05ec ("net: ethernet: davinci_emac: Deduplicate bus_find_device() by name matching") and adds a comment which should stop anyone from reintroducing the same "fix" in the future. We can't use bus_find_device_by_name() here because the device name is not guaranteed to be 'davinci_mdio'. On some systems it can be 'davinci_mdio.0' so we need to use strncmp() against the first part of the string to correctly match it. Fixes: 3243ff2a05ec ("net: ethernet: davinci_emac: Deduplicate bus_find_device() by name matching") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Acked-by: Lukas Wunner Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_emac.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 06d7c9e4dcda..a1a6445b5a7e 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1385,6 +1385,11 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd) return -EOPNOTSUPP; } +static int match_first_device(struct device *dev, void *data) +{ + return !strncmp(dev_name(dev), "davinci_mdio", 12); +} + /** * emac_dev_open - EMAC device open * @ndev: The DaVinci EMAC network adapter @@ -1484,8 +1489,14 @@ static int emac_dev_open(struct net_device *ndev) /* use the first phy on the bus if pdata did not give us a phy id */ if (!phydev && !priv->phy_id) { - phy = bus_find_device_by_name(&mdio_bus_type, NULL, - "davinci_mdio"); + /* NOTE: we can't use bus_find_device_by_name() here because + * the device name is not guaranteed to be 'davinci_mdio'. On + * some systems it can be 'davinci_mdio.0' so we need to use + * strncmp() against the first part of the string to correctly + * match it. + */ + phy = bus_find_device(&mdio_bus_type, NULL, NULL, + match_first_device); if (phy) { priv->phy_id = dev_name(phy); if (!priv->phy_id || !*priv->phy_id) From 0a889b9404c084c6fd145020c939a8f688b3e058 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 19 Jun 2018 15:39:46 +0200 Subject: [PATCH 232/572] net/sched: act_ife: fix recursive lock and idr leak a recursive lock warning [1] can be observed with the following script, # $TC actions add action ife encode allow prio pass index 42 IFE type 0xED3E # $TC actions replace action ife encode allow tcindex pass index 42 in case the kernel was unable to run the last command (e.g. because of the impossibility to load 'act_meta_skbtcindex'). For a similar reason, the kernel can leak idr in the error path of tcf_ife_init(), because tcf_idr_release() is not called after successful idr reservation: # $TC actions add action ife encode allow tcindex index 47 IFE type 0xED3E RTNETLINK answers: No such file or directory We have an error talking to the kernel # $TC actions add action ife encode allow tcindex index 47 IFE type 0xED3E RTNETLINK answers: No space left on device We have an error talking to the kernel # $TC actions add action ife encode use mark 7 type 0xfefe pass index 47 IFE type 0xFEFE RTNETLINK answers: No space left on device We have an error talking to the kernel Since tcfa_lock is already taken when the action is being edited, a call to tcf_idr_release() wrongly makes tcf_idr_cleanup() take the same lock again. On the other hand, tcf_idr_release() needs to be called in the error path of tcf_ife_init(), to undo the last tcf_idr_create() invocation. Fix both problems in tcf_ife_init(). Since the cleanup() routine can now be called when ife->params is NULL, also add a NULL pointer check to avoid calling kfree_rcu(NULL, rcu). [1] ============================================ WARNING: possible recursive locking detected 4.17.0-rc4.kasan+ #417 Tainted: G E -------------------------------------------- tc/3932 is trying to acquire lock: 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_cleanup+0x19/0x80 [act_ife] but task is already holding lock: 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&p->tcfa_lock)->rlock); lock(&(&p->tcfa_lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by tc/3932: #0: 000000007ca8e990 (rtnl_mutex){+.+.}, at: tcf_ife_init+0xf61/0x13c0 [act_ife] #1: 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife] stack backtrace: CPU: 3 PID: 3932 Comm: tc Tainted: G E 4.17.0-rc4.kasan+ #417 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Call Trace: dump_stack+0x9a/0xeb __lock_acquire+0xf43/0x34a0 ? debug_check_no_locks_freed+0x2b0/0x2b0 ? debug_check_no_locks_freed+0x2b0/0x2b0 ? debug_check_no_locks_freed+0x2b0/0x2b0 ? __mutex_lock+0x62f/0x1240 ? kvm_sched_clock_read+0x1a/0x30 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0x18/0x170 ? find_held_lock+0x39/0x1d0 ? lock_acquire+0x10b/0x330 lock_acquire+0x10b/0x330 ? tcf_ife_cleanup+0x19/0x80 [act_ife] _raw_spin_lock_bh+0x38/0x70 ? tcf_ife_cleanup+0x19/0x80 [act_ife] tcf_ife_cleanup+0x19/0x80 [act_ife] __tcf_idr_release+0xff/0x350 tcf_ife_init+0xdde/0x13c0 [act_ife] ? ife_exit_net+0x290/0x290 [act_ife] ? __lock_is_held+0xb4/0x140 tcf_action_init_1+0x67b/0xad0 ? tcf_action_dump_old+0xa0/0xa0 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0x18/0x170 ? kvm_sched_clock_read+0x1a/0x30 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0x18/0x170 ? memset+0x1f/0x40 tcf_action_init+0x30f/0x590 ? tcf_action_init_1+0xad0/0xad0 ? memset+0x1f/0x40 tc_ctl_action+0x48e/0x5e0 ? mutex_lock_io_nested+0x1160/0x1160 ? tca_action_gd+0x990/0x990 ? sched_clock+0x5/0x10 ? find_held_lock+0x39/0x1d0 rtnetlink_rcv_msg+0x4da/0x990 ? validate_linkmsg+0x680/0x680 ? sched_clock_cpu+0x18/0x170 ? find_held_lock+0x39/0x1d0 netlink_rcv_skb+0x127/0x350 ? validate_linkmsg+0x680/0x680 ? netlink_ack+0x970/0x970 ? __kmalloc_node_track_caller+0x304/0x3a0 netlink_unicast+0x40f/0x5d0 ? netlink_attachskb+0x580/0x580 ? _copy_from_iter_full+0x187/0x760 ? import_iovec+0x90/0x390 netlink_sendmsg+0x67f/0xb50 ? netlink_unicast+0x5d0/0x5d0 ? copy_msghdr_from_user+0x206/0x340 ? netlink_unicast+0x5d0/0x5d0 sock_sendmsg+0xb3/0xf0 ___sys_sendmsg+0x60a/0x8b0 ? copy_msghdr_from_user+0x340/0x340 ? lock_downgrade+0x5e0/0x5e0 ? tty_write_lock+0x18/0x50 ? kvm_sched_clock_read+0x1a/0x30 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0x18/0x170 ? find_held_lock+0x39/0x1d0 ? lock_downgrade+0x5e0/0x5e0 ? lock_acquire+0x10b/0x330 ? __audit_syscall_entry+0x316/0x690 ? current_kernel_time64+0x6b/0xd0 ? __fget_light+0x55/0x1f0 ? __sys_sendmsg+0xd2/0x170 __sys_sendmsg+0xd2/0x170 ? __ia32_sys_shutdown+0x70/0x70 ? syscall_trace_enter+0x57a/0xd60 ? rcu_read_lock_sched_held+0xdc/0x110 ? __bpf_trace_sys_enter+0x10/0x10 ? do_syscall_64+0x22/0x480 do_syscall_64+0xa5/0x480 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7fd646988ba0 RSP: 002b:00007fffc9fab3c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007fffc9fab4f0 RCX: 00007fd646988ba0 RDX: 0000000000000000 RSI: 00007fffc9fab440 RDI: 0000000000000003 RBP: 000000005b28c8b3 R08: 0000000000000002 R09: 0000000000000000 R10: 00007fffc9faae20 R11: 0000000000000246 R12: 0000000000000000 R13: 00007fffc9fab504 R14: 0000000000000001 R15: 000000000066c100 Fixes: 4e8c86155010 ("net sched: net sched: ife action fix late binding") Fixes: ef6980b6becb ("introduce IFE action") Signed-off-by: Davide Caratti Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 8527cfdc446d..078d52212172 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -415,7 +415,8 @@ static void tcf_ife_cleanup(struct tc_action *a) spin_unlock_bh(&ife->tcf_lock); p = rcu_dereference_protected(ife->params, 1); - kfree_rcu(p, rcu); + if (p) + kfree_rcu(p, rcu); } /* under ife->tcf_lock for existing action */ @@ -543,10 +544,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, NULL, NULL); if (err) { metadata_parse_err: - if (exists) - tcf_idr_release(*a, bind); if (ret == ACT_P_CREATED) - _tcf_ife_cleanup(*a); + tcf_idr_release(*a, bind); if (exists) spin_unlock_bh(&ife->tcf_lock); @@ -567,7 +566,7 @@ metadata_parse_err: err = use_all_metadata(ife); if (err) { if (ret == ACT_P_CREATED) - _tcf_ife_cleanup(*a); + tcf_idr_release(*a, bind); if (exists) spin_unlock_bh(&ife->tcf_lock); From cbf56c29624fa056a0c1c3d177e67aa51a7fd8d6 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 19 Jun 2018 15:45:50 +0200 Subject: [PATCH 233/572] net/sched: act_ife: preserve the action control in case of error in the following script # tc actions add action ife encode allow prio pass index 42 # tc actions replace action ife encode allow tcindex drop index 42 the action control should remain equal to 'pass', if the kernel failed to replace the TC action. Pospone the assignment of the action control, to ensure it is not overwritten in the error path of tcf_ife_init(). Fixes: ef6980b6becb ("introduce IFE action") Signed-off-by: Davide Caratti Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_ife.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 078d52212172..20d7d36b2fc9 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -517,8 +517,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, saddr = nla_data(tb[TCA_IFE_SMAC]); } - ife->tcf_action = parm->action; - if (parm->flags & IFE_ENCODE) { if (daddr) ether_addr_copy(p->eth_dst, daddr); @@ -575,6 +573,7 @@ metadata_parse_err: } } + ife->tcf_action = parm->action; if (exists) spin_unlock_bh(&ife->tcf_lock); From 2aee167c3675b088c86f648f834e793a0085e04d Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 19 Jun 2018 16:14:30 +0200 Subject: [PATCH 234/572] net/usb/drivers: Remove useless hrtimer_active check The code does: if (hrtimer_active(&t)) hrtimer_cancel(&t); However, hrtimer_cancel() checks if the timer is active, so the test above is pointless. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index b0e8b9613054..1eaec648bd1f 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -967,8 +967,7 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) atomic_set(&ctx->stop, 1); - if (hrtimer_active(&ctx->tx_timer)) - hrtimer_cancel(&ctx->tx_timer); + hrtimer_cancel(&ctx->tx_timer); tasklet_kill(&ctx->bh); From 421780fd498399235b044638e85b352d6da20b6a Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 19 Jun 2018 17:16:20 +0200 Subject: [PATCH 235/572] bpfilter: fix build error bpfilter Makefile assumes that the system locale is en_US, and the parsing of objdump output fails. Set LC_ALL=C and, while at it, rewrite the objdump parsing so it spawns only 2 processes instead of 7. Fixes: d2ba09c17a064 ("net: add skeleton of bpfilter kernel module") Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- net/bpfilter/Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index e0bbe7583e58..dd86b022eff0 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -21,8 +21,10 @@ endif # which bpfilter_kern.c passes further into umh blob loader at run-time quiet_cmd_copy_umh = GEN $@ cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \ - $(OBJCOPY) -I binary -O `$(OBJDUMP) -f $<|grep format|cut -d' ' -f8` \ - -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \ + $(OBJCOPY) -I binary \ + `LC_ALL=C objdump -f net/bpfilter/bpfilter_umh \ + |awk -F' |,' '/file format/{print "-O",$$NF} \ + /^architecture:/{print "-B",$$2}'` \ --rename-section .data=.init.rodata $< $@ $(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh From 8b26a06ad4f2a12425f1f63a0ee57f42961dfd1e Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 19 Jun 2018 17:21:36 +0200 Subject: [PATCH 236/572] bpfilter: ignore binary files net/bpfilter/bpfilter_umh is a binary file generated when bpfilter is enabled, add it to .gitignore to avoid committing it. Fixes: d2ba09c17a064 ("net: add skeleton of bpfilter kernel module") Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- net/bpfilter/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 net/bpfilter/.gitignore diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore new file mode 100644 index 000000000000..e97084e3eea2 --- /dev/null +++ b/net/bpfilter/.gitignore @@ -0,0 +1 @@ +bpfilter_umh From 684fb246578b9e81fc7b4ca5c71eae22edb650b2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 19 Jun 2018 10:47:50 -0500 Subject: [PATCH 237/572] objtool: Add machine_real_restart() to the noreturn list machine_real_restart() is annotated as '__noreturn", so add it to the objtool noreturn list. This fixes the following warning with clang and CONFIG_CC_OPTIMIZE_FOR_SIZE=y: arch/x86/kernel/reboot.o: warning: objtool: native_machine_emergency_restart() falls through to next function machine_power_off() Reported-by: Matthias Kaehlcke Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Tested-by: Matthias Kaehlcke Reviewed-by: Matthias Kaehlcke Link: https://lkml.kernel.org/r/791712792aa4431bdd55bf1beb33a169ddf3b4a2.1529423255.git.jpoimboe@redhat.com --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 38047c6aa575..f4a25bd1871f 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -164,6 +164,7 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, "lbug_with_loc", "fortify_panic", "usercopy_abort", + "machine_real_restart", }; if (func->bind == STB_WEAK) From 18f3e95b90b28318ef35910d21c39908de672331 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 12 Jun 2018 17:54:42 +0800 Subject: [PATCH 238/572] MIPS: io: Add barrier after register read in inX() While a barrier is present in the outX() functions before the register write, a similar barrier is missing in the inX() functions after the register read. This could allow memory accesses following inX() to observe stale data. This patch is very similar to commit a1cc7034e33d12dc1 ("MIPS: io: Add barrier after register read in readX()"). Because war_io_reorder_wmb() is both used by writeX() and outX(), if readX() need a barrier then so does inX(). Cc: stable@vger.kernel.org Signed-off-by: Huacai Chen Patchwork: https://patchwork.linux-mips.org/patch/19516/ Signed-off-by: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu Cc: Huacai Chen --- arch/mips/include/asm/io.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index a7d0b836f2f7..cea8ad864b3f 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -414,6 +414,8 @@ static inline type pfx##in##bwlq##p(unsigned long port) \ __val = *__addr; \ slow; \ \ + /* prevent prefetching of coherent DMA data prematurely */ \ + rmb(); \ return pfx##ioswab##bwlq(__addr, __val); \ } From 9ea141ad54716d48e79d0093052c12ed67debf09 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 14 Jun 2018 10:13:53 -0700 Subject: [PATCH 239/572] MIPS: Add support for restartable sequences Implement support for restartable sequences on MIPS, which requires 3 simple things: - Call rseq_handle_notify_resume() on return to userspace if TIF_NOTIFY_RESUME is set. - Call rseq_signal_deliver() to fixup the pre-signal stack frame when a signal is delivered whilst executing a restartable sequence critical section. - Select CONFIG_HAVE_RSEQ. Signed-off-by: Paul Burton Reviewed-by: James Hogan Patchwork: https://patchwork.linux-mips.org/patch/19523/ Cc: Ralf Baechle Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Boqun Feng Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org --- arch/mips/Kconfig | 1 + arch/mips/kernel/signal.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 3f9deec70b92..08c10c518f83 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -65,6 +65,7 @@ config MIPS select HAVE_OPROFILE select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RSEQ select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 9e224469c788..00f2535d2226 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -801,6 +801,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->regs[0] = 0; /* Don't deal with this again. */ } + rseq_signal_deliver(regs); + if (sig_uses_siginfo(&ksig->ka, abi)) ret = abi->setup_rt_frame(vdso + abi->vdso->off_rt_sigreturn, ksig, regs, oldset); @@ -868,6 +870,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + rseq_handle_notify_resume(regs); } user_enter(); From 9bcf53598dfe1bd8caaf8e03738d3cc51d45904e Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 14 Jun 2018 10:20:54 -0700 Subject: [PATCH 240/572] MIPS: Add syscall detection for restartable sequences Syscalls are not allowed inside restartable sequences, so add a call to rseq_syscall() at the very beginning of the system call exit path when CONFIG_DEBUG_RSEQ=y. This will help us to detect whether there is a syscall issued erroneously inside a restartable sequence. Signed-off-by: Paul Burton Reviewed-by: James Hogan Patchwork: https://patchwork.linux-mips.org/patch/19522/ Cc: Ralf Baechle Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Boqun Feng Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org --- arch/mips/kernel/entry.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 38a302919e6b..d7de8adcfcc8 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -79,6 +79,10 @@ FEXPORT(ret_from_fork) jal schedule_tail # a0 = struct task_struct *prev FEXPORT(syscall_exit) +#ifdef CONFIG_DEBUG_RSEQ + move a0, sp + jal rseq_syscall +#endif local_irq_disable # make sure need_resched and # signals dont change between # sampling and return @@ -141,6 +145,10 @@ work_notifysig: # deal with pending signals and j resume_userspace_check FEXPORT(syscall_exit_partial) +#ifdef CONFIG_DEBUG_RSEQ + move a0, sp + jal rseq_syscall +#endif local_irq_disable # make sure need_resched doesn't # change between and return LONG_L a2, TI_FLAGS($28) # current->work From e426b3754a2cb8bb45b71283fdac0cfc6d247db7 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 14 Jun 2018 10:22:44 -0700 Subject: [PATCH 241/572] MIPS: Wire up the restartable sequences (rseq) syscall Wire up the restartable sequences (rseq) syscall for MIPS. This was introduced by commit d7822b1e24f2 ("rseq: Introduce restartable sequences system call") & MIPS now supports the prerequisites. Signed-off-by: Paul Burton Reviewed-by: James Hogan Patchwork: https://patchwork.linux-mips.org/patch/19525/ Cc: Ralf Baechle Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Boqun Feng Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org --- arch/mips/include/uapi/asm/unistd.h | 15 +++++++++------ arch/mips/kernel/scall32-o32.S | 1 + arch/mips/kernel/scall64-64.S | 1 + arch/mips/kernel/scall64-n32.S | 1 + arch/mips/kernel/scall64-o32.S | 1 + 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index bb05e9916a5f..170bf0b5b250 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -388,17 +388,18 @@ #define __NR_pkey_alloc (__NR_Linux + 364) #define __NR_pkey_free (__NR_Linux + 365) #define __NR_statx (__NR_Linux + 366) +#define __NR_rseq (__NR_Linux + 367) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 366 +#define __NR_Linux_syscalls 367 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 366 +#define __NR_O32_Linux_syscalls 367 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -733,16 +734,17 @@ #define __NR_pkey_alloc (__NR_Linux + 324) #define __NR_pkey_free (__NR_Linux + 325) #define __NR_statx (__NR_Linux + 326) +#define __NR_rseq (__NR_Linux + 327) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 326 +#define __NR_Linux_syscalls 327 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 326 +#define __NR_64_Linux_syscalls 327 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1081,15 +1083,16 @@ #define __NR_pkey_alloc (__NR_Linux + 328) #define __NR_pkey_free (__NR_Linux + 329) #define __NR_statx (__NR_Linux + 330) +#define __NR_rseq (__NR_Linux + 331) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 330 +#define __NR_Linux_syscalls 331 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 330 +#define __NR_N32_Linux_syscalls 331 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index a9a7d78803cd..842ff1612893 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -590,3 +590,4 @@ EXPORT(sys_call_table) PTR sys_pkey_alloc PTR sys_pkey_free /* 4365 */ PTR sys_statx + PTR sys_rseq diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 65d5aeeb9bdb..558830d1e5ba 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -439,4 +439,5 @@ EXPORT(sys_call_table) PTR sys_pkey_alloc PTR sys_pkey_free /* 5325 */ PTR sys_statx + PTR sys_rseq .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index cbf190ef9e8a..293f0b0119f3 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -434,4 +434,5 @@ EXPORT(sysn32_call_table) PTR sys_pkey_alloc PTR sys_pkey_free PTR sys_statx /* 6330 */ + PTR sys_rseq .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 9ebe3e2403b1..f13a08de8078 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -583,4 +583,5 @@ EXPORT(sys32_call_table) PTR sys_pkey_alloc PTR sys_pkey_free /* 4365 */ PTR sys_statx + PTR sys_rseq .size sys32_call_table,.-sys32_call_table From 744f4be542d705a39dac9810350e96f37474eda3 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 14 Jun 2018 11:06:22 -0700 Subject: [PATCH 242/572] rseq/selftests: Implement MIPS support Implement support for both MIPS32 & MIPS64 in the rseq selftests, in order to sanity check the recently enabled rseq syscall. The tests all pass on a MIPS Boston development board running either a MIPS32r2 interAptiv CPU & a MIPS64r6 I6500 CPU, both of which were configured with 2 cores each of which have 2 hardware threads (VP(E)s) - ie. 4 CPUs. Signed-off-by: Paul Burton Reviewed-by: James Hogan Patchwork: https://patchwork.linux-mips.org/patch/19524/ Cc: Ralf Baechle Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Boqun Feng Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org --- tools/testing/selftests/rseq/param_test.c | 24 + tools/testing/selftests/rseq/rseq-mips.h | 725 ++++++++++++++++++++++ tools/testing/selftests/rseq/rseq.h | 2 + 3 files changed, 751 insertions(+) create mode 100644 tools/testing/selftests/rseq/rseq-mips.h diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index 6a9f602a8718..615252331813 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -137,6 +137,30 @@ unsigned int yield_mod_cnt, nr_abort; "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \ "bne 222b\n\t" \ "333:\n\t" + +#elif defined(__mips__) + +#define RSEQ_INJECT_INPUT \ + , [loop_cnt_1]"m"(loop_cnt[1]) \ + , [loop_cnt_2]"m"(loop_cnt[2]) \ + , [loop_cnt_3]"m"(loop_cnt[3]) \ + , [loop_cnt_4]"m"(loop_cnt[4]) \ + , [loop_cnt_5]"m"(loop_cnt[5]) \ + , [loop_cnt_6]"m"(loop_cnt[6]) + +#define INJECT_ASM_REG "$5" + +#define RSEQ_INJECT_CLOBBER \ + , INJECT_ASM_REG + +#define RSEQ_INJECT_ASM(n) \ + "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \ + "beqz " INJECT_ASM_REG ", 333f\n\t" \ + "222:\n\t" \ + "addiu " INJECT_ASM_REG ", -1\n\t" \ + "bnez " INJECT_ASM_REG ", 222b\n\t" \ + "333:\n\t" + #else #error unsupported target #endif diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h new file mode 100644 index 000000000000..7f48ecf46994 --- /dev/null +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -0,0 +1,725 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Author: Paul Burton + * (C) Copyright 2018 MIPS Tech LLC + * + * Based on rseq-arm.h: + * (C) Copyright 2016-2018 - Mathieu Desnoyers + */ + +#define RSEQ_SIG 0x53053053 + +#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory") +#define rseq_smp_rmb() rseq_smp_mb() +#define rseq_smp_wmb() rseq_smp_mb() + +#define rseq_smp_load_acquire(p) \ +__extension__ ({ \ + __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_smp_mb(); \ + ____p1; \ +}) + +#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() + +#define rseq_smp_store_release(p, v) \ +do { \ + rseq_smp_mb(); \ + RSEQ_WRITE_ONCE(*p, v); \ +} while (0) + +#ifdef RSEQ_SKIP_FASTPATH +#include "rseq-skip.h" +#else /* !RSEQ_SKIP_FASTPATH */ + +#if _MIPS_SZLONG == 64 +# define LONG ".dword" +# define LONG_LA "dla" +# define LONG_L "ld" +# define LONG_S "sd" +# define LONG_ADDI "daddiu" +# define U32_U64_PAD(x) x +#elif _MIPS_SZLONG == 32 +# define LONG ".word" +# define LONG_LA "la" +# define LONG_L "lw" +# define LONG_S "sw" +# define LONG_ADDI "addiu" +# ifdef __BIG_ENDIAN +# define U32_U64_PAD(x) "0x0, " x +# else +# define U32_U64_PAD(x) x ", 0x0" +# endif +#else +# error unsupported _MIPS_SZLONG +#endif + +#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \ + post_commit_offset, abort_ip) \ + ".pushsection __rseq_table, \"aw\"\n\t" \ + ".balign 32\n\t" \ + ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \ + ".popsection\n\t" + +#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ + RSEQ_INJECT_ASM(1) \ + LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \ + LONG_S " $4, %[" __rseq_str(rseq_cs) "]\n\t" \ + __rseq_str(label) ":\n\t" + +#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ + RSEQ_INJECT_ASM(2) \ + "lw $4, %[" __rseq_str(current_cpu_id) "]\n\t" \ + "bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t" + +#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \ + abort_label, version, flags, \ + start_ip, post_commit_offset, abort_ip) \ + ".balign 32\n\t" \ + __rseq_str(table_label) ":\n\t" \ + ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \ + LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \ + ".word " __rseq_str(RSEQ_SIG) "\n\t" \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(abort_label) "]\n\t" + +#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \ + start_ip, post_commit_ip, abort_ip) \ + __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \ + abort_label, 0x0, 0x0, start_ip, \ + (post_commit_ip - start_ip), abort_ip) + +#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \ + __rseq_str(label) ":\n\t" \ + teardown \ + "b %l[" __rseq_str(cmpfail_label) "]\n\t" + +#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("") + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, + off_t voffp, intptr_t *load, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "beq $4, %[expectnot], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "beq $4, %[expectnot], %l[error2]\n\t" +#endif + LONG_S " $4, %[load]\n\t" + LONG_ADDI " $4, %[voffp]\n\t" + LONG_L " $4, 0($4)\n\t" + /* final store */ + LONG_S " $4, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(5) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expectnot] "r" (expectnot), + [voffp] "Ir" (voffp), + [load] "m" (*load) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_addv(intptr_t *v, intptr_t count, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) +#endif + LONG_L " $4, %[v]\n\t" + LONG_ADDI " $4, %[count]\n\t" + /* final store */ + LONG_S " $4, %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + [v] "m" (*v), + [count] "Ir" (count) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* try store */ + LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t newv2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" +#endif + /* try store */ + LONG_S " %[newv2], %[v2]\n\t" + RSEQ_INJECT_ASM(5) + "sync\n\t" /* full sync provides store-release */ + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* try store input */ + [v2] "m" (*v2), + [newv2] "r" (newv2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, + intptr_t *v2, intptr_t expect2, + intptr_t newv, int cpu) +{ + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(4) + LONG_L " $4, %[v2]\n\t" + "bne $4, %[expect2], %l[cmpfail]\n\t" + RSEQ_INJECT_ASM(5) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1]) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], %l[error2]\n\t" + LONG_L " $4, %[v2]\n\t" + "bne $4, %[expect2], %l[error3]\n\t" +#endif + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + "b 5f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f) + "5:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* cmp2 input */ + [v2] "m" (*v2), + [expect2] "r" (expect2), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2, error3 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +error2: + rseq_bug("1st expected value comparison failed"); +error3: + rseq_bug("2nd expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uintptr_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + LONG_S " %[src], %[rseq_scratch0]\n\t" + LONG_S " %[dst], %[rseq_scratch1]\n\t" + LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 7f\n\t" +#endif + /* try memcpy */ + "beqz %[len], 333f\n\t" \ + "222:\n\t" \ + "lb $4, 0(%[src])\n\t" \ + "sb $4, 0(%[dst])\n\t" \ + LONG_ADDI " %[src], 1\n\t" \ + LONG_ADDI " %[dst], 1\n\t" \ + LONG_ADDI " %[len], -1\n\t" \ + "bnez %[len], 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + abort, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_CMPFAIL(5, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("expected value comparison failed"); +#endif +} + +static inline __attribute__((always_inline)) +int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, + void *dst, void *src, size_t len, + intptr_t newv, int cpu) +{ + uintptr_t rseq_scratch[3]; + + RSEQ_INJECT_C(9) + + rseq_workaround_gcc_asm_size_guess(); + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */ + LONG_S " %[src], %[rseq_scratch0]\n\t" + LONG_S " %[dst], %[rseq_scratch1]\n\t" + LONG_S " %[len], %[rseq_scratch2]\n\t" + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs) + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f) + RSEQ_INJECT_ASM(3) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 5f\n\t" + RSEQ_INJECT_ASM(4) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f) + LONG_L " $4, %[v]\n\t" + "bne $4, %[expect], 7f\n\t" +#endif + /* try memcpy */ + "beqz %[len], 333f\n\t" \ + "222:\n\t" \ + "lb $4, 0(%[src])\n\t" \ + "sb $4, 0(%[dst])\n\t" \ + LONG_ADDI " %[src], 1\n\t" \ + LONG_ADDI " %[dst], 1\n\t" \ + LONG_ADDI " %[len], -1\n\t" \ + "bnez %[len], 222b\n\t" \ + "333:\n\t" \ + RSEQ_INJECT_ASM(5) + "sync\n\t" /* full sync provides store-release */ + /* final store */ + LONG_S " %[newv], %[v]\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(6) + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t" + "b 8f\n\t" + RSEQ_ASM_DEFINE_ABORT(3, 4, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + abort, 1b, 2b, 4f) + RSEQ_ASM_DEFINE_CMPFAIL(5, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + cmpfail) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_CMPFAIL(6, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error1) + RSEQ_ASM_DEFINE_CMPFAIL(7, + /* teardown */ + LONG_L " %[len], %[rseq_scratch2]\n\t" + LONG_L " %[dst], %[rseq_scratch1]\n\t" + LONG_L " %[src], %[rseq_scratch0]\n\t", + error2) +#endif + "8:\n\t" + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [current_cpu_id] "m" (__rseq_abi.cpu_id), + [rseq_cs] "m" (__rseq_abi.rseq_cs), + /* final store input */ + [v] "m" (*v), + [expect] "r" (expect), + [newv] "r" (newv), + /* try memcpy input */ + [dst] "r" (dst), + [src] "r" (src), + [len] "r" (len), + [rseq_scratch0] "m" (rseq_scratch[0]), + [rseq_scratch1] "m" (rseq_scratch[1]), + [rseq_scratch2] "m" (rseq_scratch[2]) + RSEQ_INJECT_INPUT + : "$4", "memory" + RSEQ_INJECT_CLOBBER + : abort, cmpfail +#ifdef RSEQ_COMPARE_TWICE + , error1, error2 +#endif + ); + rseq_workaround_gcc_asm_size_guess(); + return 0; +abort: + rseq_workaround_gcc_asm_size_guess(); + RSEQ_INJECT_FAILED + return -1; +cmpfail: + rseq_workaround_gcc_asm_size_guess(); + return 1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("cpu_id comparison failed"); +error2: + rseq_workaround_gcc_asm_size_guess(); + rseq_bug("expected value comparison failed"); +#endif +} + +#endif /* !RSEQ_SKIP_FASTPATH */ diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 0a808575cbc4..a4684112676c 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -73,6 +73,8 @@ extern __thread volatile struct rseq __rseq_abi; #include #elif defined(__PPC__) #include +#elif defined(__mips__) +#include #else #error unsupported target #endif From 4337aac1e1c97cfda56fbec4077fbc0e37b867c0 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 14 Jun 2018 17:24:07 -0700 Subject: [PATCH 243/572] MIPS: Wire up io_pgetevents syscall Wire up the io_pgetevents syscall that was introduced by commit 7a074e96dee6 ("aio: implement io_pgetevents"). Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/19593/ Cc: James Hogan Cc: Ralf Baechle Cc: linux-mips@linux-mips.org --- arch/mips/include/uapi/asm/unistd.h | 15 +++++++++------ arch/mips/kernel/scall32-o32.S | 1 + arch/mips/kernel/scall64-64.S | 1 + arch/mips/kernel/scall64-n32.S | 1 + arch/mips/kernel/scall64-o32.S | 1 + 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h index 170bf0b5b250..f25dd1d83fb7 100644 --- a/arch/mips/include/uapi/asm/unistd.h +++ b/arch/mips/include/uapi/asm/unistd.h @@ -389,17 +389,18 @@ #define __NR_pkey_free (__NR_Linux + 365) #define __NR_statx (__NR_Linux + 366) #define __NR_rseq (__NR_Linux + 367) +#define __NR_io_pgetevents (__NR_Linux + 368) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 367 +#define __NR_Linux_syscalls 368 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 367 +#define __NR_O32_Linux_syscalls 368 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -735,16 +736,17 @@ #define __NR_pkey_free (__NR_Linux + 325) #define __NR_statx (__NR_Linux + 326) #define __NR_rseq (__NR_Linux + 327) +#define __NR_io_pgetevents (__NR_Linux + 328) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 327 +#define __NR_Linux_syscalls 328 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 327 +#define __NR_64_Linux_syscalls 328 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1084,15 +1086,16 @@ #define __NR_pkey_free (__NR_Linux + 329) #define __NR_statx (__NR_Linux + 330) #define __NR_rseq (__NR_Linux + 331) +#define __NR_io_pgetevents (__NR_Linux + 332) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 331 +#define __NR_Linux_syscalls 332 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 331 +#define __NR_N32_Linux_syscalls 332 #endif /* _UAPI_ASM_UNISTD_H */ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 842ff1612893..91d3c8c46097 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -591,3 +591,4 @@ EXPORT(sys_call_table) PTR sys_pkey_free /* 4365 */ PTR sys_statx PTR sys_rseq + PTR sys_io_pgetevents diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 558830d1e5ba..358d9599983d 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -440,4 +440,5 @@ EXPORT(sys_call_table) PTR sys_pkey_free /* 5325 */ PTR sys_statx PTR sys_rseq + PTR sys_io_pgetevents .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 293f0b0119f3..c65eaacc1abf 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -435,4 +435,5 @@ EXPORT(sysn32_call_table) PTR sys_pkey_free PTR sys_statx /* 6330 */ PTR sys_rseq + PTR compat_sys_io_pgetevents .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index f13a08de8078..73913f072e39 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -584,4 +584,5 @@ EXPORT(sys32_call_table) PTR sys_pkey_free /* 4365 */ PTR sys_statx PTR sys_rseq + PTR compat_sys_io_pgetevents .size sys32_call_table,.-sys32_call_table From 12b03558cef6d655d0d394f5e98a6fd07c1f6c0f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Jun 2018 19:18:50 -0700 Subject: [PATCH 244/572] net: sungem: fix rx checksum support After commit 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"), sungem owners reported the infamous "eth0: hw csum failure" message. CHECKSUM_COMPLETE has in fact never worked for this driver, but this was masked by the fact that upper stacks had to strip the FCS, and therefore skb->ip_summed was set back to CHECKSUM_NONE before my recent change. Driver configures a number of bytes to skip when the chip computes the checksum, and for some reason only half of the Ethernet header was skipped. Then a second problem is that we should strip the FCS by default, unless the driver is updated to eventually support NETIF_F_RXFCS in the future. Finally, a driver should check if NETIF_F_RXCSUM feature is enabled or not, so that the admin can turn off rx checksum if wanted. Many thanks to Andreas Schwab and Mathieu Malaterre for their help in debugging this issue. Signed-off-by: Eric Dumazet Reported-by: Meelis Roos Reported-by: Mathieu Malaterre Reported-by: Andreas Schwab Tested-by: Andreas Schwab Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sungem.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 7a16d40a72d1..b9221fc1674d 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -60,8 +60,7 @@ #include #include "sungem.h" -/* Stripping FCS is causing problems, disabled for now */ -#undef STRIP_FCS +#define STRIP_FCS #define DEFAULT_MSG (NETIF_MSG_DRV | \ NETIF_MSG_PROBE | \ @@ -435,7 +434,7 @@ static int gem_rxmac_reset(struct gem *gp) writel(desc_dma & 0xffffffff, gp->regs + RXDMA_DBLOW); writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK); val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) | - ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128); + (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128); writel(val, gp->regs + RXDMA_CFG); if (readl(gp->regs + GREG_BIFCFG) & GREG_BIFCFG_M66EN) writel(((5 & RXDMA_BLANK_IPKTS) | @@ -760,7 +759,6 @@ static int gem_rx(struct gem *gp, int work_to_do) struct net_device *dev = gp->dev; int entry, drops, work_done = 0; u32 done; - __sum16 csum; if (netif_msg_rx_status(gp)) printk(KERN_DEBUG "%s: rx interrupt, done: %d, rx_new: %d\n", @@ -855,9 +853,13 @@ static int gem_rx(struct gem *gp, int work_to_do) skb = copy_skb; } - csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff); - skb->csum = csum_unfold(csum); - skb->ip_summed = CHECKSUM_COMPLETE; + if (likely(dev->features & NETIF_F_RXCSUM)) { + __sum16 csum; + + csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff); + skb->csum = csum_unfold(csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } skb->protocol = eth_type_trans(skb, gp->dev); napi_gro_receive(&gp->napi, skb); @@ -1761,7 +1763,7 @@ static void gem_init_dma(struct gem *gp) writel(0, gp->regs + TXDMA_KICK); val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) | - ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128); + (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128); writel(val, gp->regs + RXDMA_CFG); writel(desc_dma >> 32, gp->regs + RXDMA_DBHI); @@ -2985,8 +2987,8 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); /* We can do scatter/gather and HW checksum */ - dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM; - dev->features |= dev->hw_features | NETIF_F_RXCSUM; + dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->features = dev->hw_features; if (pci_using_dac) dev->features |= NETIF_F_HIGHDMA; From bc8a2d9bcbf1ca548b1deb315d14e1da81945bea Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Tue, 19 Jun 2018 10:35:38 -0500 Subject: [PATCH 245/572] net: stmmac: socfpga: add additional ocp reset line for Stratix10 The Stratix10 platform has an additional reset line, OCP(Open Core Protocol), that also needs to get deasserted for the stmmac ethernet controller to work. Thus we need to update the Kconfig to include ARCH_STRATIX10 in order to build dwmac-socfpga. Also, remove the redundant check for the reset controller pointer. The reset driver already checks for the pointer and returns 0 if the pointer is NULL. Signed-off-by: Dinh Nguyen Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +- .../ethernet/stmicro/stmmac/dwmac-socfpga.c | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index cb5b0f58c395..edf20361ea5f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -111,7 +111,7 @@ config DWMAC_ROCKCHIP config DWMAC_SOCFPGA tristate "SOCFPGA dwmac support" default ARCH_SOCFPGA - depends on OF && (ARCH_SOCFPGA || COMPILE_TEST) + depends on OF && (ARCH_SOCFPGA || ARCH_STRATIX10 || COMPILE_TEST) select MFD_SYSCON help Support for ethernet controller on Altera SOCFPGA diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 6e359572b9f0..5b3b06a0a3bf 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -55,6 +55,7 @@ struct socfpga_dwmac { struct device *dev; struct regmap *sys_mgr_base_addr; struct reset_control *stmmac_rst; + struct reset_control *stmmac_ocp_rst; void __iomem *splitter_base; bool f2h_ptp_ref_clk; struct tse_pcs pcs; @@ -262,8 +263,8 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII; /* Assert reset to the enet controller before changing the phy mode */ - if (dwmac->stmmac_rst) - reset_control_assert(dwmac->stmmac_rst); + reset_control_assert(dwmac->stmmac_ocp_rst); + reset_control_assert(dwmac->stmmac_rst); regmap_read(sys_mgr_base_addr, reg_offset, &ctrl); ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK << reg_shift); @@ -288,8 +289,8 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac) /* Deassert reset for the phy configuration to be sampled by * the enet controller, and operation to start in requested mode */ - if (dwmac->stmmac_rst) - reset_control_deassert(dwmac->stmmac_rst); + reset_control_deassert(dwmac->stmmac_ocp_rst); + reset_control_deassert(dwmac->stmmac_rst); if (phymode == PHY_INTERFACE_MODE_SGMII) { if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) { dev_err(dwmac->dev, "Unable to initialize TSE PCS"); @@ -324,6 +325,15 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) goto err_remove_config_dt; } + dwmac->stmmac_ocp_rst = devm_reset_control_get_optional(dev, "stmmaceth-ocp"); + if (IS_ERR(dwmac->stmmac_ocp_rst)) { + ret = PTR_ERR(dwmac->stmmac_ocp_rst); + dev_err(dev, "error getting reset control of ocp %d\n", ret); + goto err_remove_config_dt; + } + + reset_control_deassert(dwmac->stmmac_ocp_rst); + ret = socfpga_dwmac_parse_data(dwmac, dev); if (ret) { dev_err(dev, "Unable to parse OF data\n"); From b6cfffa7ad923c73f317ea50fd4ebcb3b4b6669c Mon Sep 17 00:00:00 2001 From: Bhadram Varka Date: Sun, 17 Jun 2018 20:02:05 +0530 Subject: [PATCH 246/572] stmmac: fix DMA channel hang in half-duplex mode HW does not support Half-duplex mode in multi-queue scenario. Fix it by not advertising the Half-Duplex mode if multi-queue enabled. Signed-off-by: Bhadram Varka Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index e79b0d7b388a..cba46b62a1cd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -928,6 +928,7 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + u32 tx_cnt = priv->plat->tx_queues_to_use; struct phy_device *phydev; char phy_id_fmt[MII_BUS_ID_SIZE + 3]; char bus_id[MII_BUS_ID_SIZE]; @@ -968,6 +969,15 @@ static int stmmac_init_phy(struct net_device *dev) phydev->advertising &= ~(SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full); + /* + * Half-duplex mode not supported with multiqueue + * half-duplex can only works with single queue + */ + if (tx_cnt > 1) + phydev->supported &= ~(SUPPORTED_1000baseT_Half | + SUPPORTED_100baseT_Half | + SUPPORTED_10baseT_Half); + /* * Broken HW is sometimes missing the pull-up resistor on the * MDIO line, which results in reads to non-existent devices returning From 9887cba19978a5f288100ef90a37684cc8d5e0a6 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 19 Jun 2018 12:47:52 -0400 Subject: [PATCH 247/572] ip: limit use of gso_size to udp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ipcm(6)_cookie field gso_size is set only in the udp path. The ip layer copies this to cork only if sk_type is SOCK_DGRAM. This check proved too permissive. Ping and l2tp sockets have the same type. Limit to sockets of type SOCK_DGRAM and protocol IPPROTO_UDP to exclude ping sockets. v1 -> v2 - remove irrelevant whitespace changes Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT") Reported-by: Maciej Żenczykowski Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 ++- net/ipv6/ip6_output.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index af5a830ff6ad..b3308e9d9762 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1145,7 +1145,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->fragsize = ip_sk_use_pmtu(sk) ? dst_mtu(&rt->dst) : rt->dst.dev->mtu; - cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0; + cork->gso_size = sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP ? ipc->gso_size : 0; cork->dst = &rt->dst; cork->length = 0; cork->ttl = ipc->ttl; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 021e5aef6ba3..a14fb4fcdf18 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1219,7 +1219,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (mtu < IPV6_MIN_MTU) return -EINVAL; cork->base.fragsize = mtu; - cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0; + cork->base.gso_size = sk->sk_type == SOCK_DGRAM && + sk->sk_protocol == IPPROTO_UDP ? ipc6->gso_size : 0; if (dst_allfrag(xfrm_dst_path(&rt->dst))) cork->base.flags |= IPCORK_ALLFRAG; From f5b65348fd77839b50e79bc0a5e536832ea52d8d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 20 Jun 2018 09:47:20 +0900 Subject: [PATCH 248/572] proc: fix missing final NUL in get_mm_cmdline() rewrite The rewrite of the cmdline fetching missed the fact that we used to also return the final terminating NUL character of the last argument. I hadn't noticed, and none of the tools I tested cared, but something obviously must care, because Michal Kubecek noticed the change in behavior. Tweak the "find the end" logic to actually include the NUL character, and once past the eend of argv, always start the strnlen() at the expected (original) argument end. This whole "allow people to rewrite their arguments in place" is a nasty hack and requires that odd slop handling at the end of the argv array, but it's our traditional model, so we continue to support it. Repored-and-bisected-by: Michal Kubecek Reviewed-and-tested-by: Michal Kubecek Cc: Alexey Dobriyan Signed-off-by: Linus Torvalds --- fs/proc/base.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index b6572944efc3..aaffc0c30216 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -235,6 +235,10 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, if (env_start != arg_end || env_start >= env_end) env_start = env_end = arg_end; + /* .. and limit it to a maximum of one page of slop */ + if (env_end >= arg_end + PAGE_SIZE) + env_end = arg_end + PAGE_SIZE - 1; + /* We're not going to care if "*ppos" has high bits set */ pos = arg_start + *ppos; @@ -254,10 +258,19 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, while (count) { int got; size_t size = min_t(size_t, PAGE_SIZE, count); + long offset; - got = access_remote_vm(mm, pos, page, size, FOLL_ANON); - if (got <= 0) + /* + * Are we already starting past the official end? + * We always include the last byte that is *supposed* + * to be NUL + */ + offset = (pos >= arg_end) ? pos - arg_end + 1 : 0; + + got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON); + if (got <= offset) break; + got -= offset; /* Don't walk past a NUL character once you hit arg_end */ if (pos + got >= arg_end) { @@ -276,12 +289,17 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, n = arg_end - pos - 1; /* Cut off at first NUL after 'n' */ - got = n + strnlen(page+n, got-n); - if (!got) + got = n + strnlen(page+n, offset+got-n); + if (got < offset) break; + got -= offset; + + /* Include the NUL if it existed */ + if (got < size) + got++; } - got -= copy_to_user(buf, page, got); + got -= copy_to_user(buf, page+offset, got); if (unlikely(!got)) { if (!len) len = -EFAULT; From 4bff980f920693693d7a529c06a1bd1e7f77603a Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Tue, 22 May 2018 22:10:06 +0200 Subject: [PATCH 249/572] HID: steam: use hid_device.driver_data instead of hid_set_drvdata() When creating the low-level hidraw device, the reference to steam_device was stored using hid_set_drvdata(). But this value is not guaranteed to be kept when set before calling probe. If this pointer is reset, it crashes when opening the emulated hidraw device. It looks like hid_set_drvdata() is for users "avobe" this hid_device, while hid_device.driver_data it for users "below" this one. In this case, we are creating a virtual hidraw device, so we must use hid_device.driver_data. Signed-off-by: Rodrigo Rivas Costa Tested-by: Mariusz Ceier Signed-off-by: Jiri Kosina --- drivers/hid/hid-steam.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index cb86cc834201..0422ec2b13d2 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -573,7 +573,7 @@ static bool steam_is_valve_interface(struct hid_device *hdev) static int steam_client_ll_parse(struct hid_device *hdev) { - struct steam_device *steam = hid_get_drvdata(hdev); + struct steam_device *steam = hdev->driver_data; return hid_parse_report(hdev, steam->hdev->dev_rdesc, steam->hdev->dev_rsize); @@ -590,7 +590,7 @@ static void steam_client_ll_stop(struct hid_device *hdev) static int steam_client_ll_open(struct hid_device *hdev) { - struct steam_device *steam = hid_get_drvdata(hdev); + struct steam_device *steam = hdev->driver_data; int ret; ret = hid_hw_open(steam->hdev); @@ -605,7 +605,7 @@ static int steam_client_ll_open(struct hid_device *hdev) static void steam_client_ll_close(struct hid_device *hdev) { - struct steam_device *steam = hid_get_drvdata(hdev); + struct steam_device *steam = hdev->driver_data; mutex_lock(&steam->mutex); steam->client_opened = false; @@ -623,7 +623,7 @@ static int steam_client_ll_raw_request(struct hid_device *hdev, size_t count, unsigned char report_type, int reqtype) { - struct steam_device *steam = hid_get_drvdata(hdev); + struct steam_device *steam = hdev->driver_data; return hid_hw_raw_request(steam->hdev, reportnum, buf, count, report_type, reqtype); @@ -710,7 +710,7 @@ static int steam_probe(struct hid_device *hdev, ret = PTR_ERR(steam->client_hdev); goto client_hdev_fail; } - hid_set_drvdata(steam->client_hdev, steam); + steam->client_hdev->driver_data = steam; /* * With the real steam controller interface, do not connect hidraw. From ebeaa367548e9e92dd9374b9464ff6e7d157117b Mon Sep 17 00:00:00 2001 From: Even Xu Date: Fri, 12 Feb 2016 04:11:34 +0800 Subject: [PATCH 250/572] HID: intel_ish-hid: ipc: register more pm callbacks to support hibernation Current ISH driver only registers suspend/resume PM callbacks which don't support hibernation (suspend to disk). Basically after hiberation, the ISH can't resume properly and user may not see sensor events (for example: screen rotation may not work). User will not see a crash or panic or anything except the following message in log: hid-sensor-hub 001F:8086:22D8.0001: timeout waiting for response from ISHTP device So this patch adds support for S4/hiberbation to ISH by using the SIMPLE_DEV_PM_OPS() MACRO instead of struct dev_pm_ops directly. The suspend and resume functions will now be used for both suspend to RAM and hibernation. If power management is disabled, SIMPLE_DEV_PM_OPS will do nothing, the suspend and resume related functions won't be used, so mark them as __maybe_unused to clarify that this is the intended behavior, and remove #ifdefs for power management. Cc: stable@vger.kernel.org Signed-off-by: Even Xu Acked-by: Srinivas Pandruvada Signed-off-by: Jiri Kosina --- drivers/hid/intel-ish-hid/ipc/pci-ish.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c index 582e449be9fe..a2c53ea3b5ed 100644 --- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c +++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c @@ -205,8 +205,7 @@ static void ish_remove(struct pci_dev *pdev) kfree(ishtp_dev); } -#ifdef CONFIG_PM -static struct device *ish_resume_device; +static struct device __maybe_unused *ish_resume_device; /* 50ms to get resume response */ #define WAIT_FOR_RESUME_ACK_MS 50 @@ -220,7 +219,7 @@ static struct device *ish_resume_device; * in that case a simple resume message is enough, others we need * a reset sequence. */ -static void ish_resume_handler(struct work_struct *work) +static void __maybe_unused ish_resume_handler(struct work_struct *work) { struct pci_dev *pdev = to_pci_dev(ish_resume_device); struct ishtp_device *dev = pci_get_drvdata(pdev); @@ -262,7 +261,7 @@ static void ish_resume_handler(struct work_struct *work) * * Return: 0 to the pm core */ -static int ish_suspend(struct device *device) +static int __maybe_unused ish_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct ishtp_device *dev = pci_get_drvdata(pdev); @@ -288,7 +287,7 @@ static int ish_suspend(struct device *device) return 0; } -static DECLARE_WORK(resume_work, ish_resume_handler); +static __maybe_unused DECLARE_WORK(resume_work, ish_resume_handler); /** * ish_resume() - ISH resume callback * @device: device pointer @@ -297,7 +296,7 @@ static DECLARE_WORK(resume_work, ish_resume_handler); * * Return: 0 to the pm core */ -static int ish_resume(struct device *device) +static int __maybe_unused ish_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); struct ishtp_device *dev = pci_get_drvdata(pdev); @@ -311,21 +310,14 @@ static int ish_resume(struct device *device) return 0; } -static const struct dev_pm_ops ish_pm_ops = { - .suspend = ish_suspend, - .resume = ish_resume, -}; -#define ISHTP_ISH_PM_OPS (&ish_pm_ops) -#else -#define ISHTP_ISH_PM_OPS NULL -#endif /* CONFIG_PM */ +static SIMPLE_DEV_PM_OPS(ish_pm_ops, ish_suspend, ish_resume); static struct pci_driver ish_driver = { .name = KBUILD_MODNAME, .id_table = ish_pci_tbl, .probe = ish_probe, .remove = ish_remove, - .driver.pm = ISHTP_ISH_PM_OPS, + .driver.pm = &ish_pm_ops, }; module_pci_driver(ish_driver); From d471b6b22d37bf9928c6d0202bdaaf76583b8b61 Mon Sep 17 00:00:00 2001 From: Jason Gerecke Date: Tue, 12 Jun 2018 13:42:46 -0700 Subject: [PATCH 251/572] HID: wacom: Correct logical maximum Y for 2nd-gen Intuos Pro large The HID descriptor for the 2nd-gen Intuos Pro large (PTH-860) contains a typo which defines an incorrect logical maximum Y value. This causes a small portion of the bottom of the tablet to become unusable (both because the area is below the "bottom" of the tablet and because 'wacom_wac_event' ignores out-of-range values). It also results in a skewed aspect ratio. To fix this, we add a quirk to 'wacom_usage_mapping' which overwrites the data with the correct value. Signed-off-by: Jason Gerecke CC: stable@vger.kernel.org # v4.10+ Signed-off-by: Jiri Kosina --- drivers/hid/wacom_sys.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index ee7a37eb159a..545986cfb978 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -395,6 +395,14 @@ static void wacom_usage_mapping(struct hid_device *hdev, } } + /* 2nd-generation Intuos Pro Large has incorrect Y maximum */ + if (hdev->vendor == USB_VENDOR_ID_WACOM && + hdev->product == 0x0358 && + WACOM_PEN_FIELD(field) && + wacom_equivalent_usage(usage->hid) == HID_GD_Y) { + field->logical_maximum = 43200; + } + switch (usage->hid) { case HID_GD_X: features->x_max = field->logical_maximum; From a507a3065c09d69677c502905e597750da3a9815 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 19 Jun 2018 10:02:01 -0700 Subject: [PATCH 252/572] ACPI / processor: Finish making acpi_processor_ppc_has_changed() void Commit bca5f557dcea "ACPI / processor: Make acpi_processor_ppc_has_changed() void" changed one of the declarations of acpi_processor_ppc_has_changed() to return void, but the !CPU_FREQ version still returns int. Let's return void to be consistent. Fixes: bca5f557dcea "ACPI / processor: Make acpi_processor_ppc_has_changed() void" Signed-off-by: Brian Norris Signed-off-by: Rafael J. Wysocki --- include/acpi/processor.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 40a916efd7c0..1194a4c78d55 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -309,7 +309,7 @@ static inline void acpi_processor_ppc_exit(void) { return; } -static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr, +static inline void acpi_processor_ppc_has_changed(struct acpi_processor *pr, int event_flag) { static unsigned int printout = 1; @@ -320,7 +320,6 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr, "Consider compiling CPUfreq support into your kernel.\n"); printout = 0; } - return 0; } static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) { From 9560ba306df3e46b4b1037d101e2e4ca68610f55 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 7 Jun 2018 18:37:59 -0700 Subject: [PATCH 253/572] quota: reclaim least recently used dquots The dquots in the free_dquots list are not reclaimed in LRU way. put_dquot_last() puts entries to the tail and dqcache_shrink_scan() frees from the tail. Free unreferenced dquots in LRU order because it seems more reasonable than freeing most recently used. Signed-off-by: Greg Thelen Signed-off-by: Shakeel Butt Signed-off-by: Jan Kara --- fs/quota/dquot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d88231e3b2be..241b00f835b9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -716,7 +716,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) unsigned long freed = 0; spin_lock(&dq_list_lock); - head = free_dquots.prev; + head = free_dquots.next; while (head != &free_dquots && sc->nr_to_scan) { dquot = list_entry(head, struct dquot, dq_free); remove_dquot_hash(dquot); @@ -725,7 +725,7 @@ dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) do_destroy_dquot(dquot); sc->nr_to_scan--; freed++; - head = free_dquots.prev; + head = free_dquots.next; } spin_unlock(&dq_list_lock); return freed; From 1822193b5d4fd5d9800e6a7ed141375b8e8e68eb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 11 Jun 2018 12:14:45 +0200 Subject: [PATCH 254/572] quota: Cleanup list iteration in dqcache_shrink_scan() Use list_first_entry() and list_empty() instead of opencoded variants. Reviewed-by: Matthew Wilcox Signed-off-by: Jan Kara --- fs/quota/dquot.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 241b00f835b9..fc20e06c56ba 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -711,21 +711,18 @@ EXPORT_SYMBOL(dquot_quota_sync); static unsigned long dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { - struct list_head *head; struct dquot *dquot; unsigned long freed = 0; spin_lock(&dq_list_lock); - head = free_dquots.next; - while (head != &free_dquots && sc->nr_to_scan) { - dquot = list_entry(head, struct dquot, dq_free); + while (!list_empty(&free_dquots) && sc->nr_to_scan) { + dquot = list_first_entry(&free_dquots, struct dquot, dq_free); remove_dquot_hash(dquot); remove_free_dquot(dquot); remove_inuse(dquot); do_destroy_dquot(dquot); sc->nr_to_scan--; freed++; - head = free_dquots.next; } spin_unlock(&dq_list_lock); return freed; From 27e6ed54a30a00d6520ddb4518214df8ff99daf1 Mon Sep 17 00:00:00 2001 From: Chengguang Xu Date: Fri, 8 Jun 2018 10:53:40 +0800 Subject: [PATCH 255/572] ext2: add warning when specifying nocheck option The option nocheck(nocheck/check=none) is useless but considering backwards compatibility it's better to print warning for a while before completely remove from the code. This patch add proper warning message for option 'nocheck' and remove unnecessary comment/function declaration which is used for removed option 'check'. Signed-off-by: Chengguang Xu Signed-off-by: Jan Kara --- fs/ext2/ext2.h | 2 -- fs/ext2/super.c | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index cc40802ddfa8..00e759f05161 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -748,7 +748,6 @@ extern void ext2_free_blocks (struct inode *, unsigned long, unsigned long); extern unsigned long ext2_count_free_blocks (struct super_block *); extern unsigned long ext2_count_dirs (struct super_block *); -extern void ext2_check_blocks_bitmap (struct super_block *); extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, unsigned int block_group, struct buffer_head ** bh); @@ -771,7 +770,6 @@ extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *); extern void ext2_free_inode (struct inode *); extern unsigned long ext2_count_free_inodes (struct super_block *); -extern void ext2_check_inodes_bitmap (struct super_block *); extern unsigned long ext2_count_free (struct buffer_head *, unsigned); /* inode.c */ diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 25ab1274090f..8ff53f8da3bc 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -557,6 +557,9 @@ static int parse_options(char *options, struct super_block *sb, set_opt (opts->s_mount_opt, NO_UID32); break; case Opt_nocheck: + ext2_msg(sb, KERN_WARNING, + "Option nocheck/check=none is deprecated and" + " will be removed in June 2020."); clear_opt (opts->s_mount_opt, CHECK); break; case Opt_debug: @@ -1335,9 +1338,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) new_opts.s_resgid = sbi->s_resgid; spin_unlock(&sbi->s_lock); - /* - * Allow the "check" option to be passed as a remount option. - */ if (!parse_options(data, sb, &new_opts)) return -EINVAL; From fa65653e575fbd958bdf5fb9c4a71a324e39510d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 13 Jun 2018 12:09:22 +0200 Subject: [PATCH 256/572] udf: Detect incorrect directory size Detect when a directory entry is (possibly partially) beyond directory size and return EIO in that case since it means the filesystem is corrupted. Otherwise directory operations can further corrupt the directory and possibly also oops the kernel. CC: Anatoly Trosinenko CC: stable@vger.kernel.org Reported-and-tested-by: Anatoly Trosinenko Signed-off-by: Jan Kara --- fs/udf/directory.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 0a98a2369738..3835f983cc99 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -152,6 +152,9 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, sizeof(struct fileIdentDesc)); } } + /* Got last entry outside of dir size - fs is corrupted! */ + if (*nf_pos > dir->i_size) + return NULL; return fi; } From f2e83347119acc0412941c5a23d895624c9300e2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 13 Jun 2018 17:30:14 +0200 Subject: [PATCH 257/572] udf: Provide function for calculating dir entry length Provide function for calculating directory entry length and use to reduce code duplication. Signed-off-by: Jan Kara --- fs/udf/directory.c | 5 +---- fs/udf/namei.c | 14 +++----------- fs/udf/udfdecl.h | 6 ++++++ 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 3835f983cc99..d9523013096f 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -141,10 +141,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, fibh->ebh->b_data, sizeof(struct fileIdentDesc) + fibh->soffset); - fi_len = (sizeof(struct fileIdentDesc) + - cfi->lengthFileIdent + - le16_to_cpu(cfi->lengthOfImpUse) + 3) & ~3; - + fi_len = udf_dir_entry_len(cfi); *nf_pos += fi_len - (fibh->eoffset - fibh->soffset); fibh->eoffset = fibh->soffset + fi_len; } else { diff --git a/fs/udf/namei.c b/fs/udf/namei.c index c586026508db..06f37ddd2997 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -351,8 +351,6 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, loff_t f_pos; loff_t size = udf_ext0_offset(dir) + dir->i_size; int nfidlen; - uint8_t lfi; - uint16_t liu; udf_pblk_t block; struct kernel_lb_addr eloc; uint32_t elen = 0; @@ -383,7 +381,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, namelen = 0; } - nfidlen = (sizeof(struct fileIdentDesc) + namelen + 3) & ~3; + nfidlen = ALIGN(sizeof(struct fileIdentDesc) + namelen, UDF_NAME_PAD); f_pos = udf_ext0_offset(dir); @@ -424,12 +422,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir, goto out_err; } - liu = le16_to_cpu(cfi->lengthOfImpUse); - lfi = cfi->lengthFileIdent; - if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) { - if (((sizeof(struct fileIdentDesc) + - liu + lfi + 3) & ~3) == nfidlen) { + if (udf_dir_entry_len(cfi) == nfidlen) { cfi->descTag.tagSerialNum = cpu_to_le16(1); cfi->fileVersionNum = cpu_to_le16(1); cfi->fileCharacteristics = 0; @@ -1201,9 +1195,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, if (dir_fi) { dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location); - udf_update_tag((char *)dir_fi, - (sizeof(struct fileIdentDesc) + - le16_to_cpu(dir_fi->lengthOfImpUse) + 3) & ~3); + udf_update_tag((char *)dir_fi, udf_dir_entry_len(dir_fi)); if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) mark_inode_dirty(old_inode); else diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index bae311b59400..ed890bc4416d 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -132,6 +132,12 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb, extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *, struct fileIdentDesc *, struct udf_fileident_bh *, uint8_t *, uint8_t *); +static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi) +{ + return ALIGN(sizeof(struct fileIdentDesc) + + le16_to_cpu(cfi->lengthOfImpUse) + cfi->lengthFileIdent, + UDF_NAME_PAD); +} /* file.c */ extern long udf_ioctl(struct file *, unsigned int, unsigned long); From 6c1e4d06a3808dc67dbce2d631f4c12574567dd5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 13 Jun 2018 18:04:24 +0200 Subject: [PATCH 258/572] udf: Drop unused arguments of udf_delete_aext() udf_delete_aext() uses its last two arguments only as local variables. Drop them. Signed-off-by: Jan Kara --- fs/udf/balloc.c | 5 ++--- fs/udf/inode.c | 8 ++++---- fs/udf/udfdecl.h | 3 +-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 1b961b1d9699..fcda0fc97b90 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -533,8 +533,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb, udf_write_aext(table, &epos, &eloc, (etype << 30) | elen, 1); } else - udf_delete_aext(table, epos, eloc, - (etype << 30) | elen); + udf_delete_aext(table, epos); } else { alloc_count = 0; } @@ -630,7 +629,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb, if (goal_elen) udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1); else - udf_delete_aext(table, goal_epos, goal_eloc, goal_elen); + udf_delete_aext(table, goal_epos); brelse(goal_epos.bh); udf_add_free_space(sb, partition, -1); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 7f39d17352c9..9915a58fbabd 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1147,8 +1147,7 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr if (startnum > endnum) { for (i = 0; i < (startnum - endnum); i++) - udf_delete_aext(inode, *epos, laarr[i].extLocation, - laarr[i].extLength); + udf_delete_aext(inode, *epos); } else if (startnum < endnum) { for (i = 0; i < (endnum - startnum); i++) { udf_insert_aext(inode, *epos, laarr[i].extLocation, @@ -2176,14 +2175,15 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos, return (nelen >> 30); } -int8_t udf_delete_aext(struct inode *inode, struct extent_position epos, - struct kernel_lb_addr eloc, uint32_t elen) +int8_t udf_delete_aext(struct inode *inode, struct extent_position epos) { struct extent_position oepos; int adsize; int8_t etype; struct allocExtDesc *aed; struct udf_inode_info *iinfo; + struct kernel_lb_addr eloc; + uint32_t elen; if (epos.bh) { get_bh(epos.bh); diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index ed890bc4416d..84c47dde4d26 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -173,8 +173,7 @@ extern int udf_add_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t, int); extern void udf_write_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t, int); -extern int8_t udf_delete_aext(struct inode *, struct extent_position, - struct kernel_lb_addr, uint32_t); +extern int8_t udf_delete_aext(struct inode *, struct extent_position); extern int8_t udf_next_aext(struct inode *, struct extent_position *, struct kernel_lb_addr *, uint32_t *, int); extern int8_t udf_current_aext(struct inode *, struct extent_position *, From 03eeafdd9ab06a770d42c2b264d50dff7e2f4eee Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 24 May 2018 09:26:38 -0400 Subject: [PATCH 259/572] locking/rwsem: Fix up_read_non_owner() warning with DEBUG_RWSEMS It was found that the use of up_read_non_owner() in NFS was causing the following warning when DEBUG_RWSEMS was configured. DEBUG_LOCKS_WARN_ON(sem->owner != ((struct task_struct *)(1UL << 0))) Looking into the rwsem.c file, it was discovered that the corresponding down_read_non_owner() function was not setting the owner field properly. This is fixed now, and the warning should be gone. Fixes: 5149cbac4235 ("locking/rwsem: Add DEBUG_RWSEMS to look for lock/unlock mismatches") Signed-off-by: Waiman Long Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Tested-by: Gavin Schenk Cc: Davidlohr Bueso Cc: Dan Williams Cc: Arnd Bergmann Cc: linux-nfs@vger.kernel.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1527168398-4291-1-git-send-email-longman@redhat.com --- kernel/locking/rwsem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index bc1e507be9ff..776308d2fa9e 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -181,6 +181,7 @@ void down_read_non_owner(struct rw_semaphore *sem) might_sleep(); __down_read(sem); + rwsem_set_reader_owned(sem); } EXPORT_SYMBOL(down_read_non_owner); From 3ae6295ccb7cf6d344908209701badbbbb503e40 Mon Sep 17 00:00:00 2001 From: Siarhei Liakh Date: Thu, 14 Jun 2018 19:36:07 +0000 Subject: [PATCH 260/572] x86: Call fixup_exception() before notify_die() in math_error() fpu__drop() has an explicit fwait which under some conditions can trigger a fixable FPU exception while in kernel. Thus, we should attempt to fixup the exception first, and only call notify_die() if the fixup failed just like in do_general_protection(). The original call sequence incorrectly triggers KDB entry on debug kernels under particular FPU-intensive workloads. Andy noted, that this makes the whole conditional irq enable thing even more inconsistent, but fixing that it outside the scope of this. Signed-off-by: Siarhei Liakh Signed-off-by: Thomas Gleixner Reviewed-by: Andy Lutomirski Cc: "H. Peter Anvin" Cc: "Borislav Petkov" Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/DM5PR11MB201156F1CAB2592B07C79A03B17D0@DM5PR11MB2011.namprd11.prod.outlook.com --- arch/x86/kernel/traps.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 03f3d7695dac..162a31d80ad5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -834,16 +834,18 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; - if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) - return; cond_local_irq_enable(regs); if (!user_mode(regs)) { - if (!fixup_exception(regs, trapnr)) { - task->thread.error_code = error_code; - task->thread.trap_nr = trapnr; + if (fixup_exception(regs, trapnr)) + return; + + task->thread.error_code = error_code; + task->thread.trap_nr = trapnr; + + if (notify_die(DIE_TRAP, str, regs, error_code, + trapnr, SIGFPE) != NOTIFY_STOP) die(str, regs, error_code); - } return; } From 3d0641015bf73aaa1cb54c936674959e7805070f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 19 Jun 2018 15:34:09 +0300 Subject: [PATCH 261/572] nvme-rdma: fix possible double free condition when failing to create a controller Failures after nvme_init_ctrl will defer resource cleanups to .free_ctrl when the reference is released, hence we should not free the controller queues for these failures. Fix that by moving controller queues allocation before controller initialization and correctly freeing them for failures before initialization and skip them for failures after initialization. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index c9424da0d23e..bcb0e5d6343d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -888,9 +888,9 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) list_del(&ctrl->list); mutex_unlock(&nvme_rdma_ctrl_mutex); - kfree(ctrl->queues); nvmf_free_options(nctrl->opts); free_ctrl: + kfree(ctrl->queues); kfree(ctrl); } @@ -1932,11 +1932,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_free_ctrl; } - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, - 0 /* no quirks, we're perfect! */); - if (ret) - goto out_free_ctrl; - INIT_DELAYED_WORK(&ctrl->reconnect_work, nvme_rdma_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work); @@ -1950,14 +1945,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), GFP_KERNEL); if (!ctrl->queues) - goto out_uninit_ctrl; + goto out_free_ctrl; + + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops, + 0 /* no quirks, we're perfect! */); + if (ret) + goto out_kfree_queues; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING); WARN_ON_ONCE(!changed); ret = nvme_rdma_configure_admin_queue(ctrl, true); if (ret) - goto out_kfree_queues; + goto out_uninit_ctrl; /* sanity check icdoff */ if (ctrl->ctrl.icdoff) { @@ -2014,14 +2014,14 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, out_remove_admin_queue: nvme_rdma_destroy_admin_queue(ctrl, true); -out_kfree_queues: - kfree(ctrl->queues); out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); if (ret > 0) ret = -EIO; return ERR_PTR(ret); +out_kfree_queues: + kfree(ctrl->queues); out_free_ctrl: kfree(ctrl); return ERR_PTR(ret); From 94e42213cc1ae41c57819539c0130f8dfc69d718 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 19 Jun 2018 15:34:10 +0300 Subject: [PATCH 262/572] nvme-rdma: fix possible free of a non-allocated async event buffer If nvme_rdma_configure_admin_queue fails before we allocated the async event buffer, we will falsly free it because nvme_rdma_free_queue is freeing it. Fix it by allocating the buffer right after nvme_rdma_alloc_queue and free it right before nvme_rdma_queue_free to maintain orderly reverse cleanup sequence. Reported-by: Israel Rukshin Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index bcb0e5d6343d..f9affb71ac85 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -560,12 +560,6 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) return; - if (nvme_rdma_queue_idx(queue) == 0) { - nvme_rdma_free_qe(queue->device->dev, - &queue->ctrl->async_event_sqe, - sizeof(struct nvme_command), DMA_TO_DEVICE); - } - nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); } @@ -739,6 +733,8 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_cleanup_queue(ctrl->ctrl.admin_q); nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); } + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); nvme_rdma_free_queue(&ctrl->queues[0]); } @@ -755,11 +751,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); + error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); + if (error) + goto out_free_queue; + if (new) { ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); if (IS_ERR(ctrl->ctrl.admin_tagset)) { error = PTR_ERR(ctrl->ctrl.admin_tagset); - goto out_free_queue; + goto out_free_async_qe; } ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); @@ -795,12 +796,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) goto out_stop_queue; - error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, - &ctrl->async_event_sqe, sizeof(struct nvme_command), - DMA_TO_DEVICE); - if (error) - goto out_stop_queue; - return 0; out_stop_queue: @@ -811,6 +806,9 @@ out_cleanup_queue: out_free_tagset: if (new) nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); +out_free_async_qe: + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); out_free_queue: nvme_rdma_free_queue(&ctrl->queues[0]); return error; From c947657b15379505a9bba36a02005882b66abe57 Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Tue, 19 Jun 2018 15:34:11 +0300 Subject: [PATCH 263/572] nvme-rdma: Fix command completion race at error recovery The race is between completing the request at error recovery work and rdma completions. If we cancel the request before getting the good rdma completion we get a NULL deref of the request MR at nvme_rdma_process_nvme_rsp(). When Canceling the request we return its mr to the mr pool (set mr to NULL) and also unmap its data. Canceling the requests while the rdma queues are active is not safe. Because rdma queues are active and we get good rdma completions that can use the mr pointer which may be NULL. Completing the request too soon may lead also to performing DMA to/from user buffers which might have been already unmapped. The commit fixes the race by draining the QP before starting the abort commands mechanism. Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f9affb71ac85..2815749f4dfb 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -728,7 +728,6 @@ out: static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - nvme_rdma_stop_queue(&ctrl->queues[0]); if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); @@ -817,7 +816,6 @@ out_free_queue: static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, bool remove) { - nvme_rdma_stop_io_queues(ctrl); if (remove) { blk_cleanup_queue(ctrl->ctrl.connect_q); nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); @@ -947,6 +945,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) return; destroy_admin: + nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_rdma_destroy_admin_queue(ctrl, false); requeue: dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n", @@ -963,12 +962,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, false); } blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_stop_queue(&ctrl->queues[0]); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); nvme_rdma_destroy_admin_queue(ctrl, false); @@ -1734,6 +1735,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, shutdown); @@ -1745,6 +1747,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_stop_queue(&ctrl->queues[0]); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); @@ -2011,6 +2014,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return &ctrl->ctrl; out_remove_admin_queue: + nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_rdma_destroy_admin_queue(ctrl, true); out_uninit_ctrl: nvme_uninit_ctrl(&ctrl->ctrl); From 5e77d61cbc7e766778037127dab69e6410a8fc48 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 19 Jun 2018 15:34:13 +0300 Subject: [PATCH 264/572] nvme-rdma: don't override opts->queue_size That is user argument, and theoretically controller limits can change over time (over reconnects/resets). Instead, use the sqsize controller attribute to check queue depth boundaries and use it to the tagset allocation. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2815749f4dfb..9544625c0b7d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -692,7 +692,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set = &ctrl->tag_set; memset(set, 0, sizeof(*set)); set->ops = &nvme_rdma_mq_ops; - set->queue_depth = nctrl->opts->queue_size; + set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ set->numa_node = NUMA_NO_NODE; set->flags = BLK_MQ_F_SHOULD_MERGE; @@ -1975,20 +1975,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, goto out_remove_admin_queue; } - if (opts->queue_size > ctrl->ctrl.maxcmd) { - /* warn if maxcmd is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl maxcmd %u, clamping down\n", - opts->queue_size, ctrl->ctrl.maxcmd); - opts->queue_size = ctrl->ctrl.maxcmd; - } - + /* only warn if argument is too large here, will clamp later */ if (opts->queue_size > ctrl->ctrl.sqsize + 1) { - /* warn if sqsize is lower than queue_size */ dev_warn(ctrl->ctrl.device, "queue_size %zu > ctrl sqsize %u, clamping down\n", opts->queue_size, ctrl->ctrl.sqsize + 1); - opts->queue_size = ctrl->ctrl.sqsize + 1; + } + + /* warn if maxcmd is lower than sqsize+1 */ + if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) { + dev_warn(ctrl->ctrl.device, + "sqsize %u > ctrl maxcmd %u, clamping down\n", + ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd); + ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1; } if (opts->nr_io_queues) { From d68a90e148f5a82aa67654c5012071e31c0e4baa Mon Sep 17 00:00:00 2001 From: Max Gurtuvoy Date: Tue, 19 Jun 2018 15:45:33 +0300 Subject: [PATCH 265/572] nvmet: reset keep alive timer in controller enable Controllers that are not yet enabled should not really enforce keep alive timeouts, but we still want to track a timeout and cleanup in case a host died before it enabled the controller. Hence, simply reset the keep alive timer when the controller is enabled. Suggested-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a03da764ecae..74d4b785d2da 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -686,6 +686,14 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) } ctrl->csts = NVME_CSTS_RDY; + + /* + * Controllers that are not yet enabled should not really enforce the + * keep alive timeout, but we still want to track a timeout and cleanup + * in case a host died before it enabled the controller. Hence, simply + * reset the keep alive timer when the controller is enabled. + */ + mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ); } static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl) From a1e79188628580465ac6d7a93a313336ee3364f1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Jun 2018 13:45:05 +0300 Subject: [PATCH 266/572] blk-mq-debugfs: Off by one in blk_mq_rq_state_name() If rq_state == ARRAY_SIZE() then we read one element beyond the end of the blk_mq_rq_state_name_array[] array. Fixes: ec6dcf63c55c ("blk-mq-debugfs: Show more request state information") Reviewed-by: Bart Van Assche Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index ffa622366922..1c4532e92938 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -356,7 +356,7 @@ static const char *const blk_mq_rq_state_name_array[] = { static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state) { - if (WARN_ON_ONCE((unsigned int)rq_state > + if (WARN_ON_ONCE((unsigned int)rq_state >= ARRAY_SIZE(blk_mq_rq_state_name_array))) return "(?)"; return blk_mq_rq_state_name_array[rq_state]; From bdd5ae3aa51939bb1fd26cd9fe7af07ca8c60397 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Jun 2018 17:18:36 -0400 Subject: [PATCH 267/572] tools/power turbostat: fix show/hide issues resulting from mis-merge The --show and --hide options failed on "Node", which was listed as "Node%". The --show and --hide options were generally fouled-up do due to come content merges that scrambled the list of column name indexes. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 106 +++++++++++++------------- 1 file changed, 54 insertions(+), 52 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d6cff3070ebd..f09a272941a1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -381,19 +381,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) } /* - * Each string in this array is compared in --show and --hide cmdline. - * Thus, strings that are proper sub-sets must follow their more specific peers. + * This list matches the column headers, except + * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time + * 2. Core and CPU are moved to the end, we can't have strings that contain them + * matching on them for --show and --hide. */ struct msr_counter bic[] = { { 0x0, "usec" }, { 0x0, "Time_Of_Day_Seconds" }, { 0x0, "Package" }, + { 0x0, "Node" }, { 0x0, "Avg_MHz" }, + { 0x0, "Busy%" }, { 0x0, "Bzy_MHz" }, { 0x0, "TSC_MHz" }, { 0x0, "IRQ" }, { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL}, - { 0x0, "Busy%" }, + { 0x0, "sysfs" }, { 0x0, "CPU%c1" }, { 0x0, "CPU%c3" }, { 0x0, "CPU%c6" }, @@ -424,15 +428,13 @@ struct msr_counter bic[] = { { 0x0, "Cor_J" }, { 0x0, "GFX_J" }, { 0x0, "RAM_J" }, - { 0x0, "Core" }, - { 0x0, "CPU" }, { 0x0, "Mod%c6" }, - { 0x0, "sysfs" }, { 0x0, "Totl%C0" }, { 0x0, "Any%C0" }, { 0x0, "GFX%C0" }, { 0x0, "CPUGFX%" }, - { 0x0, "Node%" }, + { 0x0, "Core" }, + { 0x0, "CPU" }, }; @@ -441,51 +443,51 @@ struct msr_counter bic[] = { #define BIC_USEC (1ULL << 0) #define BIC_TOD (1ULL << 1) #define BIC_Package (1ULL << 2) -#define BIC_Avg_MHz (1ULL << 3) -#define BIC_Bzy_MHz (1ULL << 4) -#define BIC_TSC_MHz (1ULL << 5) -#define BIC_IRQ (1ULL << 6) -#define BIC_SMI (1ULL << 7) -#define BIC_Busy (1ULL << 8) -#define BIC_CPU_c1 (1ULL << 9) -#define BIC_CPU_c3 (1ULL << 10) -#define BIC_CPU_c6 (1ULL << 11) -#define BIC_CPU_c7 (1ULL << 12) -#define BIC_ThreadC (1ULL << 13) -#define BIC_CoreTmp (1ULL << 14) -#define BIC_CoreCnt (1ULL << 15) -#define BIC_PkgTmp (1ULL << 16) -#define BIC_GFX_rc6 (1ULL << 17) -#define BIC_GFXMHz (1ULL << 18) -#define BIC_Pkgpc2 (1ULL << 19) -#define BIC_Pkgpc3 (1ULL << 20) -#define BIC_Pkgpc6 (1ULL << 21) -#define BIC_Pkgpc7 (1ULL << 22) -#define BIC_Pkgpc8 (1ULL << 23) -#define BIC_Pkgpc9 (1ULL << 24) -#define BIC_Pkgpc10 (1ULL << 25) -#define BIC_CPU_LPI (1ULL << 26) -#define BIC_SYS_LPI (1ULL << 27) -#define BIC_PkgWatt (1ULL << 26) -#define BIC_CorWatt (1ULL << 27) -#define BIC_GFXWatt (1ULL << 28) -#define BIC_PkgCnt (1ULL << 29) -#define BIC_RAMWatt (1ULL << 30) -#define BIC_PKG__ (1ULL << 31) -#define BIC_RAM__ (1ULL << 32) -#define BIC_Pkg_J (1ULL << 33) -#define BIC_Cor_J (1ULL << 34) -#define BIC_GFX_J (1ULL << 35) -#define BIC_RAM_J (1ULL << 36) -#define BIC_Core (1ULL << 37) -#define BIC_CPU (1ULL << 38) -#define BIC_Mod_c6 (1ULL << 39) -#define BIC_sysfs (1ULL << 40) -#define BIC_Totl_c0 (1ULL << 41) -#define BIC_Any_c0 (1ULL << 42) -#define BIC_GFX_c0 (1ULL << 43) -#define BIC_CPUGFX (1ULL << 44) -#define BIC_Node (1ULL << 45) +#define BIC_Node (1ULL << 3) +#define BIC_Avg_MHz (1ULL << 4) +#define BIC_Busy (1ULL << 5) +#define BIC_Bzy_MHz (1ULL << 6) +#define BIC_TSC_MHz (1ULL << 7) +#define BIC_IRQ (1ULL << 8) +#define BIC_SMI (1ULL << 9) +#define BIC_sysfs (1ULL << 10) +#define BIC_CPU_c1 (1ULL << 11) +#define BIC_CPU_c3 (1ULL << 12) +#define BIC_CPU_c6 (1ULL << 13) +#define BIC_CPU_c7 (1ULL << 14) +#define BIC_ThreadC (1ULL << 15) +#define BIC_CoreTmp (1ULL << 16) +#define BIC_CoreCnt (1ULL << 17) +#define BIC_PkgTmp (1ULL << 18) +#define BIC_GFX_rc6 (1ULL << 19) +#define BIC_GFXMHz (1ULL << 20) +#define BIC_Pkgpc2 (1ULL << 21) +#define BIC_Pkgpc3 (1ULL << 22) +#define BIC_Pkgpc6 (1ULL << 23) +#define BIC_Pkgpc7 (1ULL << 24) +#define BIC_Pkgpc8 (1ULL << 25) +#define BIC_Pkgpc9 (1ULL << 26) +#define BIC_Pkgpc10 (1ULL << 27) +#define BIC_CPU_LPI (1ULL << 28) +#define BIC_SYS_LPI (1ULL << 29) +#define BIC_PkgWatt (1ULL << 30) +#define BIC_CorWatt (1ULL << 31) +#define BIC_GFXWatt (1ULL << 32) +#define BIC_PkgCnt (1ULL << 33) +#define BIC_RAMWatt (1ULL << 34) +#define BIC_PKG__ (1ULL << 35) +#define BIC_RAM__ (1ULL << 36) +#define BIC_Pkg_J (1ULL << 37) +#define BIC_Cor_J (1ULL << 38) +#define BIC_GFX_J (1ULL << 39) +#define BIC_RAM_J (1ULL << 40) +#define BIC_Mod_c6 (1ULL << 41) +#define BIC_Totl_c0 (1ULL << 42) +#define BIC_Any_c0 (1ULL << 43) +#define BIC_GFX_c0 (1ULL << 44) +#define BIC_CPUGFX (1ULL << 45) +#define BIC_Core (1ULL << 46) +#define BIC_CPU (1ULL << 47) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD) From d9d226ffadbf4a8e60f5b8fc866aaa5028c7e479 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Jun 2018 15:47:36 -0400 Subject: [PATCH 268/572] tools/power turbostat: decode cpuid.1.HT eg. the "HT" here: CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM HT TM Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f09a272941a1..b0f294bf89c3 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4394,7 +4394,7 @@ void process_cpuid() if (!quiet) { fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", max_level, family, model, stepping, family, model, stepping); - fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", + fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", ecx & (1 << 0) ? "SSE3" : "-", ecx & (1 << 3) ? "MONITOR" : "-", ecx & (1 << 6) ? "SMX" : "-", @@ -4403,6 +4403,7 @@ void process_cpuid() edx & (1 << 4) ? "TSC" : "-", edx & (1 << 5) ? "MSR" : "-", edx & (1 << 22) ? "ACPI-TM" : "-", + edx & (1 << 28) ? "HT" : "-", edx & (1 << 29) ? "TM" : "-"); } From 4c2122d42116ebaa1665ad0fbef2c558fdc0e3c6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 6 Jun 2018 17:44:48 -0400 Subject: [PATCH 269/572] tools/power turbostat: add optional APIC X2APIC columns Add APIC and X2APIC columns to the topology section. They are disabled-by-default -- enable like so: --debug or --enable APIC,X2APIC Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 +- tools/power/x86/turbostat/turbostat.c | 75 ++++++++++++++++++++++++--- 2 files changed, 68 insertions(+), 9 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index ca9ef7017624..d39e4ff7d0bf 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -56,7 +56,7 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. .PP -\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds". +\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC". The column name "all" can be used to enable all disabled-by-default built-in counters. .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b0f294bf89c3..3bc2c9d94739 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -109,6 +109,7 @@ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ unsigned int has_misc_feature_control; +unsigned int first_counter_read = 1; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -170,6 +171,8 @@ struct thread_data { unsigned long long irq_count; unsigned int smi_count; unsigned int cpu_id; + unsigned int apic_id; + unsigned int x2apic_id; unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 @@ -435,10 +438,10 @@ struct msr_counter bic[] = { { 0x0, "CPUGFX%" }, { 0x0, "Core" }, { 0x0, "CPU" }, + { 0x0, "APIC" }, + { 0x0, "X2APIC" }, }; - - #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) #define BIC_USEC (1ULL << 0) #define BIC_TOD (1ULL << 1) @@ -488,11 +491,13 @@ struct msr_counter bic[] = { #define BIC_CPUGFX (1ULL << 45) #define BIC_Core (1ULL << 46) #define BIC_CPU (1ULL << 47) +#define BIC_APIC (1ULL << 48) +#define BIC_X2APIC (1ULL << 49) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs; +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) @@ -603,6 +608,10 @@ void print_header(char *delim) outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU)) outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); + if (DO_BIC(BIC_APIC)) + outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); + if (DO_BIC(BIC_X2APIC)) + outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); if (DO_BIC(BIC_Avg_MHz)) outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); if (DO_BIC(BIC_Busy)) @@ -882,6 +891,10 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%s-", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU)) outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_APIC)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_X2APIC)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } else { if (DO_BIC(BIC_Package)) { if (p) @@ -906,6 +919,10 @@ int format_counters(struct thread_data *t, struct core_data *c, } if (DO_BIC(BIC_CPU)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); + if (DO_BIC(BIC_APIC)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); + if (DO_BIC(BIC_X2APIC)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); } if (DO_BIC(BIC_Avg_MHz)) @@ -1233,6 +1250,12 @@ delta_thread(struct thread_data *new, struct thread_data *old, int i; struct msr_counter *mp; + /* we run cpuid just the 1st time, copy the results */ + if (DO_BIC(BIC_APIC)) + new->apic_id = old->apic_id; + if (DO_BIC(BIC_X2APIC)) + new->x2apic_id = old->x2apic_id; + /* * the timestamps from start of measurement interval are in "old" * the timestamp from end of measurement interval are in "new" @@ -1395,6 +1418,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, int i; struct msr_counter *mp; + /* copy un-changing apic_id's */ + if (DO_BIC(BIC_APIC)) + average.threads.apic_id = t->apic_id; + if (DO_BIC(BIC_X2APIC)) + average.threads.x2apic_id = t->x2apic_id; + /* remember first tv_begin */ if (average.threads.tv_begin.tv_sec == 0) average.threads.tv_begin = t->tv_begin; @@ -1621,6 +1650,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) return 0; } +void get_apic_id(struct thread_data *t) +{ + unsigned int eax, ebx, ecx, edx, max_level; + + eax = ebx = ecx = edx = 0; + + if (!genuine_intel) + return; + + __cpuid(0, max_level, ebx, ecx, edx); + + __cpuid(1, eax, ebx, ecx, edx); + t->apic_id = (ebx >> 24) & 0xf; + + if (max_level < 0xb) + return; + + if (!DO_BIC(BIC_X2APIC)) + return; + + ecx = 0; + __cpuid(0xb, eax, ebx, ecx, edx); + t->x2apic_id = edx; + + if (debug && (t->apic_id != t->x2apic_id)) + fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); +} + /* * get_counters(...) * migrate to cpu @@ -1634,7 +1691,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) struct msr_counter *mp; int i; - gettimeofday(&t->tv_begin, (struct timezone *)NULL); if (cpu_migrate(cpu)) { @@ -1642,6 +1698,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -1; } + if (first_counter_read) + get_apic_id(t); retry: t->tsc = rdtsc(); /* we are running on local CPU of interest */ @@ -2881,6 +2939,7 @@ void do_sleep(void) } } + void turbostat_loop() { int retval; @@ -2894,6 +2953,7 @@ restart: snapshot_proc_sysfs_files(); retval = for_all_cpus(get_counters, EVEN_COUNTERS); + first_counter_read = 0; if (retval < -1) { exit(retval); } else if (retval == -1) { @@ -4655,7 +4715,6 @@ void process_cpuid() return; } - /* * in /dev/cpu/ return success for names that are numbers * ie. filter out ".", "..", "microcode". @@ -4949,6 +5008,7 @@ int fork_it(char **argv) snapshot_proc_sysfs_files(); status = for_all_cpus(get_counters, EVEN_COUNTERS); + first_counter_read = 0; if (status) exit(status); /* clear affinity side-effect of get_counters() */ @@ -5384,7 +5444,7 @@ void cmdline(int argc, char **argv) break; case 'e': /* --enable specified counter */ - bic_enabled |= bic_lookup(optarg, SHOW_LIST); + bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); break; case 'd': debug++; @@ -5468,7 +5528,6 @@ void cmdline(int argc, char **argv) int main(int argc, char **argv) { outf = stderr; - cmdline(argc, argv); if (!quiet) From 42dd45209201edb222de5f9eadc1c8f93700ef28 Mon Sep 17 00:00:00 2001 From: Nathan Ciobanu Date: Fri, 8 Jun 2018 15:15:12 -0700 Subject: [PATCH 270/572] tools/power turbostat: fix segfault on 'no node' machines Running turbostat on machines that don't expose nodes in sysfs (no /sys/bus/node) causes a segfault or a -nan value diesplayed in the log. This is caused by physical_node_id being reported as -1 and logical_node_id being calculated as a negative number resulting in the new GET_THREAD/GET_CORE returning an incorrect address. Signed-off-by: Nathan Ciobanu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3bc2c9d94739..97cc00a9c763 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2492,6 +2492,12 @@ void set_node_data(void) if (pni[pkg].count > topo.nodes_per_pkg) topo.nodes_per_pkg = pni[0].count; + /* Fake 1 node per pkg for machines that don't + * expose nodes and thus avoid -nan results + */ + if (topo.nodes_per_pkg == 0) + topo.nodes_per_pkg = 1; + for (cpu = 0; cpu < topo.num_cpus; cpu++) { pkg = cpus[cpu].physical_package_id; node = cpus[cpu].physical_node_id; @@ -4904,6 +4910,13 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct core_data *c; struct pkg_data *p; + + /* Workaround for systems where physical_node_id==-1 + * and logical_node_id==(-1 - topo.num_cpus) + */ + if (node_id < 0) + node_id = 0; + t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); c = GET_CORE(core_base, core_id, node_id, pkg_id); p = GET_PKG(pkg_base, pkg_id); From 2ee19bdea1bbc04a06606b5c9681a07d005ecbaf Mon Sep 17 00:00:00 2001 From: Nathan Ciobanu Date: Wed, 13 Jun 2018 19:51:32 -0700 Subject: [PATCH 271/572] tools/power turbostat: alphabetize the help output Sort the command line arguments output of help() in alphabetical order in line with other linux tools. Signed-off-by: Nathan Ciobanu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 97cc00a9c763..d33b655299ba 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -528,12 +528,12 @@ void help(void) " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" "--cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" - "--quiet skip decoding system configuration header\n" "--interval sec.subsec Override default 5-second measurement interval\n" - "--help print this help message\n" "--list list column headers only\n" "--num_iterations num number of the measurement iterations\n" "--out file create or truncate \"file\" for all output\n" + "--quiet skip decoding system configuration header\n" + "--help print this help message\n" "--version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); From cc4816503f835c7cea184776fe8ae5bb3f505083 Mon Sep 17 00:00:00 2001 From: Nathan Ciobanu Date: Wed, 13 Jun 2018 19:51:33 -0700 Subject: [PATCH 272/572] tools/power turbostat: add single character tokens to help Improve the help() output by adding the single character tokens (e.g -a). Signed-off-by: Nathan Ciobanu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d33b655299ba..2dcc05f3ee6f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -524,17 +524,20 @@ void help(void) "when COMMAND completes.\n" "If no COMMAND is specified, turbostat wakes every 5-seconds\n" "to print statistics, until interrupted.\n" - "--add add a counter\n" - " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" - "--cpu cpu-set limit output to summary plus cpu-set:\n" - " {core | package | j,k,l..m,n-p }\n" - "--interval sec.subsec Override default 5-second measurement interval\n" - "--list list column headers only\n" - "--num_iterations num number of the measurement iterations\n" - "--out file create or truncate \"file\" for all output\n" - "--quiet skip decoding system configuration header\n" - "--help print this help message\n" - "--version print version information\n" + " -a, --add add a counter\n" + " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" + " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" + " {core | package | j,k,l..m,n-p }\n" + " -i, --interval sec.subsec\n" + " Override default 5-second measurement interval\n" + " -l, --list list column headers only\n" + " -n, --num_iterations num\n" + " number of the measurement iterations\n" + " -o, --out file\n" + " create or truncate \"file\" for all output\n" + " -q, --quiet skip decoding system configuration header\n" + " -h, --help print this help message\n" + " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); } From 9ce80578d5f5279545c272563851d059a2359f3e Mon Sep 17 00:00:00 2001 From: Nathan Ciobanu Date: Wed, 13 Jun 2018 19:51:34 -0700 Subject: [PATCH 273/572] tools/power turbostat: add the missing command line switches Document the missing command line tokens in the help() function. Signed-off-by: Nathan Ciobanu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2dcc05f3ee6f..108c3bf2a67c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -528,14 +528,28 @@ void help(void) " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" + " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " -D, --Dump displays the raw counter values\n" + " -e, --enable [all | column]\n" + " shows all or the specified disabled column\n" + " -H, --hide [column|column,column,...]\n" + " hide the specified column(s)\n" " -i, --interval sec.subsec\n" " Override default 5-second measurement interval\n" + " -J, --Joules displays energy in Joules instead of Watts\n" " -l, --list list column headers only\n" " -n, --num_iterations num\n" " number of the measurement iterations\n" " -o, --out file\n" " create or truncate \"file\" for all output\n" " -q, --quiet skip decoding system configuration header\n" + " -s, --show [column|column,column,...]\n" + " show only the specified column(s)\n" + " -S, --Summary\n" + " limits output to 1-line system summary per interval\n" + " -T, --TCC temperature\n" + " sets the Thermal Control Circuit temperature in\n" + " degrees Celsius\n" " -h, --help print this help message\n" " -v, --version print version information\n" "\n" From 73780cd816e071b0fc0f74e204a9cb30fdb291c5 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 20 Jun 2018 13:55:29 -0400 Subject: [PATCH 274/572] tools/power turbostat: version 18.06.20 Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 108c3bf2a67c..4d14bbbf9b63 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5102,7 +5102,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 18.06.01" + fprintf(outf, "turbostat version 18.06.20" " - Len Brown \n"); } From ce042c183bcb94eb2919e8036473a1fc203420f9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 20 Jun 2018 13:41:51 +0300 Subject: [PATCH 275/572] block: sed-opal: Fix a couple off by one bugs resp->num is the number of tokens in resp->tok[]. It gets set in response_parse(). So if n == resp->num then we're reading beyond the end of the data. Fixes: 455a7b238cd6 ("block: Add Sed-opal library") Reviewed-by: Scott Bauer Tested-by: Scott Bauer Signed-off-by: Dan Carpenter Signed-off-by: Jens Axboe --- block/sed-opal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/sed-opal.c b/block/sed-opal.c index 945f4b8610e0..e0de4dd448b3 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -877,7 +877,7 @@ static size_t response_get_string(const struct parsed_resp *resp, int n, return 0; } - if (n > resp->num) { + if (n >= resp->num) { pr_debug("Response has %d tokens. Can't access %d\n", resp->num, n); return 0; @@ -916,7 +916,7 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n) return 0; } - if (n > resp->num) { + if (n >= resp->num) { pr_debug("Response has %d tokens. Can't access %d\n", resp->num, n); return 0; From 7a0f9d1eb51ff25d119b48fe7cc6aa0433cd6621 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 20 Jun 2018 10:42:07 +0200 Subject: [PATCH 276/572] Documentation: intel_pstate: Fix typo Fix a typo in the intel_pstate admin-guide documentation. Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_pstate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index ab2fe0eda1d7..8b9164990956 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -410,7 +410,7 @@ argument is passed to the kernel in the command line. That only is supported in some configurations, though (for example, if the `HWP feature is enabled in the processor `_, the operation mode of the driver cannot be changed), and if it is not - supported in the current configuration, writes to this attribute with + supported in the current configuration, writes to this attribute will fail with an appropriate error. Interpretation of Policy Attributes From c24fb5e68ebfd485ecaa8b5d40cb4721a1d137ee Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Wed, 20 Jun 2018 16:04:34 +0200 Subject: [PATCH 277/572] bpfilter: fix user mode helper cross compilation Use $(OBJDUMP) instead of literal 'objdump' to avoid using host toolchain when cross compiling. Fixes: 421780fd4983 ("bpfilter: fix build error") Signed-off-by: Matteo Croce Reported-by: Stefano Brivio Signed-off-by: David S. Miller --- net/bpfilter/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile index dd86b022eff0..051dc18b8ccb 100644 --- a/net/bpfilter/Makefile +++ b/net/bpfilter/Makefile @@ -22,7 +22,7 @@ endif quiet_cmd_copy_umh = GEN $@ cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \ $(OBJCOPY) -I binary \ - `LC_ALL=C objdump -f net/bpfilter/bpfilter_umh \ + `LC_ALL=C $(OBJDUMP) -f net/bpfilter/bpfilter_umh \ |awk -F' |,' '/file format/{print "-O",$$NF} \ /^architecture:/{print "-B",$$2}'` \ --rename-section .data=.init.rodata $< $@ From 73f9c33beb9e85850a11a1597b35a13650352509 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Tue, 19 Jun 2018 18:41:11 +0200 Subject: [PATCH 278/572] selftests: net: add config fragments Add fragments to pass bridge and vlan tests. Fixes: 33b01b7b4f19 ("selftests: add rtnetlink test script") Signed-off-by: Anders Roxell Signed-off-by: David S. Miller --- tools/testing/selftests/net/config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 7ba089b33e8b..cd3a2f1545b5 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -12,3 +12,5 @@ CONFIG_NET_IPVTI=y CONFIG_INET6_XFRM_MODE_TUNNEL=y CONFIG_IPV6_VTI=y CONFIG_DUMMY=y +CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=y From 456488cd952c23d451b0bdc5b5226b0707db80f7 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Thu, 21 Jun 2018 03:29:49 +0530 Subject: [PATCH 279/572] strparser: Don't schedule in workqueue in paused state In function strp_data_ready(), it is useless to call queue_work if the state of strparser is already paused. The state checking should be done before calling queue_work. The change reduces the context switches and improves the ktls-rx throughput by approx 20% (measured on cortex-a53 based platform). Signed-off-by: Vakul Garg Acked-by: Dave Watson Signed-off-by: David S. Miller --- net/strparser/strparser.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index 1a9695183599..373836615c57 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -392,7 +392,7 @@ static int strp_read_sock(struct strparser *strp) /* Lower sock lock held */ void strp_data_ready(struct strparser *strp) { - if (unlikely(strp->stopped)) + if (unlikely(strp->stopped) || strp->paused) return; /* This check is needed to synchronize with do_strp_work. @@ -407,9 +407,6 @@ void strp_data_ready(struct strparser *strp) return; } - if (strp->paused) - return; - if (strp->need_bytes) { if (strp_peek_len(strp) < strp->need_bytes) return; From 6d8b834989f56066499cb25fd7d60169ed5e188b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 20 Jun 2018 15:01:53 +0300 Subject: [PATCH 280/572] r8169: Fix netpoll oops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the correct thing to rtl8169_interrupt() from netpoll. Cc: Realtek linux nic maintainers Cc: netdev@vger.kernel.org Cc: Heiner Kallweit Cc: David S. Miller Fixes: ebcd5daa7ffd ("r8169: change interrupt handler argument type") Signed-off-by: Ville Syrjälä Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 75dfac0248f4..f4cae2be0fda 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7148,7 +7148,7 @@ static void rtl8169_netpoll(struct net_device *dev) { struct rtl8169_private *tp = netdev_priv(dev); - rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), dev); + rtl8169_interrupt(pci_irq_vector(tp->pci_dev, 0), tp); } #endif From 08ba91ee6e2c1c08d3f0648f978cbb5dbf3491d8 Mon Sep 17 00:00:00 2001 From: Doron Roberts-Kedes Date: Fri, 15 Jun 2018 14:05:32 -0700 Subject: [PATCH 281/572] nbd: Add the nbd NBD_DISCONNECT_ON_CLOSE config flag. If NBD_DISCONNECT_ON_CLOSE is set on a device, then the driver will issue a disconnect from nbd_release if the device has no remaining bdev->bd_openers. Fix ret val so reconfigure with only setting the flag succeeds. Reviewed-by: Josef Bacik Signed-off-by: Doron Roberts-Kedes Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 42 ++++++++++++++++++++++++++++++++-------- include/uapi/linux/nbd.h | 3 +++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 3b7083b8ecbb..74a05561b620 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -76,6 +76,7 @@ struct link_dead_args { #define NBD_HAS_CONFIG_REF 4 #define NBD_BOUND 5 #define NBD_DESTROY_ON_DISCONNECT 6 +#define NBD_DISCONNECT_ON_CLOSE 7 struct nbd_config { u32 flags; @@ -138,6 +139,7 @@ static void nbd_config_put(struct nbd_device *nbd); static void nbd_connect_reply(struct genl_info *info, int index); static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info); static void nbd_dead_link_work(struct work_struct *work); +static void nbd_disconnect_and_put(struct nbd_device *nbd); static inline struct device *nbd_to_dev(struct nbd_device *nbd) { @@ -1305,6 +1307,12 @@ out: static void nbd_release(struct gendisk *disk, fmode_t mode) { struct nbd_device *nbd = disk->private_data; + struct block_device *bdev = bdget_disk(disk, 0); + + if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && + bdev->bd_openers == 0) + nbd_disconnect_and_put(nbd); + nbd_config_put(nbd); nbd_put(nbd); } @@ -1705,6 +1713,10 @@ again: &config->runtime_flags); put_dev = true; } + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { + set_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } } if (info->attrs[NBD_ATTR_SOCKETS]) { @@ -1749,6 +1761,17 @@ out: return ret; } +static void nbd_disconnect_and_put(struct nbd_device *nbd) +{ + mutex_lock(&nbd->config_lock); + nbd_disconnect(nbd); + nbd_clear_sock(nbd); + mutex_unlock(&nbd->config_lock); + if (test_and_clear_bit(NBD_HAS_CONFIG_REF, + &nbd->config->runtime_flags)) + nbd_config_put(nbd); +} + static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) { struct nbd_device *nbd; @@ -1781,13 +1804,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) nbd_put(nbd); return 0; } - mutex_lock(&nbd->config_lock); - nbd_disconnect(nbd); - nbd_clear_sock(nbd); - mutex_unlock(&nbd->config_lock); - if (test_and_clear_bit(NBD_HAS_CONFIG_REF, - &nbd->config->runtime_flags)) - nbd_config_put(nbd); + nbd_disconnect_and_put(nbd); nbd_config_put(nbd); nbd_put(nbd); return 0; @@ -1798,7 +1815,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) struct nbd_device *nbd = NULL; struct nbd_config *config; int index; - int ret = -EINVAL; + int ret = 0; bool put_dev = false; if (!netlink_capable(skb, CAP_SYS_ADMIN)) @@ -1838,6 +1855,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) !nbd->task_recv) { dev_err(nbd_to_dev(nbd), "not configured, cannot reconfigure\n"); + ret = -EINVAL; goto out; } @@ -1862,6 +1880,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) &config->runtime_flags)) refcount_inc(&nbd->refs); } + + if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { + set_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } else { + clear_bit(NBD_DISCONNECT_ON_CLOSE, + &config->runtime_flags); + } } if (info->attrs[NBD_ATTR_SOCKETS]) { diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index 85a3fb65e40a..20d6cc91435d 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -53,6 +53,9 @@ enum { /* These are client behavior specific flags. */ #define NBD_CFLAG_DESTROY_ON_DISCONNECT (1 << 0) /* delete the nbd device on disconnect. */ +#define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on + * close by last opener. + */ /* userspace doesn't need the nbd_device structure */ From ea0820bb771175c7d4192fc6f5b5c56b3c6d5239 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 20 Jun 2018 10:03:56 +0200 Subject: [PATCH 282/572] net: davinci_emac: match the mdio device against its compatible if possible Device tree based systems without of_dev_auxdata will have the mdio device named differently than "davinci_mdio(.0)". In this case use the device's parent's compatible string for matching Signed-off-by: Bartosz Golaszewski Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_emac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index a1a6445b5a7e..f270beebb428 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1387,6 +1387,10 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd) static int match_first_device(struct device *dev, void *data) { + if (dev->parent && dev->parent->of_node) + return of_device_is_compatible(dev->parent->of_node, + "ti,davinci_mdio"); + return !strncmp(dev_name(dev), "davinci_mdio", 12); } From 08d02364b12faa54d76dbfea2090321fd27996f2 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Wed, 20 Jun 2018 10:50:46 +0200 Subject: [PATCH 283/572] net: mscc: fix the injection header When injecting frames in the Ocelot switch driver an injection header (IFH) should be used to configure various parameters related to a given frame, such as the port onto which the frame should be departed or its vlan id. Other parameters in the switch configuration can led to an injected frame being sent without an IFH but this led to various issues as the per-frame parameters are then not used. This is especially true when using multiple ports for injection. The IFH was injected with the wrong endianness which led to the switch not taking it into account as the IFH_INJ_BYPASS bit was then unset. (The bit tells the switch to use the IFH over its internal configuration). This patch fixes it. In addition to the endianness fix, the IFH is also fixed. As it was (unwillingly) unused, some of its fields were not configured the right way. Fixes: a556c76adc05 ("net: mscc: Add initial Ocelot switch support") Signed-off-by: Antoine Tenart Reviewed-by: Alexandre Belloni Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index fb2c8f8071e6..52c57e0ff617 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -344,10 +344,9 @@ static int ocelot_port_stop(struct net_device *dev) static int ocelot_gen_ifh(u32 *ifh, struct frame_info *info) { ifh[0] = IFH_INJ_BYPASS; - ifh[1] = (0xff00 & info->port) >> 8; + ifh[1] = (0xf00 & info->port) >> 8; ifh[2] = (0xff & info->port) << 24; - ifh[3] = IFH_INJ_POP_CNT_DISABLE | (info->cpuq << 20) | - (info->tag_type << 16) | info->vid; + ifh[3] = (info->tag_type << 16) | info->vid; return 0; } @@ -370,11 +369,12 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); info.port = BIT(port->chip_port); - info.cpuq = 0xff; + info.tag_type = IFH_TAG_TYPE_C; + info.vid = skb_vlan_tag_get(skb); ocelot_gen_ifh(ifh, &info); for (i = 0; i < IFH_LEN; i++) - ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp); + ocelot_write_rix(ocelot, cpu_to_be32(ifh[i]), QS_INJ_WR, grp); count = (skb->len + 3) / 4; last = skb->len % 4; From 9262478220eac908ae6e168c3df2c453c87e2da3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jun 2018 17:24:09 -0700 Subject: [PATCH 284/572] bpf: enforce correct alignment for instructions After commit 9facc336876f ("bpf: reject any prog that failed read-only lock") offsetof(struct bpf_binary_header, image) became 3 instead of 4, breaking powerpc BPF badly, since instructions need to be word aligned. Fixes: 9facc336876f ("bpf: reject any prog that failed read-only lock") Signed-off-by: Eric Dumazet Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index b615df57b7d5..20f2659dd829 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -472,7 +472,9 @@ struct sock_fprog_kern { struct bpf_binary_header { u16 pages; u16 locked:1; - u8 image[]; + + /* Some arches need word alignment for their instructions */ + u8 image[] __aligned(4); }; struct bpf_prog { From fedb1bd3d274b33c432cb83c80c6b3cf54d509c8 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 20 Jun 2018 12:47:52 -0300 Subject: [PATCH 285/572] sctp: fix erroneous inc of snmp SctpFragUsrMsgs Currently it is incrementing SctpFragUsrMsgs when the user message size is of the exactly same size as the maximum fragment size, which is wrong. The fix is to increment it only when user message is bigger than the maximum fragment size. Fixes: bfd2e4b8734d ("sctp: refactor sctp_datamsg_from_user") Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/chunk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 79daa98208c3..bfb9f812e2ef 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -237,7 +237,9 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* Account for a different sized first fragment */ if (msg_len >= first_len) { msg->can_delay = 0; - SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_FRAGUSRMSGS); + if (msg_len > first_len) + SCTP_INC_STATS(sock_net(asoc->base.sk), + SCTP_MIB_FRAGUSRMSGS); } else { /* Which may be the only one... */ first_len = msg_len; From 30877961b1cdd6fdca783c2e8c4f0f47e95dc58c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 21 Jun 2018 12:56:04 +0800 Subject: [PATCH 286/572] ipvlan: fix IFLA_MTU ignored on NEWLINK Commit 296d48568042 ("ipvlan: inherit MTU from master device") adjusted the mtu from the master device when creating a ipvlan device, but it would also override the mtu value set in rtnl_create_link. It causes IFLA_MTU param not to take effect. So this patch is to not adjust the mtu if IFLA_MTU param is set when creating a ipvlan device. Fixes: 296d48568042 ("ipvlan: inherit MTU from master device") Reported-by: Jianlin Shi Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/ipvlan/ipvlan_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index d02f0a7c534e..23c1d6600241 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -594,7 +594,8 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev, ipvlan->phy_dev = phy_dev; ipvlan->dev = dev; ipvlan->sfeatures = IPVLAN_FEATURES; - ipvlan_adjust_mtu(ipvlan, phy_dev); + if (!tb[IFLA_MTU]) + ipvlan_adjust_mtu(ipvlan, phy_dev); INIT_LIST_HEAD(&ipvlan->addrs); spin_lock_init(&ipvlan->addrs_lock); From 64d7839af8c8f67daaf9bf387135052c55d85f90 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Wed, 20 Jun 2018 17:04:20 +0530 Subject: [PATCH 287/572] net: macb: Fix ptp time adjustment for large negative delta When delta passed to gem_ptp_adjtime is negative, the sign is maintained in the ns_to_timespec64 conversion. Hence timespec_add should be used directly. timespec_sub will just subtract the negative value thus increasing the time difference. Signed-off-by: Harini Katakam Acked-by: Nicolas Ferre Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_ptp.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 2220c771092b..678835136bf8 100644 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -170,10 +170,7 @@ static int gem_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) if (delta > TSU_NSEC_MAX_VAL) { gem_tsu_get_time(&bp->ptp_clock_info, &now); - if (sign) - now = timespec64_sub(now, then); - else - now = timespec64_add(now, then); + now = timespec64_add(now, then); gem_tsu_set_time(&bp->ptp_clock_info, (const struct timespec64 *)&now); From 3739a21e0ef6ac06f46bd38e81daa95e8cb462bc Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Thu, 21 Jun 2018 00:43:44 +0200 Subject: [PATCH 288/572] selftests: net: add tcp_inq to gitignore sha: 702353b538f5 ("selftest: add test for TCP_INQ") forgot to add tcp_inq to .gitignore. Signed-off-by: Anders Roxell Signed-off-by: David S. Miller --- tools/testing/selftests/net/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 128e548aa377..1a0ac3a29ec5 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -12,3 +12,4 @@ tcp_mmap udpgso udpgso_bench_rx udpgso_bench_tx +tcp_inq From 02d62a8bc48e92171c46540722e2d52ce77d87af Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 20 Jun 2018 07:44:12 -0700 Subject: [PATCH 289/572] nvme-fc: release io queues to allow fast fail Rather than leaving io queues quiesced after tearing down an association, restart them. This allows ios to be replayed, with fastfail ios terminating and non-fastfail getting into loops of retry. This follows rdma's lead. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b528a2f5826c..41d45a1b5c62 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2790,6 +2790,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) /* re-enable the admin_q so anything new can fast fail */ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + /* resume the io queues so that things will fast fail */ + nvme_start_queues(&ctrl->ctrl); + nvme_fc_ctlr_inactive_on_rport(ctrl); } @@ -2804,9 +2807,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) * waiting for io to terminate */ nvme_fc_delete_association(ctrl); - - /* resume the io queues so that things will fast fail */ - nvme_start_queues(nctrl); } static void From ba56bc3a0786992755e6804fbcbdc60ef6cfc24c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 1 Jun 2018 17:06:28 +0200 Subject: [PATCH 290/572] KVM: arm/arm64: Drop resource size check for GICV window When booting a 64 KB pages kernel on a ACPI GICv3 system that implements support for v2 emulation, the following warning is produced GICV size 0x2000 not a multiple of page size 0x10000 and support for v2 emulation is disabled, preventing GICv2 VMs from being able to run on such hosts. The reason is that vgic_v3_probe() performs a sanity check on the size of the window (it should be a multiple of the page size), while the ACPI MADT parsing code hardcodes the size of the window to 8 KB. This makes sense, considering that ACPI does not bother to describe the size in the first place, under the assumption that platforms implementing ACPI will follow the architecture and not put anything else in the same 64 KB window. So let's just drop the sanity check altogether, and assume that the window is at least 64 KB in size. Fixes: 909777324588 ("KVM: arm/arm64: vgic-new: vgic_init: implement kvm_vgic_hyp_init") Signed-off-by: Ard Biesheuvel Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-v3.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index ff7dc890941a..cdce653e3c47 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -617,11 +617,6 @@ int vgic_v3_probe(const struct gic_kvm_info *info) pr_warn("GICV physical address 0x%llx not page aligned\n", (unsigned long long)info->vcpu.start); kvm_vgic_global_state.vcpu_base = 0; - } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { - pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", - (unsigned long long)resource_size(&info->vcpu), - PAGE_SIZE); - kvm_vgic_global_state.vcpu_base = 0; } else { kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.can_emulate_gicv2 = true; From 6ebdf4db8fa564a150f46d32178af0873eb5abbb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 15 Jun 2018 16:47:23 +0100 Subject: [PATCH 291/572] arm64: Introduce sysreg_clear_set() Currently we have a couple of helpers to manipulate bits in particular sysregs: * config_sctlr_el1(u32 clear, u32 set) * change_cpacr(u64 val, u64 mask) The parameters of these differ in naming convention, order, and size, which is unfortunate. They also differ slightly in behaviour, as change_cpacr() skips the sysreg write if the bits are unchanged, which is a useful optimization when sysreg writes are expensive. Before we gain yet another sysreg manipulation function, let's unify these with a common helper, providing a consistent order for clear/set operands, and the write skipping behaviour from change_cpacr(). Code will be migrated to the new helper in subsequent patches. Signed-off-by: Mark Rutland Reviewed-by: Dave Martin Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6171178075dc..a8f84812c6e8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -728,6 +728,17 @@ asm( asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ } while (0) +/* + * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the + * set mask are set. Other bits are left as-is. + */ +#define sysreg_clear_set(sysreg, clear, set) do { \ + u64 __scs_val = read_sysreg(sysreg); \ + u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \ + if (__scs_new != __scs_val) \ + write_sysreg(__scs_new, sysreg); \ +} while (0) + static inline void config_sctlr_el1(u32 clear, u32 set) { u32 val; From b045e4d0f392cbdab2674b0aa78c8d2b187e4e27 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 15 Jun 2018 16:47:24 +0100 Subject: [PATCH 292/572] KVM: arm64: Don't mask softirq with IRQs disabled in vcpu_put() Commit e6b673b ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing") introduces a specific helper kvm_arch_vcpu_put_fp() for saving the vcpu FPSIMD state during vcpu_put(). This function uses local_bh_disable()/_enable() to protect the FPSIMD context manipulation from interruption by softirqs. This approach is not correct, because vcpu_put() can be invoked either from the KVM host vcpu thread (when exiting the vcpu run loop), or via a preempt notifier. In the former case, only preemption is disabled. In the latter case, the function is called from inside __schedule(), which means that IRQs are disabled. Use of local_bh_disable()/_enable() with IRQs disabled is considerd an error, resulting in lockdep splats while running VMs if lockdep is enabled. This patch disables IRQs instead of attempting to disable softirqs, avoiding the problem of calling local_bh_enable() with IRQs disabled in the __schedule() path. This creates an additional interrupt blackout during vcpu run loop exit, but this is the rare case and the blackout latency is still less than that of __schedule(). Fixes: e6b673b741ea ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing") Reported-by: Andre Przywara Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index dc6ecfa5a2d2..f9d09318b8db 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -5,7 +5,7 @@ * Copyright 2018 Arm Limited * Author: Dave Martin */ -#include +#include #include #include #include @@ -92,7 +92,9 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) */ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) { - local_bh_disable(); + unsigned long flags; + + local_irq_save(flags); update_thread_flag(TIF_SVE, vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); @@ -106,5 +108,5 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) fpsimd_bind_task_to_cpu(); } - local_bh_enable(); + local_irq_restore(flags); } From b3eb56b629d1095dde56fa37f4d7bcd5f783c8b2 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 15 Jun 2018 16:47:25 +0100 Subject: [PATCH 293/572] KVM: arm64/sve: Fix SVE trap restoration for non-current tasks Commit e6b673b ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing") attempts to restore the configuration of userspace SVE trapping via a call to fpsimd_bind_task_to_cpu(), but the logic for determining when to do this is not correct. The patch makes the errnoenous assumption that the only task that may try to enter userspace with the currently loaded FPSIMD/SVE register content is current. This may not be the case however: if some other user task T is scheduled on the CPU during the execution of the KVM run loop, and the vcpu does not try to use the registers in the meantime, then T's state may be left there intact. If T happens to be the next task to enter userspace on this CPU then the hooks for reloading the register state and configuring traps will be skipped. (Also, current never has SVE state at this point anyway and should always have the trap enabled, as a side-effect of the ioctl() syscall needed to reach the KVM run loop in the first place.) This patch instead restores the state of the EL0 trap from the state observed at the most recent vcpu_load(), ensuring that the trap is set correctly for the loaded context (if any). Fixes: e6b673b741ea ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing") Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/fpsimd.c | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index fda9a8ca48be..fe8777b12f86 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -306,6 +306,7 @@ struct kvm_vcpu_arch { #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ #define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ +#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ #define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index f9d09318b8db..98d19d1afa50 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -12,6 +12,7 @@ #include #include #include +#include /* * Called on entry to KVM_RUN unless this vcpu previously ran at least @@ -61,10 +62,16 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) { BUG_ON(!current->mm); - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE); + vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | + KVM_ARM64_HOST_SVE_IN_USE | + KVM_ARM64_HOST_SVE_ENABLED); vcpu->arch.flags |= KVM_ARM64_FP_HOST; + if (test_thread_flag(TIF_SVE)) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; + + if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) + vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; } /* @@ -103,9 +110,18 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) /* Clean guest FP state to memory and invalidate cpu view */ fpsimd_save(); fpsimd_flush_cpu_state(); - } else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { - /* Ensure user trap controls are correctly restored */ - fpsimd_bind_task_to_cpu(); + } else if (system_supports_sve()) { + /* + * The FPSIMD/SVE state in the CPU has not been touched, and we + * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been + * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE + * for EL0. To avoid spurious traps, restore the trap state + * seen by kvm_arch_vcpu_load_fp(): + */ + if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED) + sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN); + else + sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); } local_irq_restore(flags); From 2955bcc8c309bb8f2c773db4798649aa802a491f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 15 Jun 2018 16:47:26 +0100 Subject: [PATCH 294/572] KVM: arm64: Avoid mistaken attempts to save SVE state for vcpus Commit e6b673b ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing") uses fpsimd_save() to save the FPSIMD state for a vcpu when scheduling the vcpu out. However, currently current's value of TIF_SVE is restored before calling fpsimd_save() which means that fpsimd_save() may erroneously attempt to save SVE state from the vcpu. This enables current's vector state to be polluted with guest data. current->thread.sve_state may be unallocated or not large enough, so this can also trigger a NULL dereference or buffer overrun. Instead of this, TIF_SVE should be configured properly for the guest when calling fpsimd_save() with the vcpu context loaded. This patch ensures this by delaying restoration of current's TIF_SVE until after the call to fpsimd_save(). Fixes: e6b673b741ea ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing") Signed-off-by: Dave Martin Signed-off-by: Marc Zyngier --- arch/arm64/kvm/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 98d19d1afa50..aac7808ce216 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -103,9 +103,6 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) local_irq_save(flags); - update_thread_flag(TIF_SVE, - vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); - if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { /* Clean guest FP state to memory and invalidate cpu view */ fpsimd_save(); @@ -124,5 +121,8 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); } + update_thread_flag(TIF_SVE, + vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); + local_irq_restore(flags); } From e8b92efa629dac0e70ea4145c5e70616de5f89c8 Mon Sep 17 00:00:00 2001 From: Maciej Purski Date: Tue, 23 Jan 2018 12:17:19 +0100 Subject: [PATCH 295/572] drm/bridge/sii8620: fix display of packed pixel modes in MHL2 Currently packed pixel modes in MHL2 can't be displayed. The device automatically recognizes output format, so setting format other than RGB causes failure. Fix it by writing proper values to registers. Tested on MHL1 and MHL2 using various vendors' dongles both in DVI and HDMI mode. Signed-off-by: Maciej Purski Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/1516706239-9104-1-git-send-email-m.purski@samsung.com --- drivers/gpu/drm/bridge/sil-sii8620.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 4a3deeda065c..250effa0e6b8 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -1017,20 +1017,11 @@ static void sii8620_stop_video(struct sii8620 *ctx) static void sii8620_set_format(struct sii8620 *ctx) { - u8 out_fmt; - if (sii8620_is_mhl3(ctx)) { sii8620_setbits(ctx, REG_M3_P0CTRL, BIT_M3_P0CTRL_MHL3_P0_PIXEL_MODE_PACKED, ctx->use_packed_pixel ? ~0 : 0); } else { - if (ctx->use_packed_pixel) - sii8620_write_seq_static(ctx, - REG_VID_MODE, BIT_VID_MODE_M1080P, - REG_MHL_TOP_CTL, BIT_MHL_TOP_CTL_MHL_PP_SEL | 1, - REG_MHLTX_CTL6, 0x60 - ); - else sii8620_write_seq_static(ctx, REG_VID_MODE, 0, REG_MHL_TOP_CTL, 1, @@ -1038,15 +1029,9 @@ static void sii8620_set_format(struct sii8620 *ctx) ); } - if (ctx->use_packed_pixel) - out_fmt = VAL_TPI_FORMAT(YCBCR422, FULL) | - BIT_TPI_OUTPUT_CSCMODE709; - else - out_fmt = VAL_TPI_FORMAT(RGB, FULL); - sii8620_write_seq(ctx, REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL), - REG_TPI_OUTPUT, out_fmt, + REG_TPI_OUTPUT, VAL_TPI_FORMAT(RGB, FULL), ); } From 74899d92e66663dc7671a8017b3146dcd4735f3b Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 21 Jun 2018 10:43:31 +0200 Subject: [PATCH 296/572] x86/xen: Add call of speculative_store_bypass_ht_init() to PV paths Commit: 1f50ddb4f418 ("x86/speculation: Handle HT correctly on AMD") ... added speculative_store_bypass_ht_init() to the per-CPU initialization sequence. speculative_store_bypass_ht_init() needs to be called on each CPU for PV guests, too. Reported-by: Brian Woods Tested-by: Brian Woods Signed-off-by: Juergen Gross Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: xen-devel@lists.xenproject.org Fixes: 1f50ddb4f4189243c05926b842dc1a0332195f31 ("x86/speculation: Handle HT correctly on AMD") Link: https://lore.kernel.org/lkml/20180621084331.21228-1-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/xen/smp_pv.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 2e20ae2fa2d6..e3b18ad49889 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -70,6 +71,8 @@ static void cpu_bringup(void) cpu_data(cpu).x86_max_cores = 1; set_cpu_sibling_map(cpu); + speculative_store_bypass_ht_init(); + xen_setup_cpu_clockevents(); notify_cpu_starting(cpu); @@ -250,6 +253,8 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus) } set_cpu_sibling_map(0); + speculative_store_bypass_ht_init(); + xen_pmu_init(0); if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0)) From f04684b4d85d6371126f476d3268ebf6a0bd57cf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Jun 2018 11:07:21 +0300 Subject: [PATCH 297/572] ALSA: lx6464es: Missing error code in snd_lx6464es_create() We forgot to set the error code on this error path. Fixes: 4a23fc8cc068 ("ALSA: lx6464es: add error handling for pci_ioremap_bar") Signed-off-by: Dan Carpenter Signed-off-by: Takashi Iwai --- sound/pci/lx6464es/lx6464es.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/lx6464es/lx6464es.c b/sound/pci/lx6464es/lx6464es.c index 6c85f13ab23f..54f6252faca6 100644 --- a/sound/pci/lx6464es/lx6464es.c +++ b/sound/pci/lx6464es/lx6464es.c @@ -1018,6 +1018,7 @@ static int snd_lx6464es_create(struct snd_card *card, chip->port_dsp_bar = pci_ioremap_bar(pci, 2); if (!chip->port_dsp_bar) { dev_err(card->dev, "cannot remap PCI memory region\n"); + err = -ENOMEM; goto remap_pci_failed; } From e7c9996bb3dedb5553e6b34e6dbbed210a72f3e1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 19 Jun 2018 09:32:28 -0400 Subject: [PATCH 298/572] rseq/selftests: Make run_param_test.sh executable The executable bit of the run_param_test.sh script got lost in the merge. Signed-off-by: Mathieu Desnoyers Cc: Andrew Hunter Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ben Maurer Cc: Boqun Feng Cc: Catalin Marinas Cc: Chris Lameter Cc: Dave Watson Cc: Joel Fernandes Cc: Josh Triplett Cc: Linus Torvalds Cc: Michael Kerrisk Cc: Paul E . McKenney Cc: Paul Turner Cc: Peter Zijlstra Cc: Russell King Cc: Shuah Khan Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Link: https://lore.kernel.org/lkml/20180619133230.4087-2-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- tools/testing/selftests/rseq/run_param_test.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 tools/testing/selftests/rseq/run_param_test.sh diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh old mode 100644 new mode 100755 From 0ea73d5e286193be4dec70d04021d6005b5b1771 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 19 Jun 2018 09:32:29 -0400 Subject: [PATCH 299/572] rseq/selftests/arm: Align 'struct rseq_cs' on 32 bytes uapi/linux/rseq.h aligns 'struct rseq_cs' on 32 bytes. Satisfy this alignment requirement in its definition within the rseq-arm.h inline assembly as well. Signed-off-by: Mathieu Desnoyers Cc: Andrew Hunter Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ben Maurer Cc: Boqun Feng Cc: Catalin Marinas Cc: Chris Lameter Cc: Dave Watson Cc: Joel Fernandes Cc: Josh Triplett Cc: Linus Torvalds Cc: Michael Kerrisk Cc: Paul E . McKenney Cc: Paul Turner Cc: Peter Zijlstra Cc: Russell King Cc: Shuah Khan Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Link: https://lore.kernel.org/lkml/20180619133230.4087-3-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- tools/testing/selftests/rseq/rseq-arm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 3b055f9aeaab..3cea19877227 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -57,6 +57,7 @@ do { \ #define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \ abort_label, version, flags, \ start_ip, post_commit_offset, abort_ip) \ + ".balign 32\n\t" \ __rseq_str(table_label) ":\n\t" \ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \ From 9a789fcfe8605417f7a1a970355f5efa4fe88c64 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 19 Jun 2018 09:32:30 -0400 Subject: [PATCH 300/572] rseq/cleanup: Do not abort rseq c.s. in child on fork() Considering that we explicitly forbid system calls in rseq critical sections, it is not valid to issue a fork or clone system call within a rseq critical section, so rseq_fork() is not required to restart an active rseq c.s. in the child process. Signed-off-by: Mathieu Desnoyers Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ben Maurer Cc: Boqun Feng Cc: Catalin Marinas Cc: Chris Lameter Cc: Dave Watson Cc: Joel Fernandes Cc: Josh Triplett Cc: Linus Torvalds Cc: Michael Kerrisk Cc: Paul E . McKenney Cc: Paul Turner Cc: Peter Zijlstra Cc: Russell King Cc: Shuah Khan Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-api@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Link: https://lore.kernel.org/lkml/20180619133230.4087-4-mathieu.desnoyers@efficios.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 87bf02d93a27..c1882643d455 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1831,9 +1831,7 @@ static inline void rseq_migrate(struct task_struct *t) /* * If parent process has a registered restartable sequences area, the - * child inherits. Only applies when forking a process, not a thread. In - * case a parent fork() in the middle of a restartable sequence, set the - * resume notifier to force the child to retry. + * child inherits. Only applies when forking a process, not a thread. */ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { @@ -1847,7 +1845,6 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) t->rseq_len = current->rseq_len; t->rseq_sig = current->rseq_sig; t->rseq_event_mask = current->rseq_event_mask; - rseq_preempt(t); } } From 47a91b7232fa25abc7e0b7fc1c69ae4f81061594 Mon Sep 17 00:00:00 2001 From: Jia He Date: Mon, 21 May 2018 11:05:30 +0800 Subject: [PATCH 301/572] KVM: arm/arm64: add WARN_ON if size is not PAGE_SIZE aligned in unmap_stage2_range There is a panic in armv8a server(QDF2400) under memory pressure tests (start 20 guests and run memhog in the host). ---------------------------------begin-------------------------------- [35380.800950] BUG: Bad page state in process qemu-kvm pfn:dd0b6 [35380.805825] page:ffff7fe003742d80 count:-4871 mapcount:-2126053375 mapping: (null) index:0x0 [35380.815024] flags: 0x1fffc00000000000() [35380.818845] raw: 1fffc00000000000 0000000000000000 0000000000000000 ffffecf981470000 [35380.826569] raw: dead000000000100 dead000000000200 ffff8017c001c000 0000000000000000 [35380.805825] page:ffff7fe003742d80 count:-4871 mapcount:-2126053375 mapping: (null) index:0x0 [35380.815024] flags: 0x1fffc00000000000() [35380.818845] raw: 1fffc00000000000 0000000000000000 0000000000000000 ffffecf981470000 [35380.826569] raw: dead000000000100 dead000000000200 ffff8017c001c000 0000000000000000 [35380.834294] page dumped because: nonzero _refcount [...] --------------------------------end-------------------------------------- The root cause might be what was fixed at [1]. But from the KVM points of view, it would be better if the issue was caught earlier. If the size is not PAGE_SIZE aligned, unmap_stage2_range might unmap the wrong(more or less) page range. Hence it caused the "BUG: Bad page state" Let's WARN in that case, so that the issue is obvious. [1] https://lkml.org/lkml/2018/5/3/1042 Reviewed-by: Suzuki K Poulose Signed-off-by: jia.he@hxt-semitech.com [maz: tidied up commit message] Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 8d90de213ce9..1d90d79706bd 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -297,6 +297,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) phys_addr_t next; assert_spin_locked(&kvm->mmu_lock); + WARN_ON(size & ~PAGE_MASK); + pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { /* From bdab125c9301a6ac538911ba68f665dfd075ec81 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 19 Jun 2018 18:43:41 +0900 Subject: [PATCH 302/572] Revert "kexec/purgatory: Add clean-up for purgatory directory" Reverts the following commit: b0108f9e93d0 ("kexec: purgatory: add clean-up for purgatory directory") ... which incorrectly stated that the kexec-purgatory.c and purgatory.ro files were not removed after 'make mrproper'. In fact, they are. You can confirm it after reverting it. $ make mrproper $ touch arch/x86/purgatory/kexec-purgatory.c $ touch arch/x86/purgatory/purgatory.ro $ make mrproper CLEAN arch/x86/purgatory $ ls arch/x86/purgatory/ entry64.S Makefile purgatory.c setup-x86_64.S stack.S string.c This is obvious from the build system point of view. arch/x86/Makefile adds 'arch/x86' to core-y. Hence 'make clean' descends like this: arch/x86/Kbuild -> arch/x86/purgatory/Makefile Signed-off-by: Masahiro Yamada Cc: Linus Torvalds Cc: Michal Marek Cc: Peter Zijlstra Cc: Sam Ravnborg Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/1529401422-28838-2-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 60135cbd905c..d6f404ae3d93 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -327,7 +327,6 @@ archclean: $(Q)rm -rf $(objtree)/arch/x86_64 $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=arch/x86/tools - $(Q)$(MAKE) $(clean)=arch/x86/purgatory define archhelp echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)' From d6605b6bbee88b74150b14f5e83a6067f5e323d2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 19 Jun 2018 18:43:42 +0900 Subject: [PATCH 303/572] x86/build: Remove unnecessary preparation for purgatory kexec-purgatory.c is properly generated when Kbuild descend into the arch/x86/purgatory/. Thus the 'archprepare' target is redundant. Signed-off-by: Masahiro Yamada Cc: Linus Torvalds Cc: Michal Marek Cc: Peter Zijlstra Cc: Sam Ravnborg Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/1529401422-28838-3-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 5 ----- 1 file changed, 5 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index d6f404ae3d93..4fafba5df891 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -258,11 +258,6 @@ archscripts: scripts_basic archheaders: $(Q)$(MAKE) $(build)=arch/x86/entry/syscalls all -archprepare: -ifeq ($(CONFIG_KEXEC_FILE),y) - $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c -endif - ### # Kernel objects From 57cb54e53bddb59f5f542ddd4b0bfe005d31a8d5 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Jun 2018 13:33:53 +0200 Subject: [PATCH 304/572] ALSA: hda - Force to link down at runtime suspend on ATI/AMD HDMI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Henning Kühn reported that the discrete AMD GPU on his hybrid graphics laptop no longer runtime-suspends due to the recent commit 07f4f97d7b4b ("vga_switcheroo: Use device link for HDA controller"). The root cause is that the HDMI codec on AMD GPU doesn't support CLKSTOP and EPSS, which are currently mandatory for powering down the HD-audio link at runtime suspend. Because the HD-audio link is still up, HD-audio controller driver blocks the transition to D3. For addressing the regression, this patch adds a new flag to indicate the forced link-down, and sets it for AMD HDMI codecs appropriately in the codec driver. Fixes: 07f4f97d7b4b ("vga_switcheroo: Use device link for HDA controller") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106957 Reported-by: Lukas Wunner Reported-and-tested-by: Henning Kühn Cc: # v4.17+ Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 5 +++-- sound/pci/hda/hda_codec.h | 1 + sound/pci/hda/patch_hdmi.c | 5 +++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index d91c87e41756..20a171ac4bb2 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2899,8 +2899,9 @@ static int hda_codec_runtime_suspend(struct device *dev) list_for_each_entry(pcm, &codec->pcm_list_head, list) snd_pcm_suspend_all(pcm->pcm); state = hda_call_codec_suspend(codec); - if (codec_has_clkstop(codec) && codec_has_epss(codec) && - (state & AC_PWRST_CLK_STOP_OK)) + if (codec->link_down_at_suspend || + (codec_has_clkstop(codec) && codec_has_epss(codec) && + (state & AC_PWRST_CLK_STOP_OK))) snd_hdac_codec_link_down(&codec->core); snd_hdac_link_power(&codec->core, false); return 0; diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 681c360f29f9..a8b1b31f161c 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -258,6 +258,7 @@ struct hda_codec { unsigned int power_save_node:1; /* advanced PM for each widget */ unsigned int auto_runtime_pm:1; /* enable automatic codec runtime pm */ unsigned int force_pin_prefix:1; /* Add location prefix */ + unsigned int link_down_at_suspend:1; /* link down at runtime suspend */ #ifdef CONFIG_PM unsigned long power_on_acct; unsigned long power_off_acct; diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 8840daf9c6a3..98e1c411c56a 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -3741,6 +3741,11 @@ static int patch_atihdmi(struct hda_codec *codec) spec->chmap.channels_max = max(spec->chmap.channels_max, 8u); + /* AMD GPUs have neither EPSS nor CLKSTOP bits, hence preventing + * the link-down as is. Tell the core to allow it. + */ + codec->link_down_at_suspend = 1; + return 0; } From 54b0a2011dfcd2e3fe2c28062694fbbe3eb377a3 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Thu, 14 Jun 2018 08:24:39 +0200 Subject: [PATCH 305/572] microblaze: Add new syscalls io_pgetevents and rseq Wire up new syscalls io_pgetevents and rseq. Signed-off-by: Michal Simek --- arch/microblaze/include/asm/unistd.h | 2 +- arch/microblaze/include/uapi/asm/unistd.h | 2 ++ arch/microblaze/kernel/syscall_table.S | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 9774e1d9507b..a62d09420a47 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -38,6 +38,6 @@ #endif /* __ASSEMBLY__ */ -#define __NR_syscalls 399 +#define __NR_syscalls 401 #endif /* _ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/include/uapi/asm/unistd.h b/arch/microblaze/include/uapi/asm/unistd.h index eb156f914793..7a9f16a76413 100644 --- a/arch/microblaze/include/uapi/asm/unistd.h +++ b/arch/microblaze/include/uapi/asm/unistd.h @@ -415,5 +415,7 @@ #define __NR_pkey_alloc 396 #define __NR_pkey_free 397 #define __NR_statx 398 +#define __NR_io_pgetevents 399 +#define __NR_rseq 400 #endif /* _UAPI_ASM_MICROBLAZE_UNISTD_H */ diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index 56bcf313121f..6ab650593792 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -400,3 +400,5 @@ ENTRY(sys_call_table) .long sys_pkey_alloc .long sys_pkey_free .long sys_statx + .long sys_io_pgetevents + .long sys_rseq From 2dbeb7040a21846facbeea876e85cbde41fa81e9 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 19 Jun 2018 15:36:19 -0600 Subject: [PATCH 306/572] microblaze: remove unecessary of_platform_bus_probe call The call to of_platform_bus_probe has no effect because the DT core already probes default buses like "simple-bus" before this call. Michal Simek said 'xlnx,compound' hasn't been used in a long time, so that match entry isn't needed. Cc: Michal Simek Signed-off-by: Rob Herring Signed-off-by: Michal Simek --- arch/microblaze/kernel/platform.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/microblaze/kernel/platform.c b/arch/microblaze/kernel/platform.c index 2540d60610d9..b9a414552d9b 100644 --- a/arch/microblaze/kernel/platform.c +++ b/arch/microblaze/kernel/platform.c @@ -14,15 +14,8 @@ #include #include -static struct of_device_id xilinx_of_bus_ids[] __initdata = { - { .compatible = "simple-bus", }, - { .compatible = "xlnx,compound", }, - {} -}; - static int __init microblaze_device_probe(void) { - of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL); of_platform_reset_gpio_probe(); return 0; } From 45df561a7ae2ca5080708cc2cf84a236f4d930ef Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 19 Jun 2018 15:36:20 -0600 Subject: [PATCH 307/572] microblaze: consolidate GPIO reset handling Now that platform.c only has the GPIO reset handling left, move the initcall to reset.c and remove platform.c. Cc: Michal Simek Signed-off-by: Rob Herring Signed-off-by: Michal Simek --- arch/microblaze/include/asm/setup.h | 2 -- arch/microblaze/kernel/Makefile | 2 +- arch/microblaze/kernel/platform.c | 22 ---------------------- arch/microblaze/kernel/reset.c | 11 ++++++----- 4 files changed, 7 insertions(+), 30 deletions(-) delete mode 100644 arch/microblaze/kernel/platform.c diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index a38e4a56e3c6..ce9b7b786156 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -23,8 +23,6 @@ extern char *klimit; extern void mmu_reset(void); # endif /* CONFIG_MMU */ -extern void of_platform_reset_gpio_probe(void); - void time_init(void); void init_IRQ(void); void machine_early_init(const char *cmdline, unsigned int ram, diff --git a/arch/microblaze/kernel/Makefile b/arch/microblaze/kernel/Makefile index 3a53378d66d9..dd71637437f4 100644 --- a/arch/microblaze/kernel/Makefile +++ b/arch/microblaze/kernel/Makefile @@ -16,7 +16,7 @@ extra-y := head.o vmlinux.lds obj-y += dma.o exceptions.o \ hw_exception_handler.o irq.o \ - platform.o process.o prom.o ptrace.o \ + process.o prom.o ptrace.o \ reset.o setup.o signal.o sys_microblaze.o timer.o traps.o unwind.o obj-y += cpu/ diff --git a/arch/microblaze/kernel/platform.c b/arch/microblaze/kernel/platform.c deleted file mode 100644 index b9a414552d9b..000000000000 --- a/arch/microblaze/kernel/platform.c +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright 2008 Michal Simek - * - * based on virtex.c file - * - * Copyright 2007 Secret Lab Technologies Ltd. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include -#include -#include - -static int __init microblaze_device_probe(void) -{ - of_platform_reset_gpio_probe(); - return 0; -} -device_initcall(microblaze_device_probe); diff --git a/arch/microblaze/kernel/reset.c b/arch/microblaze/kernel/reset.c index bab4c8330ef4..fcbe1daf6316 100644 --- a/arch/microblaze/kernel/reset.c +++ b/arch/microblaze/kernel/reset.c @@ -18,7 +18,7 @@ static int handle; /* reset pin handle */ static unsigned int reset_val; -void of_platform_reset_gpio_probe(void) +static int of_platform_reset_gpio_probe(void) { int ret; handle = of_get_named_gpio(of_find_node_by_path("/"), @@ -27,13 +27,13 @@ void of_platform_reset_gpio_probe(void) if (!gpio_is_valid(handle)) { pr_info("Skipping unavailable RESET gpio %d (%s)\n", handle, "reset"); - return; + return -ENODEV; } ret = gpio_request(handle, "reset"); if (ret < 0) { pr_info("GPIO pin is already allocated\n"); - return; + return ret; } /* get current setup value */ @@ -51,11 +51,12 @@ void of_platform_reset_gpio_probe(void) pr_info("RESET: Registered gpio device: %d, current val: %d\n", handle, reset_val); - return; + return 0; err: gpio_free(handle); - return; + return ret; } +device_initcall(of_platform_reset_gpio_probe); static void gpio_system_reset(void) From 6cb2b08ff92460290979de4be91363e5d1b6cec1 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 18 Jun 2018 09:59:54 +0200 Subject: [PATCH 308/572] x86/pti: Don't report XenPV as vulnerable Xen PV domain kernel is not by design affected by meltdown as it's enforcing split CR3 itself. Let's not report such systems as "Vulnerable" in sysfs (we're also already forcing PTI to off in X86_HYPER_XEN_PV cases); the security of the system ultimately depends on presence of mitigation in the Hypervisor, which can't be easily detected from DomU; let's report that. Reported-and-tested-by: Mike Latimer Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Acked-by: Juergen Gross Cc: Borislav Petkov Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1806180959080.6203@cbobk.fhfr.pm [ Merge the user-visible string into a single line. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/bugs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index cd0fda1fff6d..404df26b7de8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -27,6 +27,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); @@ -664,6 +665,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr if (boot_cpu_has(X86_FEATURE_PTI)) return sprintf(buf, "Mitigation: PTI\n"); + if (hypervisor_is_type(X86_HYPER_XEN_PV)) + return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n"); + break; case X86_BUG_SPECTRE_V1: From eab6870fee877258122a042bfd99ee7908c40280 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 7 Jun 2018 09:13:48 -0700 Subject: [PATCH 309/572] x86/spectre_v1: Disable compiler optimizations over array_index_mask_nospec() Mark Rutland noticed that GCC optimization passes have the potential to elide necessary invocations of the array_index_mask_nospec() instruction sequence, so mark the asm() volatile. Mark explains: "The volatile will inhibit *some* cases where the compiler could lift the array_index_nospec() call out of a branch, e.g. where there are multiple invocations of array_index_nospec() with the same arguments: if (idx < foo) { idx1 = array_idx_nospec(idx, foo) do_something(idx1); } < some other code > if (idx < foo) { idx2 = array_idx_nospec(idx, foo); do_something_else(idx2); } ... since the compiler can determine that the two invocations yield the same result, and reuse the first result (likely the same register as idx was in originally) for the second branch, effectively re-writing the above as: if (idx < foo) { idx = array_idx_nospec(idx, foo); do_something(idx); } < some other code > if (idx < foo) { do_something_else(idx); } ... if we don't take the first branch, then speculatively take the second, we lose the nospec protection. There's more info on volatile asm in the GCC docs: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Volatile " Reported-by: Mark Rutland Signed-off-by: Dan Williams Acked-by: Mark Rutland Acked-by: Thomas Gleixner Acked-by: Linus Torvalds Cc: Cc: Peter Zijlstra Fixes: babdde2698d4 ("x86: Implement array_index_mask_nospec") Link: https://lkml.kernel.org/lkml/152838798950.14521.4893346294059739135.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 042b5e892ed1..14de0432d288 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -38,7 +38,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, { unsigned long mask; - asm ("cmp %1,%2; sbb %0,%0;" + asm volatile ("cmp %1,%2; sbb %0,%0;" :"=r" (mask) :"g"(size),"r" (index) :"cc"); From f642fb5864a6e3645edce6f85ffe7b44d5e9b990 Mon Sep 17 00:00:00 2001 From: "mike.travis@hpe.com" Date: Thu, 24 May 2018 15:17:12 -0500 Subject: [PATCH 310/572] x86/platform/UV: Add adjustable set memory block size function Add a new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This is out of necessity so arch dependent code can accommodate specific BIOS requirements which can align these new PMEM modules at less than the default boundaries. A "set order" type of function was used to insure that the memory block size will be a power of two value without requiring a validity check. 64GB was chosen as the upper limit for memory block size values to accommodate upcoming 4PB systems which have 6 more bits of physical address space (46 becoming 52). Signed-off-by: Mike Travis Reviewed-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: jgross@suse.com Cc: kirill.shutemov@linux.intel.com Cc: mhocko@suse.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/20180524201711.609546602@stormcage.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 20 ++++++++++++++++---- include/linux/memory.h | 1 + 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0a400606dea0..20d8bf5fbceb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1350,16 +1350,28 @@ int kern_addr_valid(unsigned long addr) /* Amount of ram needed to start using large blocks */ #define MEM_SIZE_FOR_LARGE_BLOCK (64UL << 30) +/* Adjustable memory block size */ +static unsigned long set_memory_block_size; +int __init set_memory_block_size_order(unsigned int order) +{ + unsigned long size = 1UL << order; + + if (size > MEM_SIZE_FOR_LARGE_BLOCK || size < MIN_MEMORY_BLOCK_SIZE) + return -EINVAL; + + set_memory_block_size = size; + return 0; +} + static unsigned long probe_memory_block_size(void) { unsigned long boot_mem_end = max_pfn << PAGE_SHIFT; unsigned long bz; - /* If this is UV system, always set 2G block size */ - if (is_uv_system()) { - bz = MAX_BLOCK_SIZE; + /* If memory block size has been set, then use it */ + bz = set_memory_block_size; + if (bz) goto done; - } /* Use regular block if RAM is smaller than MEM_SIZE_FOR_LARGE_BLOCK */ if (boot_mem_end < MEM_SIZE_FOR_LARGE_BLOCK) { diff --git a/include/linux/memory.h b/include/linux/memory.h index 31ca3e28b0eb..a6ddefc60517 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -38,6 +38,7 @@ struct memory_block { int arch_get_memory_phys_device(unsigned long start_pfn); unsigned long memory_block_size_bytes(void); +int set_memory_block_size_order(unsigned int order); /* These states are exposed to userspace as text strings in sysfs */ #define MEM_ONLINE (1<<0) /* exposed to userspace */ From bbbd2b51a2aa0d76b3676271e216cf3647773397 Mon Sep 17 00:00:00 2001 From: "mike.travis@hpe.com" Date: Thu, 24 May 2018 15:17:13 -0500 Subject: [PATCH 311/572] x86/platform/UV: Use new set memory block size function Add a call to the new function to "adjust" the current fixed UV memory block size of 2GB so it can be changed to a different physical boundary. This accommodates changes in the Intel BIOS, and therefore UV BIOS, which now can align boundaries different than the previous UV standard of 2GB. It also flags any UV Global Address boundaries from BIOS that cause a change in the mem block size (boundary). The current boundary of 2GB has been used on UV since the first system release in 2009 with Linux 2.6 and has worked fine. But the new NVDIMM persistent memory modules (PMEM), along with the Intel BIOS changes to support these modules caused the memory block size boundary to be set to a lower limit. Intel only guarantees that this minimum boundary at 64MB though the current Linux limit is 128MB. Note that the default remains 2GB if no changes occur. Signed-off-by: Mike Travis Reviewed-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: jgross@suse.com Cc: kirill.shutemov@linux.intel.com Cc: mhocko@suse.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/20180524201711.732785782@stormcage.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 49 ++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index efaf2d4f9c3c..2270a777d647 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -392,6 +393,40 @@ extern int uv_hub_info_version(void) } EXPORT_SYMBOL(uv_hub_info_version); +/* Default UV memory block size is 2GB */ +static unsigned long mem_block_size = (2UL << 30); + +static __init int adj_blksize(u32 lgre) +{ + unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT; + unsigned long size; + + for (size = mem_block_size; size > MIN_MEMORY_BLOCK_SIZE; size >>= 1) + if (IS_ALIGNED(base, size)) + break; + + if (size >= mem_block_size) + return 0; + + mem_block_size = size; + return 1; +} + +static __init void set_block_size(void) +{ + unsigned int order = ffs(mem_block_size); + + if (order) { + /* adjust for ffs return of 1..64 */ + set_memory_block_size_order(order - 1); + pr_info("UV: mem_block_size set to 0x%lx\n", mem_block_size); + } else { + /* bad or zero value, default to 1UL << 31 (2GB) */ + pr_err("UV: mem_block_size error with 0x%lx\n", mem_block_size); + set_memory_block_size_order(31); + } +} + /* Build GAM range lookup table: */ static __init void build_uv_gr_table(void) { @@ -1180,23 +1215,30 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) << UV_GAM_RANGE_SHFT); int order = 0; char suffix[] = " KMGTPE"; + int flag = ' '; while (size > 9999 && order < sizeof(suffix)) { size /= 1024; order++; } + /* adjust max block size to current range start */ + if (gre->type == 1 || gre->type == 2) + if (adj_blksize(lgre)) + flag = '*'; + if (!index) { pr_info("UV: GAM Range Table...\n"); - pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); + pr_info("UV: # %20s %14s %6s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); } - pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d %04x %02x %02x\n", + pr_info("UV: %2d: 0x%014lx-0x%014lx%c %5lu%c %3d %04x %02x %02x\n", index++, (unsigned long)lgre << UV_GAM_RANGE_SHFT, (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, - size, suffix[order], + flag, size, suffix[order], gre->type, gre->nasid, gre->sockid, gre->pnode); + /* update to next range start */ lgre = gre->limit; if (sock_min > gre->sockid) sock_min = gre->sockid; @@ -1427,6 +1469,7 @@ static void __init uv_system_init_hub(void) build_socket_tables(); build_uv_gr_table(); + set_block_size(); uv_init_hub_info(&hub_info); uv_possible_blades = num_possible_nodes(); if (!_node_to_pnode) From d7609f4210cb716c11abfe2bfb5997191095d00b Mon Sep 17 00:00:00 2001 From: "mike.travis@hpe.com" Date: Thu, 24 May 2018 15:17:14 -0500 Subject: [PATCH 312/572] x86/platform/UV: Add kernel parameter to set memory block size Add a kernel parameter that allows setting UV memory block size. This is to provide an adjustment for new forms of PMEM and other DIMM memory that might require alignment restrictions other than scanning the global address table for the required minimum alignment. The value set will be further adjusted by both the GAM range table scan as well as restrictions imposed by set_memory_block_size_order(). Signed-off-by: Mike Travis Reviewed-by: Andrew Banman Cc: Andrew Morton Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: jgross@suse.com Cc: kirill.shutemov@linux.intel.com Cc: mhocko@suse.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/lkml/20180524201711.854849120@stormcage.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 2270a777d647..d492752f79e1 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -396,6 +396,17 @@ EXPORT_SYMBOL(uv_hub_info_version); /* Default UV memory block size is 2GB */ static unsigned long mem_block_size = (2UL << 30); +/* Kernel parameter to specify UV mem block size */ +static int parse_mem_block_size(char *ptr) +{ + unsigned long size = memparse(ptr, NULL); + + /* Size will be rounded down by set_block_size() below */ + mem_block_size = size; + return 0; +} +early_param("uv_memblksize", parse_mem_block_size); + static __init int adj_blksize(u32 lgre) { unsigned long base = (unsigned long)lgre << UV_GAM_RANGE_SHFT; From 9f9cafc14016f23f982d3ce18f9057923bd3037a Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Wed, 20 Jun 2018 13:42:22 +0800 Subject: [PATCH 313/572] nvme-pci: move nvme_kill_queues to nvme_remove_dead_ctrl There is race between nvme_remove and nvme_reset_work that can lead to io hang. nvme_remove nvme_reset_work -> nvme_remove_dead_ctrl -> nvme_dev_disable -> quiesce request_queue -> queue remove_work -> cancel_work_sync reset_work -> nvme_remove_namespaces -> splice ctrl->namespaces nvme_remove_dead_ctrl_work -> nvme_kill_queues -> nvme_ns_remove do nothing -> blk_cleanup_queue -> blk_freeze_queue Finally, the request_queue is quiesced state when wait freeze, we will get io hang here. To fix it, move the nvme_kill_queues from nvme_remove_dead_ctrl_work to nvme_remove_dead_ctrl. Suggested-by: Keith Busch Signed-off-by: Jianchao Wang Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fc33804662e7..73a97fcea364 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2289,6 +2289,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); + nvme_kill_queues(&dev->ctrl); if (!queue_work(nvme_wq, &dev->remove_work)) nvme_put_ctrl(&dev->ctrl); } @@ -2405,7 +2406,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); struct pci_dev *pdev = to_pci_dev(dev->dev); - nvme_kill_queues(&dev->ctrl); if (pci_get_drvdata(pdev)) device_release_driver(&pdev->dev); nvme_put_ctrl(&dev->ctrl); From 90718e32e1dcc2479acfa208ccfc6442850b594c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 18 May 2018 18:27:39 +0200 Subject: [PATCH 314/572] uprobes/x86: Remove incorrect WARN_ON() in uprobe_init_insn() insn_get_length() has the side-effect of processing the entire instruction but only if it was decoded successfully, otherwise insn_complete() can fail and in this case we need to just return an error without warning. Reported-by: syzbot+30d675e3ca03c1c351e7@syzkaller.appspotmail.com Signed-off-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: syzkaller-bugs@googlegroups.com Link: https://lkml.kernel.org/lkml/20180518162739.GA5559@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/uprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 58d8d800875d..deb576b23b7c 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -293,7 +293,7 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool insn_init(insn, auprobe->insn, sizeof(auprobe->insn), x86_64); /* has the side-effect of processing the entire instruction */ insn_get_length(insn); - if (WARN_ON_ONCE(!insn_complete(insn))) + if (!insn_complete(insn)) return -ENOEXEC; if (is_prefix_bad(insn)) From 8730662d7b2582f65dd6c59ab1e0b7fa461c79b0 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 24 Apr 2018 14:22:38 -0700 Subject: [PATCH 315/572] kernel.h: Fix a typo in comment Signed-off-by: Wei Wang Cc: Andrew Morton Cc: Borislav Petkov Cc: Crt Mori Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Randy Dunlap Cc: Steven Rostedt Cc: Thomas Gleixner Cc: gregkh@linuxfoundation.org Cc: wei.vince.wang@gmail.com Link: https://lkml.kernel.org/lkml/20180424212241.16013-1-wvw@google.com Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/kernel.h b/include/linux/kernel.h index d23123238534..941dc0a5a877 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -666,7 +666,7 @@ do { \ * your code. (Extra memory is used for special buffers that are * allocated when trace_printk() is used.) * - * A little optization trick is done here. If there's only one + * A little optimization trick is done here. If there's only one * argument, there's no need to scan the string for printf formats. * The trace_puts() will suffice. But how can we take advantage of * using trace_puts() when trace_printk() has only one argument? From 7ddfd3e0df29106c728dda2a6bd6591ee43a4e3c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Jun 2018 10:16:21 +0100 Subject: [PATCH 316/572] KVM: Enforce error in ioctl for compat tasks when !KVM_COMPAT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current behaviour of the compat ioctls is a bit odd. We provide a compat_ioctl method when KVM_COMPAT is set, and NULL otherwise. But NULL means that the normal, non-compat ioctl should be used directly for compat tasks, and there is no way to actually prevent a compat task from issueing KVM ioctls. This patch changes this behaviour, by always registering a compat_ioctl method, even if KVM_COMPAT is not selected. In that case, the callback will always return -EINVAL. Fixes: de8e5d744051568c8aad ("KVM: Disable compat ioctl for s390") Reported-by: Mark Rutland Acked-by: Christian Borntraeger Acked-by: Radim Krčmář Signed-off-by: Marc Zyngier --- virt/kvm/kvm_main.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ada21f47f22b..8b47507faab5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -116,6 +116,11 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, #ifdef CONFIG_KVM_COMPAT static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl, unsigned long arg); +#define KVM_COMPAT(c) .compat_ioctl = (c) +#else +static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl, + unsigned long arg) { return -EINVAL; } +#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl #endif static int hardware_enable_all(void); static void hardware_disable_all(void); @@ -2396,11 +2401,9 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) static struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, -#ifdef CONFIG_KVM_COMPAT - .compat_ioctl = kvm_vcpu_compat_ioctl, -#endif .mmap = kvm_vcpu_mmap, .llseek = noop_llseek, + KVM_COMPAT(kvm_vcpu_compat_ioctl), }; /* @@ -2824,10 +2827,8 @@ static int kvm_device_release(struct inode *inode, struct file *filp) static const struct file_operations kvm_device_fops = { .unlocked_ioctl = kvm_device_ioctl, -#ifdef CONFIG_KVM_COMPAT - .compat_ioctl = kvm_device_ioctl, -#endif .release = kvm_device_release, + KVM_COMPAT(kvm_device_ioctl), }; struct kvm_device *kvm_device_from_filp(struct file *filp) @@ -3165,10 +3166,8 @@ static long kvm_vm_compat_ioctl(struct file *filp, static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, -#ifdef CONFIG_KVM_COMPAT - .compat_ioctl = kvm_vm_compat_ioctl, -#endif .llseek = noop_llseek, + KVM_COMPAT(kvm_vm_compat_ioctl), }; static int kvm_dev_ioctl_create_vm(unsigned long type) @@ -3259,8 +3258,8 @@ out: static struct file_operations kvm_chardev_ops = { .unlocked_ioctl = kvm_dev_ioctl, - .compat_ioctl = kvm_dev_ioctl, .llseek = noop_llseek, + KVM_COMPAT(kvm_dev_ioctl), }; static struct miscdevice kvm_dev = { From 37b65db85f9b2fc98267eee4a18d7506492e6e8c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Jun 2018 10:22:57 +0100 Subject: [PATCH 317/572] KVM: arm64: Prevent KVM_COMPAT from being selected There is very little point in trying to support the 32bit KVM/arm API on arm64, and this was never an anticipated use case. Let's make it clear by not selecting KVM_COMPAT. Acked-by: Mark Rutland Signed-off-by: Marc Zyngier --- virt/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 72143cfaf6ec..ea434ddc8499 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -47,7 +47,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT config KVM_COMPAT def_bool y - depends on KVM && COMPAT && !S390 + depends on KVM && COMPAT && !(S390 || ARM64) config HAVE_KVM_IRQ_BYPASS bool From fcc784be837714a9173b372ff9fb9b514590dad9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 4 Apr 2018 14:06:30 -0400 Subject: [PATCH 318/572] locking/lockdep: Do not record IRQ state within lockdep code While debugging where things were going wrong with mapping enabling/disabling interrupts with the lockdep state and actual real enabling and disabling interrupts, I had to silent the IRQ disabling/enabling in debug_check_no_locks_freed() because it was always showing up as it was called before the splat was. Use raw_local_irq_save/restore() for not only debug_check_no_locks_freed() but for all internal lockdep functions, as they hide useful information about where interrupts were used incorrectly last. Signed-off-by: Steven Rostedt (VMware) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/lkml/20180404140630.3f4f4c7a@gandalf.local.home Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index edcac5de7ebc..5fa4d3138bf1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1265,11 +1265,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class) this.parent = NULL; this.class = class; - local_irq_save(flags); + raw_local_irq_save(flags); arch_spin_lock(&lockdep_lock); ret = __lockdep_count_forward_deps(&this); arch_spin_unlock(&lockdep_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } @@ -1292,11 +1292,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class) this.parent = NULL; this.class = class; - local_irq_save(flags); + raw_local_irq_save(flags); arch_spin_lock(&lockdep_lock); ret = __lockdep_count_backward_deps(&this); arch_spin_unlock(&lockdep_lock); - local_irq_restore(flags); + raw_local_irq_restore(flags); return ret; } @@ -4411,7 +4411,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) if (unlikely(!debug_locks)) return; - local_irq_save(flags); + raw_local_irq_save(flags); for (i = 0; i < curr->lockdep_depth; i++) { hlock = curr->held_locks + i; @@ -4422,7 +4422,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len) print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock); break; } - local_irq_restore(flags); + raw_local_irq_restore(flags); } EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); From 943e942e6266f22babee5efeb00f8f672fbff5bd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 21 Jun 2018 09:49:37 -0600 Subject: [PATCH 319/572] nvme-pci: limit max IO size and segments to avoid high order allocations nvme requires an sg table allocation for each request. If the request is large, then the allocation can become quite large. For instance, with our default software settings of 1280KB IO size, we'll need 10248 bytes of sg table. That turns into a 2nd order allocation, which we can't always guarantee. If we fail the allocation, blk-mq will retry it later. But there's no guarantee that we'll EVER be able to allocate that much contigious memory. Limit the IO size such that we never need more than a single page of memory. That's a lot faster and more reliable. Then back that allocation with a mempool, so that we know we'll always be able to succeed the allocation at some point. Signed-off-by: Jens Axboe Acked-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 42 +++++++++++++++++++++++++++++++++++----- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 21710a7460c8..46df030b2c3f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1808,6 +1808,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, u32 max_segments = (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; + max_segments = min_not_zero(max_segments, ctrl->max_segments); blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 231807cbc849..0c4a33df3b2f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -170,6 +170,7 @@ struct nvme_ctrl { u64 cap; u32 page_size; u32 max_hw_sectors; + u32 max_segments; u16 oncs; u16 oacs; u16 nssa; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 73a97fcea364..ba943f211687 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -38,6 +38,13 @@ #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) +/* + * These can be higher, but we need to ensure that any command doesn't + * require an sg allocation that needs more than a page of data. + */ +#define NVME_MAX_KB_SZ 4096 +#define NVME_MAX_SEGS 127 + static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); @@ -100,6 +107,8 @@ struct nvme_dev { struct nvme_ctrl ctrl; struct completion ioq_wait; + mempool_t *iod_mempool; + /* shadow doorbell buffer support: */ u32 *dbbuf_dbs; dma_addr_t dbbuf_dbs_dma_addr; @@ -477,10 +486,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->use_sgl = nvme_pci_use_sgls(dev, rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { - size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, - iod->use_sgl); - - iod->sg = kmalloc(alloc_size, GFP_ATOMIC); + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); if (!iod->sg) return BLK_STS_RESOURCE; } else { @@ -526,7 +532,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) } if (iod->sg != iod->inline_sg) - kfree(iod->sg); + mempool_free(iod->sg, dev->iod_mempool); } #ifdef CONFIG_BLK_DEV_INTEGRITY @@ -2280,6 +2286,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); free_opal_dev(dev->ctrl.opal_dev); + mempool_destroy(dev->iod_mempool); kfree(dev); } @@ -2334,6 +2341,13 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; + /* + * Limit the max command size to prevent iod->sg allocations going + * over a single page. + */ + dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; + dev->ctrl.max_segments = NVME_MAX_SEGS; + result = nvme_init_identify(&dev->ctrl); if (result) goto out; @@ -2509,6 +2523,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) int node, result = -ENOMEM; struct nvme_dev *dev; unsigned long quirks = id->driver_data; + size_t alloc_size; node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) @@ -2546,6 +2561,23 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (result) goto release_pools; + /* + * Double check that our mempool alloc size will cover the biggest + * command we support. + */ + alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ, + NVME_MAX_SEGS, true); + WARN_ON_ONCE(alloc_size > PAGE_SIZE); + + dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, + mempool_kfree, + (void *) alloc_size, + GFP_KERNEL, node); + if (!dev->iod_mempool) { + result = -ENOMEM; + goto release_pools; + } + dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); nvme_get_ctrl(&dev->ctrl); From f098631848cd9f4142b6dec7ea95baad846e1670 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 20 Jun 2018 10:02:30 +0100 Subject: [PATCH 320/572] Btrfs: fix physical offset reported by fiemap for inline extents Commit 9d311e11fc1f ("Btrfs: fiemap: pass correct bytenr when fm_extent_count is zero") introduced a regression where we no longer report 0 as the physical offset for inline extents (and other extents with a special block_start value). This is because it always sets the variable used to report the physical offset ("disko") as em->block_start plus some offset, and em->block_start has the value 18446744073709551614 ((u64) -2) for inline extents. This made the btrfs test 004 (from fstests) often fail, for example, for a file with an inline extent we have the following items in the subvolume tree: item 101 key (418 INODE_ITEM 0) itemoff 11029 itemsize 160 generation 25 transid 38 size 1525 nbytes 1525 block group 0 mode 100666 links 1 uid 0 gid 0 rdev 0 sequence 0 flags 0x2(none) atime 1529342058.461891730 (2018-06-18 18:14:18) ctime 1529342058.461891730 (2018-06-18 18:14:18) mtime 1529342058.461891730 (2018-06-18 18:14:18) otime 1529342055.869892885 (2018-06-18 18:14:15) item 102 key (418 INODE_REF 264) itemoff 11016 itemsize 13 index 25 namelen 3 name: fc7 item 103 key (418 EXTENT_DATA 0) itemoff 9470 itemsize 1546 generation 38 type 0 (inline) inline extent data size 1525 ram_bytes 1525 compression 0 (none) Then when test 004 invoked fiemap against the file it got a non-zero physical offset: $ filefrag -v /mnt/p0/d4/d7/fc7 Filesystem type is: 9123683e File size of /mnt/p0/d4/d7/fc7 is 1525 (1 block of 4096 bytes) ext: logical_offset: physical_offset: length: expected: flags: 0: 0.. 4095: 18446744073709551614.. 4093: 4096: last,not_aligned,inline,eof /mnt/p0/d4/d7/fc7: 1 extent found This resulted in the test failing like this: btrfs/004 49s ... [failed, exit status 1]- output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/004.out.bad) --- tests/btrfs/004.out 2016-08-23 10:17:35.027012095 +0100 +++ /home/fdmanana/git/hub/xfstests/results//btrfs/004.out.bad 2018-06-18 18:15:02.385872155 +0100 @@ -1,3 +1,10 @@ QA output created by 004 *** test backref walking -*** done +./tests/btrfs/004: line 227: [: 7.55578637259143e+22: integer expression expected +ERROR: 7.55578637259143e+22 is not a valid numeric value. +unexpected output from + /home/fdmanana/git/hub/btrfs-progs/btrfs inspect-internal logical-resolve -s 65536 -P 7.55578637259143e+22 /home/fdmanana/btrfs-tests/scratch_1 ... (Run 'diff -u tests/btrfs/004.out /home/fdmanana/git/hub/xfstests/results//btrfs/004.out.bad' to see the entire diff) Ran: btrfs/004 The large number in scientific notation reported as an invalid numeric value is the result from the filter passed to perl which multiplies the physical offset by the block size reported by fiemap. So fix this by ensuring the physical offset is always set to 0 when we are processing an extent with a special block_start value. Fixes: 9d311e11fc1f ("Btrfs: fiemap: pass correct bytenr when fm_extent_count is zero") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8e4a7cdbc9f5..1aa91d57404a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4545,8 +4545,11 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, offset_in_extent = em_start - em->start; em_end = extent_map_end(em); em_len = em_end - em_start; - disko = em->block_start + offset_in_extent; flags = 0; + if (em->block_start < EXTENT_MAP_LAST_BYTE) + disko = em->block_start + offset_in_extent; + else + disko = 0; /* * bump off for our next call to get_extent From 22883ddc66df402f2fbc37fe1f4d2779238e4b2d Mon Sep 17 00:00:00 2001 From: Lu Fengqi Date: Tue, 19 Jun 2018 14:54:38 +0800 Subject: [PATCH 321/572] btrfs: fix invalid-free in btrfs_extent_same If this condition ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) is hit, we will go to free the uninitialized cmp.src_pages and cmp.dst_pages. Fixes: 67b07bd4bec5 ("Btrfs: reuse cmp workspace in EXTENT_SAME ioctl") Signed-off-by: Lu Fengqi Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5556e9ea2a4b..a4d2856a4df1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3577,7 +3577,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, dst, dst_loff, &cmp); if (ret) - goto out_unlock; + goto out_free; loff += BTRFS_MAX_DEDUPE_LEN; dst_loff += BTRFS_MAX_DEDUPE_LEN; @@ -3587,16 +3587,16 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, ret = btrfs_extent_same_range(src, loff, tail_len, dst, dst_loff, &cmp); +out_free: + kvfree(cmp.src_pages); + kvfree(cmp.dst_pages); + out_unlock: if (same_inode) inode_unlock(src); else btrfs_double_inode_unlock(src, dst); -out_free: - kvfree(cmp.src_pages); - kvfree(cmp.dst_pages); - return ret; } From 70303420b5721c38998cf987e6b7d30cc62d4ff1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Jun 2018 13:20:53 -0400 Subject: [PATCH 322/572] tracing: Check for no filter when processing event filters The syzkaller detected a out-of-bounds issue with the events filter code, specifically here: prog[N].pred = NULL; /* #13 */ prog[N].target = 1; /* TRUE */ prog[N+1].pred = NULL; prog[N+1].target = 0; /* FALSE */ -> prog[N-1].target = N; prog[N-1].when_to_branch = false; As that's the first reference to a "N-1" index, it appears that the code got here with N = 0, which means the filter parser found no filter to parse (which shouldn't ever happen, but apparently it did). Add a new error to the parsing code that will check to make sure that N is not zero before going into this part of the code. If N = 0, then -EINVAL is returned, and a error message is added to the filter. Cc: stable@vger.kernel.org Fixes: 80765597bc587 ("tracing: Rewrite filter logic to be simpler and faster") Reported-by: air icy bugzilla url: https://bugzilla.kernel.org/show_bug.cgi?id=200019 Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_filter.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index e1c818dbc0d7..0dceb77d1d42 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -78,7 +78,8 @@ static const char * ops[] = { OPS }; C(TOO_MANY_PREDS, "Too many terms in predicate expression"), \ C(INVALID_FILTER, "Meaningless filter expression"), \ C(IP_FIELD_ONLY, "Only 'ip' field is supported for function trace"), \ - C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), + C(INVALID_VALUE, "Invalid value (did you forget quotes)?"), \ + C(NO_FILTER, "No filter found"), #undef C #define C(a, b) FILT_ERR_##a @@ -550,6 +551,13 @@ predicate_parse(const char *str, int nr_parens, int nr_preds, goto out_free; } + if (!N) { + /* No program? */ + ret = -EINVAL; + parse_error(pe, FILT_ERR_NO_FILTER, ptr - str); + goto out_free; + } + prog[N].pred = NULL; /* #13 */ prog[N].target = 1; /* TRUE */ prog[N+1].pred = NULL; From 1a63dcd8765bc8680481dc2f9acf6ef13cee6d27 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 7 Jun 2018 13:11:43 -0700 Subject: [PATCH 323/572] softirq: Reorder trace_softirqs_on to prevent lockdep splat I'm able to reproduce a lockdep splat with config options: CONFIG_PROVE_LOCKING=y, CONFIG_DEBUG_LOCK_ALLOC=y and CONFIG_PREEMPTIRQ_EVENTS=y $ echo 1 > /d/tracing/events/preemptirq/preempt_enable/enable [ 26.112609] DEBUG_LOCKS_WARN_ON(current->softirqs_enabled) [ 26.112636] WARNING: CPU: 0 PID: 118 at kernel/locking/lockdep.c:3854 [...] [ 26.144229] Call Trace: [ 26.144926] [ 26.145506] lock_acquire+0x55/0x1b0 [ 26.146499] ? __do_softirq+0x46f/0x4d9 [ 26.147571] ? __do_softirq+0x46f/0x4d9 [ 26.148646] trace_preempt_on+0x8f/0x240 [ 26.149744] ? trace_preempt_on+0x4d/0x240 [ 26.150862] ? __do_softirq+0x46f/0x4d9 [ 26.151930] preempt_count_sub+0x18a/0x1a0 [ 26.152985] __do_softirq+0x46f/0x4d9 [ 26.153937] irq_exit+0x68/0xe0 [ 26.154755] smp_apic_timer_interrupt+0x271/0x280 [ 26.156056] apic_timer_interrupt+0xf/0x20 [ 26.157105] The issue was this: preempt_count = 1 << SOFTIRQ_SHIFT __local_bh_enable(cnt = 1 << SOFTIRQ_SHIFT) { if (softirq_count() == (cnt && SOFTIRQ_MASK)) { trace_softirqs_on() { current->softirqs_enabled = 1; } } preempt_count_sub(cnt) { trace_preempt_on() { tracepoint() { rcu_read_lock_sched() { // jumps into lockdep Where preempt_count still has softirqs disabled, but current->softirqs_enabled is true, and we get a splat. Link: http://lkml.kernel.org/r/20180607201143.247775-1-joel@joelfernandes.org Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Tom Zanussi Cc: Namhyung Kim Cc: Thomas Glexiner Cc: Boqun Feng Cc: Paul McKenney Cc: Masami Hiramatsu Cc: Todd Kjos Cc: Erick Reyes Cc: Julia Cartwright Cc: Byungchul Park Cc: stable@vger.kernel.org Reviewed-by: Steven Rostedt (VMware) Fixes: d59158162e032 ("tracing: Add support for preempt and irq enable/disable events") Signed-off-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- kernel/softirq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index de2f57fddc04..900dcfee542c 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -139,9 +139,13 @@ static void __local_bh_enable(unsigned int cnt) { lockdep_assert_irqs_disabled(); + if (preempt_count() == cnt) + trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip()); + if (softirq_count() == (cnt & SOFTIRQ_MASK)) trace_softirqs_on(_RET_IP_); - preempt_count_sub(cnt); + + __preempt_count_sub(cnt); } /* From 08ae88f8104f486fd4103854119169f3e55dbc4c Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 9 Feb 2018 11:53:16 -0600 Subject: [PATCH 324/572] tracing: Use swap macro in update_max_tr Make use of the swap macro and remove unnecessary variable _buf_. This makes the code easier to read and maintain. Also, reduces the stack usage. This code was detected with the help of Coccinelle. Link: http://lkml.kernel.org/r/20180209175316.GA18720@embeddedgus Signed-off-by: Gustavo A. R. Silva Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c9336e98ac59..a0079b4c7a49 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1360,8 +1360,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) { - struct ring_buffer *buf; - if (tr->stop_count) return; @@ -1375,9 +1373,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) arch_spin_lock(&tr->max_lock); - buf = tr->trace_buffer.buffer; - tr->trace_buffer.buffer = tr->max_buffer.buffer; - tr->max_buffer.buffer = buf; + swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); __update_max_tr(tr, tsk, cpu); arch_spin_unlock(&tr->max_lock); From 064f35a952246c60e956717dfc5782c48f174e74 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Thu, 14 Jun 2018 15:48:59 -0700 Subject: [PATCH 325/572] tracing: Fix some errors in histogram documentation Fix typos, inconsistencies in using quotes, incorrect section number, etc. in the trace histogram documentation. Link: http://lkml.kernel.org/r/20180614224859.55864-1-joel@joelfernandes.org Reviewed-by: Masami Hiramatsu Acked-by: Tom Zanussi Signed-off-by: Joel Fernandes (Google) Signed-off-by: Steven Rostedt (VMware) --- Documentation/trace/histogram.txt | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt index e73bcf9cb5f3..7ffea6aa22e3 100644 --- a/Documentation/trace/histogram.txt +++ b/Documentation/trace/histogram.txt @@ -1729,35 +1729,35 @@ If a variable isn't a key variable or prefixed with 'vals=', the associated event field will be saved in a variable but won't be summed as a value: - # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger + # echo 'hist:keys=next_pid:ts1=common_timestamp ...' >> event/trigger Multiple variables can be assigned at the same time. The below would result in both ts0 and b being created as variables, with both common_timestamp and field1 additionally being summed as values: - # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \ + # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ...' >> \ event/trigger Note that variable assignments can appear either preceding or following their use. The command below behaves identically to the command above: - # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \ + # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ...' >> \ event/trigger Any number of variables not bound to a 'vals=' prefix can also be assigned by simply separating them with colons. Below is the same thing but without the values being summed in the histogram: - # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger + # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ...' >> event/trigger Variables set as above can be referenced and used in expressions on another event. For example, here's how a latency can be calculated: - # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger - # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger + # echo 'hist:keys=pid,prio:ts0=common_timestamp ...' >> event1/trigger + # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ...' >> event2/trigger In the first line above, the event's timetamp is saved into the variable ts0. In the next line, ts0 is subtracted from the second @@ -1766,7 +1766,7 @@ yet another variable, 'wakeup_lat'. The hist trigger below in turn makes use of the wakeup_lat variable to compute a combined latency using the same key and variable from yet another event: - # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger + # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ...' >> event3/trigger 2.2.2 Synthetic Events ---------------------- @@ -1807,10 +1807,11 @@ the command that defined it with a '!': At this point, there isn't yet an actual 'wakeup_latency' event instantiated in the event subsytem - for this to happen, a 'hist trigger action' needs to be instantiated and bound to actual fields -and variables defined on other events (see Section 6.3.3 below). +and variables defined on other events (see Section 2.2.3 below on +how that is done using hist trigger 'onmatch' action). Once that is +done, the 'wakeup_latency' synthetic event instance is created. -Once that is done, an event instance is created, and a histogram can -be defined using it: +A histogram can now be defined for the new synthetic event: # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \ /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger @@ -1960,7 +1961,7 @@ hist trigger specification. back to that pid, the timestamp difference is calculated. If the resulting latency, stored in wakeup_lat, exceeds the current maximum latency, the values specified in the save() fields are - recoreded: + recorded: # echo 'hist:keys=pid:ts0=common_timestamp.usecs \ if comm=="cyclictest"' >> \ From ed7d40bc67b8353c677b38c6cdddcdc310c0f452 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Fri, 8 Jun 2018 14:47:46 -0700 Subject: [PATCH 326/572] tracing: Fix SKIP_STACK_VALIDATION=1 build due to bad merge with -mrecord-mcount Non gcc-5 builds with CONFIG_STACK_VALIDATION=y and SKIP_STACK_VALIDATION=1 fail. Example output: /bin/sh: init/.tmp_main.o: Permission denied commit 96f60dfa5819 ("trace: Use -mcount-record for dynamic ftrace"), added a mismatched endif. This causes cmd_objtool to get mistakenly set. Relocate endif to balance the newly added -record-mcount check. Link: http://lkml.kernel.org/r/20180608214746.136554-1-gthelen@google.com Fixes: 96f60dfa5819 ("trace: Use -mcount-record for dynamic ftrace") Acked-by: Andi Kleen Tested-by: David Rientjes Signed-off-by: Greg Thelen Signed-off-by: Steven Rostedt (VMware) --- scripts/Makefile.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 34d9e9ce97c2..e7889f486ca1 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -239,6 +239,7 @@ cmd_record_mcount = \ "$(CC_FLAGS_FTRACE)" ]; then \ $(sub_cmd_record_mcount) \ fi; +endif # -record-mcount endif # CONFIG_FTRACE_MCOUNT_RECORD ifdef CONFIG_STACK_VALIDATION @@ -263,7 +264,6 @@ ifneq ($(RETPOLINE_CFLAGS),) objtool_args += --retpoline endif endif -endif ifdef CONFIG_MODVERSIONS From 9c4a665ed83462039461dd34282e22160fb447ca Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Jun 2018 19:34:57 +0100 Subject: [PATCH 327/572] ALSA: hda/ca0132: make array ca0132_alt_chmaps static The array ca0132_alt_chmaps is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: warning: symbol 'ca0132_alt_chmaps' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index d62c56feaf7d..4ff5320378e2 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -5665,7 +5665,7 @@ static const char * const ca0132_alt_slave_pfxs[] = { * I think this has to do with the pin for rear surround being 0x11, * and the center/lfe being 0x10. Usually the pin order is the opposite. */ -const struct snd_pcm_chmap_elem ca0132_alt_chmaps[] = { +static const struct snd_pcm_chmap_elem ca0132_alt_chmaps[] = { { .channels = 2, .map = { SNDRV_CHMAP_FL, SNDRV_CHMAP_FR } }, { .channels = 4, From e9893e6fa932f42c90c4ac5849fa9aa0f0f00a34 Mon Sep 17 00:00:00 2001 From: Abhishek Sahu Date: Wed, 13 Jun 2018 14:32:36 +0530 Subject: [PATCH 328/572] mtd: rawnand: fix return value check for bad block status Positive return value from read_oob() is making false BAD blocks. For some of the NAND controllers, OOB bytes will be protected with ECC and read_oob() will return number of bitflips. If there is any bitflip in ECC protected OOB bytes for BAD block status page, then that block is getting treated as BAD. Fixes: c120e75e0e7d ("mtd: nand: use read_oob() instead of cmdfunc() for bad block check") Cc: Signed-off-by: Abhishek Sahu Reviewed-by: Miquel Raynal Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/nand_base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 10c4f9919850..b01d15ec4c56 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -440,7 +440,7 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs) for (; page < page_end; page++) { res = chip->ecc.read_oob(mtd, chip, page); - if (res) + if (res < 0) return res; bad = chip->oob_poi[chip->badblockpos]; From 945d015ee0c3095d2290e845565a23dedfd8027c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Jun 2018 14:16:02 -0700 Subject: [PATCH 329/572] net/packet: fix use-after-free We should put copy_skb in receive_queue only after a successful call to virtio_net_hdr_from_skb(). syzbot report : BUG: KASAN: use-after-free in __skb_unlink include/linux/skbuff.h:1843 [inline] BUG: KASAN: use-after-free in __skb_dequeue include/linux/skbuff.h:1863 [inline] BUG: KASAN: use-after-free in skb_dequeue+0x16a/0x180 net/core/skbuff.c:2815 Read of size 8 at addr ffff8801b044ecc0 by task syz-executor217/4553 CPU: 0 PID: 4553 Comm: syz-executor217 Not tainted 4.18.0-rc1+ #111 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 print_address_description+0x6c/0x20b mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412 __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 __skb_unlink include/linux/skbuff.h:1843 [inline] __skb_dequeue include/linux/skbuff.h:1863 [inline] skb_dequeue+0x16a/0x180 net/core/skbuff.c:2815 skb_queue_purge+0x26/0x40 net/core/skbuff.c:2852 packet_set_ring+0x675/0x1da0 net/packet/af_packet.c:4331 packet_release+0x630/0xd90 net/packet/af_packet.c:2991 __sock_release+0xd7/0x260 net/socket.c:603 sock_close+0x19/0x20 net/socket.c:1186 __fput+0x35b/0x8b0 fs/file_table.c:209 ____fput+0x15/0x20 fs/file_table.c:243 task_work_run+0x1ec/0x2a0 kernel/task_work.c:113 exit_task_work include/linux/task_work.h:22 [inline] do_exit+0x1b08/0x2750 kernel/exit.c:865 do_group_exit+0x177/0x440 kernel/exit.c:968 __do_sys_exit_group kernel/exit.c:979 [inline] __se_sys_exit_group kernel/exit.c:977 [inline] __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:977 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x4448e9 Code: Bad RIP value. RSP: 002b:00007ffd5f777ca8 EFLAGS: 00000202 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00000000004448e9 RDX: 00000000004448e9 RSI: 000000000000fcfb RDI: 0000000000000001 RBP: 00000000006cf018 R08: 00007ffd0000a45b R09: 0000000000000000 R10: 00007ffd5f777e48 R11: 0000000000000202 R12: 00000000004021f0 R13: 0000000000402280 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 4553: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:490 kmem_cache_alloc+0x12e/0x760 mm/slab.c:3554 skb_clone+0x1f5/0x500 net/core/skbuff.c:1282 tpacket_rcv+0x28f7/0x3200 net/packet/af_packet.c:2221 deliver_skb net/core/dev.c:1925 [inline] deliver_ptype_list_skb net/core/dev.c:1940 [inline] __netif_receive_skb_core+0x1bfb/0x3680 net/core/dev.c:4611 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 netif_receive_skb_internal+0x12e/0x7d0 net/core/dev.c:4767 netif_receive_skb+0xbf/0x420 net/core/dev.c:4791 tun_rx_batched.isra.55+0x4ba/0x8c0 drivers/net/tun.c:1571 tun_get_user+0x2af1/0x42f0 drivers/net/tun.c:1981 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:2009 call_write_iter include/linux/fs.h:1795 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x6c6/0x9f0 fs/read_write.c:487 vfs_write+0x1f8/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 4553: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kmem_cache_free+0x86/0x2d0 mm/slab.c:3756 kfree_skbmem+0x154/0x230 net/core/skbuff.c:582 __kfree_skb net/core/skbuff.c:642 [inline] kfree_skb+0x1a5/0x580 net/core/skbuff.c:659 tpacket_rcv+0x189e/0x3200 net/packet/af_packet.c:2385 deliver_skb net/core/dev.c:1925 [inline] deliver_ptype_list_skb net/core/dev.c:1940 [inline] __netif_receive_skb_core+0x1bfb/0x3680 net/core/dev.c:4611 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 netif_receive_skb_internal+0x12e/0x7d0 net/core/dev.c:4767 netif_receive_skb+0xbf/0x420 net/core/dev.c:4791 tun_rx_batched.isra.55+0x4ba/0x8c0 drivers/net/tun.c:1571 tun_get_user+0x2af1/0x42f0 drivers/net/tun.c:1981 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:2009 call_write_iter include/linux/fs.h:1795 [inline] new_sync_write fs/read_write.c:474 [inline] __vfs_write+0x6c6/0x9f0 fs/read_write.c:487 vfs_write+0x1f8/0x560 fs/read_write.c:549 ksys_write+0x101/0x260 fs/read_write.c:598 __do_sys_write fs/read_write.c:610 [inline] __se_sys_write fs/read_write.c:607 [inline] __x64_sys_write+0x73/0xb0 fs/read_write.c:607 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8801b044ecc0 which belongs to the cache skbuff_head_cache of size 232 The buggy address is located 0 bytes inside of 232-byte region [ffff8801b044ecc0, ffff8801b044eda8) The buggy address belongs to the page: page:ffffea0006c11380 count:1 mapcount:0 mapping:ffff8801d9be96c0 index:0x0 flags: 0x2fffc0000000100(slab) raw: 02fffc0000000100 ffffea0006c17988 ffff8801d9bec248 ffff8801d9be96c0 raw: 0000000000000000 ffff8801b044e040 000000010000000c 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8801b044eb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff8801b044ec00: 00 00 00 00 00 00 00 00 00 00 00 00 00 fc fc fc >ffff8801b044ec80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ^ ffff8801b044ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8801b044ed80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc Fixes: 58d19b19cd99 ("packet: vnet_hdr support for tpacket_rcv") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 50809748c127..ff8e7e245c37 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2262,6 +2262,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (po->stats.stats1.tp_drops) status |= TP_STATUS_LOSING; } + + if (do_vnet && + virtio_net_hdr_from_skb(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr), + vio_le(), true, 0)) + goto drop_n_account; + po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; @@ -2269,15 +2276,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } spin_unlock(&sk->sk_receive_queue.lock); - if (do_vnet) { - if (virtio_net_hdr_from_skb(skb, h.raw + macoff - - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) { - spin_lock(&sk->sk_receive_queue.lock); - goto drop_n_account; - } - } - skb_copy_bits(skb, 0, h.raw + macoff, snaplen); if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp))) From 44a5cd436ec067c9a62988aca3331ac6eee5268d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 21 Jun 2018 20:02:16 +0200 Subject: [PATCH 330/572] cls_flower: fix use after free in flower S/W path If flower filter is created without the skip_sw flag, fl_mask_put() can race with fl_classify() and we can destroy the mask rhashtable while a lookup operation is accessing it. BUG: unable to handle kernel paging request at 00000000000911d1 PGD 0 P4D 0 SMP PTI CPU: 3 PID: 5582 Comm: vhost-5541 Not tainted 4.18.0-rc1.vanilla+ #1950 Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.1.7 06/16/2016 RIP: 0010:rht_bucket_nested+0x20/0x60 Code: 31 c8 c1 c1 18 29 c8 c3 66 90 8b 4f 04 ba 01 00 00 00 8b 07 48 8b bf 80 00 00 0 RSP: 0018:ffffafc5cfbb7a48 EFLAGS: 00010206 RAX: 0000000000001978 RBX: ffff9f12dff88a00 RCX: 00000000ffff9f12 RDX: 00000000000911d1 RSI: 0000000000000148 RDI: 0000000000000001 RBP: ffff9f12dff88a00 R08: 000000005f1cc119 R09: 00000000a715fae2 R10: ffffafc5cfbb7aa8 R11: ffff9f1cb4be804e R12: ffff9f1265e13000 R13: 0000000000000000 R14: ffffafc5cfbb7b48 R15: ffff9f12dff88b68 FS: 0000000000000000(0000) GS:ffff9f1d3f0c0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000911d1 CR3: 0000001575a94006 CR4: 00000000001626e0 Call Trace: fl_lookup+0x134/0x140 [cls_flower] fl_classify+0xf3/0x180 [cls_flower] tcf_classify+0x78/0x150 __netif_receive_skb_core+0x69e/0xa50 netif_receive_skb_internal+0x42/0xf0 tun_get_user+0xdd5/0xfd0 [tun] tun_sendmsg+0x52/0x70 [tun] handle_tx+0x2b3/0x5f0 [vhost_net] vhost_worker+0xab/0x100 [vhost] kthread+0xf8/0x130 ret_from_fork+0x35/0x40 Modules linked in: act_mirred act_gact cls_flower vhost_net vhost tap sch_ingress CR2: 00000000000911d1 Fix the above waiting for a RCU grace period before destroying the rhashtable: we need to use tcf_queue_work(), as rhashtable_destroy() must run in process context, as pointed out by Cong Wang. v1 -> v2: use tcf_queue_work to run rhashtable_destroy(). Fixes: 05cd271fd61a ("cls_flower: Support multiple masks per priority") Signed-off-by: Paolo Abeni Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_flower.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 2b5be42a9f1c..9e8b26a80fb3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -66,7 +66,7 @@ struct fl_flow_mask { struct rhashtable_params filter_ht_params; struct flow_dissector dissector; struct list_head filters; - struct rcu_head rcu; + struct rcu_work rwork; struct list_head list; }; @@ -203,6 +203,20 @@ static int fl_init(struct tcf_proto *tp) return rhashtable_init(&head->ht, &mask_ht_params); } +static void fl_mask_free(struct fl_flow_mask *mask) +{ + rhashtable_destroy(&mask->ht); + kfree(mask); +} + +static void fl_mask_free_work(struct work_struct *work) +{ + struct fl_flow_mask *mask = container_of(to_rcu_work(work), + struct fl_flow_mask, rwork); + + fl_mask_free(mask); +} + static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask, bool async) { @@ -210,12 +224,11 @@ static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask, return false; rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params); - rhashtable_destroy(&mask->ht); list_del_rcu(&mask->list); if (async) - kfree_rcu(mask, rcu); + tcf_queue_work(&mask->rwork, fl_mask_free_work); else - kfree(mask); + fl_mask_free(mask); return true; } From fe3dd97dd66bb7fb23b8077a3803d2f951e60b00 Mon Sep 17 00:00:00 2001 From: Mason Yang Date: Wed, 20 Jun 2018 11:46:30 +0200 Subject: [PATCH 331/572] mtd: rawnand: All AC chips have a broken GET_FEATURES(TIMINGS). Make sure we flag all broken chips as not supporting this feature. Also move this logic to a new function to keep things readable. Fixes: 34c5c01e0c8c ("mtd: rawnand: macronix: nack the support of changing timings for one chip") Cc: Signed-off-by: Mason Yang Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal --- drivers/mtd/nand/raw/nand_macronix.c | 48 +++++++++++++++++++++------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/nand/raw/nand_macronix.c b/drivers/mtd/nand/raw/nand_macronix.c index 7ed1f87e742a..49c546c97c6f 100644 --- a/drivers/mtd/nand/raw/nand_macronix.c +++ b/drivers/mtd/nand/raw/nand_macronix.c @@ -17,23 +17,47 @@ #include +/* + * Macronix AC series does not support using SET/GET_FEATURES to change + * the timings unlike what is declared in the parameter page. Unflag + * this feature to avoid unnecessary downturns. + */ +static void macronix_nand_fix_broken_get_timings(struct nand_chip *chip) +{ + unsigned int i; + static const char * const broken_get_timings[] = { + "MX30LF1G18AC", + "MX30LF1G28AC", + "MX30LF2G18AC", + "MX30LF2G28AC", + "MX30LF4G18AC", + "MX30LF4G28AC", + "MX60LF8G18AC", + }; + + if (!chip->parameters.supports_set_get_features) + return; + + for (i = 0; i < ARRAY_SIZE(broken_get_timings); i++) { + if (!strcmp(broken_get_timings[i], chip->parameters.model)) + break; + } + + if (i == ARRAY_SIZE(broken_get_timings)) + return; + + bitmap_clear(chip->parameters.get_feature_list, + ONFI_FEATURE_ADDR_TIMING_MODE, 1); + bitmap_clear(chip->parameters.set_feature_list, + ONFI_FEATURE_ADDR_TIMING_MODE, 1); +} + static int macronix_nand_init(struct nand_chip *chip) { if (nand_is_slc(chip)) chip->bbt_options |= NAND_BBT_SCAN2NDPAGE; - /* - * MX30LF2G18AC chip does not support using SET/GET_FEATURES to change - * the timings unlike what is declared in the parameter page. Unflag - * this feature to avoid unnecessary downturns. - */ - if (chip->parameters.supports_set_get_features && - !strcmp("MX30LF2G18AC", chip->parameters.model)) { - bitmap_clear(chip->parameters.get_feature_list, - ONFI_FEATURE_ADDR_TIMING_MODE, 1); - bitmap_clear(chip->parameters.set_feature_list, - ONFI_FEATURE_ADDR_TIMING_MODE, 1); - } + macronix_nand_fix_broken_get_timings(chip); return 0; } From f93aa8c4de307069c270b2d81741961162bead6c Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Wed, 6 Jun 2018 12:13:27 +0200 Subject: [PATCH 332/572] mtd: cfi_cmdset_0002: Use right chip in do_ppb_xxlock() do_ppb_xxlock() fails to add chip->start when querying for lock status (and chip_ready test), which caused false status reports. Fix that by adding adr += chip->start and adjust call sites accordingly. Fixes: 1648eaaa1575 ("mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking") Cc: stable@vger.kernel.org Signed-off-by: Joakim Tjernlund Signed-off-by: Boris Brezillon --- drivers/mtd/chips/cfi_cmdset_0002.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index a0c655628d6d..aa09d5155429 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -2544,8 +2544,9 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map, unsigned long timeo; int ret; + adr += chip->start; mutex_lock(&chip->mutex); - ret = get_chip(map, chip, adr + chip->start, FL_LOCKING); + ret = get_chip(map, chip, adr, FL_LOCKING); if (ret) { mutex_unlock(&chip->mutex); return ret; @@ -2563,8 +2564,8 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map, if (thunk == DO_XXLOCK_ONEBLOCK_LOCK) { chip->state = FL_LOCKING; - map_write(map, CMD(0xA0), chip->start + adr); - map_write(map, CMD(0x00), chip->start + adr); + map_write(map, CMD(0xA0), adr); + map_write(map, CMD(0x00), adr); } else if (thunk == DO_XXLOCK_ONEBLOCK_UNLOCK) { /* * Unlocking of one specific sector is not supported, so we @@ -2602,7 +2603,7 @@ static int __maybe_unused do_ppb_xxlock(struct map_info *map, map_write(map, CMD(0x00), chip->start); chip->state = FL_READY; - put_chip(map, chip, adr + chip->start); + put_chip(map, chip, adr); mutex_unlock(&chip->mutex); return ret; From 5fdfc3dbad099281bf027a353d5786c09408a8e5 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Wed, 6 Jun 2018 12:13:28 +0200 Subject: [PATCH 333/572] mtd: cfi_cmdset_0002: fix SEGV unlocking multiple chips cfi_ppb_unlock() tries to relock all sectors that were locked before unlocking the whole chip. This locking used the chip start address + the FULL offset from the first flash chip, thereby forming an illegal address. Fix that by using the chip offset(adr). Fixes: 1648eaaa1575 ("mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking") Cc: stable@vger.kernel.org Signed-off-by: Joakim Tjernlund Signed-off-by: Boris Brezillon --- drivers/mtd/chips/cfi_cmdset_0002.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index aa09d5155429..87f9925d25f9 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -2526,7 +2526,7 @@ static int cfi_atmel_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) struct ppb_lock { struct flchip *chip; - loff_t offset; + unsigned long adr; int locked; }; @@ -2662,7 +2662,7 @@ static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, */ if ((adr < ofs) || (adr >= (ofs + len))) { sect[sectors].chip = &cfi->chips[chipnum]; - sect[sectors].offset = offset; + sect[sectors].adr = adr; sect[sectors].locked = do_ppb_xxlock( map, &cfi->chips[chipnum], adr, 0, DO_XXLOCK_ONEBLOCK_GETLOCK); @@ -2706,7 +2706,7 @@ static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, */ for (i = 0; i < sectors; i++) { if (sect[i].locked) - do_ppb_xxlock(map, sect[i].chip, sect[i].offset, 0, + do_ppb_xxlock(map, sect[i].chip, sect[i].adr, 0, DO_XXLOCK_ONEBLOCK_LOCK); } From cb257783c2927b73614b20f915a91ff78aa6f3e8 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 21 Jun 2018 14:00:20 +0100 Subject: [PATCH 334/572] xen-netfront: Fix mismatched rtnl_unlock Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") Reported-by: Ben Hutchings Signed-off-by: Ross Lagerwall Reviewed-by: Juergen Gross Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 922ce0abf5cf..ee4cb6cfde1e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1810,7 +1810,7 @@ static int talk_to_netback(struct xenbus_device *dev, err = xen_net_read_mac(dev, info->netdev->dev_addr); if (err) { xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename); - goto out; + goto out_unlocked; } rtnl_lock(); @@ -1925,6 +1925,7 @@ abort_transaction_no_dev_fatal: xennet_destroy_queues(info); out: rtnl_unlock(); +out_unlocked: device_unregister(&dev->dev); return err; } From 45c8184c1bed1ca8a7f02918552063a00b909bf5 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 21 Jun 2018 14:00:21 +0100 Subject: [PATCH 335/572] xen-netfront: Update features after registering netdev Update the features after calling register_netdev() otherwise the device features are not set up correctly and it not possible to change the MTU of the device. After this change, the features reported by ethtool match the device's features before the commit which introduced the issue and it is possible to change the device's MTU. Fixes: f599c64fdf7d ("xen-netfront: Fix race between device setup and open") Reported-by: Liam Shepherd Signed-off-by: Ross Lagerwall Reviewed-by: Juergen Gross Signed-off-by: David S. Miller --- drivers/net/xen-netfront.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ee4cb6cfde1e..a57daecf1d57 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1951,10 +1951,6 @@ static int xennet_connect(struct net_device *dev) /* talk_to_netback() sets the correct number of queues */ num_queues = dev->real_num_tx_queues; - rtnl_lock(); - netdev_update_features(dev); - rtnl_unlock(); - if (dev->reg_state == NETREG_UNINITIALIZED) { err = register_netdev(dev); if (err) { @@ -1964,6 +1960,10 @@ static int xennet_connect(struct net_device *dev) } } + rtnl_lock(); + netdev_update_features(dev); + rtnl_unlock(); + /* * All public and private state should now be sane. Get * ready to start sending and receiving packets and give the driver From 0cd8116f172eed018907303dbff5c112690eeb91 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Wed, 6 Jun 2018 12:13:29 +0200 Subject: [PATCH 336/572] mtd: cfi_cmdset_0002: Fix unlocking requests crossing a chip boudary The "sector is in requested range" test used to determine whether sectors should be re-locked or not is done on a variable that is reset everytime we cross a chip boundary, which can lead to some blocks being re-locked while the caller expect them to be unlocked. Fix the check to make sure this cannot happen. Fixes: 1648eaaa1575 ("mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking") Cc: stable@vger.kernel.org Signed-off-by: Joakim Tjernlund Signed-off-by: Boris Brezillon --- drivers/mtd/chips/cfi_cmdset_0002.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 87f9925d25f9..9cfc2645dd93 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -2660,7 +2660,7 @@ static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, * sectors shall be unlocked, so lets keep their locking * status at "unlocked" (locked=0) for the final re-locking. */ - if ((adr < ofs) || (adr >= (ofs + len))) { + if ((offset < ofs) || (offset >= (ofs + len))) { sect[sectors].chip = &cfi->chips[chipnum]; sect[sectors].adr = adr; sect[sectors].locked = do_ppb_xxlock( From 24bb555e6e46d96e2a954aa0295029a81cc9bbaa Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Thu, 21 Jun 2018 17:14:01 -0700 Subject: [PATCH 337/572] Input: elantech - enable middle button of touchpads on ThinkPad P52 PNPID is better way to identify the type of touchpads. Enable middle button support on 2 types of touchpads on Lenovo P52. Cc: stable@vger.kernel.org Signed-off-by: Aaron Ma Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index fb4d902c4403..4c52e43c9273 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -1175,6 +1175,12 @@ static const struct dmi_system_id elantech_dmi_has_middle_button[] = { { } }; +static const char * const middle_button_pnp_ids[] = { + "LEN2131", /* ThinkPad P52 w/ NFC */ + "LEN2132", /* ThinkPad P52 */ + NULL +}; + /* * Set the appropriate event bits for the input subsystem */ @@ -1194,7 +1200,8 @@ static int elantech_set_input_params(struct psmouse *psmouse) __clear_bit(EV_REL, dev->evbit); __set_bit(BTN_LEFT, dev->keybit); - if (dmi_check_system(elantech_dmi_has_middle_button)) + if (dmi_check_system(elantech_dmi_has_middle_button) || + psmouse_matches_pnp_id(psmouse, middle_button_pnp_ids)) __set_bit(BTN_MIDDLE, dev->keybit); __set_bit(BTN_RIGHT, dev->keybit); From e0ae2519ca004a628fa55aeef969c37edce522d3 Mon Sep 17 00:00:00 2001 From: ??? Date: Thu, 21 Jun 2018 17:15:32 -0700 Subject: [PATCH 338/572] Input: elantech - fix V4 report decoding for module with middle key Some touchpad has middle key and it will be indicated in bit 2 of packet[0]. We need to fix V4 formation's byte mask to prevent error decoding. Signed-off-by: KT Liao Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elantech.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index 4c52e43c9273..dd85b16dc6f8 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -799,7 +799,7 @@ static int elantech_packet_check_v4(struct psmouse *psmouse) else if (ic_version == 7 && etd->info.samples[1] == 0x2A) sanity_check = ((packet[3] & 0x1c) == 0x10); else - sanity_check = ((packet[0] & 0x0c) == 0x04 && + sanity_check = ((packet[0] & 0x08) == 0x00 && (packet[3] & 0x1c) == 0x10); if (!sanity_check) From 8938fc7b8fe9ccfa11751ead502a8d385b607967 Mon Sep 17 00:00:00 2001 From: Alexandr Savca Date: Thu, 21 Jun 2018 17:12:54 -0700 Subject: [PATCH 339/572] Input: elan_i2c - add ELAN0618 (Lenovo v330 15IKB) ACPI ID Add ELAN0618 to the list of supported touchpads; this ID is used in Lenovo v330 15IKB devices. Signed-off-by: Alexandr Savca Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 8ff75114e762..f69d21610166 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -1345,6 +1345,7 @@ static const struct acpi_device_id elan_acpi_id[] = { { "ELAN060C", 0 }, { "ELAN0611", 0 }, { "ELAN0612", 0 }, + { "ELAN0618", 0 }, { "ELAN1000", 0 }, { } }; From 50fc7b61959af4b95fafce7fe5dd565199e0b61a Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 19 Jun 2018 11:17:32 -0700 Subject: [PATCH 340/572] Input: elan_i2c_smbus - fix more potential stack buffer overflows Commit 40f7090bb1b4 ("Input: elan_i2c_smbus - fix corrupted stack") fixed most of the functions using i2c_smbus_read_block_data() to allocate a buffer with the maximum block size. However three functions were left unchanged: * In elan_smbus_initialize(), increase the buffer size in the same way. * In elan_smbus_calibrate_result(), the buffer is provided by the caller (calibrate_store()), so introduce a bounce buffer. Also name the result buffer size. * In elan_smbus_get_report(), the buffer is provided by the caller but happens to be the right length. Add a compile-time assertion to ensure this remains the case. Cc: # 3.19+ Signed-off-by: Ben Hutchings Reviewed-by: Benjamin Tissoires Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c.h | 2 ++ drivers/input/mouse/elan_i2c_core.c | 2 +- drivers/input/mouse/elan_i2c_smbus.c | 10 ++++++++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/input/mouse/elan_i2c.h b/drivers/input/mouse/elan_i2c.h index 599544c1a91c..243e0fa6e3e3 100644 --- a/drivers/input/mouse/elan_i2c.h +++ b/drivers/input/mouse/elan_i2c.h @@ -27,6 +27,8 @@ #define ETP_DISABLE_POWER 0x0001 #define ETP_PRESSURE_OFFSET 25 +#define ETP_CALIBRATE_MAX_LEN 3 + /* IAP Firmware handling */ #define ETP_PRODUCT_ID_FORMAT_STRING "%d.0" #define ETP_FW_NAME "elan_i2c_" ETP_PRODUCT_ID_FORMAT_STRING ".bin" diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index f69d21610166..1f9cd7d8b7ad 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -613,7 +613,7 @@ static ssize_t calibrate_store(struct device *dev, int tries = 20; int retval; int error; - u8 val[3]; + u8 val[ETP_CALIBRATE_MAX_LEN]; retval = mutex_lock_interruptible(&data->sysfs_mutex); if (retval) diff --git a/drivers/input/mouse/elan_i2c_smbus.c b/drivers/input/mouse/elan_i2c_smbus.c index cfcb32559925..c060d270bc4d 100644 --- a/drivers/input/mouse/elan_i2c_smbus.c +++ b/drivers/input/mouse/elan_i2c_smbus.c @@ -56,7 +56,7 @@ static int elan_smbus_initialize(struct i2c_client *client) { u8 check[ETP_SMBUS_HELLOPACKET_LEN] = { 0x55, 0x55, 0x55, 0x55, 0x55 }; - u8 values[ETP_SMBUS_HELLOPACKET_LEN] = { 0, 0, 0, 0, 0 }; + u8 values[I2C_SMBUS_BLOCK_MAX] = {0}; int len, error; /* Get hello packet */ @@ -117,12 +117,16 @@ static int elan_smbus_calibrate(struct i2c_client *client) static int elan_smbus_calibrate_result(struct i2c_client *client, u8 *val) { int error; + u8 buf[I2C_SMBUS_BLOCK_MAX] = {0}; + + BUILD_BUG_ON(ETP_CALIBRATE_MAX_LEN > sizeof(buf)); error = i2c_smbus_read_block_data(client, - ETP_SMBUS_CALIBRATE_QUERY, val); + ETP_SMBUS_CALIBRATE_QUERY, buf); if (error < 0) return error; + memcpy(val, buf, ETP_CALIBRATE_MAX_LEN); return 0; } @@ -472,6 +476,8 @@ static int elan_smbus_get_report(struct i2c_client *client, u8 *report) { int len; + BUILD_BUG_ON(I2C_SMBUS_BLOCK_MAX > ETP_SMBUS_REPORT_LEN); + len = i2c_smbus_read_block_data(client, ETP_SMBUS_PACKET_QUERY, &report[ETP_SMBUS_REPORT_OFFSET]); From dd6bee81c942c0ea01030da9356026afb88f9d18 Mon Sep 17 00:00:00 2001 From: Enno Boland Date: Tue, 19 Jun 2018 11:55:33 -0700 Subject: [PATCH 341/572] Input: xpad - fix GPD Win 2 controller name This fixes using the controller with SDL2. SDL2 has a naive algorithm to apply the correct settings to a controller. For X-Box compatible controllers it expects that the controller name contains a variation of a 'XBOX'-string. This patch changes the identifier to contain "X-Box" as substring. Tested with Steam and C-Dogs-SDL which both detect the controller properly after adding this patch. Fixes: c1ba08390a8b ("Input: xpad - add GPD Win 2 Controller USB IDs") Cc: stable@vger.kernel.org Signed-off-by: Enno Boland Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 48e36acbeb49..cd620e009bad 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -125,7 +125,7 @@ static const struct xpad_device { u8 mapping; u8 xtype; } xpad_device[] = { - { 0x0079, 0x18d4, "GPD Win 2 Controller", 0, XTYPE_XBOX360 }, + { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, From 40141bb480a2c7805e8f4faec14ade90121ecb06 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 21 Jun 2018 18:16:45 +0100 Subject: [PATCH 342/572] net: ethernet: ti: davinci_cpdma: make function cpdma_desc_pool_create static The function cpdma_desc_pool_create is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: warning: symbol 'cpdma_desc_pool_create' was not declared. Should it be static? Signed-off-by: Colin Ian King Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_cpdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index cdbddf16dd29..4f1267477aa4 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -205,7 +205,7 @@ static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr) * devices (e.g. cpsw switches) use plain old memory. Descriptor pools * abstract out these details */ -int cpdma_desc_pool_create(struct cpdma_ctlr *ctlr) +static int cpdma_desc_pool_create(struct cpdma_ctlr *ctlr) { struct cpdma_params *cpdma_params = &ctlr->params; struct cpdma_desc_pool *pool; From e5ab564c9ebee77794842ca7d7476147b83d6a27 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Wed, 20 Jun 2018 15:51:51 +0200 Subject: [PATCH 343/572] VSOCK: fix loopback on big-endian systems The dst_cid and src_cid are 64 bits, therefore 64 bit accessors should be used, and in fact in virtio_transport_common.c only 64 bit accessors are used. Using 32 bit accessors for 64 bit values breaks big endian systems. This patch fixes a wrong use of le32_to_cpu in virtio_transport_send_pkt. Fixes: b9116823189e85ccf384 ("VSOCK: add loopback to virtio_transport") Signed-off-by: Claudio Imbrenda Reviewed-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/virtio_transport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 8e03bd3f3668..5d3cce9e8744 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -201,7 +201,7 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt) return -ENODEV; } - if (le32_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) + if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) return virtio_transport_send_pkt_loopback(vsock, pkt); if (pkt->reply) From 6cc65be4f6f2a7186af8f3e09900787c7912dad2 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 21 Jun 2018 20:35:26 -0400 Subject: [PATCH 344/572] locking/qspinlock: Fix build for anonymous union in older GCC compilers One of my tests compiles the kernel with gcc 4.5.3, and I hit the following build error: include/linux/semaphore.h: In function 'sema_init': include/linux/semaphore.h:35:17: error: unknown field 'val' specified in initializer include/linux/semaphore.h:35:17: warning: missing braces around initializer include/linux/semaphore.h:35:17: warning: (near initialization for '(anonymous).raw_lock..val') I bisected it down to: 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'") ... which makes qspinlock have an anonymous union, which makes initializing it special for older compilers. By adding strategic brackets, it makes the build happy again. Signed-off-by: Steven Rostedt (VMware) Acked-by: Waiman Long Cc: Andrew Morton Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra (Intel) Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Fixes: 625e88be1f41 ("locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'") Link: http://lkml.kernel.org/r/20180621203526.172ab5c4@vmware.local.home Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 0763f065b975..d10f1e7d6ba8 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -63,7 +63,7 @@ typedef struct qspinlock { /* * Initializier */ -#define __ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } +#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = ATOMIC_INIT(0) } } /* * Bitfields in the atomic value: From e2ac836307e346969737c60075fdb01bed1af503 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 21 Jun 2018 23:24:38 -0700 Subject: [PATCH 345/572] xfs: simplify xfs_bmap_punch_delalloc_range Instead of using xfs_bmapi_read to find delalloc extents and then punch them out using xfs_bunmapi, opencode the loop to iterate over the extents and call xfs_bmap_del_extent_delay directly. This both simplifies the code and reduces the number of extent tree lookups required. Signed-off-by: Christoph Hellwig Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_bmap_util.c | 93 ++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 57 deletions(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c35009a86699..c195b9d857af 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -685,12 +685,10 @@ out_unlock_iolock: } /* - * dead simple method of punching delalyed allocation blocks from a range in - * the inode. Walks a block at a time so will be slow, but is only executed in - * rare error cases so the overhead is not critical. This will always punch out - * both the start and end blocks, even if the ranges only partially overlap - * them, so it is up to the caller to ensure that partial blocks are not - * passed in. + * Dead simple method of punching delalyed allocation blocks from a range in + * the inode. This will always punch out both the start and end blocks, even + * if the ranges only partially overlap them, so it is up to the caller to + * ensure that partial blocks are not passed in. */ int xfs_bmap_punch_delalloc_range( @@ -698,63 +696,44 @@ xfs_bmap_punch_delalloc_range( xfs_fileoff_t start_fsb, xfs_fileoff_t length) { - xfs_fileoff_t remaining = length; + struct xfs_ifork *ifp = &ip->i_df; + xfs_fileoff_t end_fsb = start_fsb + length; + struct xfs_bmbt_irec got, del; + struct xfs_iext_cursor icur; int error = 0; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - do { - int done; - xfs_bmbt_irec_t imap; - int nimaps = 1; - xfs_fsblock_t firstblock; - struct xfs_defer_ops dfops; - - /* - * Map the range first and check that it is a delalloc extent - * before trying to unmap the range. Otherwise we will be - * trying to remove a real extent (which requires a - * transaction) or a hole, which is probably a bad idea... - */ - error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps, - XFS_BMAPI_ENTIRE); - - if (error) { - /* something screwed, just bail */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_alert(ip->i_mount, - "Failed delalloc mapping lookup ino %lld fsb %lld.", - ip->i_ino, start_fsb); - } - break; - } - if (!nimaps) { - /* nothing there */ - goto next_block; - } - if (imap.br_startblock != DELAYSTARTBLOCK) { - /* been converted, ignore */ - goto next_block; - } - WARN_ON(imap.br_blockcount == 0); - - /* - * Note: while we initialise the firstblock/dfops pair, they - * should never be used because blocks should never be - * allocated or freed for a delalloc extent and hence we need - * don't cancel or finish them after the xfs_bunmapi() call. - */ - xfs_defer_init(&dfops, &firstblock); - error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, - &dfops, &done); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); if (error) - break; + return error; + } - ASSERT(!xfs_defer_has_unfinished_work(&dfops)); -next_block: - start_fsb++; - remaining--; - } while(remaining > 0); + if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) + return 0; + + while (got.br_startoff + got.br_blockcount > start_fsb) { + del = got; + xfs_trim_extent(&del, start_fsb, length); + + /* + * A delete can push the cursor forward. Step back to the + * previous extent on non-delalloc or extents outside the + * target range. + */ + if (!del.br_blockcount || + !isnullstartblock(del.br_startblock)) { + if (!xfs_iext_prev_extent(ifp, &icur, &got)) + break; + continue; + } + + error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, + &got, &del); + if (error || !xfs_iext_get_extent(ifp, &icur, &got)) + break; + } return error; } From 23fcb3340d033d9f081e21e6c12c2db7eaa541d3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Jun 2018 23:25:57 -0700 Subject: [PATCH 346/572] xfs: More robust inode extent count validation When the inode is in extent format, it can't have more extents that fit in the inode fork. We don't currenty check this, and so this corruption goes unnoticed by the inode verifiers. This can lead to crashes operating on invalid in-memory structures. Attempts to access such a inode will now error out in the verifier rather than allowing modification operations to proceed. Reported-by: Wen Xu Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong [darrick: fix a typedef, add some braces and breaks to shut up compiler warnings] Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_format.h | 3 ++ fs/xfs/libxfs/xfs_inode_buf.c | 76 ++++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 29 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 1c5a8aaf2bfc..7b4a43deb83e 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -962,6 +962,9 @@ typedef enum xfs_dinode_fmt { XFS_DFORK_DSIZE(dip, mp) : \ XFS_DFORK_ASIZE(dip, mp)) +#define XFS_DFORK_MAXEXT(dip, mp, w) \ + (XFS_DFORK_SIZE(dip, mp, w) / sizeof(struct xfs_bmbt_rec)) + /* * Return pointers to the data or attribute forks. */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index d38d724534c4..33dc34655ac3 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -374,6 +374,47 @@ xfs_log_dinode_to_disk( } } +static xfs_failaddr_t +xfs_dinode_verify_fork( + struct xfs_dinode *dip, + struct xfs_mount *mp, + int whichfork) +{ + uint32_t di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork); + + switch (XFS_DFORK_FORMAT(dip, whichfork)) { + case XFS_DINODE_FMT_LOCAL: + /* + * no local regular files yet + */ + if (whichfork == XFS_DATA_FORK) { + if (S_ISREG(be16_to_cpu(dip->di_mode))) + return __this_address; + if (be64_to_cpu(dip->di_size) > + XFS_DFORK_SIZE(dip, mp, whichfork)) + return __this_address; + } + if (di_nextents) + return __this_address; + break; + case XFS_DINODE_FMT_EXTENTS: + if (di_nextents > XFS_DFORK_MAXEXT(dip, mp, whichfork)) + return __this_address; + break; + case XFS_DINODE_FMT_BTREE: + if (whichfork == XFS_ATTR_FORK) { + if (di_nextents > MAXAEXTNUM) + return __this_address; + } else if (di_nextents > MAXEXTNUM) { + return __this_address; + } + break; + default: + return __this_address; + } + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -441,24 +482,9 @@ xfs_dinode_verify( case S_IFREG: case S_IFLNK: case S_IFDIR: - switch (dip->di_format) { - case XFS_DINODE_FMT_LOCAL: - /* - * no local regular files yet - */ - if (S_ISREG(mode)) - return __this_address; - if (di_size > XFS_DFORK_DSIZE(dip, mp)) - return __this_address; - if (dip->di_nextents) - return __this_address; - /* fall through */ - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - break; - default: - return __this_address; - } + fa = xfs_dinode_verify_fork(dip, mp, XFS_DATA_FORK); + if (fa) + return fa; break; case 0: /* Uninitialized inode ok. */ @@ -468,17 +494,9 @@ xfs_dinode_verify( } if (XFS_DFORK_Q(dip)) { - switch (dip->di_aformat) { - case XFS_DINODE_FMT_LOCAL: - if (dip->di_anextents) - return __this_address; - /* fall through */ - case XFS_DINODE_FMT_EXTENTS: - case XFS_DINODE_FMT_BTREE: - break; - default: - return __this_address; - } + fa = xfs_dinode_verify_fork(dip, mp, XFS_ATTR_FORK); + if (fa) + return fa; } else { /* * If there is no fork offset, this may be a freshly-made inode From c51b3c639e01f20559531eef3c5919feae23c55a Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 18 Jun 2018 09:36:39 +0200 Subject: [PATCH 347/572] xen: add new hypercall buffer mapping device For passing arbitrary data from user land to the Xen hypervisor the Xen tools today are using mlock()ed buffers. Unfortunately the kernel might change access rights of such buffers for brief periods of time e.g. for page migration or compaction, leading to access faults in the hypervisor, as the hypervisor can't use the locks of the kernel. In order to solve this problem add a new device node to the Xen privcmd driver to easily allocate hypercall buffers via mmap(). The memory is allocated in the kernel and just mapped into user space. Marked as VM_IO the user mapping will not be subject to page migration et al. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- drivers/xen/Makefile | 2 +- drivers/xen/privcmd-buf.c | 210 ++++++++++++++++++++++++++++++++++++++ drivers/xen/privcmd.c | 9 ++ drivers/xen/privcmd.h | 3 + 4 files changed, 223 insertions(+), 1 deletion(-) create mode 100644 drivers/xen/privcmd-buf.c diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 451e833f5931..48b154276179 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -41,4 +41,4 @@ obj-$(CONFIG_XEN_PVCALLS_FRONTEND) += pvcalls-front.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o -xen-privcmd-y := privcmd.o +xen-privcmd-y := privcmd.o privcmd-buf.o diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c new file mode 100644 index 000000000000..df1ed37c3269 --- /dev/null +++ b/drivers/xen/privcmd-buf.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/****************************************************************************** + * privcmd-buf.c + * + * Mmap of hypercall buffers. + * + * Copyright (c) 2018 Juergen Gross + */ + +#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +#include "privcmd.h" + +MODULE_LICENSE("GPL"); + +static unsigned int limit = 64; +module_param(limit, uint, 0644); +MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by " + "the privcmd-buf device per open file"); + +struct privcmd_buf_private { + struct mutex lock; + struct list_head list; + unsigned int allocated; +}; + +struct privcmd_buf_vma_private { + struct privcmd_buf_private *file_priv; + struct list_head list; + unsigned int users; + unsigned int n_pages; + struct page *pages[]; +}; + +static int privcmd_buf_open(struct inode *ino, struct file *file) +{ + struct privcmd_buf_private *file_priv; + + file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL); + if (!file_priv) + return -ENOMEM; + + mutex_init(&file_priv->lock); + INIT_LIST_HEAD(&file_priv->list); + + file->private_data = file_priv; + + return 0; +} + +static void privcmd_buf_vmapriv_free(struct privcmd_buf_vma_private *vma_priv) +{ + unsigned int i; + + vma_priv->file_priv->allocated -= vma_priv->n_pages; + + list_del(&vma_priv->list); + + for (i = 0; i < vma_priv->n_pages; i++) + if (vma_priv->pages[i]) + __free_page(vma_priv->pages[i]); + + kfree(vma_priv); +} + +static int privcmd_buf_release(struct inode *ino, struct file *file) +{ + struct privcmd_buf_private *file_priv = file->private_data; + struct privcmd_buf_vma_private *vma_priv; + + mutex_lock(&file_priv->lock); + + while (!list_empty(&file_priv->list)) { + vma_priv = list_first_entry(&file_priv->list, + struct privcmd_buf_vma_private, + list); + privcmd_buf_vmapriv_free(vma_priv); + } + + mutex_unlock(&file_priv->lock); + + kfree(file_priv); + + return 0; +} + +static void privcmd_buf_vma_open(struct vm_area_struct *vma) +{ + struct privcmd_buf_vma_private *vma_priv = vma->vm_private_data; + + if (!vma_priv) + return; + + mutex_lock(&vma_priv->file_priv->lock); + vma_priv->users++; + mutex_unlock(&vma_priv->file_priv->lock); +} + +static void privcmd_buf_vma_close(struct vm_area_struct *vma) +{ + struct privcmd_buf_vma_private *vma_priv = vma->vm_private_data; + struct privcmd_buf_private *file_priv; + + if (!vma_priv) + return; + + file_priv = vma_priv->file_priv; + + mutex_lock(&file_priv->lock); + + vma_priv->users--; + if (!vma_priv->users) + privcmd_buf_vmapriv_free(vma_priv); + + mutex_unlock(&file_priv->lock); +} + +static vm_fault_t privcmd_buf_vma_fault(struct vm_fault *vmf) +{ + pr_debug("fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", + vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end, + vmf->pgoff, (void *)vmf->address); + + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct privcmd_buf_vm_ops = { + .open = privcmd_buf_vma_open, + .close = privcmd_buf_vma_close, + .fault = privcmd_buf_vma_fault, +}; + +static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct privcmd_buf_private *file_priv = file->private_data; + struct privcmd_buf_vma_private *vma_priv; + unsigned long count = vma_pages(vma); + unsigned int i; + int ret = 0; + + if (!(vma->vm_flags & VM_SHARED) || count > limit || + file_priv->allocated + count > limit) + return -EINVAL; + + vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *), + GFP_KERNEL); + if (!vma_priv) + return -ENOMEM; + + vma_priv->n_pages = count; + count = 0; + for (i = 0; i < vma_priv->n_pages; i++) { + vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!vma_priv->pages[i]) + break; + count++; + } + + mutex_lock(&file_priv->lock); + + file_priv->allocated += count; + + vma_priv->file_priv = file_priv; + vma_priv->users = 1; + + vma->vm_flags |= VM_IO | VM_DONTEXPAND; + vma->vm_ops = &privcmd_buf_vm_ops; + vma->vm_private_data = vma_priv; + + list_add(&vma_priv->list, &file_priv->list); + + if (vma_priv->n_pages != count) + ret = -ENOMEM; + else + for (i = 0; i < vma_priv->n_pages; i++) { + ret = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE, + vma_priv->pages[i]); + if (ret) + break; + } + + if (ret) + privcmd_buf_vmapriv_free(vma_priv); + + mutex_unlock(&file_priv->lock); + + return ret; +} + +const struct file_operations xen_privcmdbuf_fops = { + .owner = THIS_MODULE, + .open = privcmd_buf_open, + .release = privcmd_buf_release, + .mmap = privcmd_buf_mmap, +}; +EXPORT_SYMBOL_GPL(xen_privcmdbuf_fops); + +struct miscdevice xen_privcmdbuf_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/hypercall", + .fops = &xen_privcmdbuf_fops, +}; diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 8ae0349d9f0a..7e6e682104dc 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -1007,12 +1007,21 @@ static int __init privcmd_init(void) pr_err("Could not register Xen privcmd device\n"); return err; } + + err = misc_register(&xen_privcmdbuf_dev); + if (err != 0) { + pr_err("Could not register Xen hypercall-buf device\n"); + misc_deregister(&privcmd_dev); + return err; + } + return 0; } static void __exit privcmd_exit(void) { misc_deregister(&privcmd_dev); + misc_deregister(&xen_privcmdbuf_dev); } module_init(privcmd_init); diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h index 14facaeed36f..0dd9f8f67ee3 100644 --- a/drivers/xen/privcmd.h +++ b/drivers/xen/privcmd.h @@ -1,3 +1,6 @@ #include extern const struct file_operations xen_privcmd_fops; +extern const struct file_operations xen_privcmdbuf_fops; + +extern struct miscdevice xen_privcmdbuf_dev; From e53946dbd31a21f4bef155f8febba556933d62bf Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 21 Jun 2018 23:26:05 -0700 Subject: [PATCH 348/572] xfs: xfs_iflush_abort() can be called twice on cluster writeback failure When a corrupt inode is detected during xfs_iflush_cluster, we can get a shutdown ASSERT failure like this: XFS (pmem1): Metadata corruption detected at xfs_symlink_shortform_verify+0x5c/0xa0, inode 0x86627 data fork XFS (pmem1): Unmount and run xfs_repair XFS (pmem1): xfs_do_force_shutdown(0x8) called from line 3372 of file fs/xfs/xfs_inode.c. Return address = ffffffff814f4116 XFS (pmem1): Corruption of in-memory data detected. Shutting down filesystem XFS (pmem1): xfs_do_force_shutdown(0x1) called from line 222 of file fs/xfs/libxfs/xfs_defer.c. Return address = ffffffff814a8a88 XFS (pmem1): xfs_do_force_shutdown(0x1) called from line 222 of file fs/xfs/libxfs/xfs_defer.c. Return address = ffffffff814a8ef9 XFS (pmem1): Please umount the filesystem and rectify the problem(s) XFS: Assertion failed: xfs_isiflocked(ip), file: fs/xfs/xfs_inode.h, line: 258 ..... Call Trace: xfs_iflush_abort+0x10a/0x110 xfs_iflush+0xf3/0x390 xfs_inode_item_push+0x126/0x1e0 xfsaild+0x2c5/0x890 kthread+0x11c/0x140 ret_from_fork+0x24/0x30 Essentially, xfs_iflush_abort() has been called twice on the original inode that that was flushed. This happens because the inode has been flushed to teh buffer successfully via xfs_iflush_int(), and so when another inode is detected as corrupt in xfs_iflush_cluster, the buffer is marked stale and EIO, and iodone callbacks are run on it. Running the iodone callbacks walks across the original inode and calls xfs_iflush_abort() on it. When xfs_iflush_cluster() returns to xfs_iflush(), it runs the error path for that function, and that calls xfs_iflush_abort() on the inode a second time, leading to the above assert failure as the inode is not flush locked anymore. This bug has been there a long time. The simple fix would be to just avoid calling xfs_iflush_abort() in xfs_iflush() if we've got a failure from xfs_iflush_cluster(). However, xfs_iflush_cluster() has magic delwri buffer handling that means it may or may not have run IO completion on the buffer, and hence sometimes we have to call xfs_iflush_abort() from xfs_iflush(), and sometimes we shouldn't. After reading through all the error paths and the delwri buffer code, it's clear that the error handling in xfs_iflush_cluster() is unnecessary. If the buffer is delwri, it leaves it on the delwri list so that when the delwri list is submitted it sees a shutdown fliesystem in xfs_buf_submit() and that marks the buffer stale, EIO and runs IO completion. i.e. exactly what xfs+iflush_cluster() does when it's not a delwri buffer. Further, marking a buffer stale clears the _XBF_DELWRI_Q flag on the buffer, which means when submission of the buffer occurs, it just skips over it and releases it. IOWs, the error handling in xfs_iflush_cluster doesn't need to care if the buffer is already on a the delwri queue or not - it just needs to mark the buffer stale, EIO and run completions. That means we can just use the easy fix for xfs_iflush() to avoid the double abort. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_inode.c | 57 +++++++++++++++++----------------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 7a96c4e0ab5c..5df4de666cc1 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3236,7 +3236,6 @@ xfs_iflush_cluster( struct xfs_inode *cip; int nr_found; int clcount = 0; - int bufwasdelwri; int i; pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); @@ -3360,37 +3359,22 @@ cluster_corrupt_out: * inode buffer and shut down the filesystem. */ rcu_read_unlock(); - /* - * Clean up the buffer. If it was delwri, just release it -- - * brelse can handle it with no problems. If not, shut down the - * filesystem before releasing the buffer. - */ - bufwasdelwri = (bp->b_flags & _XBF_DELWRI_Q); - if (bufwasdelwri) - xfs_buf_relse(bp); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - if (!bufwasdelwri) { - /* - * Just like incore_relse: if we have b_iodone functions, - * mark the buffer as an error and call them. Otherwise - * mark it as stale and brelse. - */ - if (bp->b_iodone) { - bp->b_flags &= ~XBF_DONE; - xfs_buf_stale(bp); - xfs_buf_ioerror(bp, -EIO); - xfs_buf_ioend(bp); - } else { - xfs_buf_stale(bp); - xfs_buf_relse(bp); - } - } - /* - * Unlocks the flush lock + * We'll always have an inode attached to the buffer for completion + * process by the time we are called from xfs_iflush(). Hence we have + * always need to do IO completion processing to abort the inodes + * attached to the buffer. handle them just like the shutdown case in + * xfs_buf_submit(). */ + ASSERT(bp->b_iodone); + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); + xfs_buf_ioerror(bp, -EIO); + xfs_buf_ioend(bp); + + /* abort the corrupt inode, as it was not attached to the buffer */ xfs_iflush_abort(cip, false); kmem_free(cilist); xfs_perag_put(pag); @@ -3486,12 +3470,17 @@ xfs_iflush( xfs_log_force(mp, 0); /* - * inode clustering: - * see if other inodes can be gathered into this write + * inode clustering: try to gather other inodes into this write + * + * Note: Any error during clustering will result in the filesystem + * being shut down and completion callbacks run on the cluster buffer. + * As we have already flushed and attached this inode to the buffer, + * it has already been aborted and released by xfs_iflush_cluster() and + * so we have no further error handling to do here. */ error = xfs_iflush_cluster(ip, bp); if (error) - goto cluster_corrupt_out; + return error; *bpp = bp; return 0; @@ -3500,12 +3489,8 @@ corrupt_out: if (bp) xfs_buf_relse(bp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); -cluster_corrupt_out: - error = -EFSCORRUPTED; abort_out: - /* - * Unlocks the flush lock - */ + /* abort the corrupt inode, as it was not attached to the buffer */ xfs_iflush_abort(ip, false); return error; } From eef04c7b3786ff0c9cb1019278b6c6c2ea0ad4ff Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 21 Jun 2018 13:29:44 -0400 Subject: [PATCH 349/572] xen: Remove unnecessary BUG_ON from __unbind_from_irq() Commit 910f8befdf5b ("xen/pirq: fix error path cleanup when binding MSIs") fixed a couple of errors in error cleanup path of xen_bind_pirq_msi_to_irq(). This cleanup allowed a call to __unbind_from_irq() with an unbound irq, which would result in triggering the BUG_ON there. Since there is really no reason for the BUG_ON (xen_free_irq() can operate on unbound irqs) we can remove it. Reported-by: Ben Hutchings Signed-off-by: Boris Ostrovsky Cc: stable@vger.kernel.org Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 762378f1811c..08e4af04d6f2 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -628,8 +628,6 @@ static void __unbind_from_irq(unsigned int irq) xen_irq_info_cleanup(info); } - BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND); - xen_free_irq(irq); } From f1ce87f6080b1dda7e7b1eda3da332add19d87b9 Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Wed, 6 Jun 2018 12:13:30 +0200 Subject: [PATCH 350/572] mtd: cfi_cmdset_0002: Avoid walking all chips when unlocking. cfi_ppb_unlock() walks all flash chips when unlocking sectors, avoid walking chips unaffected by the unlock operation. Fixes: 1648eaaa1575 ("mtd: cfi_cmdset_0002: Support Persistent Protection Bits (PPB) locking") Cc: stable@vger.kernel.org Signed-off-by: Joakim Tjernlund Signed-off-by: Boris Brezillon --- drivers/mtd/chips/cfi_cmdset_0002.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c index 9cfc2645dd93..1b64ac8c5bc8 100644 --- a/drivers/mtd/chips/cfi_cmdset_0002.c +++ b/drivers/mtd/chips/cfi_cmdset_0002.c @@ -2676,6 +2676,8 @@ static int __maybe_unused cfi_ppb_unlock(struct mtd_info *mtd, loff_t ofs, i++; if (adr >> cfi->chipshift) { + if (offset >= (ofs + len)) + break; adr = 0; chipnum++; From cbdceb9b3e1928554fffd0d889adf2d0d8edee4d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 22 Jun 2018 09:04:25 +0200 Subject: [PATCH 351/572] mtd: dataflash: Use ULL suffix for 64-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With gcc 4.1.2 when compiling for 32-bit: drivers/mtd/devices/mtd_dataflash.c:736: warning: integer constant is too large for ‘long’ type drivers/mtd/devices/mtd_dataflash.c:737: warning: integer constant is too large for ‘long’ type Add the missing "ULL" suffixes to fix this. Fixes: 67e4145ebf2c161d ("mtd: dataflash: Add flash_info for AT45DB641E") Signed-off-by: Geert Uytterhoeven Acked-by: Andrey Smirnov Signed-off-by: Boris Brezillon --- drivers/mtd/devices/mtd_dataflash.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 3a6f450d1093..53febe8a68c3 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -733,8 +733,8 @@ static struct flash_info dataflash_data[] = { { "AT45DB642x", 0x1f2800, 8192, 1056, 11, SUP_POW2PS}, { "at45db642d", 0x1f2800, 8192, 1024, 10, SUP_POW2PS | IS_POW2PS}, - { "AT45DB641E", 0x1f28000100, 32768, 264, 9, SUP_EXTID | SUP_POW2PS}, - { "at45db641e", 0x1f28000100, 32768, 256, 8, SUP_EXTID | SUP_POW2PS | IS_POW2PS}, + { "AT45DB641E", 0x1f28000100ULL, 32768, 264, 9, SUP_EXTID | SUP_POW2PS}, + { "at45db641e", 0x1f28000100ULL, 32768, 256, 8, SUP_EXTID | SUP_POW2PS | IS_POW2PS}, }; static struct flash_info *jedec_lookup(struct spi_device *spi, From 52e1cf2d19c2e62e6a81b8de3f7320d033917dd5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Jun 2018 08:42:22 +0200 Subject: [PATCH 352/572] efi/libstub/tpm: Initialize efi_physical_addr_t vars to zero for mixed mode Commit: 79832f0b5f71 ("efi/libstub/tpm: Initialize pointer variables to zero for mixed mode") fixes a problem with the tpm code on mixed mode (64-bit kernel on 32-bit UEFI), where 64-bit pointer variables are not fully initialized by the 32-bit EFI code. A similar problem applies to the efi_physical_addr_t variables which are written by the ->get_event_log() EFI call. Even though efi_physical_addr_t is 64-bit everywhere, it seems that some 32-bit UEFI implementations only fill in the lower 32 bits when passed a pointer to an efi_physical_addr_t to fill. This commit initializes these to 0 to, to ensure the upper 32 bits are 0 in mixed mode. This fixes recent kernels sometimes hanging during early boot on mixed mode UEFI systems. Signed-off-by: Hans de Goede Signed-off-by: Ard Biesheuvel Cc: # v4.16+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180622064222.11633-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/tpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/tpm.c b/drivers/firmware/efi/libstub/tpm.c index caa37a6dd9d4..a90b0b8fc69a 100644 --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -64,7 +64,7 @@ static void efi_retrieve_tpm2_eventlog_1_2(efi_system_table_t *sys_table_arg) efi_guid_t tcg2_guid = EFI_TCG2_PROTOCOL_GUID; efi_guid_t linux_eventlog_guid = LINUX_EFI_TPM_EVENT_LOG_GUID; efi_status_t status; - efi_physical_addr_t log_location, log_last_entry; + efi_physical_addr_t log_location = 0, log_last_entry = 0; struct linux_efi_tpm_eventlog *log_tbl = NULL; unsigned long first_entry_addr, last_entry_addr; size_t log_size, last_entry_size; From 57d6a7938a8fc6cee8420b40ca244220b41721f5 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Thu, 8 Mar 2018 21:28:56 +0100 Subject: [PATCH 353/572] perf/core: Move the inline keyword at the beginning of the function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building perf with W=1 the following warning triggers: CC kernel/events/ring_buffer.o kernel/events/ring_buffer.c:105:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] static bool __always_inline ^~~~~~ ... Move the inline keyword to the beginning of the function declaration. Signed-off-by: Mathieu Malaterre Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: trival@kernel.org Link: http://lkml.kernel.org/r/20180308202856.9378-1-malat@debian.org Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 1d8ca9ea9979..d11f60cbe8ca 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -103,7 +103,7 @@ out: preempt_enable(); } -static bool __always_inline +static __always_inline bool ring_buffer_has_space(unsigned long head, unsigned long tail, unsigned long data_size, unsigned int size, bool backward) @@ -114,7 +114,7 @@ ring_buffer_has_space(unsigned long head, unsigned long tail, return CIRC_SPACE(tail, head, data_size) >= size; } -static int __always_inline +static __always_inline int __perf_output_begin(struct perf_output_handle *handle, struct perf_event *event, unsigned int size, bool backward) @@ -414,7 +414,7 @@ err: } EXPORT_SYMBOL_GPL(perf_aux_output_begin); -static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb) +static __always_inline bool rb_need_aux_wakeup(struct ring_buffer *rb) { if (rb->aux_overwrite) return false; From 275ec0cb946cb75ac8977f662e608fce92f8b8a8 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Jun 2018 12:17:45 +0200 Subject: [PATCH 354/572] ALSA: hda/realtek - Add a quirk for FSC ESPRIMO U9210 Fujitsu Seimens ESPRIMO Mobile U9210 requires the same fixup as H270 for the correct pin configs. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200107 Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 487ceb9fd038..70bf4c30548a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2545,6 +2545,7 @@ static const struct snd_pci_quirk alc262_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1397, "Fujitsu Lifebook S7110", ALC262_FIXUP_FSC_S7110), SND_PCI_QUIRK(0x10cf, 0x142d, "Fujitsu Lifebook E8410", ALC262_FIXUP_BENQ), SND_PCI_QUIRK(0x10f1, 0x2915, "Tyan Thunder n6650W", ALC262_FIXUP_TYAN), + SND_PCI_QUIRK(0x1734, 0x1141, "FSC ESPRIMO U9210", ALC262_FIXUP_FSC_H270), SND_PCI_QUIRK(0x1734, 0x1147, "FSC Celsius H270", ALC262_FIXUP_FSC_H270), SND_PCI_QUIRK(0x17aa, 0x384e, "Lenovo 3000", ALC262_FIXUP_LENOVO_3000), SND_PCI_QUIRK(0x17ff, 0x0560, "Benq ED8", ALC262_FIXUP_BENQ), From c5b4a50b74018b3677098151ec5f4fce07d5e6a0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 11 Jun 2018 19:24:16 +0100 Subject: [PATCH 355/572] Btrfs: fix return value on rename exchange failure If we failed during a rename exchange operation after starting/joining a transaction, we would end up replacing the return value, stored in the local 'ret' variable, with the return value from btrfs_end_transaction(). So this could end up returning 0 (success) to user space despite the operation having failed and aborted the transaction, because if there are multiple tasks having a reference on the transaction at the time btrfs_end_transaction() is called by the rename exchange, that function returns 0 (otherwise it returns -EIO and not the original error value). So fix this by not overwriting the return value on error after getting a transaction handle. Fixes: cdd1fedf8261 ("btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT") CC: stable@vger.kernel.org # 4.9+ Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c12b7a6e534a..a6bf9e2ff68f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9443,6 +9443,7 @@ static int btrfs_rename_exchange(struct inode *old_dir, u64 new_idx = 0; u64 root_objectid; int ret; + int ret2; bool root_log_pinned = false; bool dest_log_pinned = false; @@ -9639,7 +9640,8 @@ out_fail: dest_log_pinned = false; } } - ret = btrfs_end_transaction(trans); + ret2 = btrfs_end_transaction(trans); + ret = ret ? ret : ret2; out_notrans: if (new_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); From 72a8edc2d9134c2895eac2fec5eecf8230a05c96 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:48 +0100 Subject: [PATCH 356/572] genirq/debugfs: Add missing IRQCHIP_SUPPORTS_LEVEL_MSI debug Debug is missing the IRQCHIP_SUPPORTS_LEVEL_MSI debug entry, making debugfs slightly less useful. Take this opportunity to also add a missing comment in the definition of IRQCHIP_SUPPORTS_LEVEL_MSI. Fixes: 6988e0e0d283 ("genirq/msi: Limit level-triggered MSI to platform devices") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Yang Yingliang Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-2-marc.zyngier@arm.com --- include/linux/irq.h | 1 + kernel/irq/debugfs.c | 1 + 2 files changed, 2 insertions(+) diff --git a/include/linux/irq.h b/include/linux/irq.h index 4bd2f34947f4..201de12a9957 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -503,6 +503,7 @@ struct irq_chip { * IRQCHIP_SKIP_SET_WAKE: Skip chip.irq_set_wake(), for this irq chip * IRQCHIP_ONESHOT_SAFE: One shot does not require mask/unmask * IRQCHIP_EOI_THREADED: Chip requires eoi() on unmask in threaded mode + * IRQCHIP_SUPPORTS_LEVEL_MSI Chip can provide two doorbells for Level MSIs */ enum { IRQCHIP_SET_TYPE_MASKED = (1 << 0), diff --git a/kernel/irq/debugfs.c b/kernel/irq/debugfs.c index 4dadeb3d6666..6f636136cccc 100644 --- a/kernel/irq/debugfs.c +++ b/kernel/irq/debugfs.c @@ -55,6 +55,7 @@ static const struct irq_bit_descr irqchip_flags[] = { BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE), BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE), BIT_MASK_DESCR(IRQCHIP_EOI_THREADED), + BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI), }; static void From 893fbfff976cd069f2e60c3b186dbe3f85504db2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:49 +0100 Subject: [PATCH 357/572] irqchip/ls-scfg-msi: Fix MSI affinity handling The ls-scfs-msi driver is not dealing with the effective affinity as it should. Let's fix that, and make it clear that the effective affinity is restricted to a single CPU. Also prevent the driver from messing with the internals of the affinity setting infrastructure. Reported-by: Alexandre Belloni Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Alexandre Belloni Cc: Jason Cooper Cc: Yang Yingliang Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-3-marc.zyngier@arm.com --- drivers/irqchip/irq-ls-scfg-msi.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index 1ec3bfe56693..c671b3212010 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -93,8 +93,12 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg) msg->address_lo = lower_32_bits(msi_data->msiir_addr); msg->data = data->hwirq; - if (msi_affinity_flag) - msg->data |= cpumask_first(data->common->affinity); + if (msi_affinity_flag) { + const struct cpumask *mask; + + mask = irq_data_get_effective_affinity_mask(data); + msg->data |= cpumask_first(mask); + } iommu_dma_map_msi_msg(data->irq, msg); } @@ -121,7 +125,7 @@ static int ls_scfg_msi_set_affinity(struct irq_data *irq_data, return -EINVAL; } - cpumask_copy(irq_data->common->affinity, mask); + irq_data_update_effective_affinity(irq_data, cpumask_of(cpu)); return IRQ_SET_MASK_OK; } From cbaf45a6be497c272e80500e4fd9bccdf20d5050 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:50 +0100 Subject: [PATCH 358/572] irqchip/gic-v2m: Fix SPI release on error path On failing to allocate the required SPIs, the actual number of interrupts should be freed and not its log2 value. Fixes: de337ee30142 ("irqchip/gic-v2m: Add PCI Multi-MSI support") Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Yang Yingliang Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-4-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v2m.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index 0f52d44b3f69..f5fe0100f9ff 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -199,7 +199,7 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, fail: irq_domain_free_irqs_parent(domain, virq, nr_irqs); - gicv2m_unalloc_msi(v2m, hwirq, get_count_order(nr_irqs)); + gicv2m_unalloc_msi(v2m, hwirq, nr_irqs); return err; } From c1797b11a09c8323c92b074fd48b89a936c991d0 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 22 Jun 2018 10:52:51 +0100 Subject: [PATCH 359/572] irqchip/gic-v3-its: Don't bind LPI to unavailable NUMA node On a NUMA system, if an ITS is local to an offline node, the ITS driver may pick an offline CPU to bind the LPI. In this case, pick an online CPU (and the first one will do). But on some systems, binding an LPI to non-local node CPU may cause deadlock (see Cavium erratum 23144). In this case, just fail the activate and return an error code. Signed-off-by: Yang Yingliang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Sumit Garg Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180622095254.5906-5-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 5377d7e2afba..cae53937feeb 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2310,7 +2310,14 @@ static int its_irq_domain_activate(struct irq_domain *domain, cpu_mask = cpumask_of_node(its_dev->its->numa_node); /* Bind the LPI to the first possible CPU */ - cpu = cpumask_first(cpu_mask); + cpu = cpumask_first_and(cpu_mask, cpu_online_mask); + if (cpu >= nr_cpu_ids) { + if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) + return -EINVAL; + + cpu = cpumask_first(cpu_online_mask); + } + its_dev->event_map.col_map[event] = cpu; irq_data_update_effective_affinity(d, cpumask_of(cpu)); From 83559b47cdc4d396fc1187a13b527d01b55e0fe6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:52 +0100 Subject: [PATCH 360/572] irqchip/gic-v3-its: Only emit SYNC if targetting a valid collection It is possible, under obscure circumstances, to convince the ITS driver to emit a SYNC operation that targets a collection that is not bound to any redistributor (and the target_address field is zero) because the corresponding CPU has not been seen yet (the system has been booted with max_cpus="something small"). If the ITS is using the linear CPU number as the target, this is not a big deal, as we just end-up issuing a SYNC to CPU0. But if the ITS requires the physical address of the redistributor (with GITS_TYPER.PTA==1), we end-up asking the ITS to write to the physical address zero, which is not exactly a good idea (there has been report of the ITS locking up). This should of course never happen, but hey, this is SW... In order to avoid the above disaster, let's track which collections have been actually initialized, and let's not generate a SYNC if the collection hasn't been properly bound to a redistributor. Take this opportunity to spit our a warning, in the hope that someone may report the issue if it arrises again. Reported-by: Yang Yingliang Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-6-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index cae53937feeb..fcfc96f8e0de 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -182,6 +182,14 @@ static struct its_collection *dev_event_to_col(struct its_device *its_dev, return its->collections + its_dev->event_map.col_map[event]; } +static struct its_collection *valid_col(struct its_collection *col) +{ + if (WARN_ON_ONCE(col->target_address & GENMASK_ULL(0, 15))) + return NULL; + + return col; +} + /* * ITS command descriptors - parameters to be encoded in a command * block. @@ -439,7 +447,7 @@ static struct its_collection *its_build_mapti_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_movi_cmd(struct its_node *its, @@ -458,7 +466,7 @@ static struct its_collection *its_build_movi_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_discard_cmd(struct its_node *its, @@ -476,7 +484,7 @@ static struct its_collection *its_build_discard_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_inv_cmd(struct its_node *its, @@ -494,7 +502,7 @@ static struct its_collection *its_build_inv_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_int_cmd(struct its_node *its, @@ -512,7 +520,7 @@ static struct its_collection *its_build_int_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_clear_cmd(struct its_node *its, @@ -530,7 +538,7 @@ static struct its_collection *its_build_clear_cmd(struct its_node *its, its_fixup_cmd(cmd); - return col; + return valid_col(col); } static struct its_collection *its_build_invall_cmd(struct its_node *its, @@ -1824,11 +1832,16 @@ static int its_alloc_tables(struct its_node *its) static int its_alloc_collections(struct its_node *its) { + int i; + its->collections = kcalloc(nr_cpu_ids, sizeof(*its->collections), GFP_KERNEL); if (!its->collections) return -ENOMEM; + for (i = 0; i < nr_cpu_ids; i++) + its->collections[i].target_address = ~0ULL; + return 0; } From 205e065d91d72e6afad112ea84f0ca60b30bf5ab Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:53 +0100 Subject: [PATCH 361/572] irqchip/gic-v3-its: Only emit VSYNC if targetting a valid collection Similarily to the SYNC operation, it must be verified that the VPE targetted by a VLPI is backed by a valid collection in the GIC driver data structures. Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Cc: Jason Cooper Cc: Alexandre Belloni Cc: Yang Yingliang Cc: Sumit Garg Link: https://lkml.kernel.org/r/20180622095254.5906-7-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fcfc96f8e0de..0269ffb93f6e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -190,6 +190,14 @@ static struct its_collection *valid_col(struct its_collection *col) return col; } +static struct its_vpe *valid_vpe(struct its_node *its, struct its_vpe *vpe) +{ + if (valid_col(its->collections + vpe->col_idx)) + return vpe; + + return NULL; +} + /* * ITS command descriptors - parameters to be encoded in a command * block. @@ -562,7 +570,7 @@ static struct its_vpe *its_build_vinvall_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vinvall_cmd.vpe; + return valid_vpe(its, desc->its_vinvall_cmd.vpe); } static struct its_vpe *its_build_vmapp_cmd(struct its_node *its, @@ -584,7 +592,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vmapp_cmd.vpe; + return valid_vpe(its, desc->its_vmapp_cmd.vpe); } static struct its_vpe *its_build_vmapti_cmd(struct its_node *its, @@ -607,7 +615,7 @@ static struct its_vpe *its_build_vmapti_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vmapti_cmd.vpe; + return valid_vpe(its, desc->its_vmapti_cmd.vpe); } static struct its_vpe *its_build_vmovi_cmd(struct its_node *its, @@ -630,7 +638,7 @@ static struct its_vpe *its_build_vmovi_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vmovi_cmd.vpe; + return valid_vpe(its, desc->its_vmovi_cmd.vpe); } static struct its_vpe *its_build_vmovp_cmd(struct its_node *its, @@ -648,7 +656,7 @@ static struct its_vpe *its_build_vmovp_cmd(struct its_node *its, its_fixup_cmd(cmd); - return desc->its_vmovp_cmd.vpe; + return valid_vpe(its, desc->its_vmovp_cmd.vpe); } static u64 its_cmd_ptr_to_offset(struct its_node *its, From 82f499c8811149069ec958b72a86643a7a289b25 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 22 Jun 2018 10:52:54 +0100 Subject: [PATCH 362/572] irqchip/gic-v3-its: Fix reprogramming of redistributors on CPU hotplug Enabling LPIs was made a lot stricter recently, by checking that they are disabled before enabling them. By doing so, the CPU hotplug case was missed altogether, which leaves LPIs enabled on hotplug off (expecting the CPU to eventually come back), and won't write a different value anyway on hotplug on. So skip that check if that particular case is detected Fixes: 6eb486b66a30 ("irqchip/gic-v3: Ensure GICR_CTLR.EnableLPI=0 is observed before enabling") Reported-by: Sumit Garg Signed-off-by: Marc Zyngier Signed-off-by: Thomas Gleixner Tested-by: Sumit Garg Cc: Jason Cooper Cc: Alexandre Belloni Cc: Yang Yingliang Link: https://lkml.kernel.org/r/20180622095254.5906-8-marc.zyngier@arm.com --- drivers/irqchip/irq-gic-v3-its.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 0269ffb93f6e..d7842d312d3e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3427,6 +3427,16 @@ static int redist_disable_lpis(void) u64 timeout = USEC_PER_SEC; u64 val; + /* + * If coming via a CPU hotplug event, we don't need to disable + * LPIs before trying to re-enable them. They are already + * configured and all is well in the world. Detect this case + * by checking the allocation of the pending table for the + * current CPU. + */ + if (gic_data_rdist()->pend_page) + return 0; + if (!gic_rdists_supports_plpis()) { pr_info("CPU%d: LPIs not supported\n", smp_processor_id()); return -ENXIO; From bed9df97b39e73a4607189f2c4b9fb89cc3f7f59 Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 22 Jun 2018 19:35:33 +0800 Subject: [PATCH 363/572] irqdesc: Delete irq_desc_get_msi_desc() Function irq_desc_get_msi_desc() is not referenced in the kernel (and does not seem to have been referenced since e39758e0ea76, 3 years ago), so delete it. Signed-off-by: John Garry Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Cc: Cc: Cc: Link: https://lkml.kernel.org/r/1529667333-92959-1-git-send-email-john.garry@huawei.com --- include/linux/irqdesc.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 25b33b664537..dd1e40ddac7d 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -145,11 +145,6 @@ static inline void *irq_desc_get_handler_data(struct irq_desc *desc) return desc->irq_common_data.handler_data; } -static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc) -{ - return desc->irq_common_data.msi_desc; -} - /* * Architectures call this to let the generic IRQ layer * handle an interrupt. From 1f74c8a64798e2c488f86efc97e308b85fb7d7aa Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 22 Jun 2018 11:54:28 +0200 Subject: [PATCH 364/572] x86/mce: Do not overwrite MCi_STATUS in mce_no_way_out() mce_no_way_out() does a quick check during #MC to see whether some of the MCEs logged would require the kernel to panic immediately. And it passes a struct mce where MCi_STATUS gets written. However, after having saved a valid status value, the next iteration of the loop which goes over the MCA banks on the CPU, overwrites the valid status value because we're using struct mce as storage instead of a temporary variable. Which leads to MCE records with an empty status value: mce: [Hardware Error]: CPU 0: Machine Check Exception: 6 Bank 0: 0000000000000000 mce: [Hardware Error]: RIP 10: {trigger_mce+0x7/0x10} In order to prevent the loss of the status register value, return immediately when severity is a panic one so that we can panic immediately with the first fatal MCE logged. This is also the intention of this function and not to noodle over the banks while a fatal MCE is already logged. Tony: read the rest of the MCA bank to populate the struct mce fully. Suggested-by: Tony Luck Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Link: https://lkml.kernel.org/r/20180622095428.626-8-bp@alien8.de --- arch/x86/kernel/cpu/mcheck/mce.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index cd76380af79f..7e6f51a9d917 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll); static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, struct pt_regs *regs) { - int i, ret = 0; char *tmp; + int i; for (i = 0; i < mca_cfg.banks; i++) { m->status = mce_rdmsrl(msr_ops.status(i)); - if (m->status & MCI_STATUS_VAL) { - __set_bit(i, validp); - if (quirk_no_way_out) - quirk_no_way_out(i, m, regs); - } + if (!(m->status & MCI_STATUS_VAL)) + continue; + + __set_bit(i, validp); + if (quirk_no_way_out) + quirk_no_way_out(i, m, regs); if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { + mce_read_aux(m, i); *msg = tmp; - ret = 1; + return 1; } } - return ret; + return 0; } /* From 40c36e2741d7fe1e66d6ec55477ba5fd19c9c5d2 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 22 Jun 2018 11:54:23 +0200 Subject: [PATCH 365/572] x86/mce: Fix incorrect "Machine check from unknown source" message Some injection testing resulted in the following console log: mce: [Hardware Error]: CPU 22: Machine Check Exception: f Bank 1: bd80000000100134 mce: [Hardware Error]: RIP 10: {pmem_do_bvec+0x11d/0x330 [nd_pmem]} mce: [Hardware Error]: TSC c51a63035d52 ADDR 3234bc4000 MISC 88 mce: [Hardware Error]: PROCESSOR 0:50654 TIME 1526502199 SOCKET 0 APIC 38 microcode 2000043 mce: [Hardware Error]: Run the above through 'mcelog --ascii' Kernel panic - not syncing: Machine check from unknown source This confused everybody because the first line quite clearly shows that we found a logged error in "Bank 1", while the last line says "unknown source". The problem is that the Linux code doesn't do the right thing for a local machine check that results in a fatal error. It turns out that we know very early in the handler whether the machine check is fatal. The call to mce_no_way_out() has checked all the banks for the CPU that took the local machine check. If it says we must crash, we can do so right away with the right messages. We do scan all the banks again. This means that we might initially not see a problem, but during the second scan find something fatal. If this happens we print a slightly different message (so I can see if it actually every happens). [ bp: Remove unneeded severity assignment. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Ashok Raj Cc: Dan Williams Cc: Qiuxu Zhuo Cc: linux-edac Cc: stable@vger.kernel.org # 4.2 Link: http://lkml.kernel.org/r/52e049a497e86fd0b71c529651def8871c804df0.1527283897.git.tony.luck@intel.com --- arch/x86/kernel/cpu/mcheck/mce.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7e6f51a9d917..e93670d736a6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1207,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code) lmce = m.mcgstatus & MCG_STATUS_LMCES; /* + * Local machine check may already know that we have to panic. + * Broadcast machine check begins rendezvous in mce_start() * Go through all banks in exclusion of the other CPUs. This way we * don't report duplicated events on shared banks because the first one - * to see it will clear it. If this is a Local MCE, then no need to - * perform rendezvous. + * to see it will clear it. */ - if (!lmce) + if (lmce) { + if (no_way_out) + mce_panic("Fatal local machine check", &m, msg); + } else { order = mce_start(&no_way_out); + } for (i = 0; i < cfg->banks; i++) { __clear_bit(i, toclear); @@ -1289,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code) no_way_out = worst >= MCE_PANIC_SEVERITY; } else { /* - * Local MCE skipped calling mce_reign() - * If we found a fatal error, we need to panic here. + * If there was a fatal machine check we should have + * already called mce_panic earlier in this function. + * Since we re-read the banks, we might have found + * something new. Check again to see if we found a + * fatal error. We call "mce_severity()" again to + * make sure we have the right "msg". */ - if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) - mce_panic("Machine check from unknown source", - NULL, NULL); + if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { + mce_severity(&m, cfg->tolerant, &msg, true); + mce_panic("Local fatal machine check!", &m, msg); + } } /* From 0218c766263e70795c5eaa17d75ed54bca350950 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 22 Jun 2018 13:51:26 +0200 Subject: [PATCH 366/572] x86/microcode/intel: Fix memleak in save_microcode_patch() Free useless ucode_patch entry when it's replaced. [ bp: Drop the memfree_patch() two-liner. ] Signed-off-by: Zhenzhong Duan Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Srinivas REDDY Eeda Link: http://lkml.kernel.org/r/888102f0-fd22-459d-b090-a1bd8a00cb2b@default --- arch/x86/kernel/cpu/microcode/intel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 1c2cfa0644aa..97ccf4c3b45b 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -190,8 +190,11 @@ static void save_microcode_patch(void *data, unsigned int size) p = memdup_patch(data, size); if (!p) pr_err("Error allocating buffer %p\n", data); - else + else { list_replace(&iter->plist, &p->plist); + kfree(iter->data); + kfree(iter); + } } } From 0447378a4a793da008451fad50bc0f93e9675ae6 Mon Sep 17 00:00:00 2001 From: Marc Orr Date: Wed, 20 Jun 2018 17:21:29 -0700 Subject: [PATCH 367/572] kvm: vmx: Nested VM-entry prereqs for event inj. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch extends the checks done prior to a nested VM entry. Specifically, it extends the check_vmentry_prereqs function with checks for fields relevant to the VM-entry event injection information, as described in the Intel SDM, volume 3. This patch is motivated by a syzkaller bug, where a bad VM-entry interruption information field is generated in the VMCS02, which causes the nested VM launch to fail. Then, KVM fails to resume L1. While KVM should be improved to correctly resume L1 execution after a failed nested launch, this change is justified because the existing code to resume L1 is flaky/ad-hoc and the test coverage for resuming L1 is sparse. Reported-by: syzbot Signed-off-by: Marc Orr [Removed comment whose parts were describing previous revisions and the rest was obvious from function/variable naming. - Radim] Signed-off-by: Radim Krčmář --- arch/x86/include/asm/vmx.h | 3 ++ arch/x86/kvm/vmx.c | 67 ++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 9 +++++ 3 files changed, 79 insertions(+) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 425e6b8b9547..6aa8499e1f62 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -114,6 +114,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 #define VMX_MISC_ACTIVITY_HLT 0x00000040 +#define VMX_MISC_ZERO_LEN_INS 0x40000000 /* VMFUNC functions */ #define VMX_VMFUNC_EPTP_SWITCHING 0x00000001 @@ -351,11 +352,13 @@ enum vmcs_field { #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ +#define INTR_TYPE_RESERVED (1 << 8) /* reserved */ #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ #define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ +#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ #define GUEST_INTR_STATE_STI 0x00000001 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 559a12b6184d..1689f433f3a0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1705,6 +1705,17 @@ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu) MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS; } +static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS; +} + +static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu) +{ + return to_vmx(vcpu)->nested.msrs.procbased_ctls_high & + CPU_BASED_MONITOR_TRAP_FLAG; +} + static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit) { return vmcs12->cpu_based_vm_exec_control & bit; @@ -11620,6 +11631,62 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) !nested_cr3_valid(vcpu, vmcs12->host_cr3)) return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; + /* + * From the Intel SDM, volume 3: + * Fields relevant to VM-entry event injection must be set properly. + * These fields are the VM-entry interruption-information field, the + * VM-entry exception error code, and the VM-entry instruction length. + */ + if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) { + u32 intr_info = vmcs12->vm_entry_intr_info_field; + u8 vector = intr_info & INTR_INFO_VECTOR_MASK; + u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK; + bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK; + bool should_have_error_code; + bool urg = nested_cpu_has2(vmcs12, + SECONDARY_EXEC_UNRESTRICTED_GUEST); + bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE; + + /* VM-entry interruption-info field: interruption type */ + if (intr_type == INTR_TYPE_RESERVED || + (intr_type == INTR_TYPE_OTHER_EVENT && + !nested_cpu_supports_monitor_trap_flag(vcpu))) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: vector */ + if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) || + (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) || + (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: deliver error code */ + should_have_error_code = + intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode && + x86_exception_has_error_code(vector); + if (has_error_code != should_have_error_code) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry exception error code */ + if (has_error_code && + vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry interruption-info field: reserved bits */ + if (intr_info & INTR_INFO_RESVD_BITS_MASK) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + + /* VM-entry instruction length */ + switch (intr_type) { + case INTR_TYPE_SOFT_EXCEPTION: + case INTR_TYPE_SOFT_INTR: + case INTR_TYPE_PRIV_SW_EXCEPTION: + if ((vmcs12->vm_entry_instruction_len > 15) || + (vmcs12->vm_entry_instruction_len == 0 && + !nested_cpu_has_zero_length_injection(vcpu))) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; + } + } + return 0; } diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 331993c49dae..257f27620bc2 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -110,6 +110,15 @@ static inline bool is_la57_mode(struct kvm_vcpu *vcpu) #endif } +static inline bool x86_exception_has_error_code(unsigned int vector) +{ + static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) | + BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) | + BIT(PF_VECTOR) | BIT(AC_VECTOR); + + return (1U << vector) & exception_has_error_code; +} + static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; From 2ddc649810133fcf8e5282eea898ee7ececf161e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 22 Jun 2018 16:56:14 +0200 Subject: [PATCH 368/572] KVM: fix KVM_CAP_HYPERV_TLBFLUSH paragraph number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM_CAP_HYPERV_TLBFLUSH collided with KVM_CAP_S390_PSW-BPB, its paragraph number should now be 8.18. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Radim Krčmář --- Documentation/virtual/kvm/api.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 495b7742ab58..d10944e619d3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4610,7 +4610,7 @@ This capability indicates that kvm will implement the interfaces to handle reset, migration and nested KVM for branch prediction blocking. The stfle facility 82 should not be provided to the guest without this capability. -8.14 KVM_CAP_HYPERV_TLBFLUSH +8.18 KVM_CAP_HYPERV_TLBFLUSH Architectures: x86 From abcbcb80cd09cd40f2089d912764e315459b71f7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 22 Jun 2018 16:33:57 +0200 Subject: [PATCH 369/572] time: Make sure jiffies_to_msecs() preserves non-zero time periods MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the common cases where 1000 is a multiple of HZ, or HZ is a multiple of 1000, jiffies_to_msecs() never returns zero when passed a non-zero time period. However, if HZ > 1000 and not an integer multiple of 1000 (e.g. 1024 or 1200, as used on alpha and DECstation), jiffies_to_msecs() may return zero for small non-zero time periods. This may break code that relies on receiving back a non-zero value. jiffies_to_usecs() does not need such a fix: one jiffy can only be less than one µs if HZ > 1000000, and such large values of HZ are already rejected at build time, twice: - include/linux/jiffies.h does #error if HZ >= 12288, - kernel/time/time.c has BUILD_BUG_ON(HZ > USEC_PER_SEC). Broken since forever. Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Reviewed-by: Arnd Bergmann Cc: John Stultz Cc: Stephen Boyd Cc: linux-alpha@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180622143357.7495-1-geert@linux-m68k.org --- kernel/time/time.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/time/time.c b/kernel/time/time.c index 6fa99213fc72..2b41e8e2d31d 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -28,6 +28,7 @@ */ #include +#include #include #include #include @@ -314,9 +315,10 @@ unsigned int jiffies_to_msecs(const unsigned long j) return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); #else # if BITS_PER_LONG == 32 - return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32; + return (HZ_TO_MSEC_MUL32 * j + (1ULL << HZ_TO_MSEC_SHR32) - 1) >> + HZ_TO_MSEC_SHR32; # else - return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN; + return DIV_ROUND_UP(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN); # endif #endif } From 48e315618dc4dc8904182cd221e3d395d5d97005 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 22 Jun 2018 12:08:20 +0200 Subject: [PATCH 370/572] MAINTAINERS: Add file patterns for x86 device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Signed-off-by: Thomas Gleixner Cc: "H . Peter Anvin" Cc: Rob Herring Cc: Mark Rutland Cc: devicetree@vger.kernel.org Link: https://lkml.kernel.org/r/20180622100820.29616-1-geert@linux-m68k.org --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9c125f705f78..60929873b900 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15366,6 +15366,7 @@ M: x86@kernel.org L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core S: Maintained +F: Documentation/devicetree/bindings/x86/ F: Documentation/x86/ F: arch/x86/ From b5b7dd647f2d21b93f734ce890671cd908e69b0a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Jun 2018 10:25:25 +0100 Subject: [PATCH 371/572] arm64: kpti: Use early_param for kpti= command-line option We inspect __kpti_forced early on as part of the cpufeature enable callback which remaps the swapper page table using non-global entries. Ensure that __kpti_forced has been updated to reflect the kpti= command-line option before we start using it. Fixes: ea1e3de85e94 ("arm64: entry: Add fake CPU feature for unmapping the kernel at EL0") Cc: # 4.16.x- Reported-by: Wei Xu Tested-by: Sudeep Holla Tested-by: Wei Xu Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d2856b129097..f24892a40d2c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -937,7 +937,7 @@ static int __init parse_kpti(char *str) __kpti_forced = enabled ? 1 : -1; return 0; } -__setup("kpti=", parse_kpti); +early_param("kpti", parse_kpti); #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ #ifdef CONFIG_ARM64_HW_AFDBM From 71c8fc0c96abf8e53e74ed4d891d671e585f9076 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Jun 2018 16:23:45 +0100 Subject: [PATCH 372/572] arm64: mm: Ensure writes to swapper are ordered wrt subsequent cache maintenance When rewriting swapper using nG mappings, we must performance cache maintenance around each page table access in order to avoid coherency problems with the host's cacheable alias under KVM. To ensure correct ordering of the maintenance with respect to Device memory accesses made with the Stage-1 MMU disabled, DMBs need to be added between the maintenance and the corresponding memory access. This patch adds a missing DMB between writing a new page table entry and performing a clean+invalidate on the same line. Fixes: f992b4dfd58b ("arm64: kpti: Add ->enable callback to remap swapper using nG mappings") Cc: # 4.16.x- Acked-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 5f9a73a4452c..03646e6a2ef4 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -217,8 +217,9 @@ ENDPROC(idmap_cpu_replace_ttbr1) .macro __idmap_kpti_put_pgtable_ent_ng, type orr \type, \type, #PTE_NG // Same bit for blocks and pages - str \type, [cur_\()\type\()p] // Update the entry and ensure it - dc civac, cur_\()\type\()p // is visible to all CPUs. + str \type, [cur_\()\type\()p] // Update the entry and ensure + dmb sy // that it is visible to all + dc civac, cur_\()\type\()p // CPUs. .endm /* From 3f6e6986045d47f87bd982910821b7ab9758487e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 23 Jun 2018 01:06:34 +0900 Subject: [PATCH 373/572] mtd: rawnand: denali_dt: set clk_x_rate to 200 MHz unconditionally Since commit 1bb88666775e ("mtd: nand: denali: handle timing parameters by setup_data_interface()"), denali_dt.c gets the clock rate from the clock driver. The driver expects the frequency of the bus interface clock, whereas the clock driver of SOCFPGA provides the core clock. Thus, the setup_data_interface() hook calculates timing parameters based on a wrong frequency. To make it work without relying on the clock driver, hard-code the clock frequency, 200MHz. This is fine for existing DT of UniPhier, and also fixes the issue of SOCFPGA because both platforms use 200 MHz for the bus interface clock. Fixes: 1bb88666775e ("mtd: nand: denali: handle timing parameters by setup_data_interface()") Cc: linux-stable #4.14+ Reported-by: Philipp Rosenberger Suggested-by: Boris Brezillon Signed-off-by: Masahiro Yamada Tested-by: Richard Weinberger Signed-off-by: Boris Brezillon --- drivers/mtd/nand/raw/denali_dt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/denali_dt.c b/drivers/mtd/nand/raw/denali_dt.c index cfd33e6ca77f..5869e90cc14b 100644 --- a/drivers/mtd/nand/raw/denali_dt.c +++ b/drivers/mtd/nand/raw/denali_dt.c @@ -123,7 +123,11 @@ static int denali_dt_probe(struct platform_device *pdev) if (ret) return ret; - denali->clk_x_rate = clk_get_rate(dt->clk); + /* + * Hardcode the clock rate for the backward compatibility. + * This works for both SOCFPGA and UniPhier. + */ + denali->clk_x_rate = 200000000; ret = denali_init(denali); if (ret) From 784e0300fe9fe4aa81bd7df9d59e138f56bb605b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Jun 2018 11:45:07 +0100 Subject: [PATCH 374/572] rseq: Avoid infinite recursion when delivering SIGSEGV When delivering a signal to a task that is using rseq, we call into __rseq_handle_notify_resume() so that the registers pushed in the sigframe are updated to reflect the state of the restartable sequence (for example, ensuring that the signal returns to the abort handler if necessary). However, if the rseq management fails due to an unrecoverable fault when accessing userspace or certain combinations of RSEQ_CS_* flags, then we will attempt to deliver a SIGSEGV. This has the potential for infinite recursion if the rseq code continuously fails on signal delivery. Avoid this problem by using force_sigsegv() instead of force_sig(), which is explicitly designed to reset the SEGV handler to SIG_DFL in the case of a recursive fault. In doing so, remove rseq_signal_deliver() from the internal rseq API and have an optional struct ksignal * parameter to rseq_handle_notify_resume() instead. Signed-off-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Mathieu Desnoyers Cc: peterz@infradead.org Cc: paulmck@linux.vnet.ibm.com Cc: boqun.feng@gmail.com Link: https://lkml.kernel.org/r/1529664307-983-1-git-send-email-will.deacon@arm.com --- arch/arm/kernel/signal.c | 4 ++-- arch/powerpc/kernel/signal.c | 4 ++-- arch/x86/entry/common.c | 2 +- arch/x86/kernel/signal.c | 2 +- include/linux/sched.h | 18 +++++++++++------- kernel/rseq.c | 7 ++++--- 6 files changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index f09e9d66d605..dec130e7078c 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -544,7 +544,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* * Set up the stack frame @@ -666,7 +666,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } else { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } } local_irq_disable(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 17fe4339ba59..b3e8db376ecd 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -134,7 +134,7 @@ static void do_signal(struct task_struct *tsk) /* Re-enable the breakpoints for the signal stack */ thread_change_pc(tsk, tsk->thread.regs); - rseq_signal_deliver(tsk->thread.regs); + rseq_signal_deliver(&ksig, tsk->thread.regs); if (is32) { if (ksig.ka.sa.sa_flags & SA_SIGINFO) @@ -170,7 +170,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } user_enter(); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 92190879b228..3b2490b81918 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -164,7 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } if (cached_flags & _TIF_USER_RETURN_NOTIFY) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 445ca11ff863..92a3b312a53c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -692,7 +692,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ if (is_ia32_frame(ksig)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index c1882643d455..9256118bd40c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1799,20 +1799,22 @@ static inline void rseq_set_notify_resume(struct task_struct *t) set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } -void __rseq_handle_notify_resume(struct pt_regs *regs); +void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); -static inline void rseq_handle_notify_resume(struct pt_regs *regs) +static inline void rseq_handle_notify_resume(struct ksignal *ksig, + struct pt_regs *regs) { if (current->rseq) - __rseq_handle_notify_resume(regs); + __rseq_handle_notify_resume(ksig, regs); } -static inline void rseq_signal_deliver(struct pt_regs *regs) +static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) { preempt_disable(); __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); preempt_enable(); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(ksig, regs); } /* rseq_preempt() requires preemption to be disabled. */ @@ -1861,10 +1863,12 @@ static inline void rseq_execve(struct task_struct *t) static inline void rseq_set_notify_resume(struct task_struct *t) { } -static inline void rseq_handle_notify_resume(struct pt_regs *regs) +static inline void rseq_handle_notify_resume(struct ksignal *ksig, + struct pt_regs *regs) { } -static inline void rseq_signal_deliver(struct pt_regs *regs) +static inline void rseq_signal_deliver(struct ksignal *ksig, + struct pt_regs *regs) { } static inline void rseq_preempt(struct task_struct *t) diff --git a/kernel/rseq.c b/kernel/rseq.c index ae306f90c514..22b6acf1ad63 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -251,10 +251,10 @@ static int rseq_ip_fixup(struct pt_regs *regs) * respect to other threads scheduled on the same CPU, and with respect * to signal handlers. */ -void __rseq_handle_notify_resume(struct pt_regs *regs) +void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { struct task_struct *t = current; - int ret; + int ret, sig; if (unlikely(t->flags & PF_EXITING)) return; @@ -268,7 +268,8 @@ void __rseq_handle_notify_resume(struct pt_regs *regs) return; error: - force_sig(SIGSEGV, t); + sig = ksig ? ksig->sig : 0; + force_sigsegv(sig, t); } #ifdef CONFIG_DEBUG_RSEQ From 0ae52ddf5bd7f685bb43d7687290f6c2eeacfb31 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 22 Jun 2018 13:05:35 +0200 Subject: [PATCH 375/572] lightnvm: Remove depends on HAS_DMA in case of platform dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove dependencies on HAS_DMA where a Kconfig symbol depends on another symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST". In most cases this other symbol is an architecture or platform specific symbol, or PCI. Generic symbols and drivers without platform dependencies keep their dependencies on HAS_DMA, to prevent compiling subsystems or drivers that cannot work anyway. This simplifies the dependencies, and allows to improve compile-testing. Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Robin Murphy Reviewed-by: Matias Bjørling Signed-off-by: Jens Axboe --- drivers/lightnvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/lightnvm/Kconfig b/drivers/lightnvm/Kconfig index 10c08982185a..9c03f35d9df1 100644 --- a/drivers/lightnvm/Kconfig +++ b/drivers/lightnvm/Kconfig @@ -4,7 +4,7 @@ menuconfig NVM bool "Open-Channel SSD target support" - depends on BLOCK && HAS_DMA && PCI + depends on BLOCK && PCI select BLK_DEV_NVME help Say Y here to get to enable Open-channel SSDs. From 3ee7e8697d5860b173132606d80a9cd35e7113ee Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 18 Jun 2018 15:46:58 +0200 Subject: [PATCH 376/572] bdi: Fix another oops in wb_workfn() syzbot is reporting NULL pointer dereference at wb_workfn() [1] due to wb->bdi->dev being NULL. And Dmitry confirmed that wb->state was WB_shutting_down after wb->bdi->dev became NULL. This indicates that unregister_bdi() failed to call wb_shutdown() on one of wb objects. The problem is in cgwb_bdi_unregister() which does cgwb_kill() and thus drops bdi's reference to wb structures before going through the list of wbs again and calling wb_shutdown() on each of them. This way the loop iterating through all wbs can easily miss a wb if that wb has already passed through cgwb_remove_from_bdi_list() called from wb_shutdown() from cgwb_release_workfn() and as a result fully shutdown bdi although wb_workfn() for this wb structure is still running. In fact there are also other ways cgwb_bdi_unregister() can race with cgwb_release_workfn() leading e.g. to use-after-free issues: CPU1 CPU2 cgwb_bdi_unregister() cgwb_kill(*slot); cgwb_release() queue_work(cgwb_release_wq, &wb->release_work); cgwb_release_workfn() wb = list_first_entry(&bdi->wb_list, ...) spin_unlock_irq(&cgwb_lock); wb_shutdown(wb); ... kfree_rcu(wb, rcu); wb_shutdown(wb); -> oops use-after-free We solve these issues by synchronizing writeback structure shutdown from cgwb_bdi_unregister() with cgwb_release_workfn() using a new mutex. That way we also no longer need synchronization using WB_shutting_down as the mutex provides it for CONFIG_CGROUP_WRITEBACK case and without CONFIG_CGROUP_WRITEBACK wb_shutdown() can be called only once from bdi_unregister(). Reported-by: syzbot Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 2 +- mm/backing-dev.c | 20 +++++++------------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 0bd432a4d7bd..24251762c20c 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -22,7 +22,6 @@ struct dentry; */ enum wb_state { WB_registered, /* bdi_register() was done */ - WB_shutting_down, /* wb_shutdown() in progress */ WB_writeback_running, /* Writeback is in progress */ WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ WB_start_all, /* nr_pages == 0 (all) work pending */ @@ -189,6 +188,7 @@ struct backing_dev_info { #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ + struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ #else struct bdi_writeback_congested *wb_congested; #endif diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 347cc834c04a..2e5d3df0853d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -359,15 +359,8 @@ static void wb_shutdown(struct bdi_writeback *wb) spin_lock_bh(&wb->work_lock); if (!test_and_clear_bit(WB_registered, &wb->state)) { spin_unlock_bh(&wb->work_lock); - /* - * Wait for wb shutdown to finish if someone else is just - * running wb_shutdown(). Otherwise we could proceed to wb / - * bdi destruction before wb_shutdown() is finished. - */ - wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE); return; } - set_bit(WB_shutting_down, &wb->state); spin_unlock_bh(&wb->work_lock); cgwb_remove_from_bdi_list(wb); @@ -379,12 +372,6 @@ static void wb_shutdown(struct bdi_writeback *wb) mod_delayed_work(bdi_wq, &wb->dwork, 0); flush_delayed_work(&wb->dwork); WARN_ON(!list_empty(&wb->work_list)); - /* - * Make sure bit gets cleared after shutdown is finished. Matches with - * the barrier provided by test_and_clear_bit() above. - */ - smp_wmb(); - clear_and_wake_up_bit(WB_shutting_down, &wb->state); } static void wb_exit(struct bdi_writeback *wb) @@ -508,10 +495,12 @@ static void cgwb_release_workfn(struct work_struct *work) struct bdi_writeback *wb = container_of(work, struct bdi_writeback, release_work); + mutex_lock(&wb->bdi->cgwb_release_mutex); wb_shutdown(wb); css_put(wb->memcg_css); css_put(wb->blkcg_css); + mutex_unlock(&wb->bdi->cgwb_release_mutex); fprop_local_destroy_percpu(&wb->memcg_completions); percpu_ref_exit(&wb->refcnt); @@ -697,6 +686,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi) INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC); bdi->cgwb_congested_tree = RB_ROOT; + mutex_init(&bdi->cgwb_release_mutex); ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL); if (!ret) { @@ -717,7 +707,10 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) spin_lock_irq(&cgwb_lock); radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0) cgwb_kill(*slot); + spin_unlock_irq(&cgwb_lock); + mutex_lock(&bdi->cgwb_release_mutex); + spin_lock_irq(&cgwb_lock); while (!list_empty(&bdi->wb_list)) { wb = list_first_entry(&bdi->wb_list, struct bdi_writeback, bdi_node); @@ -726,6 +719,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi) spin_lock_irq(&cgwb_lock); } spin_unlock_irq(&cgwb_lock); + mutex_unlock(&bdi->cgwb_release_mutex); } /** From f21c601a2bb319ec19eb4562eadc7797d90fd90e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 15 Jun 2018 09:35:33 -0400 Subject: [PATCH 377/572] dm: use bio_split() when splitting out the already processed bio Use of bio_clone_bioset() is inefficient if there is no need to clone the original bio's bio_vec array. Best to use the bio_clone_fast() variant. Also, just using bio_advance() is only part of what is needed to properly setup the clone -- it doesn't account for the various bio_integrity() related work that also needs to be performed (see bio_split). Address both of these issues by switching from bio_clone_bioset() to bio_split(). Fixes: 18a25da8 ("dm: ensure bio submission follows a depth-first tree walk") Cc: stable@vger.kernel.org # 4.15+, requires removal of '&' before md->queue->bio_split Reported-by: Christoph Hellwig Reviewed-by: NeilBrown Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e65429a29c06..a3b103e8e3ce 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1606,10 +1606,9 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * the usage of io->orig_bio in dm_remap_zone_report() * won't be affected by this reassignment. */ - struct bio *b = bio_clone_bioset(bio, GFP_NOIO, - &md->queue->bio_split); + struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, + GFP_NOIO, &md->queue->bio_split); ci.io->orig_bio = b; - bio_advance(bio, (bio_sectors(bio) - ci.sector_count) << 9); bio_chain(b, bio); ret = generic_make_request(bio); break; From 7ccdbf85d3b2237cdbdafe6a47673dbdfab4b7a2 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 18 Jun 2018 11:57:51 -0400 Subject: [PATCH 378/572] dm thin metadata: remove needless work from __commit_transaction Commit 5a32083d03fb5 ("dm: take care to copy the space map roots before locking the superblock") properly removed the calls to dm_sm_root_size() from __write_initial_superblock(). But the dm_sm_root_size() calls were left dangling in __commit_transaction(). Fixes: 5a32083d03fb5 ("dm: take care to copy the space map roots before locking the superblock") Signed-off-by: Mike Snitzer --- drivers/md/dm-thin-metadata.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 36ef284ad086..72142021b5c9 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -776,7 +776,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) static int __commit_transaction(struct dm_pool_metadata *pmd) { int r; - size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; struct dm_block *sblock; @@ -797,14 +796,6 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) if (r < 0) return r; - r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); - if (r < 0) - return r; - - r = dm_sm_root_size(pmd->data_sm, &data_len); - if (r < 0) - return r; - r = save_sm_roots(pmd); if (r < 0) return r; From 50a7d3ba7c9ac5e0b7e03fc7f420180989361dbf Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 18 Jun 2018 10:50:33 -0700 Subject: [PATCH 379/572] dm writecache: use 2-factor allocator arguments This adjusts the allocator calls to use the 2-factor argument style, as already done treewide for better defense against allocator overflows. Signed-off-by: Kees Cook [snitzer: tweaked code to leave assignment in a test alone] Signed-off-by: Mike Snitzer --- drivers/md/dm-writecache.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 5961c7794ef3..07ea6a48aac6 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -259,7 +259,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) if (da != p) { long i; wc->memory_map = NULL; - pages = kvmalloc(p * sizeof(struct page *), GFP_KERNEL); + pages = kvmalloc_array(p, sizeof(struct page *), GFP_KERNEL); if (!pages) { r = -ENOMEM; goto err2; @@ -859,7 +859,7 @@ static int writecache_alloc_entries(struct dm_writecache *wc) if (wc->entries) return 0; - wc->entries = vmalloc(sizeof(struct wc_entry) * wc->n_blocks); + wc->entries = vmalloc(array_size(sizeof(struct wc_entry), wc->n_blocks)); if (!wc->entries) return -ENOMEM; for (b = 0; b < wc->n_blocks; b++) { @@ -1481,9 +1481,9 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba wb->bio.bi_iter.bi_sector = read_original_sector(wc, e); wb->page_offset = PAGE_SIZE; if (max_pages <= WB_LIST_INLINE || - unlikely(!(wb->wc_list = kmalloc(max_pages * sizeof(struct wc_entry *), - GFP_NOIO | __GFP_NORETRY | - __GFP_NOMEMALLOC | __GFP_NOWARN)))) { + unlikely(!(wb->wc_list = kmalloc_array(max_pages, sizeof(struct wc_entry *), + GFP_NOIO | __GFP_NORETRY | + __GFP_NOMEMALLOC | __GFP_NOWARN)))) { wb->wc_list = wb->wc_list_inline; max_pages = WB_LIST_INLINE; } From 2d0b2d64d325e22939d9db3ba784f1236459ed98 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 22 Jun 2018 08:09:11 -0700 Subject: [PATCH 380/572] dm zoned: avoid triggering reclaim from inside dmz_map() This patch avoids that lockdep reports the following: ====================================================== WARNING: possible circular locking dependency detected 4.18.0-rc1 #62 Not tainted ------------------------------------------------------ kswapd0/84 is trying to acquire lock: 00000000c313516d (&xfs_nondir_ilock_class){++++}, at: xfs_free_eofblocks+0xa2/0x1e0 but task is already holding lock: 00000000591c83ae (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (fs_reclaim){+.+.}: kmem_cache_alloc+0x2c/0x2b0 radix_tree_node_alloc.constprop.19+0x3d/0xc0 __radix_tree_create+0x161/0x1c0 __radix_tree_insert+0x45/0x210 dmz_map+0x245/0x2d0 [dm_zoned] __map_bio+0x40/0x260 __split_and_process_non_flush+0x116/0x220 __split_and_process_bio+0x81/0x180 __dm_make_request.isra.32+0x5a/0x100 generic_make_request+0x36e/0x690 submit_bio+0x6c/0x140 mpage_readpages+0x19e/0x1f0 read_pages+0x6d/0x1b0 __do_page_cache_readahead+0x21b/0x2d0 force_page_cache_readahead+0xc4/0x100 generic_file_read_iter+0x7c6/0xd20 __vfs_read+0x102/0x180 vfs_read+0x9b/0x140 ksys_read+0x55/0xc0 do_syscall_64+0x5a/0x1f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #1 (&dmz->chunk_lock){+.+.}: dmz_map+0x133/0x2d0 [dm_zoned] __map_bio+0x40/0x260 __split_and_process_non_flush+0x116/0x220 __split_and_process_bio+0x81/0x180 __dm_make_request.isra.32+0x5a/0x100 generic_make_request+0x36e/0x690 submit_bio+0x6c/0x140 _xfs_buf_ioapply+0x31c/0x590 xfs_buf_submit_wait+0x73/0x520 xfs_buf_read_map+0x134/0x2f0 xfs_trans_read_buf_map+0xc3/0x580 xfs_read_agf+0xa5/0x1e0 xfs_alloc_read_agf+0x59/0x2b0 xfs_alloc_pagf_init+0x27/0x60 xfs_bmap_longest_free_extent+0x43/0xb0 xfs_bmap_btalloc_nullfb+0x7f/0xf0 xfs_bmap_btalloc+0x428/0x7c0 xfs_bmapi_write+0x598/0xcc0 xfs_iomap_write_allocate+0x15a/0x330 xfs_map_blocks+0x1cf/0x3f0 xfs_do_writepage+0x15f/0x7b0 write_cache_pages+0x1ca/0x540 xfs_vm_writepages+0x65/0xa0 do_writepages+0x48/0xf0 __writeback_single_inode+0x58/0x730 writeback_sb_inodes+0x249/0x5c0 wb_writeback+0x11e/0x550 wb_workfn+0xa3/0x670 process_one_work+0x228/0x670 worker_thread+0x3c/0x390 kthread+0x11c/0x140 ret_from_fork+0x3a/0x50 -> #0 (&xfs_nondir_ilock_class){++++}: down_read_nested+0x43/0x70 xfs_free_eofblocks+0xa2/0x1e0 xfs_fs_destroy_inode+0xac/0x270 dispose_list+0x51/0x80 prune_icache_sb+0x52/0x70 super_cache_scan+0x127/0x1a0 shrink_slab.part.47+0x1bd/0x590 shrink_node+0x3b5/0x470 balance_pgdat+0x158/0x3b0 kswapd+0x1ba/0x600 kthread+0x11c/0x140 ret_from_fork+0x3a/0x50 other info that might help us debug this: Chain exists of: &xfs_nondir_ilock_class --> &dmz->chunk_lock --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(&dmz->chunk_lock); lock(fs_reclaim); lock(&xfs_nondir_ilock_class); *** DEADLOCK *** 3 locks held by kswapd0/84: #0: 00000000591c83ae (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 #1: 000000000f8208f5 (shrinker_rwsem){++++}, at: shrink_slab.part.47+0x3f/0x590 #2: 00000000cacefa54 (&type->s_umount_key#43){.+.+}, at: trylock_super+0x16/0x50 stack backtrace: CPU: 7 PID: 84 Comm: kswapd0 Not tainted 4.18.0-rc1 #62 Hardware name: Supermicro Super Server/X10SRL-F, BIOS 2.0 12/17/2015 Call Trace: dump_stack+0x85/0xcb print_circular_bug.isra.36+0x1ce/0x1db __lock_acquire+0x124e/0x1310 lock_acquire+0x9f/0x1f0 down_read_nested+0x43/0x70 xfs_free_eofblocks+0xa2/0x1e0 xfs_fs_destroy_inode+0xac/0x270 dispose_list+0x51/0x80 prune_icache_sb+0x52/0x70 super_cache_scan+0x127/0x1a0 shrink_slab.part.47+0x1bd/0x590 shrink_node+0x3b5/0x470 balance_pgdat+0x158/0x3b0 kswapd+0x1ba/0x600 kthread+0x11c/0x140 ret_from_fork+0x3a/0x50 Reported-by: Masato Suzuki Fixes: 4218a9554653 ("dm zoned: use GFP_NOIO in I/O path") Cc: Signed-off-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-zoned-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 3c0e45f4dcf5..a44183ff4be0 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -787,7 +787,7 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* Chunk BIO work */ mutex_init(&dmz->chunk_lock); - INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_KERNEL); + INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_NOIO); dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s", WQ_MEM_RECLAIM | WQ_UNBOUND, 0, dev->name); if (!dmz->chunk_wq) { From f2ccaa5904666f84d4e6958d6db9d1e65ead5085 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Jun 2018 10:01:19 +0200 Subject: [PATCH 381/572] dm raid: don't use 'const' in function return A newly introduced function has 'const int' as the return type, but as "make W=1" reports, that has no meaning: drivers/md/dm-raid.c:510:18: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers] This changes the return type to plain 'int'. Signed-off-by: Arnd Bergmann Fixes: 33e53f06850f ("dm raid: introduce extended superblock and new raid types to support takeover/reshaping") Signed-off-by: Mike Snitzer Fixes: 552aa679f2657431 ("dm raid: use rs_is_raid*()") Signed-off-by: Geert Uytterhoeven Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index ab13fcec3fca..75df4c9d8b54 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -588,7 +588,7 @@ static const char *raid10_md_layout_to_format(int layout) } /* Return md raid10 algorithm for @name */ -static const int raid10_name_to_format(const char *name) +static int raid10_name_to_format(const char *name) { if (!strcasecmp(name, "near")) return ALGORITHM_RAID10_NEAR; From 964d978433a4b9aa1368ff71227ca0027dd1e32f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 13 Jun 2018 13:43:10 -0500 Subject: [PATCH 382/572] x86/CPU/AMD: Fix LLC ID bit-shift calculation The current logic incorrectly calculates the LLC ID from the APIC ID. Unless specified otherwise, the LLC ID should be calculated by removing the Core and Thread ID bits from the least significant end of the APIC ID. For more info, see "ApicId Enumeration Requirements" in any Fam17h PPR document. [ bp: Improve commit message. ] Fixes: 68091ee7ac3c ("Calculate last level cache ID from number of sharing threads") Signed-off-by: Suravee Suthikulpanit Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/1528915390-30533-1-git-send-email-suravee.suthikulpanit@amd.com --- arch/x86/kernel/cpu/cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 38354c66df81..0c5fcbd998cf 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -671,7 +671,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) num_sharing_cache = ((eax >> 14) & 0xfff) + 1; if (num_sharing_cache) { - int bits = get_count_order(num_sharing_cache) - 1; + int bits = get_count_order(num_sharing_cache); per_cpu(cpu_llc_id, cpu) = c->apicid >> bits; } From fe2a19652918a5247418aed48a247414a5e45fe2 Mon Sep 17 00:00:00 2001 From: Lyude Paul Date: Thu, 21 Jun 2018 16:48:26 -0400 Subject: [PATCH 383/572] drm/amdgpu: Count disabled CRTCs in commit tail earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a regression I accidentally reduced that was picked up by kasan, where we were checking the CRTC atomic states after DRM's helpers had already freed them. Example: ================================================================== BUG: KASAN: use-after-free in amdgpu_dm_atomic_commit_tail.cold.50+0x13d/0x15a [amdgpu] Read of size 1 at addr ffff8803a697b071 by task kworker/u16:0/7 CPU: 7 PID: 7 Comm: kworker/u16:0 Tainted: G O 4.18.0-rc1Lyude-Upstream+ #1 Hardware name: HP HP ZBook 15 G4/8275, BIOS P70 Ver. 01.21 05/02/2018 Workqueue: events_unbound commit_work [drm_kms_helper] Call Trace: dump_stack+0xc1/0x169 ? dump_stack_print_info.cold.1+0x42/0x42 ? kmsg_dump_rewind_nolock+0xd9/0xd9 ? printk+0x9f/0xc5 ? amdgpu_dm_atomic_commit_tail.cold.50+0x13d/0x15a [amdgpu] print_address_description+0x6c/0x23c ? amdgpu_dm_atomic_commit_tail.cold.50+0x13d/0x15a [amdgpu] kasan_report.cold.6+0x241/0x2fd amdgpu_dm_atomic_commit_tail.cold.50+0x13d/0x15a [amdgpu] ? commit_planes_to_stream.constprop.45+0x13b0/0x13b0 [amdgpu] ? cpu_load_update_active+0x290/0x290 ? finish_task_switch+0x2bd/0x840 ? __switch_to_asm+0x34/0x70 ? read_word_at_a_time+0xe/0x20 ? strscpy+0x14b/0x460 ? drm_atomic_helper_wait_for_dependencies+0x47d/0x7e0 [drm_kms_helper] commit_tail+0x96/0xe0 [drm_kms_helper] process_one_work+0x88a/0x1360 ? create_worker+0x540/0x540 ? __sched_text_start+0x8/0x8 ? move_queued_task+0x760/0x760 ? call_rcu_sched+0x20/0x20 ? vsnprintf+0xcda/0x1350 ? wait_woken+0x1c0/0x1c0 ? mutex_unlock+0x1d/0x40 ? init_timer_key+0x190/0x230 ? schedule+0xea/0x390 ? __schedule+0x1ea0/0x1ea0 ? need_to_create_worker+0xe4/0x210 ? init_worker_pool+0x700/0x700 ? try_to_del_timer_sync+0xbf/0x110 ? del_timer+0x120/0x120 ? __mutex_lock_slowpath+0x10/0x10 worker_thread+0x196/0x11f0 ? flush_rcu_work+0x50/0x50 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? __switch_to_asm+0x40/0x70 ? __schedule+0x7d6/0x1ea0 ? migrate_swap_stop+0x850/0x880 ? __sched_text_start+0x8/0x8 ? save_stack+0x8c/0xb0 ? kasan_kmalloc+0xbf/0xe0 ? kmem_cache_alloc_trace+0xe4/0x190 ? kthread+0x98/0x390 ? ret_from_fork+0x35/0x40 ? ret_from_fork+0x35/0x40 ? deactivate_slab.isra.67+0x3c4/0x5c0 ? kthread+0x98/0x390 ? kthread+0x98/0x390 ? set_track+0x76/0x120 ? schedule+0xea/0x390 ? __schedule+0x1ea0/0x1ea0 ? wait_woken+0x1c0/0x1c0 ? kasan_unpoison_shadow+0x30/0x40 ? parse_args.cold.15+0x17a/0x17a ? flush_rcu_work+0x50/0x50 kthread+0x2d4/0x390 ? kthread_create_worker_on_cpu+0xc0/0xc0 ret_from_fork+0x35/0x40 Allocated by task 1124: kasan_kmalloc+0xbf/0xe0 kmem_cache_alloc_trace+0xe4/0x190 dm_crtc_duplicate_state+0x78/0x130 [amdgpu] drm_atomic_get_crtc_state+0x147/0x410 [drm] page_flip_common+0x57/0x230 [drm_kms_helper] drm_atomic_helper_page_flip+0xa6/0x110 [drm_kms_helper] drm_mode_page_flip_ioctl+0xc4b/0x10a0 [drm] drm_ioctl_kernel+0x1d4/0x260 [drm] drm_ioctl+0x433/0x920 [drm] amdgpu_drm_ioctl+0x11d/0x290 [amdgpu] do_vfs_ioctl+0x1a1/0x13d0 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0x147/0x440 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Freed by task 1124: __kasan_slab_free+0x12e/0x180 kfree+0x92/0x1a0 drm_atomic_state_default_clear+0x315/0xc40 [drm] __drm_atomic_state_free+0x35/0xd0 [drm] drm_atomic_helper_update_plane+0xac/0x350 [drm_kms_helper] __setplane_internal+0x2d6/0x840 [drm] drm_mode_cursor_universal+0x41e/0xbe0 [drm] drm_mode_cursor_common+0x49f/0x880 [drm] drm_mode_cursor_ioctl+0xd8/0x130 [drm] drm_ioctl_kernel+0x1d4/0x260 [drm] drm_ioctl+0x433/0x920 [drm] amdgpu_drm_ioctl+0x11d/0x290 [amdgpu] do_vfs_ioctl+0x1a1/0x13d0 ksys_ioctl+0x60/0x90 __x64_sys_ioctl+0x6f/0xb0 do_syscall_64+0x147/0x440 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The buggy address belongs to the object at ffff8803a697b068 which belongs to the cache kmalloc-1024 of size 1024 The buggy address is located 9 bytes inside of 1024-byte region [ffff8803a697b068, ffff8803a697b468) The buggy address belongs to the page: page:ffffea000e9a5e00 count:1 mapcount:0 mapping:ffff88041e00efc0 index:0x0 compound_mapcount: 0 flags: 0x8000000000008100(slab|head) raw: 8000000000008100 ffffea000ecbc208 ffff88041e000c70 ffff88041e00efc0 raw: 0000000000000000 0000000000170017 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8803a697af00: fb fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8803a697af80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff8803a697b000: fc fc fc fc fc fc fc fc fc fc fc fc fc fb fb fb ^ ffff8803a697b080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8803a697b100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== So, we fix this by counting the number of CRTCs this atomic commit disabled early on in the function before their atomic states have been freed, then use that count later to do the appropriate number of RPM puts at the end of the function. Acked-by: Michel Dänzer Reviewed-by: Harry Wentland Cc: stable@vger.kernel.org Fixes: 97028037a38ae ("drm/amdgpu: Grab/put runtime PM references in atomic_commit_tail()") Signed-off-by: Lyude Paul Cc: Michel Dänzer Reported-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f9add85157e7..689dbdf44bbf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4206,6 +4206,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct drm_connector *connector; struct drm_connector_state *old_con_state, *new_con_state; struct dm_crtc_state *dm_old_crtc_state, *dm_new_crtc_state; + int crtc_disable_count = 0; drm_atomic_helper_update_legacy_modeset_state(dev, state); @@ -4410,6 +4411,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); bool modeset_needed; + if (old_crtc_state->active && !new_crtc_state->active) + crtc_disable_count++; + dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); modeset_needed = modeset_required( @@ -4463,11 +4467,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) * so we can put the GPU into runtime suspend if we're not driving any * displays anymore */ + for (i = 0; i < crtc_disable_count; i++) + pm_runtime_put_autosuspend(dev->dev); pm_runtime_mark_last_busy(dev->dev); - for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { - if (old_crtc_state->active && !new_crtc_state->active) - pm_runtime_put_autosuspend(dev->dev); - } } From 38e624a18f9a05b8c894409be6b14709a7206c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 21 Jun 2018 11:27:46 +0200 Subject: [PATCH 384/572] drm/amdgpu: GPU vs CPU page size fixes in amdgpu_vm_bo_split_mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit start / last / max_entries are numbers of GPU pages, pfn / count are numbers of CPU pages. Convert between them accordingly. Fixes badness on systems with > 4K page size. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/106258 Reported-by: Matt Corallo Tested-by: foxbat@ruin.net Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b0eb2f537392..edf16b2b957a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1463,7 +1463,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, uint64_t count; max_entries = min(max_entries, 16ull * 1024ull); - for (count = 1; count < max_entries; ++count) { + for (count = 1; + count < max_entries / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); + ++count) { uint64_t idx = pfn + count; if (pages_addr[idx] != @@ -1476,7 +1478,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, dma_addr = pages_addr; } else { addr = pages_addr[pfn]; - max_entries = count; + max_entries = count * (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); } } else if (flags & AMDGPU_PTE_VALID) { @@ -1491,7 +1493,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (r) return r; - pfn += last - start + 1; + pfn += (last - start + 1) / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); if (nodes && nodes->size == pfn) { pfn = 0; ++nodes; From b8f1f65882f07913157c44673af7ec0b308d03eb Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 21 Jun 2018 13:11:31 +0800 Subject: [PATCH 385/572] vhost_net: validate sock before trying to put its fd Sock will be NULL if we pass -1 to vhost_net_set_backend(), but when we meet errors during ubuf allocation, the code does not check for NULL before calling sockfd_put(), this will lead NULL dereferencing. Fixing by checking sock pointer before. Fixes: bab632d69ee4 ("vhost: vhost TX zero-copy support") Reported-by: Dan Carpenter Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/vhost/net.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 686dc670fd29..29756d88799b 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1226,7 +1226,8 @@ err_used: if (ubufs) vhost_net_ubuf_put_wait_and_free(ubufs); err_ubufs: - sockfd_put(sock); + if (sock) + sockfd_put(sock); err_vq: mutex_unlock(&vq->mutex); err: From 6c6da92808442908287fae8ebb0ca041a52469f4 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 21 Jun 2018 19:49:36 +0800 Subject: [PATCH 386/572] ipv6: mcast: fix unsolicited report interval after receiving querys After recieving MLD querys, we update idev->mc_maxdelay with max_delay from query header. This make the later unsolicited reports have the same interval with mc_maxdelay, which means we may send unsolicited reports with long interval time instead of default configured interval time. Also as we will not call ipv6_mc_reset() after device up. This issue will be there even after leave the group and join other groups. Fixes: fc4eba58b4c14 ("ipv6: make unsolicited report intervals configurable for mld") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv6/mcast.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 975021df7c1c..c0c74088f2af 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -2082,7 +2082,8 @@ void ipv6_mc_dad_complete(struct inet6_dev *idev) mld_send_initial_cr(idev); idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, idev->mc_maxdelay); + mld_dad_start_timer(idev, + unsolicited_report_interval(idev)); } } @@ -2094,7 +2095,8 @@ static void mld_dad_timer_expire(struct timer_list *t) if (idev->mc_dad_count) { idev->mc_dad_count--; if (idev->mc_dad_count) - mld_dad_start_timer(idev, idev->mc_maxdelay); + mld_dad_start_timer(idev, + unsolicited_report_interval(idev)); } in6_dev_put(idev); } @@ -2452,7 +2454,8 @@ static void mld_ifc_timer_expire(struct timer_list *t) if (idev->mc_ifc_count) { idev->mc_ifc_count--; if (idev->mc_ifc_count) - mld_ifc_start_timer(idev, idev->mc_maxdelay); + mld_ifc_start_timer(idev, + unsolicited_report_interval(idev)); } in6_dev_put(idev); } From 32e6996ca697099a4be5cd0dbd4b436699ca81a9 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 22 Jun 2018 10:37:05 +1000 Subject: [PATCH 387/572] Documentation: e100: Use correct heading adornment Recently documentation file was converted to rst. The document title has the incorrect heading adornment. From kernel docs: * Please stick to this order of heading adornments: 1. ``=`` with overline for document title:: ============== Document title ============== Add overline heading adornment to document title. Fixes commit (85d63445f411 Documentation: e100: Update the Intel 10/100 driver doc) CC: Jeff Kirsher Signed-off-by: Tobin C. Harding Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- Documentation/networking/e100.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst index d4d837027925..59b80608e27d 100644 --- a/Documentation/networking/e100.rst +++ b/Documentation/networking/e100.rst @@ -1,3 +1,4 @@ +============================================================== Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters ============================================================== From 3be40e54764d3ac9c004dc1646ab2c4cc0f0905e Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 22 Jun 2018 10:37:06 +1000 Subject: [PATCH 388/572] Documentation: e1000: Use correct heading adornment Recently documentation file was converted to rst. The document title has the incorrect heading adornment. From kernel docs: * Please stick to this order of heading adornments: 1. ``=`` with overline for document title:: ============== Document title ============== Add overline heading adornment to document title. Fixes commit (228046e76189 Documentation: e1000: Update kernel documentation) CC: Jeff Kirsher Signed-off-by: Tobin C. Harding Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- Documentation/networking/e1000.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst index 616848940e63..55f28e5043b6 100644 --- a/Documentation/networking/e1000.rst +++ b/Documentation/networking/e1000.rst @@ -1,3 +1,4 @@ +=========================================================== Linux* Base Driver for Intel(R) Ethernet Network Connection =========================================================== From 3b0c3ebe2a42ce18a59828acc4578166367dc7b5 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 22 Jun 2018 10:37:07 +1000 Subject: [PATCH 389/572] Documentation: e100: Fix docs build error Recent patch updated e100 docs to rst format. Docs build (`make htmldocs`) is currently failing due to this file with error: (SEVERE/4) Unexpected section title. This is because a section of the file is indented 2 spaces. Build error can be cleared by aligning the text with column 0. While we are changing these lines we can make sure line length does not exceed 72, that newlines following headings are uniform, and that full stops are followed by two spaces. Align text with column 0, limit line length to 72, ensure two spaces follow all full stops, ensure uniform use of newlines after heading. Fixes commit (85d63445f411 Documentation: e100: Update the Intel 10/100 driver doc) CC: Jeff Kirsher Signed-off-by: Tobin C. Harding Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- Documentation/networking/e100.rst | 111 +++++++++++++++--------------- 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/Documentation/networking/e100.rst b/Documentation/networking/e100.rst index 59b80608e27d..9708f5fa76de 100644 --- a/Documentation/networking/e100.rst +++ b/Documentation/networking/e100.rst @@ -87,83 +87,84 @@ Event Log Message Level: The driver uses the message level flag to log events Additional Configurations ========================= - Configuring the Driver on Different Distributions - ------------------------------------------------- +Configuring the Driver on Different Distributions +------------------------------------------------- - Configuring a network driver to load properly when the system is started is - distribution dependent. Typically, the configuration process involves adding - an alias line to /etc/modprobe.d/*.conf as well as editing other system - startup scripts and/or configuration files. Many popular Linux - distributions ship with tools to make these changes for you. To learn the - proper way to configure a network device for your system, refer to your - distribution documentation. If during this process you are asked for the - driver or module name, the name for the Linux Base Driver for the Intel - PRO/100 Family of Adapters is e100. +Configuring a network driver to load properly when the system is started +is distribution dependent. Typically, the configuration process involves +adding an alias line to /etc/modprobe.d/*.conf as well as editing other +system startup scripts and/or configuration files. Many popular Linux +distributions ship with tools to make these changes for you. To learn +the proper way to configure a network device for your system, refer to +your distribution documentation. If during this process you are asked +for the driver or module name, the name for the Linux Base Driver for +the Intel PRO/100 Family of Adapters is e100. - As an example, if you install the e100 driver for two PRO/100 adapters - (eth0 and eth1), add the following to a configuration file in /etc/modprobe.d/ +As an example, if you install the e100 driver for two PRO/100 adapters +(eth0 and eth1), add the following to a configuration file in +/etc/modprobe.d/:: alias eth0 e100 alias eth1 e100 - Viewing Link Messages - --------------------- - In order to see link messages and other Intel driver information on your - console, you must set the dmesg level up to six. This can be done by - entering the following on the command line before loading the e100 driver:: +Viewing Link Messages +--------------------- + +In order to see link messages and other Intel driver information on your +console, you must set the dmesg level up to six. This can be done by +entering the following on the command line before loading the e100 +driver:: dmesg -n 6 - If you wish to see all messages issued by the driver, including debug - messages, set the dmesg level to eight. +If you wish to see all messages issued by the driver, including debug +messages, set the dmesg level to eight. - NOTE: This setting is not saved across reboots. +NOTE: This setting is not saved across reboots. +ethtool +------- - ethtool - ------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The ethtool +version 1.6 or later is required for this functionality. - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The ethtool - version 1.6 or later is required for this functionality. +The latest release of ethtool can be found from +https://www.kernel.org/pub/software/network/ethtool/ - The latest release of ethtool can be found from - https://www.kernel.org/pub/software/network/ethtool/ +Enabling Wake on LAN* (WoL) +--------------------------- +WoL is provided through the ethtool* utility. For instructions on +enabling WoL with ethtool, refer to the ethtool man page. WoL will be +enabled on the system during the next shut down or reboot. For this +driver version, in order to enable WoL, the e100 driver must be loaded +when shutting down or rebooting the system. - Enabling Wake on LAN* (WoL) - --------------------------- - WoL is provided through the ethtool* utility. For instructions on enabling - WoL with ethtool, refer to the ethtool man page. +NAPI +---- - WoL will be enabled on the system during the next shut down or reboot. For - this driver version, in order to enable WoL, the e100 driver must be - loaded when shutting down or rebooting the system. +NAPI (Rx polling mode) is supported in the e100 driver. - NAPI - ---- +See https://wiki.linuxfoundation.org/networking/napi for more +information on NAPI. - NAPI (Rx polling mode) is supported in the e100 driver. +Multiple Interfaces on Same Ethernet Broadcast Network +------------------------------------------------------ - See https://wiki.linuxfoundation.org/networking/napi for more information - on NAPI. +Due to the default ARP behavior on Linux, it is not possible to have one +system on two IP networks in the same Ethernet broadcast domain +(non-partitioned switch) behave as expected. All Ethernet interfaces +will respond to IP traffic for any IP address assigned to the system. +This results in unbalanced receive traffic. - Multiple Interfaces on Same Ethernet Broadcast Network - ------------------------------------------------------ +If you have multiple interfaces in a server, either turn on ARP +filtering by - Due to the default ARP behavior on Linux, it is not possible to have - one system on two IP networks in the same Ethernet broadcast domain - (non-partitioned switch) behave as expected. All Ethernet interfaces - will respond to IP traffic for any IP address assigned to the system. - This results in unbalanced receive traffic. +(1) entering:: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter + (this only works if your kernel's version is higher than 2.4.5), or - If you have multiple interfaces in a server, either turn on ARP - filtering by - - (1) entering:: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter - (this only works if your kernel's version is higher than 2.4.5), or - - (2) installing the interfaces in separate broadcast domains (either - in different switches or in a switch partitioned to VLANs). +(2) installing the interfaces in separate broadcast domains (either + in different switches or in a switch partitioned to VLANs). Support From 805f16a5f12fd68e10841013ccfaceb2f4d7066a Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 22 Jun 2018 10:37:08 +1000 Subject: [PATCH 390/572] Documentation: e1000: Fix docs build error Recent patch updated e1000 docs to rst format. Docs build (`make htmldocs`) is currently failing due to this file with error: (SEVERE/4) Unexpected section title. This is because a section of the file is indented 2 spaces. Build error can be cleared by aligning the text with column 0. While we are changing these lines we can make sure line length does not exceed 72, that newlines following headings are uniform, and that full stops are followed by two spaces. Align text with column 0, limit line length to 72, ensure two spaces follow all full stops, ensure uniform use of newlines after heading. Fixes commit (228046e76189 Documentation: e1000: Update kernel documentation) CC: Jeff Kirsher Signed-off-by: Tobin C. Harding Acked-by: Jeff Kirsher Signed-off-by: David S. Miller --- Documentation/networking/e1000.rst | 75 +++++++++++++++--------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/Documentation/networking/e1000.rst b/Documentation/networking/e1000.rst index 55f28e5043b6..144b87eef153 100644 --- a/Documentation/networking/e1000.rst +++ b/Documentation/networking/e1000.rst @@ -355,57 +355,58 @@ previously mentioned to force the adapter to the same speed and duplex. Additional Configurations ========================= - Jumbo Frames - ------------ - Jumbo Frames support is enabled by changing the MTU to a value larger than - the default of 1500. Use the ifconfig command to increase the MTU size. - For example:: +Jumbo Frames +------------ +Jumbo Frames support is enabled by changing the MTU to a value larger +than the default of 1500. Use the ifconfig command to increase the MTU +size. For example:: ifconfig eth mtu 9000 up - This setting is not saved across reboots. It can be made permanent if - you add:: +This setting is not saved across reboots. It can be made permanent if +you add:: MTU=9000 - to the file /etc/sysconfig/network-scripts/ifcfg-eth. This example - applies to the Red Hat distributions; other distributions may store this - setting in a different location. +to the file /etc/sysconfig/network-scripts/ifcfg-eth. This example +applies to the Red Hat distributions; other distributions may store this +setting in a different location. - Notes: - Degradation in throughput performance may be observed in some Jumbo frames - environments. If this is observed, increasing the application's socket buffer - size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help. - See the specific application manual and /usr/src/linux*/Documentation/ - networking/ip-sysctl.txt for more details. +Notes: Degradation in throughput performance may be observed in some +Jumbo frames environments. If this is observed, increasing the +application's socket buffer size and/or increasing the +/proc/sys/net/ipv4/tcp_*mem entry values may help. See the specific +application manual and /usr/src/linux*/Documentation/ +networking/ip-sysctl.txt for more details. - - The maximum MTU setting for Jumbo Frames is 16110. This value coincides - with the maximum Jumbo Frames size of 16128. +- The maximum MTU setting for Jumbo Frames is 16110. This value + coincides with the maximum Jumbo Frames size of 16128. - - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in - poor performance or loss of link. +- Using Jumbo frames at 10 or 100 Mbps is not supported and may result + in poor performance or loss of link. - - Adapters based on the Intel(R) 82542 and 82573V/E controller do not - support Jumbo Frames. These correspond to the following product names: - Intel(R) PRO/1000 Gigabit Server Adapter - Intel(R) PRO/1000 PM Network Connection +- Adapters based on the Intel(R) 82542 and 82573V/E controller do not + support Jumbo Frames. These correspond to the following product names: + Intel(R) PRO/1000 Gigabit Server Adapter Intel(R) PRO/1000 PM Network + Connection - ethtool - ------- - The driver utilizes the ethtool interface for driver configuration and - diagnostics, as well as displaying statistical information. The ethtool - version 1.6 or later is required for this functionality. +ethtool +------- +The driver utilizes the ethtool interface for driver configuration and +diagnostics, as well as displaying statistical information. The ethtool +version 1.6 or later is required for this functionality. - The latest release of ethtool can be found from - https://www.kernel.org/pub/software/network/ethtool/ +The latest release of ethtool can be found from +https://www.kernel.org/pub/software/network/ethtool/ - Enabling Wake on LAN* (WoL) - --------------------------- - WoL is configured through the ethtool* utility. +Enabling Wake on LAN* (WoL) +--------------------------- +WoL is configured through the ethtool* utility. + +WoL will be enabled on the system during the next shut down or reboot. +For this driver version, in order to enable WoL, the e1000 driver must be +loaded when shutting down or rebooting the system. - WoL will be enabled on the system during the next shut down or reboot. - For this driver version, in order to enable WoL, the e1000 driver must be - loaded when shutting down or rebooting the system. Support ======= From 271f7ff5aa5a73488b7a9d8b84b5205fb5b2f7cc Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 22 Jun 2018 10:15:39 +0200 Subject: [PATCH 391/572] net: mvneta: fix the Rx desc DMA address in the Rx path When using s/w buffer management, buffers are allocated and DMA mapped. When doing so on an arm64 platform, an offset correction is applied on the DMA address, before storing it in an Rx descriptor. The issue is this DMA address is then used later in the Rx path without removing the offset correction. Thus the DMA address is wrong, which can led to various issues. This patch fixes this by removing the offset correction from the DMA address retrieved from the Rx descriptor before using it in the Rx path. Fixes: 8d5047cf9ca2 ("net: mvneta: Convert to be 64 bits compatible") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 17a904cc6a5e..0ad2f3f7da85 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1932,7 +1932,7 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo, rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE); index = rx_desc - rxq->descs; data = rxq->buf_virt_addr[index]; - phys_addr = rx_desc->buf_phys_addr; + phys_addr = rx_desc->buf_phys_addr - pp->rx_offset_correction; if (!mvneta_rxq_desc_is_first_last(rx_status) || (rx_status & MVNETA_RXD_ERR_SUMMARY)) { From c2cd650b5b2bd6b57f2f62be65b1a8a576d4de6d Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Fri, 22 Jun 2018 11:50:52 +0200 Subject: [PATCH 392/572] net: mscc: make sparse happy This patch fixes a sparse warning about using an incorrect type in argument 2 of ocelot_write_rix(), as an u32 was expected but a __be32 was given. The conversion to u32 is forced, which is safe as the value will be written as-is in the hardware without any modification. Fixes: 08d02364b12f ("net: mscc: fix the injection header") Signed-off-by: Antoine Tenart Signed-off-by: David S. Miller --- drivers/net/ethernet/mscc/ocelot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 52c57e0ff617..776a8a9be8e3 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -374,7 +374,8 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev) ocelot_gen_ifh(ifh, &info); for (i = 0; i < IFH_LEN; i++) - ocelot_write_rix(ocelot, cpu_to_be32(ifh[i]), QS_INJ_WR, grp); + ocelot_write_rix(ocelot, (__force u32)cpu_to_be32(ifh[i]), + QS_INJ_WR, grp); count = (skb->len + 3) / 4; last = skb->len % 4; From 935c5e3eafe8186dd13e1eda24ad82ecae942852 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 22 Jun 2018 12:08:13 +0200 Subject: [PATCH 393/572] MAINTAINERS: Add file patterns for dsa device tree bindings Submitters of device tree binding documentation may forget to CC the subsystem maintainer if this is missing. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index edf3cf5ea691..977f9a431f77 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9882,6 +9882,7 @@ M: Andrew Lunn M: Vivien Didelot M: Florian Fainelli S: Maintained +F: Documentation/devicetree/bindings/net/dsa/ F: net/dsa/ F: include/net/dsa.h F: include/linux/dsa/ From e020797b7def11c8feeb3ee5c1f48c12cb959def Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 22 Jun 2018 13:08:43 +0200 Subject: [PATCH 394/572] net: Remove depends on HAS_DMA in case of platform dependency Remove dependencies on HAS_DMA where a Kconfig symbol depends on another symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST". In most cases this other symbol is an architecture or platform specific symbol, or PCI. Generic symbols and drivers without platform dependencies keep their dependencies on HAS_DMA, to prevent compiling subsystems or drivers that cannot work anyway. This simplifies the dependencies, and allows to improve compile-testing. Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Robin Murphy Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/Kconfig | 2 +- drivers/net/ethernet/apm/xgene-v2/Kconfig | 1 - drivers/net/ethernet/apm/xgene/Kconfig | 1 - drivers/net/ethernet/arc/Kconfig | 6 ++++-- drivers/net/ethernet/broadcom/Kconfig | 2 -- drivers/net/ethernet/calxeda/Kconfig | 2 +- drivers/net/ethernet/hisilicon/Kconfig | 2 +- drivers/net/ethernet/marvell/Kconfig | 8 +++----- drivers/net/ethernet/mellanox/mlxsw/Kconfig | 2 +- drivers/net/ethernet/renesas/Kconfig | 2 -- drivers/net/wireless/broadcom/brcm80211/Kconfig | 1 - drivers/net/wireless/quantenna/qtnfmac/Kconfig | 2 +- 12 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index d5c15e8bb3de..f273af136fc7 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -173,7 +173,7 @@ config SUNLANCE config AMD_XGBE tristate "AMD 10GbE Ethernet driver" - depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM && HAS_DMA + depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM depends on X86 || ARM64 || COMPILE_TEST select BITREVERSE select CRC32 diff --git a/drivers/net/ethernet/apm/xgene-v2/Kconfig b/drivers/net/ethernet/apm/xgene-v2/Kconfig index 1205861b6318..eedd3f3dd22e 100644 --- a/drivers/net/ethernet/apm/xgene-v2/Kconfig +++ b/drivers/net/ethernet/apm/xgene-v2/Kconfig @@ -1,6 +1,5 @@ config NET_XGENE_V2 tristate "APM X-Gene SoC Ethernet-v2 Driver" - depends on HAS_DMA depends on ARCH_XGENE || COMPILE_TEST help This is the Ethernet driver for the on-chip ethernet interface diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig index afccb033177b..e4e33c900b57 100644 --- a/drivers/net/ethernet/apm/xgene/Kconfig +++ b/drivers/net/ethernet/apm/xgene/Kconfig @@ -1,6 +1,5 @@ config NET_XGENE tristate "APM X-Gene SoC Ethernet Driver" - depends on HAS_DMA depends on ARCH_XGENE || COMPILE_TEST select PHYLIB select MDIO_XGENE diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig index e743ddf46343..5d0ab8e74b68 100644 --- a/drivers/net/ethernet/arc/Kconfig +++ b/drivers/net/ethernet/arc/Kconfig @@ -24,7 +24,8 @@ config ARC_EMAC_CORE config ARC_EMAC tristate "ARC EMAC support" select ARC_EMAC_CORE - depends on OF_IRQ && OF_NET && HAS_DMA && (ARC || COMPILE_TEST) + depends on OF_IRQ && OF_NET + depends on ARC || COMPILE_TEST ---help--- On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x non-standard on-chip ethernet device ARC EMAC 10/100 is used. @@ -33,7 +34,8 @@ config ARC_EMAC config EMAC_ROCKCHIP tristate "Rockchip EMAC support" select ARC_EMAC_CORE - depends on OF_IRQ && OF_NET && REGULATOR && HAS_DMA && (ARCH_ROCKCHIP || COMPILE_TEST) + depends on OF_IRQ && OF_NET && REGULATOR + depends on ARCH_ROCKCHIP || COMPILE_TEST ---help--- Support for Rockchip RK3036/RK3066/RK3188 EMAC ethernet controllers. This selects Rockchip SoC glue layer support for the diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index af75156919ed..4c3bfde6e8de 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -157,7 +157,6 @@ config BGMAC config BGMAC_BCMA tristate "Broadcom iProc GBit BCMA support" depends on BCMA && BCMA_HOST_SOC - depends on HAS_DMA depends on BCM47XX || ARCH_BCM_5301X || COMPILE_TEST select BGMAC select PHYLIB @@ -170,7 +169,6 @@ config BGMAC_BCMA config BGMAC_PLATFORM tristate "Broadcom iProc GBit platform support" - depends on HAS_DMA depends on ARCH_BCM_IPROC || COMPILE_TEST depends on OF select BGMAC diff --git a/drivers/net/ethernet/calxeda/Kconfig b/drivers/net/ethernet/calxeda/Kconfig index 07d2201530d2..9fdd496b90ff 100644 --- a/drivers/net/ethernet/calxeda/Kconfig +++ b/drivers/net/ethernet/calxeda/Kconfig @@ -1,6 +1,6 @@ config NET_CALXEDA_XGMAC tristate "Calxeda 1G/10G XGMAC Ethernet driver" - depends on HAS_IOMEM && HAS_DMA + depends on HAS_IOMEM depends on ARCH_HIGHBANK || COMPILE_TEST select CRC32 help diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 8bcf470ff5f3..fb1a7251f45d 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -5,7 +5,7 @@ config NET_VENDOR_HISILICON bool "Hisilicon devices" default y - depends on (OF || ACPI) && HAS_DMA + depends on OF || ACPI depends on ARM || ARM64 || COMPILE_TEST ---help--- If you have a network (Ethernet) card belonging to this class, say Y. diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index cc2f7701e71e..f33fd22b351c 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -18,8 +18,8 @@ if NET_VENDOR_MARVELL config MV643XX_ETH tristate "Marvell Discovery (643XX) and Orion ethernet support" - depends on (MV64X60 || PPC32 || PLAT_ORION || COMPILE_TEST) && INET - depends on HAS_DMA + depends on MV64X60 || PPC32 || PLAT_ORION || COMPILE_TEST + depends on INET select PHYLIB select MVMDIO ---help--- @@ -58,7 +58,6 @@ config MVNETA_BM_ENABLE config MVNETA tristate "Marvell Armada 370/38x/XP/37xx network interface support" depends on ARCH_MVEBU || COMPILE_TEST - depends on HAS_DMA select MVMDIO select PHYLINK ---help--- @@ -84,7 +83,6 @@ config MVNETA_BM config MVPP2 tristate "Marvell Armada 375/7K/8K network interface support" depends on ARCH_MVEBU || COMPILE_TEST - depends on HAS_DMA select MVMDIO select PHYLINK ---help--- @@ -93,7 +91,7 @@ config MVPP2 config PXA168_ETH tristate "Marvell pxa168 ethernet support" - depends on HAS_IOMEM && HAS_DMA + depends on HAS_IOMEM depends on CPU_PXA168 || ARCH_BERLIN || COMPILE_TEST select PHYLIB ---help--- diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index f4d9c9975ac3..82827a8d3d67 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -30,7 +30,7 @@ config MLXSW_CORE_THERMAL config MLXSW_PCI tristate "PCI bus implementation for Mellanox Technologies Switch ASICs" - depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE + depends on PCI && HAS_IOMEM && MLXSW_CORE default m ---help--- This is PCI bus implementation for Mellanox Technologies Switch ASICs. diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig index 27be51f0a421..f3f7477043ce 100644 --- a/drivers/net/ethernet/renesas/Kconfig +++ b/drivers/net/ethernet/renesas/Kconfig @@ -17,7 +17,6 @@ if NET_VENDOR_RENESAS config SH_ETH tristate "Renesas SuperH Ethernet support" - depends on HAS_DMA depends on ARCH_RENESAS || SUPERH || COMPILE_TEST select CRC32 select MII @@ -31,7 +30,6 @@ config SH_ETH config RAVB tristate "Renesas Ethernet AVB support" - depends on HAS_DMA depends on ARCH_RENESAS || COMPILE_TEST select CRC32 select MII diff --git a/drivers/net/wireless/broadcom/brcm80211/Kconfig b/drivers/net/wireless/broadcom/brcm80211/Kconfig index 9d99eb42d917..6acba67bca07 100644 --- a/drivers/net/wireless/broadcom/brcm80211/Kconfig +++ b/drivers/net/wireless/broadcom/brcm80211/Kconfig @@ -60,7 +60,6 @@ config BRCMFMAC_PCIE bool "PCIE bus interface support for FullMAC driver" depends on BRCMFMAC depends on PCI - depends on HAS_DMA select BRCMFMAC_PROTO_MSGBUF select FW_LOADER ---help--- diff --git a/drivers/net/wireless/quantenna/qtnfmac/Kconfig b/drivers/net/wireless/quantenna/qtnfmac/Kconfig index 025fa6018550..8d1492a90bd1 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/Kconfig +++ b/drivers/net/wireless/quantenna/qtnfmac/Kconfig @@ -7,7 +7,7 @@ config QTNFMAC config QTNFMAC_PEARL_PCIE tristate "Quantenna QSR10g PCIe support" default n - depends on HAS_DMA && PCI && CFG80211 + depends on PCI && CFG80211 select QTNFMAC select FW_LOADER select CRC32 From 7b4e88434c4e7982fb053c49657e1c8bbb8692d9 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Fri, 22 Jun 2018 10:54:45 -0700 Subject: [PATCH 395/572] Smack: Mark inode instant in smack_task_to_inode Smack: Mark inode instant in smack_task_to_inode /proc clean-up in commit 1bbc55131e59bd099fdc568d3aa0b42634dbd188 resulted in smack_task_to_inode() being called before smack_d_instantiate. This resulted in the smk_inode value being ignored, even while present for files in /proc/self. Marking the inode as instant here fixes that. Signed-off-by: Casey Schaufler Signed-off-by: James Morris --- security/smack/smack_lsm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 7ad226018f51..19de675d4504 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -2296,6 +2296,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) struct smack_known *skp = smk_of_task_struct(p); isp->smk_inode = skp; + isp->smk_flags |= SMK_INODE_INSTANT; } /* From 74174fe5634ffbf645a7ca5a261571f700b2f332 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Jun 2018 06:44:14 -0700 Subject: [PATCH 396/572] net: dccp: avoid crash in ccid3_hc_rx_send_feedback() On fast hosts or malicious bots, we trigger a DCCP_BUG() which seems excessive. syzbot reported : BUG: delta (-6195) <= 0 at net/dccp/ccids/ccid3.c:628/ccid3_hc_rx_send_feedback() CPU: 1 PID: 18 Comm: ksoftirqd/1 Not tainted 4.18.0-rc1+ #112 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113 ccid3_hc_rx_send_feedback net/dccp/ccids/ccid3.c:628 [inline] ccid3_hc_rx_packet_recv.cold.16+0x38/0x71 net/dccp/ccids/ccid3.c:793 ccid_hc_rx_packet_recv net/dccp/ccid.h:185 [inline] dccp_deliver_input_to_ccids+0xf0/0x280 net/dccp/input.c:180 dccp_rcv_established+0x87/0xb0 net/dccp/input.c:378 dccp_v4_do_rcv+0x153/0x180 net/dccp/ipv4.c:654 sk_backlog_rcv include/net/sock.h:914 [inline] __sk_receive_skb+0x3ba/0xd80 net/core/sock.c:517 dccp_v4_rcv+0x10f9/0x1f58 net/dccp/ipv4.c:875 ip_local_deliver_finish+0x2eb/0xda0 net/ipv4/ip_input.c:215 NF_HOOK include/linux/netfilter.h:287 [inline] ip_local_deliver+0x1e9/0x750 net/ipv4/ip_input.c:256 dst_input include/net/dst.h:450 [inline] ip_rcv_finish+0x823/0x2220 net/ipv4/ip_input.c:396 NF_HOOK include/linux/netfilter.h:287 [inline] ip_rcv+0xa18/0x1284 net/ipv4/ip_input.c:492 __netif_receive_skb_core+0x2488/0x3680 net/core/dev.c:4628 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4693 process_backlog+0x219/0x760 net/core/dev.c:5373 napi_poll net/core/dev.c:5771 [inline] net_rx_action+0x7da/0x1980 net/core/dev.c:5837 __do_softirq+0x2e8/0xb17 kernel/softirq.c:284 run_ksoftirqd+0x86/0x100 kernel/softirq.c:645 smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164 kthread+0x345/0x410 kernel/kthread.c:240 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Gerrit Renker Cc: dccp@vger.kernel.org Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 8b5ba6dffac7..d57a2be1e2e0 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -625,9 +625,8 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, case CCID3_FBACK_PERIODIC: delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback); if (delta <= 0) - DCCP_BUG("delta (%ld) <= 0", (long)delta); - else - hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); + delta = 1; + hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta); break; default: return; From 0ce4e70ff00662ad7490e545ba0cd8c1fa179fca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 22 Jun 2018 06:44:15 -0700 Subject: [PATCH 397/572] net: dccp: switch rx_tstamp_last_feedback to monotonic clock To compute delays, better not use time of the day which can be changed by admins or malicious programs. Also change ccid3_first_li() to use s64 type for delta variable to avoid potential overflows. Signed-off-by: Eric Dumazet Cc: Gerrit Renker Cc: dccp@vger.kernel.org Signed-off-by: David S. Miller --- net/dccp/ccids/ccid3.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index d57a2be1e2e0..12877a1514e7 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -600,7 +600,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - ktime_t now = ktime_get_real(); + ktime_t now = ktime_get(); s64 delta = 0; switch (fbtype) { @@ -632,7 +632,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk, return; } - ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, + ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta, hc->rx_x_recv, hc->rx_pinv); hc->rx_tstamp_last_feedback = now; @@ -679,7 +679,8 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) static u32 ccid3_first_li(struct sock *sk) { struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); - u32 x_recv, p, delta; + u32 x_recv, p; + s64 delta; u64 fval; if (hc->rx_rtt == 0) { @@ -687,7 +688,9 @@ static u32 ccid3_first_li(struct sock *sk) hc->rx_rtt = DCCP_FALLBACK_RTT; } - delta = ktime_to_us(net_timedelta(hc->rx_tstamp_last_feedback)); + delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback); + if (delta <= 0) + delta = 1; x_recv = scaled_div32(hc->rx_bytes_recv, delta); if (x_recv == 0) { /* would also trigger divide-by-zero */ DCCP_WARN("X_recv==0\n"); From 35b42da69e35536da603a50e40aa6c41b2f7b0f8 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 22 Jun 2018 14:33:16 -0700 Subject: [PATCH 398/572] net_sched: remove a bogus warning in hfsc In update_vf(): cftree_remove(cl); update_cfmin(cl->cl_parent); the cl_cfmin of cl->cl_parent is intentionally updated to 0 when that parent only has one child. And if this parent is root qdisc, we could end up, in hfsc_schedule_watchdog(), that we can't decide the next schedule time for qdisc watchdog. But it seems safe that we can just skip it, as this watchdog is not always scheduled anyway. Thanks to Marco for testing all the cases, nothing is broken. Reported-by: Marco Berizzi Tested-by: Marco Berizzi Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ae9877ea205..3278a76f6861 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1385,8 +1385,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch) if (next_time == 0 || next_time > q->root.cl_cfmin) next_time = q->root.cl_cfmin; } - WARN_ON(next_time == 0); - qdisc_watchdog_schedule(&q->watchdog, next_time); + if (next_time) + qdisc_watchdog_schedule(&q->watchdog, next_time); } static int From 1ebb2709ba39bf291a30dc1fcf937f46c4675f4d Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 12 Jun 2018 15:48:06 +0100 Subject: [PATCH 399/572] arm64: dts: apq8096-db820c: disable uart0 by default Access to UART0 is disabled by bootloaders. By leaving it enabled by default would reboot the board. Disable this for now, this would alteast give a board which boots. Signed-off-by: Srinivas Kandagatla Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross --- arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 0f829db33efe..4d5ef01f43a3 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -75,7 +75,7 @@ serial@75b1000 { label = "LS-UART0"; - status = "okay"; + status = "disabled"; pinctrl-names = "default", "sleep"; pinctrl-0 = <&blsp2_uart2_4pins_default>; pinctrl-1 = <&blsp2_uart2_4pins_sleep>; From 6b4154a655a258c67bcfabbd4c3a06637e74ebcd Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 8 May 2018 10:09:51 -0500 Subject: [PATCH 400/572] arm64: dts: msm8916: fix Coresight ETF graph connections The ETF input should be connected to the funnel output, and the ETF output should be connected to the replicator input. The labels are wrong and these got swapped: Warning (graph_endpoint): /soc/funnel@821000/ports/port@8/endpoint: graph connection to node '/soc/etf@825000/ports/port@1/endpoint' is not bidirectional Warning (graph_endpoint): /soc/replicator@824000/ports/port@2/endpoint: graph connection to node '/soc/etf@825000/ports/port@0/endpoint' is not bidirectional Fixes: 7c10da373698 ("arm64: dts: qcom: Add msm8916 CoreSight components") Cc: Ivan T. Ivanov Cc: Mathieu Poirier Cc: Andy Gross Cc: David Brown Cc: linux-arm-msm@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Mathieu Poirier Tested-by: Mathieu Poirier Signed-off-by: Andy Gross --- arch/arm64/boot/dts/qcom/msm8916.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 650f356f69ca..c2625d15a8c0 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1191,14 +1191,14 @@ port@0 { reg = <0>; - etf_out: endpoint { + etf_in: endpoint { slave-mode; remote-endpoint = <&funnel0_out>; }; }; port@1 { reg = <0>; - etf_in: endpoint { + etf_out: endpoint { remote-endpoint = <&replicator_in>; }; }; From b2f82565f2caa1a5c1a26e68593eaef355fae3f4 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Wed, 20 Jun 2018 16:35:16 -0300 Subject: [PATCH 401/572] powerpc: Wire up io_pgetevents Wire up io_pgetevents system call on powerpc. io_pgetevents is a new syscall to read asynchronous I/O events from the completion queue. Tested with libaio branch aio-poll[1] and the io_pgetevents test (#22) passed on both ppc64 LE and BE modes. [1] https://pagure.io/libaio/branch/aio-poll CC: Christoph Hellwig Signed-off-by: Breno Leitao Acked-by: Christoph Hellwig Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/systbl.h | 1 + arch/powerpc/include/asm/unistd.h | 2 +- arch/powerpc/include/uapi/asm/unistd.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index cfcf6a874cfa..01b5171ea189 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -393,3 +393,4 @@ SYSCALL(pkey_alloc) SYSCALL(pkey_free) SYSCALL(pkey_mprotect) SYSCALL(rseq) +COMPAT_SYS(io_pgetevents) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 1e9708632dce..c19379f0a32e 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -12,7 +12,7 @@ #include -#define NR_syscalls 388 +#define NR_syscalls 389 #define __NR__exit __NR_exit diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h index ac5ba55066dd..985534d0b448 100644 --- a/arch/powerpc/include/uapi/asm/unistd.h +++ b/arch/powerpc/include/uapi/asm/unistd.h @@ -399,5 +399,6 @@ #define __NR_pkey_free 385 #define __NR_pkey_mprotect 386 #define __NR_rseq 387 +#define __NR_io_pgetevents 388 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ From 51be1335151771075dcb19f3464ca9f331134285 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Sat, 23 Jun 2018 01:08:40 +0300 Subject: [PATCH 402/572] Revert "x86/mm: Mark __pgtable_l5_enabled __initdata" This reverts commit e4e961e36f063484c48bed919013c106d178995d. We need to use early version of pgtable_l5_enabled() in early_identify_cpu() as this code runs before cpu_feature_enabled() is usable. But it leads to section mismatch: cpu_init() load_mm_ldt() ldt_slot_va() LDT_BASE_ADDR LDT_PGD_ENTRY pgtable_l5_enabled() __pgtable_l5_enabled __pgtable_l5_enabled marked as __initdata, but cpu_init() is not __init. It's fixable: early code can be isolated into a separate translation unit, but such change collides with other work in the area. That's too much hassle to save 4 bytes of memory. Return __pgtable_l5_enabled back to be __ro_after_init. Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180622220841.54135-2-kirill.shutemov@linux.intel.com --- arch/x86/kernel/head64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index a21d6ace648e..8047379e575a 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -44,7 +44,7 @@ static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); #ifdef CONFIG_X86_5LEVEL -unsigned int __pgtable_l5_enabled __initdata; +unsigned int __pgtable_l5_enabled __ro_after_init; unsigned int pgdir_shift __ro_after_init = 39; EXPORT_SYMBOL(pgdir_shift); unsigned int ptrs_per_p4d __ro_after_init = 1; From 2458e53ff74cd1063ed3e00459da1d35c559d369 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Sat, 23 Jun 2018 01:08:41 +0300 Subject: [PATCH 403/572] x86/mm: Fix 'no5lvl' handling early_identify_cpu() has to use early version of pgtable_l5_enabled() that doesn't rely on cpu_feature_enabled(). Defining USE_EARLY_PGTABLE_L5 before all includes does the trick. I lost the define in one of reworks of the original patch. Fixes: 372fddf70904 ("x86/mm: Introduce the 'no5lvl' kernel parameter") Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180622220841.54135-3-kirill.shutemov@linux.intel.com --- arch/x86/kernel/cpu/common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0df7151cfef4..eb4cb3efd20e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1,3 +1,6 @@ +/* cpu_feature_enabled() cannot be used this early */ +#define USE_EARLY_PGTABLE_L5 + #include #include #include From f5e350f021e04ea41d2e5d58487c33b05ba3d25b Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 22 Jun 2018 13:18:09 -0700 Subject: [PATCH 404/572] blk-mq: Fix timeout handling in case the timeout handler returns BLK_EH_DONE Make sure that RQF_TIMED_OUT is cleared when a request is reused after a block driver timeout handler has returned BLK_EH_DONE. Fixes: da6612673988 ("blk-mq: don't time out requests again that are in the timeout handler") Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Jianchao Wang Cc: Andrew Randrianasulu Signed-off-by: Jens Axboe --- block/blk-mq.c | 1 - block/blk-timeout.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 8e57b84e50e9..b6888ff556cf 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -781,7 +781,6 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved) WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); } - req->rq_flags &= ~RQF_TIMED_OUT; blk_add_timer(req); } diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 4b8a48d48ba1..f2cfd56e1606 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -210,6 +210,7 @@ void blk_add_timer(struct request *req) if (!req->timeout) req->timeout = q->rq_timeout; + req->rq_flags &= ~RQF_TIMED_OUT; blk_rq_set_deadline(req, jiffies + req->timeout); /* From c432c08805963f9b145b86b5f4ac945881e1b9df Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sat, 23 Jun 2018 10:24:23 -0700 Subject: [PATCH 405/572] arm64: defconfig: renormalize based on recent additions The defconfig has drifted over time, as Kconfig entries have changed order or default values. Several maintainers ended up running 'savedefconfig' themselves which caused a cascade of conflicts. Let's do it once and for all in our tree before -rc2 instead. Signed-off-by: Olof Johansson --- arch/arm64/configs/defconfig | 110 ++++++++++++++--------------------- 1 file changed, 43 insertions(+), 67 deletions(-) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 3cfa8ca26738..f9a186f6af8a 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -47,6 +47,7 @@ CONFIG_ARCH_MVEBU=y CONFIG_ARCH_QCOM=y CONFIG_ARCH_ROCKCHIP=y CONFIG_ARCH_SEATTLE=y +CONFIG_ARCH_SYNQUACER=y CONFIG_ARCH_RENESAS=y CONFIG_ARCH_R8A7795=y CONFIG_ARCH_R8A7796=y @@ -58,7 +59,6 @@ CONFIG_ARCH_R8A77995=y CONFIG_ARCH_STRATIX10=y CONFIG_ARCH_TEGRA=y CONFIG_ARCH_SPRD=y -CONFIG_ARCH_SYNQUACER=y CONFIG_ARCH_THUNDER=y CONFIG_ARCH_THUNDER2=y CONFIG_ARCH_UNIPHIER=y @@ -67,25 +67,23 @@ CONFIG_ARCH_XGENE=y CONFIG_ARCH_ZX=y CONFIG_ARCH_ZYNQMP=y CONFIG_PCI=y -CONFIG_HOTPLUG_PCI_PCIE=y CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_ACPI=y -CONFIG_PCI_LAYERSCAPE=y -CONFIG_PCI_HISI=y -CONFIG_PCIE_QCOM=y -CONFIG_PCIE_KIRIN=y -CONFIG_PCIE_ARMADA_8K=y -CONFIG_PCIE_HISI_STB=y CONFIG_PCI_AARDVARK=y CONFIG_PCI_TEGRA=y CONFIG_PCIE_RCAR=y -CONFIG_PCIE_ROCKCHIP=y -CONFIG_PCIE_ROCKCHIP_HOST=m CONFIG_PCI_HOST_GENERIC=y CONFIG_PCI_XGENE=y CONFIG_PCI_HOST_THUNDER_PEM=y CONFIG_PCI_HOST_THUNDER_ECAM=y +CONFIG_PCIE_ROCKCHIP_HOST=m +CONFIG_PCI_LAYERSCAPE=y +CONFIG_PCI_HISI=y +CONFIG_PCIE_QCOM=y +CONFIG_PCIE_ARMADA_8K=y +CONFIG_PCIE_KIRIN=y +CONFIG_PCIE_HISI_STB=y CONFIG_ARM64_VA_BITS_48=y CONFIG_SCHED_MC=y CONFIG_NUMA=y @@ -104,8 +102,6 @@ CONFIG_HIBERNATION=y CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y CONFIG_ARM_CPUIDLE=y CONFIG_CPU_FREQ=y -CONFIG_CPU_FREQ_GOV_ATTR_SET=y -CONFIG_CPU_FREQ_GOV_COMMON=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=y @@ -113,11 +109,11 @@ CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y CONFIG_CPUFREQ_DT=y +CONFIG_ACPI_CPPC_CPUFREQ=m CONFIG_ARM_ARMADA_37XX_CPUFREQ=y CONFIG_ARM_BIG_LITTLE_CPUFREQ=y CONFIG_ARM_SCPI_CPUFREQ=y CONFIG_ARM_TEGRA186_CPUFREQ=y -CONFIG_ACPI_CPPC_CPUFREQ=m CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -236,11 +232,6 @@ CONFIG_SMSC911X=y CONFIG_SNI_AVE=y CONFIG_SNI_NETSEC=y CONFIG_STMMAC_ETH=m -CONFIG_DWMAC_IPQ806X=m -CONFIG_DWMAC_MESON=m -CONFIG_DWMAC_ROCKCHIP=m -CONFIG_DWMAC_SUNXI=m -CONFIG_DWMAC_SUN8I=m CONFIG_MDIO_BUS_MUX_MMIOREG=y CONFIG_AT803X_PHY=m CONFIG_MARVELL_PHY=m @@ -269,8 +260,8 @@ CONFIG_WL18XX=m CONFIG_WLCORE_SDIO=m CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_ADC=m -CONFIG_KEYBOARD_CROS_EC=y CONFIG_KEYBOARD_GPIO=y +CONFIG_KEYBOARD_CROS_EC=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_ATMEL_MXT=m CONFIG_INPUT_MISC=y @@ -296,17 +287,13 @@ CONFIG_SERIAL_SAMSUNG=y CONFIG_SERIAL_SAMSUNG_CONSOLE=y CONFIG_SERIAL_TEGRA=y CONFIG_SERIAL_SH_SCI=y -CONFIG_SERIAL_SH_SCI_NR_UARTS=11 -CONFIG_SERIAL_SH_SCI_CONSOLE=y CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM_CONSOLE=y CONFIG_SERIAL_XILINX_PS_UART=y CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y CONFIG_SERIAL_MVEBU_UART=y CONFIG_SERIAL_DEV_BUS=y -CONFIG_SERIAL_DEV_CTRL_TTYPORT=y CONFIG_VIRTIO_CONSOLE=y -CONFIG_I2C_HID=m CONFIG_I2C_CHARDEV=y CONFIG_I2C_MUX=y CONFIG_I2C_MUX_PCA954x=y @@ -325,26 +312,26 @@ CONFIG_I2C_RCAR=y CONFIG_I2C_CROS_EC_TUNNEL=y CONFIG_SPI=y CONFIG_SPI_ARMADA_3700=y -CONFIG_SPI_MESON_SPICC=m -CONFIG_SPI_MESON_SPIFC=m CONFIG_SPI_BCM2835=m CONFIG_SPI_BCM2835AUX=m +CONFIG_SPI_MESON_SPICC=m +CONFIG_SPI_MESON_SPIFC=m CONFIG_SPI_ORION=y CONFIG_SPI_PL022=y -CONFIG_SPI_QUP=y CONFIG_SPI_ROCKCHIP=y +CONFIG_SPI_QUP=y CONFIG_SPI_S3C64XX=y CONFIG_SPI_SPIDEV=m CONFIG_SPMI=y -CONFIG_PINCTRL_IPQ8074=y CONFIG_PINCTRL_SINGLE=y CONFIG_PINCTRL_MAX77620=y +CONFIG_PINCTRL_IPQ8074=y CONFIG_PINCTRL_MSM8916=y CONFIG_PINCTRL_MSM8994=y CONFIG_PINCTRL_MSM8996=y -CONFIG_PINCTRL_MT7622=y CONFIG_PINCTRL_QDF2XXX=y CONFIG_PINCTRL_QCOM_SPMI_PMIC=y +CONFIG_PINCTRL_MT7622=y CONFIG_GPIO_DWAPB=y CONFIG_GPIO_MB86S7X=y CONFIG_GPIO_PL061=y @@ -368,13 +355,13 @@ CONFIG_SENSORS_INA2XX=m CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y CONFIG_CPU_THERMAL=y CONFIG_THERMAL_EMULATION=y +CONFIG_ROCKCHIP_THERMAL=m +CONFIG_RCAR_GEN3_THERMAL=y CONFIG_ARMADA_THERMAL=y CONFIG_BRCMSTB_THERMAL=m CONFIG_EXYNOS_THERMAL=y -CONFIG_RCAR_GEN3_THERMAL=y -CONFIG_QCOM_TSENS=y -CONFIG_ROCKCHIP_THERMAL=m CONFIG_TEGRA_BPMP_THERMAL=m +CONFIG_QCOM_TSENS=y CONFIG_UNIPHIER_THERMAL=y CONFIG_WATCHDOG=y CONFIG_S3C2410_WATCHDOG=y @@ -395,9 +382,9 @@ CONFIG_MFD_MAX77620=y CONFIG_MFD_SPMI_PMIC=y CONFIG_MFD_RK808=y CONFIG_MFD_SEC_CORE=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_AXP20X=y CONFIG_REGULATOR_FAN53555=y -CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_GPIO=y CONFIG_REGULATOR_HI6421V530=y CONFIG_REGULATOR_HI655X=y @@ -407,16 +394,15 @@ CONFIG_REGULATOR_QCOM_SMD_RPM=y CONFIG_REGULATOR_QCOM_SPMI=y CONFIG_REGULATOR_RK808=y CONFIG_REGULATOR_S2MPS11=y +CONFIG_RC_CORE=m +CONFIG_RC_DECODERS=y +CONFIG_RC_DEVICES=y +CONFIG_IR_MESON=m CONFIG_MEDIA_SUPPORT=m CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_MEDIA_ANALOG_TV_SUPPORT=y CONFIG_MEDIA_DIGITAL_TV_SUPPORT=y CONFIG_MEDIA_CONTROLLER=y -CONFIG_MEDIA_RC_SUPPORT=y -CONFIG_RC_CORE=m -CONFIG_RC_DEVICES=y -CONFIG_RC_DECODERS=y -CONFIG_IR_MESON=m CONFIG_VIDEO_V4L2_SUBDEV_API=y # CONFIG_DVB_NET is not set CONFIG_V4L_MEM2MEM_DRIVERS=y @@ -441,8 +427,7 @@ CONFIG_ROCKCHIP_DW_HDMI=y CONFIG_ROCKCHIP_DW_MIPI_DSI=y CONFIG_ROCKCHIP_INNO_HDMI=y CONFIG_DRM_RCAR_DU=m -CONFIG_DRM_RCAR_LVDS=y -CONFIG_DRM_RCAR_VSP=y +CONFIG_DRM_RCAR_LVDS=m CONFIG_DRM_TEGRA=m CONFIG_DRM_PANEL_SIMPLE=m CONFIG_DRM_I2C_ADV7511=m @@ -455,7 +440,6 @@ CONFIG_FB_ARMCLCD=y CONFIG_BACKLIGHT_GENERIC=m CONFIG_BACKLIGHT_PWM=m CONFIG_BACKLIGHT_LP855X=m -CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -468,6 +452,7 @@ CONFIG_SND_SOC_RCAR=m CONFIG_SND_SOC_AK4613=m CONFIG_SND_SIMPLE_CARD=m CONFIG_SND_AUDIO_GRAPH_CARD=m +CONFIG_I2C_HID=m CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y @@ -501,12 +486,12 @@ CONFIG_MMC_BLOCK_MINORS=32 CONFIG_MMC_ARMMMCI=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_ACPI=y -CONFIG_MMC_SDHCI_F_SDH30=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_OF_ARASAN=y CONFIG_MMC_SDHCI_OF_ESDHC=y CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC_SDHCI_TEGRA=y +CONFIG_MMC_SDHCI_F_SDH30=y CONFIG_MMC_MESON_GX=y CONFIG_MMC_SDHCI_MSM=y CONFIG_MMC_SPI=y @@ -524,11 +509,11 @@ CONFIG_LEDS_CLASS=y CONFIG_LEDS_GPIO=y CONFIG_LEDS_PWM=y CONFIG_LEDS_SYSCON=y +CONFIG_LEDS_TRIGGER_DISK=y CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_LEDS_TRIGGER_CPU=y CONFIG_LEDS_TRIGGER_DEFAULT_ON=y CONFIG_LEDS_TRIGGER_PANIC=y -CONFIG_LEDS_TRIGGER_DISK=y CONFIG_EDAC=y CONFIG_EDAC_GHES=y CONFIG_RTC_CLASS=y @@ -537,13 +522,13 @@ CONFIG_RTC_DRV_RK808=m CONFIG_RTC_DRV_S5M=y CONFIG_RTC_DRV_DS3232=y CONFIG_RTC_DRV_EFI=y +CONFIG_RTC_DRV_CROS_EC=y CONFIG_RTC_DRV_S3C=y CONFIG_RTC_DRV_PL031=y CONFIG_RTC_DRV_SUN6I=y CONFIG_RTC_DRV_ARMADA38X=y CONFIG_RTC_DRV_TEGRA=y CONFIG_RTC_DRV_XGENE=y -CONFIG_RTC_DRV_CROS_EC=y CONFIG_DMADEVICES=y CONFIG_DMA_BCM2835=m CONFIG_K3_DMA=y @@ -579,7 +564,6 @@ CONFIG_HWSPINLOCK_QCOM=y CONFIG_ARM_MHU=y CONFIG_PLATFORM_MHU=y CONFIG_BCM2835_MBOX=y -CONFIG_HI6220_MBOX=y CONFIG_QCOM_APCS_IPC=y CONFIG_ROCKCHIP_IOMMU=y CONFIG_TEGRA_IOMMU_SMMU=y @@ -602,7 +586,6 @@ CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y CONFIG_EXTCON_USB_GPIO=y CONFIG_EXTCON_USBC_CROS_EC=y CONFIG_MEMORY=y -CONFIG_TEGRA_MC=y CONFIG_IIO=y CONFIG_EXYNOS_ADC=y CONFIG_ROCKCHIP_SARADC=m @@ -618,27 +601,27 @@ CONFIG_PWM_RCAR=m CONFIG_PWM_ROCKCHIP=y CONFIG_PWM_SAMSUNG=y CONFIG_PWM_TEGRA=m +CONFIG_PHY_XGENE=y +CONFIG_PHY_SUN4I_USB=y +CONFIG_PHY_HI6220_USB=y CONFIG_PHY_HISTB_COMBPHY=y CONFIG_PHY_HISI_INNO_USB2=y -CONFIG_PHY_RCAR_GEN3_USB2=y -CONFIG_PHY_RCAR_GEN3_USB3=m -CONFIG_PHY_HI6220_USB=y -CONFIG_PHY_QCOM_USB_HS=y -CONFIG_PHY_SUN4I_USB=y CONFIG_PHY_MVEBU_CP110_COMPHY=y CONFIG_PHY_QCOM_QMP=m -CONFIG_PHY_ROCKCHIP_INNO_USB2=y +CONFIG_PHY_QCOM_USB_HS=y +CONFIG_PHY_RCAR_GEN3_USB2=y +CONFIG_PHY_RCAR_GEN3_USB3=m CONFIG_PHY_ROCKCHIP_EMMC=y +CONFIG_PHY_ROCKCHIP_INNO_USB2=y CONFIG_PHY_ROCKCHIP_PCIE=m CONFIG_PHY_ROCKCHIP_TYPEC=y -CONFIG_PHY_XGENE=y CONFIG_PHY_TEGRA_XUSB=y CONFIG_QCOM_L2_PMU=y CONFIG_QCOM_L3_PMU=y -CONFIG_MESON_EFUSE=m CONFIG_QCOM_QFPROM=y CONFIG_ROCKCHIP_EFUSE=y CONFIG_UNIPHIER_EFUSE=y +CONFIG_MESON_EFUSE=m CONFIG_TEE=y CONFIG_OPTEE=y CONFIG_ARM_SCPI_PROTOCOL=y @@ -647,7 +630,6 @@ CONFIG_EFI_CAPSULE_LOADER=y CONFIG_ACPI=y CONFIG_ACPI_APEI=y CONFIG_ACPI_APEI_GHES=y -CONFIG_ACPI_APEI_PCIEAER=y CONFIG_ACPI_APEI_MEMORY_FAILURE=y CONFIG_ACPI_APEI_EINJ=y CONFIG_EXT2_FS=y @@ -682,7 +664,6 @@ CONFIG_DEBUG_INFO=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y -CONFIG_LOCKUP_DETECTOR=y # CONFIG_SCHED_DEBUG is not set # CONFIG_DEBUG_PREEMPT is not set # CONFIG_FTRACE is not set @@ -691,20 +672,15 @@ CONFIG_SECURITY=y CONFIG_CRYPTO_ECHAINIV=y CONFIG_CRYPTO_ANSI_CPRNG=y CONFIG_ARM64_CRYPTO=y -CONFIG_CRYPTO_SHA256_ARM64=m -CONFIG_CRYPTO_SHA512_ARM64=m CONFIG_CRYPTO_SHA1_ARM64_CE=y CONFIG_CRYPTO_SHA2_ARM64_CE=y -CONFIG_CRYPTO_GHASH_ARM64_CE=y -CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m -CONFIG_CRYPTO_CRC32_ARM64_CE=m -CONFIG_CRYPTO_AES_ARM64=m -CONFIG_CRYPTO_AES_ARM64_CE=m -CONFIG_CRYPTO_AES_ARM64_CE_CCM=y -CONFIG_CRYPTO_AES_ARM64_CE_BLK=y -CONFIG_CRYPTO_AES_ARM64_NEON_BLK=m -CONFIG_CRYPTO_CHACHA20_NEON=m -CONFIG_CRYPTO_AES_ARM64_BS=m CONFIG_CRYPTO_SHA512_ARM64_CE=m CONFIG_CRYPTO_SHA3_ARM64=m CONFIG_CRYPTO_SM3_ARM64_CE=m +CONFIG_CRYPTO_GHASH_ARM64_CE=y +CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m +CONFIG_CRYPTO_CRC32_ARM64_CE=m +CONFIG_CRYPTO_AES_ARM64_CE_CCM=y +CONFIG_CRYPTO_AES_ARM64_CE_BLK=y +CONFIG_CRYPTO_CHACHA20_NEON=m +CONFIG_CRYPTO_AES_ARM64_BS=m From fd7d58f0dbc3e3c6d68d4a3dccabfe1e52a23345 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sat, 23 Jun 2018 10:24:23 -0700 Subject: [PATCH 406/572] ARM: multi_v7_defconfig: renormalize based on recent additions The defconfig has drifted over time, as Kconfig entries have changed order or default values. Several maintainers ended up running 'savedefconfig' themselves which caused a cascade of conflicts. Let's do it once and for all in our tree before -rc2 instead. Signed-off-by: Olof Johansson --- arch/arm/configs/multi_v7_defconfig | 390 ++++++++++++---------------- 1 file changed, 161 insertions(+), 229 deletions(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 7e1c543162c3..8f6be1982545 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1,5 +1,4 @@ CONFIG_SYSVIPC=y -CONFIG_FHANDLE=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_CGROUPS=y @@ -10,20 +9,10 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_PARTITION_ADVANCED=y CONFIG_CMDLINE_PARTITION=y -CONFIG_ARCH_MULTI_V7=y -# CONFIG_ARCH_MULTI_V5 is not set -# CONFIG_ARCH_MULTI_V4 is not set CONFIG_ARCH_VIRT=y CONFIG_ARCH_ALPINE=y CONFIG_ARCH_ARTPEC=y CONFIG_MACH_ARTPEC6=y -CONFIG_ARCH_MVEBU=y -CONFIG_MACH_ARMADA_370=y -CONFIG_MACH_ARMADA_375=y -CONFIG_MACH_ARMADA_38X=y -CONFIG_MACH_ARMADA_39X=y -CONFIG_MACH_ARMADA_XP=y -CONFIG_MACH_DOVE=y CONFIG_ARCH_AT91=y CONFIG_SOC_SAMA5D2=y CONFIG_SOC_SAMA5D3=y @@ -32,9 +21,9 @@ CONFIG_ARCH_BCM=y CONFIG_ARCH_BCM_CYGNUS=y CONFIG_ARCH_BCM_HR2=y CONFIG_ARCH_BCM_NSP=y -CONFIG_ARCH_BCM_21664=y -CONFIG_ARCH_BCM_281XX=y CONFIG_ARCH_BCM_5301X=y +CONFIG_ARCH_BCM_281XX=y +CONFIG_ARCH_BCM_21664=y CONFIG_ARCH_BCM2835=y CONFIG_ARCH_BCM_63XX=y CONFIG_ARCH_BRCMSTB=y @@ -43,14 +32,14 @@ CONFIG_MACH_BERLIN_BG2=y CONFIG_MACH_BERLIN_BG2CD=y CONFIG_MACH_BERLIN_BG2Q=y CONFIG_ARCH_DIGICOLOR=y +CONFIG_ARCH_EXYNOS=y +CONFIG_EXYNOS5420_MCPM=y CONFIG_ARCH_HIGHBANK=y CONFIG_ARCH_HISI=y CONFIG_ARCH_HI3xxx=y -CONFIG_ARCH_HIX5HD2=y CONFIG_ARCH_HIP01=y CONFIG_ARCH_HIP04=y -CONFIG_ARCH_KEYSTONE=y -CONFIG_ARCH_MESON=y +CONFIG_ARCH_HIX5HD2=y CONFIG_ARCH_MXC=y CONFIG_SOC_IMX50=y CONFIG_SOC_IMX51=y @@ -60,29 +49,30 @@ CONFIG_SOC_IMX6SL=y CONFIG_SOC_IMX6SX=y CONFIG_SOC_IMX6UL=y CONFIG_SOC_IMX7D=y -CONFIG_SOC_VF610=y CONFIG_SOC_LS1021A=y +CONFIG_SOC_VF610=y +CONFIG_ARCH_KEYSTONE=y +CONFIG_ARCH_MEDIATEK=y +CONFIG_ARCH_MESON=y +CONFIG_ARCH_MVEBU=y +CONFIG_MACH_ARMADA_370=y +CONFIG_MACH_ARMADA_375=y +CONFIG_MACH_ARMADA_38X=y +CONFIG_MACH_ARMADA_39X=y +CONFIG_MACH_ARMADA_XP=y +CONFIG_MACH_DOVE=y CONFIG_ARCH_OMAP3=y CONFIG_ARCH_OMAP4=y CONFIG_SOC_OMAP5=y CONFIG_SOC_AM33XX=y CONFIG_SOC_AM43XX=y CONFIG_SOC_DRA7XX=y +CONFIG_ARCH_SIRF=y CONFIG_ARCH_QCOM=y -CONFIG_ARCH_MEDIATEK=y CONFIG_ARCH_MSM8X60=y CONFIG_ARCH_MSM8960=y CONFIG_ARCH_MSM8974=y CONFIG_ARCH_ROCKCHIP=y -CONFIG_ARCH_SOCFPGA=y -CONFIG_PLAT_SPEAR=y -CONFIG_ARCH_SPEAR13XX=y -CONFIG_MACH_SPEAR1310=y -CONFIG_MACH_SPEAR1340=y -CONFIG_ARCH_STI=y -CONFIG_ARCH_STM32=y -CONFIG_ARCH_EXYNOS=y -CONFIG_EXYNOS5420_MCPM=y CONFIG_ARCH_RENESAS=y CONFIG_ARCH_EMEV2=y CONFIG_ARCH_R7S72100=y @@ -99,40 +89,33 @@ CONFIG_ARCH_R8A7792=y CONFIG_ARCH_R8A7793=y CONFIG_ARCH_R8A7794=y CONFIG_ARCH_SH73A0=y +CONFIG_ARCH_SOCFPGA=y +CONFIG_PLAT_SPEAR=y +CONFIG_ARCH_SPEAR13XX=y +CONFIG_MACH_SPEAR1310=y +CONFIG_MACH_SPEAR1340=y +CONFIG_ARCH_STI=y +CONFIG_ARCH_STM32=y CONFIG_ARCH_SUNXI=y -CONFIG_ARCH_SIRF=y CONFIG_ARCH_TEGRA=y -CONFIG_ARCH_TEGRA_2x_SOC=y -CONFIG_ARCH_TEGRA_3x_SOC=y -CONFIG_ARCH_TEGRA_114_SOC=y -CONFIG_ARCH_TEGRA_124_SOC=y CONFIG_ARCH_UNIPHIER=y CONFIG_ARCH_U8500=y -CONFIG_MACH_HREFV60=y -CONFIG_MACH_SNOWBALL=y CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_VEXPRESS_TC2_PM=y CONFIG_ARCH_WM8850=y CONFIG_ARCH_ZYNQ=y -CONFIG_TRUSTED_FOUNDATIONS=y -CONFIG_PCI=y -CONFIG_PCI_HOST_GENERIC=y -CONFIG_PCI_DRA7XX=y -CONFIG_PCI_DRA7XX_EP=y -CONFIG_PCI_KEYSTONE=y -CONFIG_PCI_MSI=y +CONFIG_PCIEPORTBUS=y CONFIG_PCI_MVEBU=y CONFIG_PCI_TEGRA=y CONFIG_PCI_RCAR_GEN2=y CONFIG_PCIE_RCAR=y -CONFIG_PCIEPORTBUS=y +CONFIG_PCI_DRA7XX_EP=y +CONFIG_PCI_KEYSTONE=y CONFIG_PCI_ENDPOINT=y CONFIG_PCI_ENDPOINT_CONFIGFS=y CONFIG_PCI_EPF_TEST=m CONFIG_SMP=y CONFIG_NR_CPUS=16 -CONFIG_HIGHPTE=y -CONFIG_CMA=y CONFIG_SECCOMP=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y @@ -145,14 +128,14 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=m CONFIG_CPU_FREQ_GOV_USERSPACE=m CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y +CONFIG_CPUFREQ_DT=y CONFIG_ARM_IMX6Q_CPUFREQ=y CONFIG_QORIQ_CPUFREQ=y CONFIG_CPU_IDLE=y CONFIG_ARM_CPUIDLE=y -CONFIG_NEON=y -CONFIG_KERNEL_MODE_NEON=y CONFIG_ARM_ZYNQ_CPUIDLE=y CONFIG_ARM_EXYNOS_CPUIDLE=y +CONFIG_KERNEL_MODE_NEON=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -170,23 +153,13 @@ CONFIG_IPV6_MIP6=m CONFIG_IPV6_TUNNEL=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_DSA=m -CONFIG_NET_SWITCHDEV=y CONFIG_CAN=y -CONFIG_CAN_RAW=y -CONFIG_CAN_BCM=y -CONFIG_CAN_DEV=y CONFIG_CAN_AT91=m CONFIG_CAN_FLEXCAN=m -CONFIG_CAN_RCAR=m -CONFIG_CAN_XILINXCAN=y -CONFIG_CAN_MCP251X=y -CONFIG_NET_DSA_BCM_SF2=m -CONFIG_B53=m -CONFIG_B53_SPI_DRIVER=m -CONFIG_B53_MDIO_DRIVER=m -CONFIG_B53_MMAP_DRIVER=m -CONFIG_B53_SRAB_DRIVER=m CONFIG_CAN_SUN4I=y +CONFIG_CAN_XILINXCAN=y +CONFIG_CAN_RCAR=m +CONFIG_CAN_MCP251X=y CONFIG_BT=m CONFIG_BT_HCIUART=m CONFIG_BT_HCIUART_BCM=y @@ -199,11 +172,9 @@ CONFIG_RFKILL_INPUT=y CONFIG_RFKILL_GPIO=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y -CONFIG_DMA_CMA=y CONFIG_CMA_SIZE_MBYTES=64 CONFIG_OMAP_OCP2SCP=y CONFIG_SIMPLE_PM_BUS=y -CONFIG_SUNXI_RSB=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y @@ -236,7 +207,6 @@ CONFIG_PCI_ENDPOINT_TEST=m CONFIG_EEPROM_AT24=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y -CONFIG_SCSI_MULTI_LUN=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y @@ -251,14 +221,20 @@ CONFIG_SATA_MV=y CONFIG_SATA_RCAR=y CONFIG_NETDEVICES=y CONFIG_VIRTIO_NET=y -CONFIG_HIX5HD2_GMAC=y +CONFIG_B53_SPI_DRIVER=m +CONFIG_B53_MDIO_DRIVER=m +CONFIG_B53_MMAP_DRIVER=m +CONFIG_B53_SRAB_DRIVER=m +CONFIG_NET_DSA_BCM_SF2=m CONFIG_SUN4I_EMAC=y -CONFIG_MACB=y CONFIG_BCMGENET=m CONFIG_BGMAC_BCMA=y CONFIG_SYSTEMPORT=m +CONFIG_MACB=y CONFIG_NET_CALXEDA_XGMAC=y CONFIG_GIANFAR=y +CONFIG_HIX5HD2_GMAC=y +CONFIG_E1000E=y CONFIG_IGB=y CONFIG_MV643XX_ETH=y CONFIG_MVNETA=y @@ -268,19 +244,17 @@ CONFIG_R8169=y CONFIG_SH_ETH=y CONFIG_SMSC911X=y CONFIG_STMMAC_ETH=y -CONFIG_STMMAC_PLATFORM=y CONFIG_DWMAC_DWC_QOS_ETH=y CONFIG_TI_CPSW=y CONFIG_XILINX_EMACLITE=y CONFIG_AT803X_PHY=y -CONFIG_MARVELL_PHY=y -CONFIG_SMSC_PHY=y CONFIG_BROADCOM_PHY=y CONFIG_ICPLUS_PHY=y -CONFIG_REALTEK_PHY=y +CONFIG_MARVELL_PHY=y CONFIG_MICREL_PHY=y -CONFIG_FIXED_PHY=y +CONFIG_REALTEK_PHY=y CONFIG_ROCKCHIP_PHY=y +CONFIG_SMSC_PHY=y CONFIG_USB_PEGASUS=y CONFIG_USB_RTL8152=m CONFIG_USB_LAN78XX=m @@ -288,29 +262,29 @@ CONFIG_USB_USBNET=y CONFIG_USB_NET_SMSC75XX=y CONFIG_USB_NET_SMSC95XX=y CONFIG_BRCMFMAC=m -CONFIG_RT2X00=m -CONFIG_RT2800USB=m CONFIG_MWIFIEX=m CONFIG_MWIFIEX_SDIO=m +CONFIG_RT2X00=m +CONFIG_RT2800USB=m CONFIG_INPUT_JOYDEV=y CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_QT1070=m CONFIG_KEYBOARD_GPIO=y CONFIG_KEYBOARD_TEGRA=y -CONFIG_KEYBOARD_SPEAR=y -CONFIG_KEYBOARD_ST_KEYSCAN=y -CONFIG_KEYBOARD_CROS_EC=m CONFIG_KEYBOARD_SAMSUNG=m +CONFIG_KEYBOARD_ST_KEYSCAN=y +CONFIG_KEYBOARD_SPEAR=y +CONFIG_KEYBOARD_CROS_EC=m CONFIG_MOUSE_PS2_ELANTECH=y CONFIG_MOUSE_CYAPA=m CONFIG_MOUSE_ELAN_I2C=y CONFIG_INPUT_TOUCHSCREEN=y CONFIG_TOUCHSCREEN_ATMEL_MXT=m CONFIG_TOUCHSCREEN_MMS114=m +CONFIG_TOUCHSCREEN_WM97XX=m CONFIG_TOUCHSCREEN_ST1232=m CONFIG_TOUCHSCREEN_STMPE=y CONFIG_TOUCHSCREEN_SUN4I=y -CONFIG_TOUCHSCREEN_WM97XX=m CONFIG_INPUT_MISC=y CONFIG_INPUT_MAX77693_HAPTIC=m CONFIG_INPUT_MAX8997_HAPTIC=m @@ -327,13 +301,12 @@ CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_8250_EM=y CONFIG_SERIAL_8250_MT6577=y CONFIG_SERIAL_8250_UNIPHIER=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_SERIAL_ATMEL=y CONFIG_SERIAL_ATMEL_CONSOLE=y CONFIG_SERIAL_ATMEL_TTYAT=y -CONFIG_SERIAL_BCM63XX=y -CONFIG_SERIAL_BCM63XX_CONSOLE=y CONFIG_SERIAL_MESON=y CONFIG_SERIAL_MESON_CONSOLE=y CONFIG_SERIAL_SAMSUNG=y @@ -345,15 +318,14 @@ CONFIG_SERIAL_IMX=y CONFIG_SERIAL_IMX_CONSOLE=y CONFIG_SERIAL_SH_SCI=y CONFIG_SERIAL_SH_SCI_NR_UARTS=20 -CONFIG_SERIAL_SH_SCI_CONSOLE=y -CONFIG_SERIAL_SH_SCI_DMA=y CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM_CONSOLE=y CONFIG_SERIAL_VT8500=y CONFIG_SERIAL_VT8500_CONSOLE=y -CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_OMAP=y CONFIG_SERIAL_OMAP_CONSOLE=y +CONFIG_SERIAL_BCM63XX=y +CONFIG_SERIAL_BCM63XX_CONSOLE=y CONFIG_SERIAL_XILINX_PS_UART=y CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y CONFIG_SERIAL_FSL_LPUART=y @@ -365,12 +337,10 @@ CONFIG_SERIAL_ST_ASC_CONSOLE=y CONFIG_SERIAL_STM32=y CONFIG_SERIAL_STM32_CONSOLE=y CONFIG_SERIAL_DEV_BUS=y -CONFIG_HVC_DRIVER=y CONFIG_VIRTIO_CONSOLE=y +CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_ST=y CONFIG_I2C_CHARDEV=y -CONFIG_I2C_DAVINCI=y -CONFIG_I2C_MESON=y -CONFIG_I2C_MUX=y CONFIG_I2C_ARB_GPIO_CHALLENGE=m CONFIG_I2C_MUX_PCA954x=y CONFIG_I2C_MUX_PINCTRL=y @@ -378,12 +348,13 @@ CONFIG_I2C_DEMUX_PINCTRL=y CONFIG_I2C_AT91=m CONFIG_I2C_BCM2835=y CONFIG_I2C_CADENCE=y +CONFIG_I2C_DAVINCI=y CONFIG_I2C_DESIGNWARE_PLATFORM=y CONFIG_I2C_DIGICOLOR=m CONFIG_I2C_EMEV2=m CONFIG_I2C_GPIO=m -CONFIG_I2C_EXYNOS5=y CONFIG_I2C_IMX=y +CONFIG_I2C_MESON=y CONFIG_I2C_MV64XXX=y CONFIG_I2C_RIIC=y CONFIG_I2C_RK3X=y @@ -427,7 +398,6 @@ CONFIG_SPI_SPIDEV=y CONFIG_SPMI=y CONFIG_PINCTRL_AS3722=y CONFIG_PINCTRL_PALMAS=y -CONFIG_PINCTRL_BCM2835=y CONFIG_PINCTRL_APQ8064=y CONFIG_PINCTRL_APQ8084=y CONFIG_PINCTRL_IPQ8064=y @@ -437,25 +407,33 @@ CONFIG_PINCTRL_MSM8X74=y CONFIG_PINCTRL_MSM8916=y CONFIG_PINCTRL_QCOM_SPMI_PMIC=y CONFIG_PINCTRL_QCOM_SSBI_PMIC=y -CONFIG_GPIO_GENERIC_PLATFORM=y CONFIG_GPIO_DAVINCI=y CONFIG_GPIO_DWAPB=y CONFIG_GPIO_EM=y CONFIG_GPIO_RCAR=y +CONFIG_GPIO_SYSCON=y CONFIG_GPIO_UNIPHIER=y CONFIG_GPIO_XILINX=y CONFIG_GPIO_ZYNQ=y CONFIG_GPIO_PCA953X=y CONFIG_GPIO_PCA953X_IRQ=y CONFIG_GPIO_PCF857X=y -CONFIG_GPIO_TWL4030=y CONFIG_GPIO_PALMAS=y -CONFIG_GPIO_SYSCON=y CONFIG_GPIO_TPS6586X=y CONFIG_GPIO_TPS65910=y +CONFIG_GPIO_TWL4030=y +CONFIG_POWER_AVS=y +CONFIG_ROCKCHIP_IODOMAIN=y +CONFIG_POWER_RESET_AS3722=y +CONFIG_POWER_RESET_GPIO=y +CONFIG_POWER_RESET_GPIO_RESTART=y +CONFIG_POWER_RESET_ST=y +CONFIG_POWER_RESET_KEYSTONE=y +CONFIG_POWER_RESET_RMOBILE=y CONFIG_BATTERY_ACT8945A=y CONFIG_BATTERY_CPCAP=m CONFIG_BATTERY_SBS=y +CONFIG_AXP20X_POWER=m CONFIG_BATTERY_MAX17040=m CONFIG_BATTERY_MAX17042=m CONFIG_CHARGER_CPCAP=m @@ -464,15 +442,6 @@ CONFIG_CHARGER_MAX77693=m CONFIG_CHARGER_MAX8997=m CONFIG_CHARGER_MAX8998=m CONFIG_CHARGER_TPS65090=y -CONFIG_AXP20X_POWER=m -CONFIG_POWER_RESET_AS3722=y -CONFIG_POWER_RESET_GPIO=y -CONFIG_POWER_RESET_GPIO_RESTART=y -CONFIG_POWER_RESET_KEYSTONE=y -CONFIG_POWER_RESET_RMOBILE=y -CONFIG_POWER_RESET_ST=y -CONFIG_POWER_AVS=y -CONFIG_ROCKCHIP_IODOMAIN=y CONFIG_SENSORS_IIO_HWMON=y CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM95245=y @@ -480,14 +449,12 @@ CONFIG_SENSORS_NTC_THERMISTOR=m CONFIG_SENSORS_PWM_FAN=m CONFIG_SENSORS_INA2XX=m CONFIG_CPU_THERMAL=y -CONFIG_BCM2835_THERMAL=m -CONFIG_BRCMSTB_THERMAL=m CONFIG_IMX_THERMAL=y CONFIG_ROCKCHIP_THERMAL=y CONFIG_RCAR_THERMAL=y CONFIG_ARMADA_THERMAL=y -CONFIG_DAVINCI_WATCHDOG=m -CONFIG_EXYNOS_THERMAL=m +CONFIG_BCM2835_THERMAL=m +CONFIG_BRCMSTB_THERMAL=m CONFIG_ST_THERMAL_MEMMAP=y CONFIG_WATCHDOG=y CONFIG_DA9063_WATCHDOG=m @@ -495,20 +462,24 @@ CONFIG_XILINX_WATCHDOG=y CONFIG_ARM_SP805_WATCHDOG=y CONFIG_AT91SAM9X_WATCHDOG=y CONFIG_SAMA5D4_WATCHDOG=y +CONFIG_DW_WATCHDOG=y +CONFIG_DAVINCI_WATCHDOG=m CONFIG_ORION_WATCHDOG=y CONFIG_RN5T618_WATCHDOG=y -CONFIG_ST_LPC_WATCHDOG=y CONFIG_SUNXI_WATCHDOG=y CONFIG_IMX2_WDT=y +CONFIG_ST_LPC_WATCHDOG=y CONFIG_TEGRA_WATCHDOG=m CONFIG_MESON_WATCHDOG=y -CONFIG_DW_WATCHDOG=y CONFIG_DIGICOLOR_WATCHDOG=y CONFIG_RENESAS_WDT=m -CONFIG_BCM2835_WDT=y CONFIG_BCM47XX_WDT=y -CONFIG_BCM7038_WDT=m +CONFIG_BCM2835_WDT=y CONFIG_BCM_KONA_WDT=y +CONFIG_BCM7038_WDT=m +CONFIG_BCMA_HOST_SOC=y +CONFIG_BCMA_DRIVER_GMAC_CMN=y +CONFIG_BCMA_DRIVER_GPIO=y CONFIG_MFD_ACT8945A=y CONFIG_MFD_AS3711=y CONFIG_MFD_AS3722=y @@ -516,7 +487,6 @@ CONFIG_MFD_ATMEL_FLEXCOM=y CONFIG_MFD_ATMEL_HLCDC=m CONFIG_MFD_BCM590XX=y CONFIG_MFD_AC100=y -CONFIG_MFD_AXP20X=y CONFIG_MFD_AXP20X_I2C=y CONFIG_MFD_AXP20X_RSB=y CONFIG_MFD_CROS_EC=m @@ -529,11 +499,11 @@ CONFIG_MFD_MAX77693=m CONFIG_MFD_MAX8907=y CONFIG_MFD_MAX8997=y CONFIG_MFD_MAX8998=y -CONFIG_MFD_RK808=y CONFIG_MFD_CPCAP=y CONFIG_MFD_PM8XXX=y CONFIG_MFD_QCOM_RPM=y CONFIG_MFD_SPMI_PMIC=y +CONFIG_MFD_RK808=y CONFIG_MFD_RN5T618=y CONFIG_MFD_SEC_CORE=y CONFIG_MFD_STMPE=y @@ -543,10 +513,10 @@ CONFIG_MFD_TPS65217=y CONFIG_MFD_TPS65218=y CONFIG_MFD_TPS6586X=y CONFIG_MFD_TPS65910=y -CONFIG_REGULATOR_ACT8945A=y -CONFIG_REGULATOR_AB8500=y CONFIG_REGULATOR_ACT8865=y +CONFIG_REGULATOR_ACT8945A=y CONFIG_REGULATOR_ANATOP=y +CONFIG_REGULATOR_AB8500=y CONFIG_REGULATOR_AS3711=y CONFIG_REGULATOR_AS3722=y CONFIG_REGULATOR_AXP20X=y @@ -554,10 +524,7 @@ CONFIG_REGULATOR_BCM590XX=y CONFIG_REGULATOR_CPCAP=y CONFIG_REGULATOR_DA9210=y CONFIG_REGULATOR_FAN53555=y -CONFIG_REGULATOR_RK808=y CONFIG_REGULATOR_GPIO=y -CONFIG_MFD_SYSCON=y -CONFIG_POWER_RESET_SYSCON=y CONFIG_REGULATOR_LP872X=y CONFIG_REGULATOR_MAX14577=m CONFIG_REGULATOR_MAX8907=y @@ -571,7 +538,8 @@ CONFIG_REGULATOR_PALMAS=y CONFIG_REGULATOR_PBIAS=y CONFIG_REGULATOR_PWM=y CONFIG_REGULATOR_QCOM_RPM=y -CONFIG_REGULATOR_QCOM_SMD_RPM=y +CONFIG_REGULATOR_QCOM_SMD_RPM=m +CONFIG_REGULATOR_RK808=y CONFIG_REGULATOR_RN5T618=y CONFIG_REGULATOR_S2MPS11=y CONFIG_REGULATOR_S5M8767=y @@ -592,18 +560,17 @@ CONFIG_MEDIA_CEC_SUPPORT=y CONFIG_MEDIA_CONTROLLER=y CONFIG_VIDEO_V4L2_SUBDEV_API=y CONFIG_MEDIA_USB_SUPPORT=y -CONFIG_USB_VIDEO_CLASS=y -CONFIG_USB_GSPCA=y +CONFIG_USB_VIDEO_CLASS=m CONFIG_V4L_PLATFORM_DRIVERS=y CONFIG_SOC_CAMERA=m CONFIG_SOC_CAMERA_PLATFORM=m -CONFIG_VIDEO_RCAR_VIN=m -CONFIG_VIDEO_ATMEL_ISI=m CONFIG_VIDEO_SAMSUNG_EXYNOS4_IS=m CONFIG_VIDEO_S5P_FIMC=m CONFIG_VIDEO_S5P_MIPI_CSIS=m CONFIG_VIDEO_EXYNOS_FIMC_LITE=m CONFIG_VIDEO_EXYNOS4_FIMC_IS=m +CONFIG_VIDEO_RCAR_VIN=m +CONFIG_VIDEO_ATMEL_ISI=m CONFIG_V4L_MEM2MEM_DRIVERS=y CONFIG_VIDEO_SAMSUNG_S5P_JPEG=m CONFIG_VIDEO_SAMSUNG_S5P_MFC=m @@ -614,19 +581,15 @@ CONFIG_VIDEO_STI_DELTA=m CONFIG_VIDEO_RENESAS_JPU=m CONFIG_VIDEO_RENESAS_VSP1=m CONFIG_V4L_TEST_DRIVERS=y +CONFIG_VIDEO_VIVID=m CONFIG_CEC_PLATFORM_DRIVERS=y CONFIG_VIDEO_SAMSUNG_S5P_CEC=m # CONFIG_MEDIA_SUBDRV_AUTOSELECT is not set CONFIG_VIDEO_ADV7180=m CONFIG_VIDEO_ML86V7667=m CONFIG_DRM=y -CONFIG_DRM_I2C_ADV7511=m -CONFIG_DRM_I2C_ADV7511_AUDIO=y # CONFIG_DRM_I2C_CH7006 is not set # CONFIG_DRM_I2C_SIL164 is not set -CONFIG_DRM_DUMB_VGA_DAC=m -CONFIG_DRM_NXP_PTN3460=m -CONFIG_DRM_PARADE_PS8622=m CONFIG_DRM_NOUVEAU=m CONFIG_DRM_EXYNOS=m CONFIG_DRM_EXYNOS_FIMD=y @@ -645,13 +608,18 @@ CONFIG_DRM_RCAR_LVDS=y CONFIG_DRM_SUN4I=m CONFIG_DRM_FSL_DCU=m CONFIG_DRM_TEGRA=y +CONFIG_DRM_PANEL_SIMPLE=y CONFIG_DRM_PANEL_SAMSUNG_LD9040=m CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03=m CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0=m -CONFIG_DRM_PANEL_SIMPLE=y +CONFIG_DRM_DUMB_VGA_DAC=m +CONFIG_DRM_NXP_PTN3460=m +CONFIG_DRM_PARADE_PS8622=m CONFIG_DRM_SII9234=m +CONFIG_DRM_I2C_ADV7511=m +CONFIG_DRM_I2C_ADV7511_AUDIO=y CONFIG_DRM_STI=m -CONFIG_DRM_VC4=y +CONFIG_DRM_VC4=m CONFIG_DRM_ETNAVIV=m CONFIG_DRM_MXSFB=m CONFIG_FB_ARMCLCD=y @@ -659,8 +627,6 @@ CONFIG_FB_EFI=y CONFIG_FB_WM8505=y CONFIG_FB_SH_MOBILE_LCDC=y CONFIG_FB_SIMPLE=y -CONFIG_BACKLIGHT_LCD_SUPPORT=y -CONFIG_BACKLIGHT_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m CONFIG_BACKLIGHT_PWM=y CONFIG_BACKLIGHT_AS3711=y @@ -668,7 +634,6 @@ CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y CONFIG_SOUND=m CONFIG_SND=m -CONFIG_SND_DYNAMIC_MINORS=y CONFIG_SND_HDA_TEGRA=m CONFIG_SND_HDA_INPUT_BEEP=y CONFIG_SND_HDA_PATCH_LOADER=y @@ -692,7 +657,7 @@ CONFIG_SND_SOC_SNOW=m CONFIG_SND_SOC_ODROID=m CONFIG_SND_SOC_SH4_FSI=m CONFIG_SND_SOC_RCAR=m -CONFIG_SND_SIMPLE_SCU_CARD=m +CONFIG_SND_SOC_STI=m CONFIG_SND_SUN4I_CODEC=m CONFIG_SND_SOC_TEGRA=m CONFIG_SND_SOC_TEGRA20_I2S=m @@ -703,31 +668,25 @@ CONFIG_SND_SOC_TEGRA_WM8903=m CONFIG_SND_SOC_TEGRA_WM9712=m CONFIG_SND_SOC_TEGRA_TRIMSLICE=m CONFIG_SND_SOC_TEGRA_ALC5632=m -CONFIG_SND_SOC_CPCAP=m CONFIG_SND_SOC_TEGRA_MAX98090=m CONFIG_SND_SOC_AK4642=m +CONFIG_SND_SOC_CPCAP=m CONFIG_SND_SOC_SGTL5000=m CONFIG_SND_SOC_SPDIF=m -CONFIG_SND_SOC_WM8978=m -CONFIG_SND_SOC_STI=m CONFIG_SND_SOC_STI_SAS=m -CONFIG_SND_SIMPLE_CARD=m +CONFIG_SND_SOC_WM8978=m +CONFIG_SND_SIMPLE_SCU_CARD=m CONFIG_USB=y CONFIG_USB_OTG=y CONFIG_USB_XHCI_HCD=y CONFIG_USB_XHCI_MVEBU=y -CONFIG_USB_XHCI_RCAR=m CONFIG_USB_XHCI_TEGRA=m CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_MSM=m -CONFIG_USB_EHCI_EXYNOS=y -CONFIG_USB_EHCI_TEGRA=y CONFIG_USB_EHCI_HCD_STI=y -CONFIG_USB_EHCI_HCD_PLATFORM=y -CONFIG_USB_ISP1760=y +CONFIG_USB_EHCI_TEGRA=y +CONFIG_USB_EHCI_EXYNOS=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_STI=y -CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_OHCI_EXYNOS=m CONFIG_USB_R8A66597_HCD=m CONFIG_USB_RENESAS_USBHS=m @@ -746,18 +705,18 @@ CONFIG_USB_TI_CPPI41_DMA=y CONFIG_USB_TUSB_OMAP_DMA=y CONFIG_USB_DWC3=y CONFIG_USB_DWC2=y -CONFIG_USB_HSIC_USB3503=y CONFIG_USB_CHIPIDEA=y CONFIG_USB_CHIPIDEA_UDC=y CONFIG_USB_CHIPIDEA_HOST=y +CONFIG_USB_ISP1760=y +CONFIG_USB_HSIC_USB3503=y CONFIG_AB8500_USB=y -CONFIG_KEYSTONE_USB_PHY=y +CONFIG_KEYSTONE_USB_PHY=m CONFIG_NOP_USB_XCEIV=m CONFIG_AM335X_PHY_USB=m CONFIG_TWL6030_USB=m CONFIG_USB_GPIO_VBUS=y CONFIG_USB_ISP1301=y -CONFIG_USB_MSM_OTG=m CONFIG_USB_MXS_PHY=y CONFIG_USB_GADGET=y CONFIG_USB_FSL_USB2=y @@ -793,21 +752,20 @@ CONFIG_MMC_SDHCI_OF_ESDHC=y CONFIG_MMC_SDHCI_ESDHC_IMX=y CONFIG_MMC_SDHCI_DOVE=y CONFIG_MMC_SDHCI_TEGRA=y +CONFIG_MMC_SDHCI_S3C=y CONFIG_MMC_SDHCI_PXAV3=y CONFIG_MMC_SDHCI_SPEAR=y -CONFIG_MMC_SDHCI_S3C=y CONFIG_MMC_SDHCI_S3C_DMA=y CONFIG_MMC_SDHCI_BCM_KONA=y +CONFIG_MMC_MESON_MX_SDIO=y CONFIG_MMC_SDHCI_ST=y CONFIG_MMC_OMAP=y CONFIG_MMC_OMAP_HS=y CONFIG_MMC_ATMELMCI=y CONFIG_MMC_SDHCI_MSM=y -CONFIG_MMC_MESON_MX_SDIO=y CONFIG_MMC_MVSDIO=y CONFIG_MMC_SDHI=y CONFIG_MMC_DW=y -CONFIG_MMC_DW_PLTFM=y CONFIG_MMC_DW_EXYNOS=y CONFIG_MMC_DW_ROCKCHIP=y CONFIG_MMC_SH_MMCIF=y @@ -847,94 +805,85 @@ CONFIG_RTC_DRV_MAX77686=y CONFIG_RTC_DRV_RK808=m CONFIG_RTC_DRV_RS5C372=m CONFIG_RTC_DRV_BQ32K=m -CONFIG_RTC_DRV_PALMAS=y -CONFIG_RTC_DRV_ST_LPC=y CONFIG_RTC_DRV_TWL4030=y +CONFIG_RTC_DRV_PALMAS=y CONFIG_RTC_DRV_TPS6586X=y CONFIG_RTC_DRV_TPS65910=y CONFIG_RTC_DRV_S35390A=m CONFIG_RTC_DRV_RX8581=m CONFIG_RTC_DRV_EM3027=y +CONFIG_RTC_DRV_S5M=m CONFIG_RTC_DRV_DA9063=m CONFIG_RTC_DRV_EFI=m CONFIG_RTC_DRV_DIGICOLOR=m -CONFIG_RTC_DRV_S5M=m CONFIG_RTC_DRV_S3C=m CONFIG_RTC_DRV_PL031=y CONFIG_RTC_DRV_AT91RM9200=m CONFIG_RTC_DRV_AT91SAM9=m CONFIG_RTC_DRV_VT8500=y -CONFIG_RTC_DRV_SUN6I=y CONFIG_RTC_DRV_SUNXI=y CONFIG_RTC_DRV_MV=y CONFIG_RTC_DRV_TEGRA=y +CONFIG_RTC_DRV_ST_LPC=y CONFIG_RTC_DRV_CPCAP=m CONFIG_DMADEVICES=y -CONFIG_DW_DMAC=y CONFIG_AT_HDMAC=y CONFIG_AT_XDMAC=y +CONFIG_DMA_BCM2835=y +CONFIG_DMA_SUN6I=y CONFIG_FSL_EDMA=y +CONFIG_IMX_DMA=y +CONFIG_IMX_SDMA=y CONFIG_MV_XOR=y +CONFIG_MXS_DMA=y +CONFIG_PL330_DMA=y +CONFIG_SIRF_DMA=y +CONFIG_STE_DMA40=y +CONFIG_ST_FDMA=m CONFIG_TEGRA20_APB_DMA=y +CONFIG_XILINX_DMA=y +CONFIG_QCOM_BAM_DMA=y +CONFIG_DW_DMAC=y CONFIG_SH_DMAE=y CONFIG_RCAR_DMAC=y CONFIG_RENESAS_USB_DMAC=m -CONFIG_STE_DMA40=y -CONFIG_SIRF_DMA=y -CONFIG_TI_EDMA=y -CONFIG_PL330_DMA=y -CONFIG_IMX_SDMA=y -CONFIG_IMX_DMA=y -CONFIG_MXS_DMA=y -CONFIG_DMA_BCM2835=y -CONFIG_DMA_OMAP=y -CONFIG_QCOM_BAM_DMA=y -CONFIG_XILINX_DMA=y -CONFIG_DMA_SUN6I=y -CONFIG_ST_FDMA=m +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_MMIO=y CONFIG_STAGING=y -CONFIG_SENSORS_ISL29018=y -CONFIG_SENSORS_ISL29028=y CONFIG_MFD_NVEC=y CONFIG_KEYBOARD_NVEC=y CONFIG_SERIO_NVEC_PS2=y CONFIG_NVEC_POWER=y CONFIG_NVEC_PAZ00=y -CONFIG_BCMA=y -CONFIG_BCMA_HOST_SOC=y -CONFIG_BCMA_DRIVER_GMAC_CMN=y -CONFIG_BCMA_DRIVER_GPIO=y -CONFIG_QCOM_GSBI=y -CONFIG_QCOM_PM=y -CONFIG_QCOM_SMEM=y -CONFIG_QCOM_SMD_RPM=y -CONFIG_QCOM_SMP2P=y -CONFIG_QCOM_SMSM=y -CONFIG_QCOM_WCNSS_CTRL=m -CONFIG_ROCKCHIP_PM_DOMAINS=y -CONFIG_COMMON_CLK_QCOM=y -CONFIG_QCOM_CLK_RPM=y -CONFIG_CHROME_PLATFORMS=y CONFIG_STAGING_BOARD=y -CONFIG_CROS_EC_CHARDEV=m CONFIG_COMMON_CLK_MAX77686=y CONFIG_COMMON_CLK_RK808=m CONFIG_COMMON_CLK_S2MPS11=m +CONFIG_COMMON_CLK_QCOM=y +CONFIG_QCOM_CLK_RPM=y CONFIG_APQ_MMCC_8084=y CONFIG_MSM_GCC_8660=y CONFIG_MSM_MMCC_8960=y CONFIG_MSM_MMCC_8974=y -CONFIG_HWSPINLOCK_QCOM=y +CONFIG_BCM2835_MBOX=y CONFIG_ROCKCHIP_IOMMU=y CONFIG_TEGRA_IOMMU_GART=y CONFIG_TEGRA_IOMMU_SMMU=y CONFIG_REMOTEPROC=m CONFIG_ST_REMOTEPROC=m CONFIG_RPMSG_VIRTIO=m +CONFIG_RASPBERRYPI_POWER=y +CONFIG_QCOM_GSBI=y +CONFIG_QCOM_PM=y +CONFIG_QCOM_SMD_RPM=m +CONFIG_QCOM_WCNSS_CTRL=m +CONFIG_ROCKCHIP_PM_DOMAINS=y +CONFIG_ARCH_TEGRA_2x_SOC=y +CONFIG_ARCH_TEGRA_3x_SOC=y +CONFIG_ARCH_TEGRA_114_SOC=y +CONFIG_ARCH_TEGRA_124_SOC=y CONFIG_PM_DEVFREQ=y CONFIG_ARM_TEGRA_DEVFREQ=m -CONFIG_MEMORY=y -CONFIG_EXTCON=y CONFIG_TI_AEMIF=y CONFIG_IIO=y CONFIG_IIO_SW_TRIGGER=y @@ -947,56 +896,54 @@ CONFIG_VF610_ADC=m CONFIG_XILINX_XADC=y CONFIG_MPU3050_I2C=y CONFIG_CM36651=m +CONFIG_SENSORS_ISL29018=y +CONFIG_SENSORS_ISL29028=y CONFIG_AK8975=y -CONFIG_RASPBERRYPI_POWER=y CONFIG_IIO_HRTIMER_TRIGGER=y CONFIG_PWM=y CONFIG_PWM_ATMEL=m CONFIG_PWM_ATMEL_HLCDC_PWM=m CONFIG_PWM_ATMEL_TCB=m +CONFIG_PWM_BCM2835=y +CONFIG_PWM_BRCMSTB=m CONFIG_PWM_FSL_FTM=m CONFIG_PWM_MESON=m CONFIG_PWM_RCAR=m CONFIG_PWM_RENESAS_TPU=y CONFIG_PWM_ROCKCHIP=m CONFIG_PWM_SAMSUNG=m +CONFIG_PWM_STI=y CONFIG_PWM_SUN4I=y CONFIG_PWM_TEGRA=y CONFIG_PWM_VT8500=y +CONFIG_KEYSTONE_IRQ=y +CONFIG_PHY_SUN4I_USB=y +CONFIG_PHY_SUN9I_USB=y CONFIG_PHY_HIX5HD2_SATA=y -CONFIG_E1000E=y -CONFIG_PWM_STI=y -CONFIG_PWM_BCM2835=y -CONFIG_PWM_BRCMSTB=m +CONFIG_PHY_BERLIN_SATA=y +CONFIG_PHY_BERLIN_USB=y +CONFIG_PHY_CPCAP_USB=m +CONFIG_PHY_QCOM_APQ8064_SATA=m +CONFIG_PHY_RCAR_GEN2=m +CONFIG_PHY_ROCKCHIP_DP=m +CONFIG_PHY_ROCKCHIP_USB=y +CONFIG_PHY_SAMSUNG_USB2=m +CONFIG_PHY_MIPHY28LP=y +CONFIG_PHY_STIH407_USB=y +CONFIG_PHY_STM32_USBPHYC=y +CONFIG_PHY_TEGRA_XUSB=y CONFIG_PHY_DM816X_USB=m CONFIG_OMAP_USB2=y CONFIG_TI_PIPE3=y CONFIG_TWL4030_USB=m -CONFIG_PHY_BERLIN_USB=y -CONFIG_PHY_CPCAP_USB=m -CONFIG_PHY_BERLIN_SATA=y -CONFIG_PHY_ROCKCHIP_DP=m -CONFIG_PHY_ROCKCHIP_USB=y -CONFIG_PHY_QCOM_APQ8064_SATA=m -CONFIG_PHY_MIPHY28LP=y -CONFIG_PHY_RCAR_GEN2=m -CONFIG_PHY_STIH407_USB=y -CONFIG_PHY_STM32_USBPHYC=y -CONFIG_PHY_SUN4I_USB=y -CONFIG_PHY_SUN9I_USB=y -CONFIG_PHY_SAMSUNG_USB2=m -CONFIG_PHY_TEGRA_XUSB=y -CONFIG_PHY_BRCM_SATA=y -CONFIG_NVMEM=y CONFIG_NVMEM_IMX_OCOTP=y CONFIG_NVMEM_SUNXI_SID=y CONFIG_NVMEM_VF610_OCOTP=y -CONFIG_BCM2835_MBOX=y CONFIG_RASPBERRYPI_FIRMWARE=y -CONFIG_EFI_VARS=m -CONFIG_EFI_CAPSULE_LOADER=m CONFIG_BCM47XX_NVRAM=y CONFIG_BCM47XX_SPROM=y +CONFIG_EFI_VARS=m +CONFIG_EFI_CAPSULE_LOADER=m CONFIG_EXT4_FS=y CONFIG_AUTOFS4_FS=y CONFIG_MSDOS_FS=y @@ -1004,7 +951,6 @@ CONFIG_VFAT_FS=y CONFIG_NTFS_FS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_UBIFS_FS=y -CONFIG_TMPFS=y CONFIG_SQUASHFS=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y @@ -1020,13 +966,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y -CONFIG_LOCKUP_DETECTOR=y -CONFIG_CPUFREQ_DT=y -CONFIG_KEYSTONE_IRQ=y -CONFIG_HW_RANDOM=y -CONFIG_HW_RANDOM_ST=y CONFIG_CRYPTO_USER=m CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_USER_API_SKCIPHER=m @@ -1035,27 +975,19 @@ CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_DEV_MARVELL_CESA=m CONFIG_CRYPTO_DEV_EXYNOS_RNG=m CONFIG_CRYPTO_DEV_S5P=m +CONFIG_CRYPTO_DEV_ATMEL_AES=m +CONFIG_CRYPTO_DEV_ATMEL_TDES=m +CONFIG_CRYPTO_DEV_ATMEL_SHA=m CONFIG_CRYPTO_DEV_SUN4I_SS=m CONFIG_CRYPTO_DEV_ROCKCHIP=m CONFIG_ARM_CRYPTO=y -CONFIG_CRYPTO_SHA1_ARM=m CONFIG_CRYPTO_SHA1_ARM_NEON=m CONFIG_CRYPTO_SHA1_ARM_CE=m CONFIG_CRYPTO_SHA2_ARM_CE=m -CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_AES_ARM=m CONFIG_CRYPTO_AES_ARM_BS=m CONFIG_CRYPTO_AES_ARM_CE=m -CONFIG_CRYPTO_CHACHA20_NEON=m -CONFIG_CRYPTO_CRC32_ARM_CE=m -CONFIG_CRYPTO_CRCT10DIF_ARM_CE=m CONFIG_CRYPTO_GHASH_ARM_CE=m -CONFIG_CRYPTO_DEV_ATMEL_AES=m -CONFIG_CRYPTO_DEV_ATMEL_TDES=m -CONFIG_CRYPTO_DEV_ATMEL_SHA=m -CONFIG_VIDEO_VIVID=m -CONFIG_VIRTIO=y -CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_PCI_LEGACY=y -CONFIG_VIRTIO_MMIO=y +CONFIG_CRYPTO_CRC32_ARM_CE=m +CONFIG_CRYPTO_CHACHA20_NEON=m From 0fff9001840c314fa4473dee256976b1e4535e73 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Jun 2018 11:25:16 +0200 Subject: [PATCH 407/572] ARM: Always build secure_cntvoff.S on ARM V7 to fix shmobile !SMP build If CONFIG_SMP=n, building a kernel for R-Car Gen2 fails with: arch/arm/mach-shmobile/setup-rcar-gen2.o: In function `rcar_gen2_timer_init': setup-rcar-gen2.c:(.init.text+0x30): undefined reference to `secure_cntvoff_init' Indeed, on R-Car Gen2 SoCs, secure_cntvoff_init() is not only needed for secondary CPUs, but also for the boot CPU. This is most visible on SoCs with Cortex A7 cores (e.g. R-Car E2, cfr. commit 9ce3fa6816c2fb59 ("ARM: shmobile: rcar-gen2: Add CA7 arch_timer initialization for r8a7794")), but Cortex A15 is affected, too. Fix this by always providing secure_cntvoff_init() when building for ARM V7. Reported-by: Arnd Bergmann Fixes: 7c607944bc657616 ("ARM: smp: Add initialization of CNTVOFF") Fixes: cad160ed0a94927e ("ARM: shmobile: Convert file to use cntvoff") Signed-off-by: Geert Uytterhoeven Reviewed-by: Maxime Ripard Reviewed-by: Simon Horman Signed-off-by: Olof Johansson --- arch/arm/common/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile index 1e9f7af8f70f..3157be413297 100644 --- a/arch/arm/common/Makefile +++ b/arch/arm/common/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_DMABOUNCE) += dmabounce.o obj-$(CONFIG_SHARP_LOCOMO) += locomo.o obj-$(CONFIG_SHARP_PARAM) += sharpsl_param.o obj-$(CONFIG_SHARP_SCOOP) += scoop.o -obj-$(CONFIG_SMP) += secure_cntvoff.o +obj-$(CONFIG_CPU_V7) += secure_cntvoff.o obj-$(CONFIG_PCI_HOST_ITE8152) += it8152.o obj-$(CONFIG_MCPM) += mcpm_head.o mcpm_entry.o mcpm_platsmp.o vlock.o CFLAGS_REMOVE_mcpm_entry.o = -pg From 5ce36338a30f9814fc4824f9fe6c20cd83d872c7 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Sat, 23 Jun 2018 20:28:26 +0530 Subject: [PATCH 408/572] cxgb4: when disabling dcb set txq dcb priority to 0 When we are disabling DCB, store "0" in txq->dcb_prio since that's used for future TX Work Request "OVLAN_IDX" values. Setting non zero priority upon disabling DCB would halt the traffic. Reported-by: AMG Zollner Robert CC: David Ahern Signed-off-by: Casey Leedom Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index dd04a2f89ce6..bc03c175a3cd 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -263,7 +263,7 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) "Can't %s DCB Priority on port %d, TX Queue %d: err=%d\n", enable ? "set" : "unset", pi->port_id, i, -err); else - txq->dcb_prio = value; + txq->dcb_prio = enable ? value : 0; } } From e7e197edd09c25774b4f12cab19f9d5462f240f4 Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Sat, 23 Jun 2018 23:22:52 +0200 Subject: [PATCH 409/572] qmi_wwan: add support for the Dell Wireless 5821e module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This module exposes two USB configurations: a QMI+AT capable setup on USB config #1 and a MBIM capable setup on USB config #2. By default the kernel will choose the MBIM capable configuration as long as the cdc_mbim driver is available. This patch adds support for the QMI port in the secondary configuration. Signed-off-by: Aleksander Morgado Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 8e8b51f171f4..8fac8e132c5b 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1246,6 +1246,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ From 2e6eb40ca5eb53836d18f3b9ac61ff2e0b417038 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 23 Jun 2018 23:19:03 +0200 Subject: [PATCH 410/572] efi/x86: Fix incorrect invocation of PciIo->Attributes() The following commit: 2c3625cb9fa2 ("efi/x86: Fold __setup_efi_pci32() and __setup_efi_pci64() into one function") ... merged the two versions of __setup_efi_pciXX(), without taking into account that the 32-bit version used a rather dodgy trick to pass an immediate 0 constant as argument for a uint64_t parameter. The issue is caused by the fact that on x86, UEFI protocol method calls are redirected via struct efi_config::call(), which is a variadic function, and so the compiler has to infer the types of the parameters from the arguments rather than from the prototype. As the 32-bit x86 calling convention passes arguments via the stack, passing the unqualified constant 0 twice is the same as passing 0ULL, which is why the 32-bit code in __setup_efi_pci32() contained the following call: status = efi_early->call(pci->attributes, pci, EfiPciIoAttributeOperationGet, 0, 0, &attributes); to invoke this UEFI protocol method: typedef EFI_STATUS (EFIAPI *EFI_PCI_IO_PROTOCOL_ATTRIBUTES) ( IN EFI_PCI_IO_PROTOCOL *This, IN EFI_PCI_IO_PROTOCOL_ATTRIBUTE_OPERATION Operation, IN UINT64 Attributes, OUT UINT64 *Result OPTIONAL ); After the merge, we inadvertently ended up with this version for both 32-bit and 64-bit builds, breaking the latter. So replace the two zeroes with the explicitly typed constant 0ULL, which works as expected on both 32-bit and 64-bit builds. Wilfried tested the 64-bit build, and I checked the generated assembly of a 32-bit build with and without this patch, and they are identical. Reported-by: Wilfried Klaebe Tested-by: Wilfried Klaebe Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: hdegoede@redhat.com Cc: linux-efi@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index a8a8642d2b0b..e57665b4ba1c 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -118,7 +118,7 @@ __setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) void *romimage; status = efi_call_proto(efi_pci_io_protocol, attributes, pci, - EfiPciIoAttributeOperationGet, 0, 0, + EfiPciIoAttributeOperationGet, 0ULL, &attributes); if (status != EFI_SUCCESS) return status; From 3531456aba6c8a8c905730af96dbb83608538b71 Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Sun, 24 Jun 2018 18:14:01 +0530 Subject: [PATCH 411/572] strparser: Corrected typo in documentation. Replaced strp_pause() with strp_unpause() to correct a seemingly copy paste documentation mistake. Signed-off-by: Vakul Garg Signed-off-by: David S. Miller --- Documentation/networking/strparser.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt index 13081b3decef..a7d354ddda7b 100644 --- a/Documentation/networking/strparser.txt +++ b/Documentation/networking/strparser.txt @@ -48,7 +48,7 @@ void strp_pause(struct strparser *strp) Temporarily pause a stream parser. Message parsing is suspended and no new messages are delivered to the upper layer. -void strp_pause(struct strparser *strp) +void strp_unpause(struct strparser *strp) Unpause a paused stream parser. From 7daf201d7fe8334e2d2364d4e8ed3394ec9af819 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Jun 2018 20:54:29 +0800 Subject: [PATCH 412/572] Linux 4.18-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index ca2af1ab91eb..c9132594860b 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Merciless Moray # *DOCUMENTATION* From 13399ff25f179811ce9c1df1523eb39f9e4a4772 Mon Sep 17 00:00:00 2001 From: Tomasz Duszynski Date: Mon, 28 May 2018 17:38:59 +0200 Subject: [PATCH 413/572] iio: pressure: bmp280: fix relative humidity unit According to IIO ABI relative humidity reading should be returned in milli percent. This patch addresses that by applying proper scaling and returning integer instead of fractional format type specifier. Note that the fixes tag is before the driver was heavily refactored to introduce spi support, so the patch won't apply that far back. Signed-off-by: Tomasz Duszynski Fixes: 14beaa8f5ab1 ("iio: pressure: bmp280: add humidity support") Acked-by: Matt Ranostay Signed-off-by: Jonathan Cameron --- drivers/iio/pressure/bmp280-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iio/pressure/bmp280-core.c b/drivers/iio/pressure/bmp280-core.c index 5ec3e41b65f2..fe87d27779d9 100644 --- a/drivers/iio/pressure/bmp280-core.c +++ b/drivers/iio/pressure/bmp280-core.c @@ -415,10 +415,9 @@ static int bmp280_read_humid(struct bmp280_data *data, int *val, int *val2) } comp_humidity = bmp280_compensate_humidity(data, adc_humidity); - *val = comp_humidity; - *val2 = 1024; + *val = comp_humidity * 1000 / 1024; - return IIO_VAL_FRACTIONAL; + return IIO_VAL_INT; } static int bmp280_read_raw(struct iio_dev *indio_dev, From 288320f6e9a254bb595b4783c9bdfcc7c0e73153 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 30 May 2018 19:19:36 +0100 Subject: [PATCH 414/572] iio: tsl2x7x/tsl2772: avoid potential division by zero It may be possible for tsl2772_get_lux to return a zero lux value and hence a division by zero can occur when lux_val is zero. Check for this case and return -ERANGE to avoid the division by zero. Detected by CoverityScan, CID#1469484 ("Division or modulo by zero") Signed-off-by: Colin Ian King Acked-by: Brian Masney Signed-off-by: Jonathan Cameron --- drivers/iio/light/tsl2772.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/light/tsl2772.c b/drivers/iio/light/tsl2772.c index 34d42a2504c9..df5b2a0da96c 100644 --- a/drivers/iio/light/tsl2772.c +++ b/drivers/iio/light/tsl2772.c @@ -582,6 +582,8 @@ static int tsl2772_als_calibrate(struct iio_dev *indio_dev) "%s: failed to get lux\n", __func__); return lux_val; } + if (lux_val == 0) + return -ERANGE; ret = (chip->settings.als_cal_target * chip->settings.als_gain_trim) / lux_val; From b02ec67a8e38875cdc5f9214be885022f11c0017 Mon Sep 17 00:00:00 2001 From: Leonard Crestez Date: Thu, 7 Jun 2018 21:52:50 +0300 Subject: [PATCH 415/572] iio: mma8452: Fix ignoring MMA8452_INT_DRDY Interrupts are ignored if no event bit is set in the status status register and this breaks the buffer interface. No data is shown when running "iio_generic_buffer -n mma8451 -a" and interrupt counts go crazy. Fix by not returning IRQ_NONE if DRDY is set. Fixes: 605f72de137a ("iio: accel: mma8452: improvements to handle multiple events") Signed-off-by: Leonard Crestez cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/mma8452.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index 7e3d82cff3d5..c149c9c360fc 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1053,7 +1053,7 @@ static irqreturn_t mma8452_interrupt(int irq, void *p) if (src < 0) return IRQ_NONE; - if (!(src & data->chip_info->enabled_events)) + if (!(src & (data->chip_info->enabled_events | MMA8452_INT_DRDY))) return IRQ_NONE; if (src & MMA8452_INT_DRDY) { From 92397a6c38d139d50fabbe9e2dc09b61d53b2377 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Tue, 5 Jun 2018 14:15:10 +0800 Subject: [PATCH 416/572] iio: buffer: fix the function signature to match implementation linux/iio/buffer-dma.h was not updated to when length was changed to unsigned int. Fixes: c043ec1ca5ba ("iio:buffer: make length types match kfifo types") Signed-off-by: Phil Reid Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer-dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iio/buffer-dma.h b/include/linux/iio/buffer-dma.h index 767467d886de..67c75372b691 100644 --- a/include/linux/iio/buffer-dma.h +++ b/include/linux/iio/buffer-dma.h @@ -141,7 +141,7 @@ int iio_dma_buffer_read(struct iio_buffer *buffer, size_t n, char __user *user_buffer); size_t iio_dma_buffer_data_available(struct iio_buffer *buffer); int iio_dma_buffer_set_bytes_per_datum(struct iio_buffer *buffer, size_t bpd); -int iio_dma_buffer_set_length(struct iio_buffer *buffer, int length); +int iio_dma_buffer_set_length(struct iio_buffer *buffer, unsigned int length); int iio_dma_buffer_request_update(struct iio_buffer *buffer); int iio_dma_buffer_init(struct iio_dma_buffer_queue *queue, From 2a221c04228bbf683fc8d3cb5d177aebb38b393c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 18 Jun 2018 18:09:36 +0200 Subject: [PATCH 417/572] iio: imu: inv_mpu6050: Fix probe() failure on older ACPI based machines Commit 5ec6486daa98 ("iio:imu: inv_mpu6050: support more interrupt types") causes inv_mpu_core_probe() to fail if the IRQ does not have a trigger-type setup. This happens on machines where the mpu6050 is enumerated through ACPI and an older Interrupt type ACPI resource is used for the interrupt, rather then a GpioInt type type, causing the mpu6050 driver to no longer work there. This happens on e.g. the Asus T100TA. This commits makes the mpu6050 fallback to the old IRQF_TRIGGER_RISING default if the irq-type is not setup, fixing this. Signed-off-by: Hans de Goede Fixes: 5ec6486daa98 ("iio:imu: inv_mpu6050: support more interrupt types") Reviewed-by: Martin Kelly Reviewed-by: Jean-Baptiste Maneyrol Signed-off-by: Jonathan Cameron --- drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c index f9c0624505a2..42618fe4f83e 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c @@ -959,6 +959,8 @@ int inv_mpu_core_probe(struct regmap *regmap, int irq, const char *name, } irq_type = irqd_get_trigger_type(desc); + if (!irq_type) + irq_type = IRQF_TRIGGER_RISING; if (irq_type == IRQF_TRIGGER_RISING) st->irq_mask = INV_MPU6050_ACTIVE_HIGH; else if (irq_type == IRQF_TRIGGER_FALLING) From 829eb05365ff06e8adc23f2541597d0cc3c18348 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 24 Jun 2018 11:57:31 +0100 Subject: [PATCH 418/572] sfc: make function efx_rps_hash_bucket static The function efx_rps_hash_bucket is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol 'efx_rps_hash_bucket' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/efx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index ad4a354ce570..570ec72266f3 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3180,6 +3180,7 @@ bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx, return true; } +static struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, const struct efx_filter_spec *spec) { From 662d855c66c0a7400f9117b6ac02047942cd1d22 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sun, 24 Jun 2018 09:33:22 -0700 Subject: [PATCH 419/572] MIPS: Add ksig argument to rseq_{signal_deliver,handle_notify_resume} Commit 784e0300fe9f ("rseq: Avoid infinite recursion when delivering SIGSEGV") added a new ksig argument to the rseq_signal_deliver() & rseq_handle_notify_resume() functions, and was merged in v4.18-rc2. Meanwhile MIPS support for restartable sequences was also merged in v4.18-rc2 with commit 9ea141ad5471 ("MIPS: Add support for restartable sequences"), and therefore didn't get updated for the API change. This results in build failures like the following: CC arch/mips/kernel/signal.o arch/mips/kernel/signal.c: In function 'handle_signal': arch/mips/kernel/signal.c:804:22: error: passing argument 1 of 'rseq_signal_deliver' from incompatible pointer type [-Werror=incompatible-pointer-types] rseq_signal_deliver(regs); ^~~~ In file included from ./include/linux/context_tracking.h:5, from arch/mips/kernel/signal.c:12: ./include/linux/sched.h:1811:56: note: expected 'struct ksignal *' but argument is of type 'struct pt_regs *' static inline void rseq_signal_deliver(struct ksignal *ksig, ~~~~~~~~~~~~~~~~^~~~ arch/mips/kernel/signal.c:804:2: error: too few arguments to function 'rseq_signal_deliver' rseq_signal_deliver(regs); ^~~~~~~~~~~~~~~~~~~ Fix this by adding the ksig argument as was done for other architectures in commit 784e0300fe9f ("rseq: Avoid infinite recursion when delivering SIGSEGV"). Signed-off-by: Paul Burton Acked-by: Mathieu Desnoyers Patchwork: https://patchwork.linux-mips.org/patch/19603/ Cc: James Hogan Cc: Ralf Baechle Cc: Peter Zijlstra Cc: Paul E. McKenney Cc: Boqun Feng Cc: Will Deacon Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org --- arch/mips/kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 00f2535d2226..0a9cfe7a0372 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -801,7 +801,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->regs[0] = 0; /* Don't deal with this again. */ } - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); if (sig_uses_siginfo(&ksig->ka, abi)) ret = abi->setup_rt_frame(vdso + abi->vdso->off_rt_sigreturn, @@ -870,7 +870,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } user_enter(); From 10ee25268e1f8475905e1deb85bb83627dca561e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jun 2018 23:26:55 -0700 Subject: [PATCH 420/572] xfs: allow empty transactions while frozen In commit e89c041338ed6ef ("xfs: implement the GETFSMAP ioctl") we created the ability to obtain empty transactions. These transactions have no log or block reservations and therefore can't modify anything. Since they're also NO_WRITECOUNT they can run while the fs is frozen, so we don't need to WARN_ON about that usage. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_trans.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index e040af120b69..524f543c5b82 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -258,7 +258,12 @@ xfs_trans_alloc( if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); - WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); + /* + * Zero-reservation ("empty") transactions can't modify anything, so + * they're allowed to run while we're frozen. + */ + WARN_ON(resp->tr_logres > 0 && + mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); atomic_inc(&mp->m_active_trans); tp = kmem_zone_zalloc(xfs_trans_zone, From aafe12cee0b132824f5187987f8a3fb704b9f685 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jun 2018 23:26:56 -0700 Subject: [PATCH 421/572] xfs: don't trip over negative free space in xfs_reserve_blocks If we somehow end up with a filesystem that has fewer free blocks than the blocks set aside to avoid ENOSPC deadlocks, it's possible that the free space calculation in xfs_reserve_blocks will spit out a negative number (because percpu_counter_sum returns s64). We fail to notice this negative number and set fdblks_delta to it. Now we increment fdblocks(!) and the unsigned type of m_resblks means that we end up setting a ridiculously huge m_resblks reservation. Avoid this comedy of errors by detecting the negative free space and returning -ENOSPC. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_fsops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index a7afcad6b711..3f2bd6032cf8 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -387,7 +387,7 @@ xfs_reserve_blocks( do { free = percpu_counter_sum(&mp->m_fdblocks) - mp->m_alloc_set_aside; - if (!free) + if (free <= 0) break; delta = request - mp->m_resblks; From f62cb48e43195f66c7a40bbfcf11531fc1ff8999 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jun 2018 23:26:57 -0700 Subject: [PATCH 422/572] xfs: don't allow insert-range to shift extents past the maximum offset Zorro Lang reports that generic/485 blows an assert on a filesystem with 512 byte blocks. The test tries to fallocate a post-eof extent at the maximum file size and calls insert range to shift the extents right by two blocks. On a 512b block filesystem this causes startoff to overflow the 54-bit startoff field, leading to the assert. Therefore, always check the rightmost extent to see if it would overflow prior to invoking the insert range machinery. Reported-by: zlang@redhat.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200137 Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_bmap.c | 26 ++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_bmap.h | 2 ++ fs/xfs/libxfs/xfs_format.h | 2 ++ fs/xfs/xfs_bmap_util.c | 4 ++++ 4 files changed, 34 insertions(+) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 01628f0c9a0c..7205268b30bc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5780,6 +5780,32 @@ del_cursor: return error; } +/* Make sure we won't be right-shifting an extent past the maximum bound. */ +int +xfs_bmap_can_insert_extents( + struct xfs_inode *ip, + xfs_fileoff_t off, + xfs_fileoff_t shift) +{ + struct xfs_bmbt_irec got; + int is_empty; + int error = 0; + + ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_bmap_last_extent(NULL, ip, XFS_DATA_FORK, &got, &is_empty); + if (!error && !is_empty && got.br_startoff >= off && + ((got.br_startoff + shift) & BMBT_STARTOFF_MASK) < got.br_startoff) + error = -EINVAL; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return error; +} + int xfs_bmap_insert_extents( struct xfs_trans *tp, diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 99dddbd0fcc6..9b49ddf99c41 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -227,6 +227,8 @@ int xfs_bmap_collapse_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops); +int xfs_bmap_can_insert_extents(struct xfs_inode *ip, xfs_fileoff_t off, + xfs_fileoff_t shift); int xfs_bmap_insert_extents(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t *next_fsb, xfs_fileoff_t offset_shift_fsb, bool *done, xfs_fileoff_t stop_fsb, xfs_fsblock_t *firstblock, diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 7b4a43deb83e..059bc44c27e8 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1529,6 +1529,8 @@ typedef struct xfs_bmdr_block { #define BMBT_STARTBLOCK_BITLEN 52 #define BMBT_BLOCKCOUNT_BITLEN 21 +#define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1) + typedef struct xfs_bmbt_rec { __be64 l0, l1; } xfs_bmbt_rec_t; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index c195b9d857af..bb417156e3bf 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1383,6 +1383,10 @@ xfs_insert_file_space( trace_xfs_insert_file_space(ip); + error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb); + if (error) + return error; + error = xfs_prepare_shift(ip, offset); if (error) return error; From 5bd88d153998c1b189fdeb8b8bd4cce36b5acf62 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jun 2018 23:26:57 -0700 Subject: [PATCH 423/572] xfs: recheck reflink state after grabbing ILOCK_SHARED for a write The reflink iflag could have changed since the earlier unlocked check, so if we got ILOCK_SHARED for a write and but we're now a reflink inode we have to switch to ILOCK_EXCL and relock. This helps us avoid blowing lock assertions in things like generic/166: XFS: Assertion failed: xfs_isilocked(ip, XFS_ILOCK_EXCL), file: fs/xfs/xfs_reflink.c, line: 383 WARNING: CPU: 1 PID: 24707 at fs/xfs/xfs_message.c:104 assfail+0x25/0x30 [xfs] Modules linked in: deadline_iosched dm_snapshot dm_bufio ext4 mbcache jbd2 dm_flakey xfs libcrc32c dax_pmem device_dax nd_pmem sch_fq_codel af_packet [last unloaded: scsi_debug] CPU: 1 PID: 24707 Comm: xfs_io Not tainted 4.18.0-rc1-djw #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:assfail+0x25/0x30 [xfs] Code: ff 0f 0b c3 90 66 66 66 66 90 48 89 f1 41 89 d0 48 c7 c6 e8 ef 1b a0 48 89 fa 31 ff e8 54 f9 ff ff 80 3d fd ba 0f 00 00 75 03 <0f> 0b c3 0f 0b 66 0f 1f 44 00 00 66 66 66 66 90 48 63 f6 49 89 f9 RSP: 0018:ffffc90006423ad8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff880030b65e80 RCX: 0000000000000000 RDX: 00000000ffffffc0 RSI: 000000000000000a RDI: ffffffffa01b0447 RBP: ffffc90006423c10 R08: 0000000000000000 R09: 0000000000000000 R10: ffff88003d43fc30 R11: f000000000000000 R12: ffff880077cda000 R13: 0000000000000000 R14: ffffc90006423c30 R15: ffffc90006423bf9 FS: 00007feba8986800(0000) GS:ffff88003ec00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000138ab58 CR3: 000000003d40a000 CR4: 00000000000006a0 Call Trace: xfs_reflink_allocate_cow+0x24c/0x3d0 [xfs] xfs_file_iomap_begin+0x6d2/0xeb0 [xfs] ? iomap_to_fiemap+0x80/0x80 iomap_apply+0x5e/0x130 iomap_dio_rw+0x2e0/0x400 ? iomap_to_fiemap+0x80/0x80 ? xfs_file_dio_aio_write+0x133/0x4a0 [xfs] xfs_file_dio_aio_write+0x133/0x4a0 [xfs] xfs_file_write_iter+0x7b/0xb0 [xfs] __vfs_write+0x16f/0x1f0 vfs_write+0xc8/0x1c0 ksys_pwrite64+0x74/0x90 do_syscall_64+0x56/0x180 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_iomap.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 49f5492eed3b..55876dd02f0c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -963,12 +963,13 @@ xfs_ilock_for_iomap( unsigned *lockmode) { unsigned mode = XFS_ILOCK_SHARED; + bool is_write = flags & (IOMAP_WRITE | IOMAP_ZERO); /* * COW writes may allocate delalloc space or convert unwritten COW * extents, so we need to make sure to take the lock exclusively here. */ - if (xfs_is_reflink_inode(ip) && (flags & (IOMAP_WRITE | IOMAP_ZERO))) { + if (xfs_is_reflink_inode(ip) && is_write) { /* * FIXME: It could still overwrite on unshared extents and not * need allocation. @@ -989,6 +990,7 @@ xfs_ilock_for_iomap( mode = XFS_ILOCK_EXCL; } +relock: if (flags & IOMAP_NOWAIT) { if (!xfs_ilock_nowait(ip, mode)) return -EAGAIN; @@ -996,6 +998,17 @@ xfs_ilock_for_iomap( xfs_ilock(ip, mode); } + /* + * The reflink iflag could have changed since the earlier unlocked + * check, so if we got ILOCK_SHARED for a write and but we're now a + * reflink inode we have to switch to ILOCK_EXCL and relock. + */ + if (mode == XFS_ILOCK_SHARED && is_write && xfs_is_reflink_inode(ip)) { + xfs_iunlock(ip, mode); + mode = XFS_ILOCK_EXCL; + goto relock; + } + *lockmode = mode; return 0; } From 232d0a24b0fc197c50e95fe65f236027b5fa1b74 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jun 2018 23:26:57 -0700 Subject: [PATCH 424/572] xfs: fix uninitialized field in rtbitmap fsmap backend Initialize the extent count field of the high key so that when we use the high key to synthesize an 'unknown owner' record (i.e. used space record) at the end of the queried range we have a field with which to compute rm_blockcount. This is not strictly necessary because the synthesizer never uses the rm_blockcount field, but we can shut up the static code analysis anyway. Coverity-id: 1437358 Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_fsmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index c34fa9c342f2..c7157bc48bd1 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -513,8 +513,8 @@ xfs_getfsmap_rtdev_rtbitmap_query( struct xfs_trans *tp, struct xfs_getfsmap_info *info) { - struct xfs_rtalloc_rec alow; - struct xfs_rtalloc_rec ahigh; + struct xfs_rtalloc_rec alow = { 0 }; + struct xfs_rtalloc_rec ahigh = { 0 }; int error; xfs_ilock(tp->t_mountp->m_rbmip, XFS_ILOCK_SHARED); From a3a374bf1889b1b401b25e6aada3ca4151a99d15 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jun 2018 23:26:58 -0700 Subject: [PATCH 425/572] xfs: fix off-by-one error in xfs_rtalloc_query_range In commit 8ad560d2565e6 ("xfs: strengthen rtalloc query range checks") we strengthened the input parameter checks in the rtbitmap range query function, but introduced an off-by-one error in the process. The call to xfs_rtfind_forw deals with the high key being rextents, but we clamp the high key to rextents - 1. This causes the returned results to stop one block short of the end of the rtdev, which is incorrect. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_rtbitmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 65fc4ed2e9a1..b228c821bae6 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -1029,8 +1029,8 @@ xfs_rtalloc_query_range( if (low_rec->ar_startext >= mp->m_sb.sb_rextents || low_rec->ar_startext == high_rec->ar_startext) return 0; - if (high_rec->ar_startext >= mp->m_sb.sb_rextents) - high_rec->ar_startext = mp->m_sb.sb_rextents - 1; + if (high_rec->ar_startext > mp->m_sb.sb_rextents) + high_rec->ar_startext = mp->m_sb.sb_rextents; /* Iterate the bitmap, looking for discrepancies. */ rtstart = low_rec->ar_startext; From e53c4b598372001a13901b77649dc1b4afec3e85 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jun 2018 23:26:58 -0700 Subject: [PATCH 426/572] xfs: ensure post-EOF zeroing happens after zeroing part of a file If a user asks us to zero_range part of a file, the end of the range is EOF, and not aligned to a page boundary, invoke writeback of the EOF page to ensure that the post-EOF part of the page is zeroed. This ensures that we don't expose stale memory contents via mmap, if in a clumsy manner. Found by running generic/127 when it runs zero_range and mapread at EOF one after the other. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Reviewed-by: Allison Henderson --- fs/xfs/xfs_bmap_util.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index bb417156e3bf..83b1e8c6c18f 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1187,7 +1187,22 @@ xfs_free_file_space( return 0; if (offset + len > XFS_ISIZE(ip)) len = XFS_ISIZE(ip) - offset; - return iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); + error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops); + if (error) + return error; + + /* + * If we zeroed right up to EOF and EOF straddles a page boundary we + * must make sure that the post-EOF area is also zeroed because the + * page could be mmap'd and iomap_zero_range doesn't do that for us. + * Writeback of the eof page will do this, albeit clumsily. + */ + if (offset + len >= XFS_ISIZE(ip) && ((offset + len) & PAGE_MASK)) { + error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + (offset + len) & ~PAGE_MASK, LLONG_MAX); + } + + return error; } /* From d8cb5e42378918e00eda58792f3ab86dd1e7d214 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 21 Jun 2018 23:26:56 -0700 Subject: [PATCH 427/572] xfs: fix fdblocks accounting w/ RMAPBT per-AG reservation In __xfs_ag_resv_init we incorrectly calculate the amount by which to decrease fdblocks when reserving blocks for the rmapbt. Because rmapbt allocations do not decrease fdblocks, we must decrease fdblocks by the entire size of the requested reservation in order to achieve our goal of always having enough free blocks to satisfy an rmapbt expansion. This is in contrast to the refcountbt/finobt, which /do/ subtract from fdblocks whenever they allocate a block. For this allocation type we preserve the existing behavior where we decrease fdblocks only by the requested reservation minus the size of the existing tree. This fixes the problem where the available block counts reported by statfs change across a remount if there had been an rmapbt size change since mount time. Signed-off-by: Darrick J. Wong Reviewed-by: Allison Henderson --- fs/xfs/libxfs/xfs_ag_resv.c | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 84db76e0e3e3..fecd187fcf2c 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -157,6 +157,7 @@ __xfs_ag_resv_free( error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true); resv->ar_reserved = 0; resv->ar_asked = 0; + resv->ar_orig_reserved = 0; if (error) trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno, @@ -189,13 +190,34 @@ __xfs_ag_resv_init( struct xfs_mount *mp = pag->pag_mount; struct xfs_ag_resv *resv; int error; - xfs_extlen_t reserved; + xfs_extlen_t hidden_space; if (used > ask) ask = used; - reserved = ask - used; - error = xfs_mod_fdblocks(mp, -(int64_t)reserved, true); + switch (type) { + case XFS_AG_RESV_RMAPBT: + /* + * Space taken by the rmapbt is not subtracted from fdblocks + * because the rmapbt lives in the free space. Here we must + * subtract the entire reservation from fdblocks so that we + * always have blocks available for rmapbt expansion. + */ + hidden_space = ask; + break; + case XFS_AG_RESV_METADATA: + /* + * Space taken by all other metadata btrees are accounted + * on-disk as used space. We therefore only hide the space + * that is reserved but not used by the trees. + */ + hidden_space = ask - used; + break; + default: + ASSERT(0); + return -EINVAL; + } + error = xfs_mod_fdblocks(mp, -(int64_t)hidden_space, true); if (error) { trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno, error, _RET_IP_); @@ -216,7 +238,8 @@ __xfs_ag_resv_init( resv = xfs_perag_resv(pag, type); resv->ar_asked = ask; - resv->ar_reserved = resv->ar_orig_reserved = reserved; + resv->ar_orig_reserved = hidden_space; + resv->ar_reserved = ask - used; trace_xfs_ag_resv_init(pag, type, ask); return 0; From 1dd2058f902eb2d8bd5bedab0b190a21d5245e71 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Jun 2018 11:06:41 +0300 Subject: [PATCH 428/572] cpufreq: qcom-kryo: Fix error handling in probe() We should return if get_cpu_device() fails or it leads to a NULL dereference. Also dev_pm_opp_of_get_opp_desc_node() returns NULL on error, it never returns error pointers. Fixes: 46e2856b8e18 (cpufreq: Add Kryo CPU scaling driver) Signed-off-by: Dan Carpenter Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/qcom-cpufreq-kryo.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/qcom-cpufreq-kryo.c b/drivers/cpufreq/qcom-cpufreq-kryo.c index 01bddacf5c3b..29389accf3e9 100644 --- a/drivers/cpufreq/qcom-cpufreq-kryo.c +++ b/drivers/cpufreq/qcom-cpufreq-kryo.c @@ -87,8 +87,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) int ret; cpu_dev = get_cpu_device(0); - if (NULL == cpu_dev) - ret = -ENODEV; + if (!cpu_dev) + return -ENODEV; msm8996_version = qcom_cpufreq_kryo_get_msm_id(); if (NUM_OF_MSM8996_VERSIONS == msm8996_version) { @@ -97,8 +97,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev) } np = dev_pm_opp_of_get_opp_desc_node(cpu_dev); - if (IS_ERR(np)) - return PTR_ERR(np); + if (!np) + return -ENOENT; ret = of_device_is_compatible(np, "operating-points-v2-kryo-cpu"); if (!ret) { From 5e03aa61a7f3cb99852bc3ad6925f5fa3c0dcc93 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 14 Jun 2018 10:53:34 +0530 Subject: [PATCH 429/572] PM / Domains: Fix return value of of_genpd_opp_to_performance_state() of_genpd_opp_to_performance_state() should return 0 for errors, but the dummy routine isn't doing that. Fix it. Signed-off-by: Viresh Kumar Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9206a4fef9ac..139f79c8477a 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -276,7 +276,7 @@ static inline unsigned int of_genpd_opp_to_performance_state(struct device *dev, struct device_node *opp_node) { - return -ENODEV; + return 0; } static inline int genpd_dev_pm_attach(struct device *dev) From ad6384ba3ac98ad524194e37de379c1fe503870b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 20 Jun 2018 11:45:37 +0530 Subject: [PATCH 430/572] PM / Domains: Rename opp_node to np The DT node passed here isn't necessarily an OPP node, as this routine can also be used for cases where the "required-opps" property is present directly in the device's node. Rename it. This also removes a stale comment. Acked-by: Ulf Hansson Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 7 +++---- include/linux/pm_domain.h | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 4925af5c4cf0..c298de8a8308 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -2487,10 +2487,9 @@ EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states); * power domain corresponding to a DT node's "required-opps" property. * * @dev: Device for which the performance-state needs to be found. - * @opp_node: DT node where the "required-opps" property is present. This can be + * @np: DT node where the "required-opps" property is present. This can be * the device node itself (if it doesn't have an OPP table) or a node * within the OPP table of a device (if device has an OPP table). - * @state: Pointer to return performance state. * * Returns performance state corresponding to the "required-opps" property of * a DT node. This calls platform specific genpd->opp_to_performance_state() @@ -2499,7 +2498,7 @@ EXPORT_SYMBOL_GPL(of_genpd_parse_idle_states); * Returns performance state on success and 0 on failure. */ unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *opp_node) + struct device_node *np) { struct generic_pm_domain *genpd; struct dev_pm_opp *opp; @@ -2514,7 +2513,7 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev, genpd_lock(genpd); - opp = of_dev_pm_opp_find_required_opp(&genpd->dev, opp_node); + opp = of_dev_pm_opp_find_required_opp(&genpd->dev, np); if (IS_ERR(opp)) { dev_err(dev, "Failed to find required OPP: %ld\n", PTR_ERR(opp)); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 139f79c8477a..cb8d84090cfb 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -234,7 +234,7 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np); int of_genpd_parse_idle_states(struct device_node *dn, struct genpd_power_state **states, int *n); unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *opp_node); + struct device_node *np); int genpd_dev_pm_attach(struct device *dev); struct device *genpd_dev_pm_attach_by_id(struct device *dev, @@ -274,7 +274,7 @@ static inline int of_genpd_parse_idle_states(struct device_node *dn, static inline unsigned int of_genpd_opp_to_performance_state(struct device *dev, - struct device_node *opp_node) + struct device_node *np) { return 0; } From e41fc8c5bd41b96bfae5ce4c66bee6edabc932e8 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 25 Jun 2018 14:40:56 +0800 Subject: [PATCH 431/572] ALSA: hda/realtek - Fix the problem of two front mics on more machines We have 3 more Lenovo machines, they all have 2 front mics on them, so they need the fixup to change the location for one of two mics. Among these 3 Lenovo machines, one of them has the same pin cfg as the machine with subid 0x17aa3138, so use the pin cfg table to apply fixup for them. The rest machines don't share the same pin cfg, so far use the subid to apply fixup for them. Fixes: a3dafb2200bf ("ALSA: hda/realtek - adjust the location of one mic") Cc: Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 70bf4c30548a..5ad6c7e5f92e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6610,8 +6610,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY), SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x312a, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), - SND_PCI_QUIRK(0x17aa, 0x3138, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), + SND_PCI_QUIRK(0x17aa, 0x3136, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), @@ -6789,6 +6790,12 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x14, 0x90170110}, {0x19, 0x02a11030}, {0x21, 0x02211020}), + SND_HDA_PIN_QUIRK(0x10ec0235, 0x17aa, "Lenovo", ALC294_FIXUP_LENOVO_MIC_LOCATION, + {0x14, 0x90170110}, + {0x19, 0x02a11030}, + {0x1a, 0x02a11040}, + {0x1b, 0x01014020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x12, 0x90a60140}, {0x14, 0x90170110}, From b41f794f284966fd6ec634111e3b40d241389f96 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Jun 2018 11:09:11 +0200 Subject: [PATCH 432/572] ALSA: timer: Fix UBSAN warning at SNDRV_TIMER_IOCTL_NEXT_DEVICE ioctl The kernel may spew a WARNING about UBSAN undefined behavior at handling ALSA timer ioctl SNDRV_TIMER_IOCTL_NEXT_DEVICE: UBSAN: Undefined behaviour in sound/core/timer.c:1524:19 signed integer overflow: 2147483647 + 1 cannot be represented in type 'int' Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x122/0x1c8 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x86 lib/ubsan.c:159 handle_overflow+0x1c2/0x21f lib/ubsan.c:190 __ubsan_handle_add_overflow+0x2a/0x31 lib/ubsan.c:198 snd_timer_user_next_device sound/core/timer.c:1524 [inline] __snd_timer_user_ioctl+0x204d/0x2520 sound/core/timer.c:1939 snd_timer_user_ioctl+0x67/0x95 sound/core/timer.c:1994 .... It happens only when a value with INT_MAX is passed, as we're incrementing it unconditionally. So the fix is trivial, check the value with INT_MAX. Although the bug itself is fairly harmless, it's better to fix it so that fuzzers won't hit this again later. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200213 Reported-and-tested-by: Team OWL337 Cc: Signed-off-by: Takashi Iwai --- sound/core/timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/core/timer.c b/sound/core/timer.c index 665089c45560..b6f076bbc72d 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -1520,7 +1520,7 @@ static int snd_timer_user_next_device(struct snd_timer_id __user *_tid) } else { if (id.subdevice < 0) id.subdevice = 0; - else + else if (id.subdevice < INT_MAX) id.subdevice++; } } From c9a4c63888dbb79ce4d068ca1dd8b05bc3f156b1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Jun 2018 11:13:59 +0200 Subject: [PATCH 433/572] ALSA: seq: Fix UBSAN warning at SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT ioctl The kernel may spew a WARNING with UBSAN undefined behavior at handling ALSA sequencer ioctl SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT: UBSAN: Undefined behaviour in sound/core/seq/seq_clientmgr.c:2007:14 signed integer overflow: 2147483647 + 1 cannot be represented in type 'int' Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x122/0x1c8 lib/dump_stack.c:113 ubsan_epilogue+0x12/0x86 lib/ubsan.c:159 handle_overflow+0x1c2/0x21f lib/ubsan.c:190 __ubsan_handle_add_overflow+0x2a/0x31 lib/ubsan.c:198 snd_seq_ioctl_query_next_client+0x1ac/0x1d0 sound/core/seq/seq_clientmgr.c:2007 snd_seq_ioctl+0x264/0x3d0 sound/core/seq/seq_clientmgr.c:2144 .... It happens only when INT_MAX is passed there, as we're incrementing it unconditionally. So the fix is trivial, check the value with INT_MAX. Although the bug itself is fairly harmless, it's better to fix it so that fuzzers won't hit this again later. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200211 Signed-off-by: Takashi Iwai --- sound/core/seq/seq_clientmgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 61a07fe34cd2..56ca78423040 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -2004,7 +2004,8 @@ static int snd_seq_ioctl_query_next_client(struct snd_seq_client *client, struct snd_seq_client *cptr = NULL; /* search for next client */ - info->client++; + if (info->client < INT_MAX) + info->client++; if (info->client < 0) info->client = 0; for (; info->client < SNDRV_SEQ_MAX_CLIENTS; info->client++) { From 043f891b70e6197bc181f3b087c2bd04c60fddd2 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Mon, 25 Jun 2018 13:28:06 +0200 Subject: [PATCH 434/572] Revert "lib/test_printf.c: call wait_for_random_bytes() before plain %p tests" This reverts commit ee410f15b1418f2f4428e79980674c979081bcb7. It might prevent the machine from boot. It would wait for enough randomness at the very beginning of kernel_init(). But there is basically nothing running in parallel that would help to produce any randomness. Reported-by: Steven Rostedt (VMware) Signed-off-by: Petr Mladek --- lib/test_printf.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lib/test_printf.c b/lib/test_printf.c index b2aa8f514844..cea592f402ed 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -260,13 +260,6 @@ plain(void) { int err; - /* - * Make sure crng is ready. Otherwise we get "(ptrval)" instead - * of a hashed address when printing '%p' in plain_hash() and - * plain_format(). - */ - wait_for_random_bytes(); - err = plain_hash(); if (err) { pr_warn("plain 'p' does not appear to be hashed\n"); From 229bc19fd7aca4f37964af06e3583c1c8f36b5d6 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 21 Jun 2018 16:19:41 +0300 Subject: [PATCH 435/572] xhci: Fix perceived dead host due to runtime suspend race with event handler Don't rely on event interrupt (EINT) bit alone to detect pending port change in resume. If no change event is detected the host may be suspended again, oterwise roothubs are resumed. There is a lag in xHC setting EINT. If we don't notice the pending change in resume, and the controller is runtime suspeded again, it causes the event handler to assume host is dead as it will fail to read xHC registers once PCI puts the controller to D3 state. [ 268.520969] xhci_hcd: xhci_resume: starting port polling. [ 268.520985] xhci_hcd: xhci_hub_status_data: stopping port polling. [ 268.521030] xhci_hcd: xhci_suspend: stopping port polling. [ 268.521040] xhci_hcd: // Setting command ring address to 0x349bd001 [ 268.521139] xhci_hcd: Port Status Change Event for port 3 [ 268.521149] xhci_hcd: resume root hub [ 268.521163] xhci_hcd: port resume event for port 3 [ 268.521168] xhci_hcd: xHC is not running. [ 268.521174] xhci_hcd: handle_port_status: starting port polling. [ 268.596322] xhci_hcd: xhci_hc_died: xHCI host controller not responding, assume dead The EINT lag is described in a additional note in xhci specs 4.19.2: "Due to internal xHC scheduling and system delays, there will be a lag between a change bit being set and the Port Status Change Event that it generated being written to the Event Ring. If SW reads the PORTSC and sees a change bit set, there is no guarantee that the corresponding Port Status Change Event has already been written into the Event Ring." Cc: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 40 +++++++++++++++++++++++++++++++++++++--- drivers/usb/host/xhci.h | 4 ++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8c8da2d657fa..f11ec61bcc7d 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -908,6 +908,41 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) spin_unlock_irqrestore(&xhci->lock, flags); } +static bool xhci_pending_portevent(struct xhci_hcd *xhci) +{ + struct xhci_port **ports; + int port_index; + u32 status; + u32 portsc; + + status = readl(&xhci->op_regs->status); + if (status & STS_EINT) + return true; + /* + * Checking STS_EINT is not enough as there is a lag between a change + * bit being set and the Port Status Change Event that it generated + * being written to the Event Ring. See note in xhci 1.1 section 4.19.2. + */ + + port_index = xhci->usb2_rhub.num_ports; + ports = xhci->usb2_rhub.ports; + while (port_index--) { + portsc = readl(ports[port_index]->addr); + if (portsc & PORT_CHANGE_MASK || + (portsc & PORT_PLS_MASK) == XDEV_RESUME) + return true; + } + port_index = xhci->usb3_rhub.num_ports; + ports = xhci->usb3_rhub.ports; + while (port_index--) { + portsc = readl(ports[port_index]->addr); + if (portsc & PORT_CHANGE_MASK || + (portsc & PORT_PLS_MASK) == XDEV_RESUME) + return true; + } + return false; +} + /* * Stop HC (not bus-specific) * @@ -1009,7 +1044,7 @@ EXPORT_SYMBOL_GPL(xhci_suspend); */ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) { - u32 command, temp = 0, status; + u32 command, temp = 0; struct usb_hcd *hcd = xhci_to_hcd(xhci); struct usb_hcd *secondary_hcd; int retval = 0; @@ -1134,8 +1169,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) done: if (retval == 0) { /* Resume root hubs only when have pending events. */ - status = readl(&xhci->op_regs->status); - if (status & STS_EINT) { + if (xhci_pending_portevent(xhci)) { usb_hcd_resume_root_hub(xhci->shared_hcd); usb_hcd_resume_root_hub(hcd); } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 939e2f86b595..841e89ffe2e9 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -382,6 +382,10 @@ struct xhci_op_regs { #define PORT_PLC (1 << 22) /* port configure error change - port failed to configure its link partner */ #define PORT_CEC (1 << 23) +#define PORT_CHANGE_MASK (PORT_CSC | PORT_PEC | PORT_WRC | PORT_OCC | \ + PORT_RC | PORT_PLC | PORT_CEC) + + /* Cold Attach Status - xHC can set this bit to report device attached during * Sx state. Warm port reset should be perfomed to clear this bit and move port * to connected state. From d850c1658328e757635a46763783c6fd56390dcb Mon Sep 17 00:00:00 2001 From: Zhengjun Xing Date: Thu, 21 Jun 2018 16:19:42 +0300 Subject: [PATCH 436/572] xhci: Fix kernel oops in trace_xhci_free_virt_device commit 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device") set dev->udev pointer to NULL in xhci_free_dev(), it will cause kernel panic in trace_xhci_free_virt_device. This patch reimplement the trace function trace_xhci_free_virt_device, remove dev->udev dereference and added more useful parameters to show in the trace function,it also makes sure dev->udev is not NULL before calling trace_xhci_free_virt_device. This issue happened when xhci-hcd trace is enabled and USB devices hot plug test. Original use-after-free patch went to stable so this needs so be applied there as well. [ 1092.022457] usb 2-4: USB disconnect, device number 6 [ 1092.092772] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 1092.101694] PGD 0 P4D 0 [ 1092.104601] Oops: 0000 [#1] SMP [ 1092.207734] Workqueue: usb_hub_wq hub_event [ 1092.212507] RIP: 0010:trace_event_raw_event_xhci_log_virt_dev+0x6c/0xf0 [ 1092.220050] RSP: 0018:ffff8c252e883d28 EFLAGS: 00010086 [ 1092.226024] RAX: ffff8c24af86fa84 RBX: 0000000000000003 RCX: ffff8c25255c2a01 [ 1092.234130] RDX: 0000000000000000 RSI: 00000000aef55009 RDI: ffff8c252e883d28 [ 1092.242242] RBP: ffff8c252550e2c0 R08: ffff8c24af86fa84 R09: 0000000000000a70 [ 1092.250364] R10: 0000000000000a70 R11: 0000000000000000 R12: ffff8c251f21a000 [ 1092.258468] R13: 000000000000000c R14: ffff8c251f21a000 R15: ffff8c251f432f60 [ 1092.266572] FS: 0000000000000000(0000) GS:ffff8c252e880000(0000) knlGS:0000000000000000 [ 1092.275757] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1092.282281] CR2: 0000000000000000 CR3: 0000000154209001 CR4: 00000000003606e0 [ 1092.290384] Call Trace: [ 1092.293156] [ 1092.295439] xhci_free_virt_device.part.34+0x182/0x1a0 [ 1092.301288] handle_cmd_completion+0x7ac/0xfa0 [ 1092.306336] ? trace_event_raw_event_xhci_log_trb+0x6e/0xa0 [ 1092.312661] xhci_irq+0x3e8/0x1f60 [ 1092.316524] __handle_irq_event_percpu+0x75/0x180 [ 1092.321876] handle_irq_event_percpu+0x20/0x50 [ 1092.326922] handle_irq_event+0x36/0x60 [ 1092.331273] handle_edge_irq+0x6d/0x180 [ 1092.335644] handle_irq+0x16/0x20 [ 1092.339417] do_IRQ+0x41/0xc0 [ 1092.342782] common_interrupt+0xf/0xf [ 1092.346955] Fixes: 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device") Cc: Signed-off-by: Zhengjun Xing Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-mem.c | 4 ++-- drivers/usb/host/xhci-trace.h | 36 ++++++++++++++++++++++++++++++----- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index acbd3d7b8828..8a62eee9eee1 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -886,12 +886,12 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id) dev = xhci->devs[slot_id]; - trace_xhci_free_virt_device(dev); - xhci->dcbaa->dev_context_ptrs[slot_id] = 0; if (!dev) return; + trace_xhci_free_virt_device(dev); + if (dev->tt_info) old_active_eps = dev->tt_info->active_eps; diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index 410544ffe78f..88b427434bd8 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -171,6 +171,37 @@ DEFINE_EVENT(xhci_log_trb, xhci_dbc_gadget_ep_queue, TP_ARGS(ring, trb) ); +DECLARE_EVENT_CLASS(xhci_log_free_virt_dev, + TP_PROTO(struct xhci_virt_device *vdev), + TP_ARGS(vdev), + TP_STRUCT__entry( + __field(void *, vdev) + __field(unsigned long long, out_ctx) + __field(unsigned long long, in_ctx) + __field(u8, fake_port) + __field(u8, real_port) + __field(u16, current_mel) + + ), + TP_fast_assign( + __entry->vdev = vdev; + __entry->in_ctx = (unsigned long long) vdev->in_ctx->dma; + __entry->out_ctx = (unsigned long long) vdev->out_ctx->dma; + __entry->fake_port = (u8) vdev->fake_port; + __entry->real_port = (u8) vdev->real_port; + __entry->current_mel = (u16) vdev->current_mel; + ), + TP_printk("vdev %p ctx %llx | %llx fake_port %d real_port %d current_mel %d", + __entry->vdev, __entry->in_ctx, __entry->out_ctx, + __entry->fake_port, __entry->real_port, __entry->current_mel + ) +); + +DEFINE_EVENT(xhci_log_free_virt_dev, xhci_free_virt_device, + TP_PROTO(struct xhci_virt_device *vdev), + TP_ARGS(vdev) +); + DECLARE_EVENT_CLASS(xhci_log_virt_dev, TP_PROTO(struct xhci_virt_device *vdev), TP_ARGS(vdev), @@ -208,11 +239,6 @@ DEFINE_EVENT(xhci_log_virt_dev, xhci_alloc_virt_device, TP_ARGS(vdev) ); -DEFINE_EVENT(xhci_log_virt_dev, xhci_free_virt_device, - TP_PROTO(struct xhci_virt_device *vdev), - TP_ARGS(vdev) -); - DEFINE_EVENT(xhci_log_virt_dev, xhci_setup_device, TP_PROTO(struct xhci_virt_device *vdev), TP_ARGS(vdev) From 36eb93509c45d0bdbd8d09a01ab9d857972f5963 Mon Sep 17 00:00:00 2001 From: Dongjiu Geng Date: Thu, 21 Jun 2018 16:19:43 +0300 Subject: [PATCH 437/572] usb: xhci: remove the code build warning Initialize the 'err' variate to remove the build warning, the warning is shown as below: drivers/usb/host/xhci-tegra.c: In function 'tegra_xusb_mbox_thread': drivers/usb/host/xhci-tegra.c:552:6: warning: 'err' may be used uninitialized in this function [-Wuninitialized] drivers/usb/host/xhci-tegra.c:482:6: note: 'err' was declared here Fixes: e84fce0f8837 ("usb: xhci: Add NVIDIA Tegra XUSB controller driver") Signed-off-by: Dongjiu Geng Acked-by: Thierry Reding Acked-by: Jon Hunter Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index a8c1d073cba0..d50549fedec8 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -481,7 +481,7 @@ static void tegra_xusb_mbox_handle(struct tegra_xusb *tegra, unsigned long mask; unsigned int port; bool idle, enable; - int err; + int err = 0; memset(&rsp, 0, sizeof(rsp)); From 3431a150fd89bc74cd2f2aaf6977cc0e278fb445 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 21 Jun 2018 16:19:44 +0300 Subject: [PATCH 438/572] usb: xhci: tegra: fix runtime PM error handling The address-of operator will always evaluate to true. However, power should be explicitly disabled if no power domain is used. Remove the address-of operator. Fixes: 58c38116c6cc ("usb: xhci: tegra: Add support for managing powergates") Signed-off-by: Stefan Agner Acked-by: Jon Hunter Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-tegra.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index d50549fedec8..4b463e5202a4 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1223,10 +1223,10 @@ disable_rpm: pm_runtime_disable(&pdev->dev); usb_put_hcd(tegra->hcd); disable_xusbc: - if (!&pdev->dev.pm_domain) + if (!pdev->dev.pm_domain) tegra_powergate_power_off(TEGRA_POWERGATE_XUSBC); disable_xusba: - if (!&pdev->dev.pm_domain) + if (!pdev->dev.pm_domain) tegra_powergate_power_off(TEGRA_POWERGATE_XUSBA); put_padctl: tegra_xusb_padctl_put(tegra->padctl); From 305886ca87be480ae159908c2affd135c04215cf Mon Sep 17 00:00:00 2001 From: Ajay Gupta Date: Thu, 21 Jun 2018 16:19:45 +0300 Subject: [PATCH 439/572] usb: xhci: increase CRS timeout value Some controllers take almost 55ms to complete controller restore state (CRS). There is no timeout limit mentioned in xhci specification so fixing the issue by increasing the timeout limit to 100ms [reformat code comment -Mathias] Signed-off-by: Ajay Gupta Signed-off-by: Nagaraj Annaiah Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index f11ec61bcc7d..2f4850f25e82 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1078,8 +1078,13 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) command = readl(&xhci->op_regs->command); command |= CMD_CRS; writel(command, &xhci->op_regs->command); + /* + * Some controllers take up to 55+ ms to complete the controller + * restore so setting the timeout to 100ms. Xhci specification + * doesn't mention any timeout value. + */ if (xhci_handshake(&xhci->op_regs->status, - STS_RESTORE, 0, 10 * 1000)) { + STS_RESTORE, 0, 100 * 1000)) { xhci_warn(xhci, "WARN: xHC restore state timeout\n"); spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; From d2d2e3c46be5d6dd8001d0eebdf7cafb9bc7006b Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 21 Jun 2018 16:43:17 +0300 Subject: [PATCH 440/572] acpi: Add helper for deactivating memory region Sometimes memory resource may be overlapping with SystemMemory Operation Region by design, for example if the memory region is used as a mailbox for communication with a firmware in the system. One occasion of such mailboxes is USB Type-C Connector System Software Interface (UCSI). With regions like that, it is important that the driver is able to map the memory with the requirements it has. For example, the driver should be allowed to map the memory as non-cached memory. However, if the operation region has been accessed before the driver has mapped the memory, the memory has been marked as write-back by the time the driver is loaded. That means the driver will fail to map the memory if it expects non-cached memory. To work around the problem, introducing helper that the drivers can use to temporarily deactivate (unmap) SystemMemory Operation Regions that overlap with their IO memory. Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver") Cc: stable@vger.kernel.org Reviewed-by: Rafael J. Wysocki Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/osl.c | 72 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 3 ++ 2 files changed, 75 insertions(+) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 7ca41bf023c9..8df9abfa947b 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -45,6 +45,8 @@ #include #include +#include "acpica/accommon.h" +#include "acpica/acnamesp.h" #include "internal.h" #define _COMPONENT ACPI_OS_SERVICES @@ -1490,6 +1492,76 @@ int acpi_check_region(resource_size_t start, resource_size_t n, } EXPORT_SYMBOL(acpi_check_region); +static acpi_status acpi_deactivate_mem_region(acpi_handle handle, u32 level, + void *_res, void **return_value) +{ + struct acpi_mem_space_context **mem_ctx; + union acpi_operand_object *handler_obj; + union acpi_operand_object *region_obj2; + union acpi_operand_object *region_obj; + struct resource *res = _res; + acpi_status status; + + region_obj = acpi_ns_get_attached_object(handle); + if (!region_obj) + return AE_OK; + + handler_obj = region_obj->region.handler; + if (!handler_obj) + return AE_OK; + + if (region_obj->region.space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) + return AE_OK; + + if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE)) + return AE_OK; + + region_obj2 = acpi_ns_get_secondary_object(region_obj); + if (!region_obj2) + return AE_OK; + + mem_ctx = (void *)®ion_obj2->extra.region_context; + + if (!(mem_ctx[0]->address >= res->start && + mem_ctx[0]->address < res->end)) + return AE_OK; + + status = handler_obj->address_space.setup(region_obj, + ACPI_REGION_DEACTIVATE, + NULL, (void **)mem_ctx); + if (ACPI_SUCCESS(status)) + region_obj->region.flags &= ~(AOPOBJ_SETUP_COMPLETE); + + return status; +} + +/** + * acpi_release_memory - Release any mappings done to a memory region + * @handle: Handle to namespace node + * @res: Memory resource + * @level: A level that terminates the search + * + * Walks through @handle and unmaps all SystemMemory Operation Regions that + * overlap with @res and that have already been activated (mapped). + * + * This is a helper that allows drivers to place special requirements on memory + * region that may overlap with operation regions, primarily allowing them to + * safely map the region as non-cached memory. + * + * The unmapped Operation Regions will be automatically remapped next time they + * are called, so the drivers do not need to do anything else. + */ +acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, + u32 level) +{ + if (!(res->flags & IORESOURCE_MEM)) + return AE_TYPE; + + return acpi_walk_namespace(ACPI_TYPE_REGION, handle, level, + acpi_deactivate_mem_region, NULL, res, NULL); +} +EXPORT_SYMBOL_GPL(acpi_release_memory); + /* * Let drivers know whether the resource checks are effective */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4b35a66383f9..e54f40974eb0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -443,6 +443,9 @@ int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, const char *name); +acpi_status acpi_release_memory(acpi_handle handle, struct resource *res, + u32 level); + int acpi_resources_are_enforced(void); #ifdef CONFIG_HIBERNATION From 1f9f9d168ce619608572b01771c47a41b15429e6 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 21 Jun 2018 16:43:18 +0300 Subject: [PATCH 441/572] usb: typec: ucsi: acpi: Workaround for cache mode issue This fixes an issue where the driver fails with an error: ioremap error for 0x3f799000-0x3f79a000, requested 0x2, got 0x0 On some platforms the UCSI ACPI mailbox SystemMemory Operation Region may be setup before the driver has been loaded. That will lead into the driver failing to map the mailbox region, as it has been already marked as write-back memory. acpi_os_ioremap() for x86 uses ioremap_cache() unconditionally. When the issue happens, the embedded controller has a pending query event for the UCSI notification right after boot-up which causes the operation region to be setup before UCSI driver has been loaded. The fix is to notify acpi core that the driver is about to access memory region which potentially overlaps with an operation region right before mapping it. acpi_release_memory() will check if the memory has already been setup (mapped) by acpi core, and deactivate it (unmap) if it has. The driver is then able to map the memory with ioremap_nocache() and set the memtype to uncached for the region. Reported-by: Paul Menzel Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver") Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index 44eb4e1ea817..a18112a83fae 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -79,6 +79,11 @@ static int ucsi_acpi_probe(struct platform_device *pdev) return -ENODEV; } + /* This will make sure we can use ioremap_nocache() */ + status = acpi_release_memory(ACPI_HANDLE(&pdev->dev), res, 1); + if (ACPI_FAILURE(status)) + return -ENOMEM; + /* * NOTE: The memory region for the data structures is used also in an * operation region, which means ACPI has already reserved it. Therefore From 68816e16b4789f2d05e77b6dcb77564cf5d6a8d8 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Thu, 21 Jun 2018 16:43:19 +0300 Subject: [PATCH 442/572] usb: typec: ucsi: Fix for incorrect status data issue According to UCSI Specification, Connector Change Event only means a change in the Connector Status and Operation Mode fields of the STATUS data structure. So any other change should create another event. Unfortunately on some platforms the firmware acting as PPM (platform policy manager - usually embedded controller firmware) still does not report any other status changes if there is a connector change event. So if the connector power or data role was changed when a device was plugged to the connector, the driver does not get any indication about that. The port will show wrong roles if that happens. To fix the issue, always checking the data and power role together with a connector change event. Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface") Signed-off-by: Heikki Krogerus Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index bd5cca5632b3..8d0a6fe748bd 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -350,6 +350,19 @@ static void ucsi_connector_change(struct work_struct *work) } if (con->status.change & UCSI_CONSTAT_CONNECT_CHANGE) { + typec_set_pwr_role(con->port, con->status.pwr_dir); + + switch (con->status.partner_type) { + case UCSI_CONSTAT_PARTNER_TYPE_UFP: + typec_set_data_role(con->port, TYPEC_HOST); + break; + case UCSI_CONSTAT_PARTNER_TYPE_DFP: + typec_set_data_role(con->port, TYPEC_DEVICE); + break; + default: + break; + } + if (con->status.connected) ucsi_register_partner(con); else From e16711c32bca59a29d8ef449eda5b8427dc6eb6d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 20 Jun 2018 17:05:42 -0700 Subject: [PATCH 443/572] staging/typec: fix tcpci_rt1711h build errors Fix Kconfig warning and build errors in staging/typec/rt1711h.c. The driver uses I2C interfaces so it should depend on I2C. WARNING: unmet direct dependencies detected for TYPEC_TCPCI Depends on [m]: STAGING [=y] && TYPEC_TCPM [=y] && I2C [=m] Selected by [y]: - TYPEC_RT1711H [=y] && STAGING [=y] && TYPEC_TCPM [=y] and then: drivers/staging/typec/tcpci.o: In function `tcpci_probe': ../drivers/staging/typec/tcpci.c:536: undefined reference to `__devm_regmap_init_i2c' drivers/staging/typec/tcpci.o: In function `tcpci_i2c_driver_init': ../drivers/staging/typec/tcpci.c:593: undefined reference to `i2c_register_driver' drivers/staging/typec/tcpci.o: In function `tcpci_i2c_driver_exit': ../drivers/staging/typec/tcpci.c:593: undefined reference to `i2c_del_driver' drivers/staging/typec/tcpci_rt1711h.o: In function `rt1711h_check_revision': ../drivers/staging/typec/tcpci_rt1711h.c:218: undefined reference to `i2c_smbus_read_word_data' ../drivers/staging/typec/tcpci_rt1711h.c:225: undefined reference to `i2c_smbus_read_word_data' drivers/staging/typec/tcpci_rt1711h.o: In function `rt1711h_probe': ../drivers/staging/typec/tcpci_rt1711h.c:251: undefined reference to `__devm_regmap_init_i2c' drivers/staging/typec/tcpci_rt1711h.o: In function `rt1711h_i2c_driver_init': ../drivers/staging/typec/tcpci_rt1711h.c:308: undefined reference to `i2c_register_driver' drivers/staging/typec/tcpci_rt1711h.o: In function `rt1711h_i2c_driver_exit': ../drivers/staging/typec/tcpci_rt1711h.c:308: undefined reference to `i2c_del_driver' Fixes: ce08eaeb6388 ("staging: typec: rt1711h typec chip driver") Reported-by: kbuild test robot Signed-off-by: Randy Dunlap Cc: ShuFan Lee Cc: kbuild-all@01.org Reviewed-by: Guenter Roeck Revieved-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/staging/typec/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/typec/Kconfig b/drivers/staging/typec/Kconfig index 3aa981fbc8f5..e45ed08a5166 100644 --- a/drivers/staging/typec/Kconfig +++ b/drivers/staging/typec/Kconfig @@ -11,6 +11,7 @@ config TYPEC_TCPCI config TYPEC_RT1711H tristate "Richtek RT1711H Type-C chip driver" + depends on I2C select TYPEC_TCPCI help Richtek RT1711H Type-C chip driver that works with From 4a762569a2722b8a48066c7bacf0e1dc67d17fa1 Mon Sep 17 00:00:00 2001 From: Houston Yaroschoff Date: Mon, 11 Jun 2018 12:39:09 +0200 Subject: [PATCH 444/572] usb: cdc_acm: Add quirk for Uniden UBC125 scanner Uniden UBC125 radio scanner has USB interface which fails to work with cdc_acm driver: usb 1-1.5: new full-speed USB device number 4 using xhci_hcd cdc_acm 1-1.5:1.0: Zero length descriptor references cdc_acm: probe of 1-1.5:1.0 failed with error -22 Adding the NO_UNION_NORMAL quirk for the device fixes the issue: usb 1-4: new full-speed USB device number 15 using xhci_hcd usb 1-4: New USB device found, idVendor=1965, idProduct=0018 usb 1-4: New USB device strings: Mfr=1, Product=2, SerialNumber=3 usb 1-4: Product: UBC125XLT usb 1-4: Manufacturer: Uniden Corp. usb 1-4: SerialNumber: 0001 cdc_acm 1-4:1.0: ttyACM0: USB ACM device `lsusb -v` of the device: Bus 001 Device 015: ID 1965:0018 Uniden Corporation Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 2 Communications bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x1965 Uniden Corporation idProduct 0x0018 bcdDevice 0.01 iManufacturer 1 Uniden Corp. iProduct 2 UBC125XLT iSerial 3 0001 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 48 bNumInterfaces 2 bConfigurationValue 1 iConfiguration 0 bmAttributes 0x80 (Bus Powered) MaxPower 500mA Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 0 bAlternateSetting 0 bNumEndpoints 1 bInterfaceClass 2 Communications bInterfaceSubClass 2 Abstract (modem) bInterfaceProtocol 0 None iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x87 EP 7 IN bmAttributes 3 Transfer Type Interrupt Synch Type None Usage Type Data wMaxPacketSize 0x0008 1x 8 bytes bInterval 10 Interface Descriptor: bLength 9 bDescriptorType 4 bInterfaceNumber 1 bAlternateSetting 0 bNumEndpoints 2 bInterfaceClass 10 CDC Data bInterfaceSubClass 0 Unused bInterfaceProtocol 0 iInterface 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x81 EP 1 IN bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Endpoint Descriptor: bLength 7 bDescriptorType 5 bEndpointAddress 0x02 EP 2 OUT bmAttributes 2 Transfer Type Bulk Synch Type None Usage Type Data wMaxPacketSize 0x0040 1x 64 bytes bInterval 0 Device Status: 0x0000 (Bus Powered) Signed-off-by: Houston Yaroschoff Cc: stable Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7b366a6c0b49..998b32d0167e 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1758,6 +1758,9 @@ static const struct usb_device_id acm_ids[] = { { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */ .driver_info = SINGLE_RX_URB, }, + { USB_DEVICE(0x1965, 0x0018), /* Uniden UBC125XLT */ + .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ + }, { USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, From ecc443c03fb14abfb8a6af5e3b2d43b5257e60f2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 7 Jun 2018 15:54:48 +0200 Subject: [PATCH 445/572] NFC: pn533: Fix wrong GFP flag usage pn533_recv_response() is an urb completion handler, so it must use GFP_ATOMIC. pn533_usb_send_frame() OTOH runs from a regular sleeping context, so the pn533_submit_urb_for_response() there (and only there) can use the regular GFP_KERNEL flags. BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1514134 Fixes: 9815c7cf22da ("NFC: pn533: Separate physical layer from ...") Cc: Michael Thalmeier Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/pn533/usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index d5553c47014f..5d823e965883 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -74,7 +74,7 @@ static void pn533_recv_response(struct urb *urb) struct sk_buff *skb = NULL; if (!urb->status) { - skb = alloc_skb(urb->actual_length, GFP_KERNEL); + skb = alloc_skb(urb->actual_length, GFP_ATOMIC); if (!skb) { nfc_err(&phy->udev->dev, "failed to alloc memory\n"); } else { @@ -186,7 +186,7 @@ static int pn533_usb_send_frame(struct pn533 *dev, if (dev->protocol_type == PN533_PROTO_REQ_RESP) { /* request for response for sent packet directly */ - rc = pn533_submit_urb_for_response(phy, GFP_ATOMIC); + rc = pn533_submit_urb_for_response(phy, GFP_KERNEL); if (rc) goto error; } else if (dev->protocol_type == PN533_PROTO_REQ_ACK_RESP) { From 9578bcd0bb487b8ecef4b7eee799aafb678aa441 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 7 Jun 2018 16:17:14 +0300 Subject: [PATCH 446/572] typec: tcpm: Fix a msecs vs jiffies bug The tcpm_set_state() function take msecs not jiffies. Fixes: f0690a25a140 ("staging: typec: USB Type-C Port Manager (tcpm)") Signed-off-by: Dan Carpenter Acked-by: Heikki Krogerus Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c index 8a201dd53d36..0dfd755020f4 100644 --- a/drivers/usb/typec/tcpm.c +++ b/drivers/usb/typec/tcpm.c @@ -3043,7 +3043,8 @@ static void run_state_machine(struct tcpm_port *port) tcpm_port_is_sink(port) && time_is_after_jiffies(port->delayed_runtime)) { tcpm_set_state(port, SNK_DISCOVERY, - port->delayed_runtime - jiffies); + jiffies_to_msecs(port->delayed_runtime - + jiffies)); break; } tcpm_set_state(port, unattached_state(port), 0); From d5a4f93511b7000183c0d528739b824752139f79 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 12 Jun 2018 09:53:01 +0800 Subject: [PATCH 447/572] usb: typec: tcpm: fix logbuffer index is wrong if _tcpm_log is re-entered The port->logbuffer_head may be wrong if the two processes enters _tcpm_log at the mostly same time. The 2nd process enters _tcpm_log before the 1st process update the index, then the 2nd process will not allocate logbuffer, when the 2nd process tries to use log buffer, the index has already updated by the 1st process, so it will get NULL pointer for updated logbuffer, the error message like below: tcpci 0-0050: Log buffer index 6 is NULL Cc: Heikki Krogerus Cc: Guenter Roeck Cc: Jun Li Signed-off-by: Peter Chen Reviewed-by: Heikki Krogerus Cc: stable Reviewed-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/typec/tcpm.c b/drivers/usb/typec/tcpm.c index 0dfd755020f4..d961f1ec0e08 100644 --- a/drivers/usb/typec/tcpm.c +++ b/drivers/usb/typec/tcpm.c @@ -418,17 +418,18 @@ static void _tcpm_log(struct tcpm_port *port, const char *fmt, va_list args) u64 ts_nsec = local_clock(); unsigned long rem_nsec; + mutex_lock(&port->logbuffer_lock); if (!port->logbuffer[port->logbuffer_head]) { port->logbuffer[port->logbuffer_head] = kzalloc(LOG_BUFFER_ENTRY_SIZE, GFP_KERNEL); - if (!port->logbuffer[port->logbuffer_head]) + if (!port->logbuffer[port->logbuffer_head]) { + mutex_unlock(&port->logbuffer_lock); return; + } } vsnprintf(tmpbuffer, sizeof(tmpbuffer), fmt, args); - mutex_lock(&port->logbuffer_lock); - if (tcpm_log_full(port)) { port->logbuffer_head = max(port->logbuffer_head - 1, 0); strcpy(tmpbuffer, "overflow"); From 8793bb7f4a9dd1396575d2e9337d331662cb7555 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Jun 2018 13:14:56 -0700 Subject: [PATCH 448/572] kbuild: add macro for controlling warnings to linux/compiler.h I have occasionally run into a situation where it would make sense to control a compiler warning from a source file rather than doing so from a Makefile using the $(cc-disable-warning, ...) or $(cc-option, ...) helpers. The approach here is similar to what glibc uses, using __diag() and related macros to encapsulate a _Pragma("GCC diagnostic ...") statement that gets turned into the respective "#pragma GCC diagnostic ..." by the preprocessor when the macro gets expanded. Like glibc, I also have an argument to pass the affected compiler version, but decided to actually evaluate that one. For now, this supports GCC_4_6, GCC_4_7, GCC_4_8, GCC_4_9, GCC_5, GCC_6, GCC_7, GCC_8 and GCC_9. Adding support for CLANG_5 and other interesting versions is straightforward here. GNU compilers starting with gcc-4.2 could support it in principle, but "#pragma GCC diagnostic push" was only added in gcc-4.6, so it seems simpler to not deal with those at all. The same versions show a large number of warnings already, so it seems easier to just leave it at that and not do a more fine-grained control for them. The use cases I found so far include: - turning off the gcc-8 -Wattribute-alias warning inside of the SYSCALL_DEFINEx() macro without having to do it globally. - Reducing the build time for a simple re-make after a change, once we move the warnings from ./Makefile and ./scripts/Makefile.extrawarn into linux/compiler.h - More control over the warnings based on other configurations, using preprocessor syntax instead of Makefile syntax. This should make it easier for the average developer to understand and change things. - Adding an easy way to turn the W=1 option on unconditionally for a subdirectory or a specific file. This has been requested by several developers in the past that want to have their subsystems W=1 clean. - Integrating clang better into the build systems. Clang supports more warnings than GCC, and we probably want to classify them as default, W=1, W=2 etc, but there are cases in which the warnings should be classified differently due to excessive false positives from one or the other compiler. - Adding a way to turn the default warnings into errors (e.g. using a new "make E=0" tag) while not also turning the W=1 warnings into errors. This patch for now just adds the minimal infrastructure in order to do the first of the list above. As the #pragma GCC diagnostic takes precedence over command line options, the next step would be to convert a lot of the individual Makefiles that set nonstandard options to use __diag() instead. [paul.burton@mips.com: - Rebase atop current master. - Add __diag_GCC, or more generally __diag_, abstraction to avoid code outside of linux/compiler-gcc.h needing to duplicate knowledge about different GCC versions. - Add a comment argument to __diag_{ignore,warn,error} which isn't used in the expansion of the macros but serves to push people to document the reason for using them - per feedback from Kees Cook. - Translate severity to GCC-specific pragmas in linux/compiler-gcc.h rather than using GCC-specific in linux/compiler_types.h. - Drop all but GCC 8 macros, since we only need to define macros for versions that we need to introduce pragmas for, and as of this series that's just GCC 8. - Capitalize comments in linux/compiler-gcc.h to match the style of the rest of the file. - Line up macro definitions with tabs in linux/compiler-gcc.h.] Signed-off-by: Arnd Bergmann Signed-off-by: Paul Burton Tested-by: Christophe Leroy Tested-by: Stafford Horne Signed-off-by: Masahiro Yamada --- include/linux/compiler-gcc.h | 25 +++++++++++++++++++++++++ include/linux/compiler_types.h | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index f1a7492a5cc8..fd282c7d3e5e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -347,3 +347,28 @@ #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif + +/* + * Turn individual warnings and errors on and off locally, depending + * on version. + */ +#define __diag_GCC(version, severity, s) \ + __diag_GCC_ ## version(__diag_GCC_ ## severity s) + +/* Severity used in pragma directives */ +#define __diag_GCC_ignore ignored +#define __diag_GCC_warn warning +#define __diag_GCC_error error + +/* Compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */ +#if GCC_VERSION >= 40600 +#define __diag_str1(s) #s +#define __diag_str(s) __diag_str1(s) +#define __diag(s) _Pragma(__diag_str(GCC diagnostic s)) +#endif + +#if GCC_VERSION >= 80000 +#define __diag_GCC_8(s) __diag(s) +#else +#define __diag_GCC_8(s) +#endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6b79a9bba9a7..a8ba6b04152c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -271,4 +271,22 @@ struct ftrace_likely_data { # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) #endif +#ifndef __diag +#define __diag(string) +#endif + +#ifndef __diag_GCC +#define __diag_GCC(version, severity, string) +#endif + +#define __diag_push() __diag(push) +#define __diag_pop() __diag(pop) + +#define __diag_ignore(compiler, version, option, comment) \ + __diag_ ## compiler(version, ignore, option) +#define __diag_warn(compiler, version, option, comment) \ + __diag_ ## compiler(version, warn, option) +#define __diag_error(compiler, version, option, comment) \ + __diag_ ## compiler(version, error, option) + #endif /* __LINUX_COMPILER_TYPES_H */ From bee20031772af3debe8cbaa234528f24c7892e8f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Jun 2018 13:14:57 -0700 Subject: [PATCH 449/572] disable -Wattribute-alias warning for SYSCALL_DEFINEx() gcc-8 warns for every single definition of a system call entry point, e.g.: include/linux/compat.h:56:18: error: 'compat_sys_rt_sigprocmask' alias between functions of incompatible types 'long int(int, compat_sigset_t *, compat_sigset_t *, compat_size_t)' {aka 'long int(int, struct *, struct *, unsigned int)'} and 'long int(long int, long int, long int, long int)' [-Werror=attribute-alias] asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\ ^~~~~~~~~~ include/linux/compat.h:45:2: note: in expansion of macro 'COMPAT_SYSCALL_DEFINEx' COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__) ^~~~~~~~~~~~~~~~~~~~~~ kernel/signal.c:2601:1: note: in expansion of macro 'COMPAT_SYSCALL_DEFINE4' COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset, ^~~~~~~~~~~~~~~~~~~~~~ include/linux/compat.h:60:18: note: aliased declaration here asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\ ^~~~~~~~~~ The new warning seems reasonable in principle, but it doesn't help us here, since we rely on the type mismatch to sanitize the system call arguments. After I reported this as GCC PR82435, a new -Wno-attribute-alias option was added that could be used to turn the warning off globally on the command line, but I'd prefer to do it a little more fine-grained. Interestingly, turning a warning off and on again inside of a single macro doesn't always work, in this case I had to add an extra statement inbetween and decided to copy the __SC_TEST one from the native syscall to the compat syscall macro. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83256 for more details about this. [paul.burton@mips.com: - Rebase atop current master. - Split GCC & version arguments to __diag_ignore() in order to match changes to the preceding patch. - Add the comment argument to match the preceding patch.] Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82435 Signed-off-by: Arnd Bergmann Signed-off-by: Paul Burton Tested-by: Christophe Leroy Tested-by: Stafford Horne Signed-off-by: Masahiro Yamada --- include/linux/compat.h | 8 +++++++- include/linux/syscalls.h | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index b1a5562b3215..c68acc47da57 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -72,6 +72,9 @@ */ #ifndef COMPAT_SYSCALL_DEFINEx #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_compat_sys##name)))); \ @@ -80,8 +83,11 @@ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ asmlinkage long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + long ret = __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + return ret; \ } \ + __diag_pop(); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* COMPAT_SYSCALL_DEFINEx */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 73810808cdf2..a368a68cb667 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -231,6 +231,9 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) */ #ifndef __SYSCALL_DEFINEx #define __SYSCALL_DEFINEx(x, name, ...) \ + __diag_push(); \ + __diag_ignore(GCC, 8, "-Wattribute-alias", \ + "Type aliasing is used to sanitize syscall arguments");\ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \ __attribute__((alias(__stringify(__se_sys##name)))); \ ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ @@ -243,6 +246,7 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ + __diag_pop(); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) #endif /* __SYSCALL_DEFINEx */ From ac8517440344dbe598f7c1c23e686c800b563061 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 19 Jun 2018 13:14:58 -0700 Subject: [PATCH 450/572] powerpc: Remove -Wattribute-alias pragmas With SYSCALL_DEFINEx() disabling -Wattribute-alias generically, there's no need to duplicate that for PowerPC syscalls. This reverts commit 415520373975 ("powerpc: fix build failure by disabling attribute-alias warning in pci_32") and commit 2479bfc9bc60 ("powerpc: Fix build by disabling attribute-alias warning for SYSCALL_DEFINEx"). Signed-off-by: Paul Burton Acked-by: Christophe Leroy Acked-by: Michael Ellerman Signed-off-by: Masahiro Yamada --- arch/powerpc/kernel/pci_32.c | 4 ---- arch/powerpc/kernel/pci_64.c | 4 ---- arch/powerpc/kernel/rtas.c | 4 ---- arch/powerpc/kernel/signal_32.c | 8 -------- arch/powerpc/kernel/signal_64.c | 4 ---- arch/powerpc/kernel/syscalls.c | 4 ---- arch/powerpc/mm/subpage-prot.c | 4 ---- 7 files changed, 32 deletions(-) diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index 4f861055a852..d63b488d34d7 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -285,9 +285,6 @@ pci_bus_to_hose(int bus) * Note that the returned IO or memory base is a physical address */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus, unsigned long, devfn) { @@ -313,4 +310,3 @@ SYSCALL_DEFINE3(pciconfig_iobase, long, which, return result; } -#pragma GCC diagnostic pop diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 812171c09f42..dff28f903512 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -203,9 +203,6 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose) #define IOBASE_ISA_IO 3 #define IOBASE_ISA_MEM 4 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus, unsigned long, in_devfn) { @@ -259,7 +256,6 @@ SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus, return -EOPNOTSUPP; } -#pragma GCC diagnostic pop #ifdef CONFIG_NUMA int pcibus_to_node(struct pci_bus *bus) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 7fb9f83dcde8..8afd146bc9c7 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1051,9 +1051,6 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, } /* We assume to be passed big endian arguments */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { struct rtas_args args; @@ -1140,7 +1137,6 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) return 0; } -#pragma GCC diagnostic pop /* * Call early during boot, before mem init, to retrieve the RTAS diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 5eedbb282d42..e6474a45cef5 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -1038,9 +1038,6 @@ static int do_setcontext_tm(struct ucontext __user *ucp, } #endif -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" #ifdef CONFIG_PPC64 COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, struct ucontext __user *, new_ctx, int, ctx_size) @@ -1134,7 +1131,6 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; } -#pragma GCC diagnostic pop #ifdef CONFIG_PPC64 COMPAT_SYSCALL_DEFINE0(rt_sigreturn) @@ -1231,9 +1227,6 @@ SYSCALL_DEFINE0(rt_sigreturn) return 0; } -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" #ifdef CONFIG_PPC32 SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, int, ndbg, struct sig_dbg_op __user *, dbg) @@ -1337,7 +1330,6 @@ SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx, return 0; } #endif -#pragma GCC diagnostic pop /* * OK, we're invoking a handler diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d42b60020389..83d51bf586c7 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -625,9 +625,6 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp) /* * Handle {get,set,swap}_context operations */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, struct ucontext __user *, new_ctx, long, ctx_size) { @@ -693,7 +690,6 @@ SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx, set_thread_flag(TIF_RESTOREALL); return 0; } -#pragma GCC diagnostic pop /* diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 083fa06962fd..466216506eb2 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -62,9 +62,6 @@ out: return ret; } -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -78,7 +75,6 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len, { return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT); } -#pragma GCC diagnostic pop #ifdef CONFIG_PPC32 /* diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 75cb646a79c3..9d16ee251fc0 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -186,9 +186,6 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr, * in a 2-bit field won't allow writes to a page that is otherwise * write-protected. */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpragmas" -#pragma GCC diagnostic ignored "-Wattribute-alias" SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, unsigned long, len, u32 __user *, map) { @@ -272,4 +269,3 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr, up_write(&mm->mmap_sem); return err; } -#pragma GCC diagnostic pop From 5391e536dbf702de6581369e47c9c4cea3a87170 Mon Sep 17 00:00:00 2001 From: Sven Joachim Date: Mon, 18 Jun 2018 19:45:25 +0200 Subject: [PATCH 451/572] stack-protector: Fix test with 32-bit userland and CONFIG_64BIT=y When building a 64-bit 4.18-rc1 kernel with a 32-bit userland, I noticed that stack protection was silently disabled. Adding -m64 in gcc-x86_64-has-stack-protector.sh fixed that, similar to what has been noticed in commit 2a61f4747eea ("stack-protector: test compiler capability in Kconfig and drop AUTO mode") for gcc-x86_32-has-stack-protector.sh. Signed-off-by: Sven Joachim Signed-off-by: Masahiro Yamada --- scripts/gcc-x86_64-has-stack-protector.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh index 3755af0cd9f7..75e4e22b986a 100755 --- a/scripts/gcc-x86_64-has-stack-protector.sh +++ b/scripts/gcc-x86_64-has-stack-protector.sh @@ -1,4 +1,4 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" +echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -m64 -O0 -mcmodel=kernel -fno-PIE -fstack-protector - -o - 2> /dev/null | grep -q "%gs" From b2d00d7c61c84edd150310af3f556f8a3c6e2e67 Mon Sep 17 00:00:00 2001 From: Dirk Gouders Date: Thu, 21 Jun 2018 15:30:54 +0200 Subject: [PATCH 452/572] kconfig: fix line numbers for if-entries in menu tree The line numers for if-entries in the menu tree are off by one or more lines which is confusing when debugging for correctness of unrelated changes. According to the git log, commit a02f0570ae201c49 (kconfig: improve error handling in the parser) was the last one that changed that part of the parser and replaced "if_entry: T_IF expr T_EOL" by "if_entry: T_IF expr nl" but the commit message does not state why this has been done. When reverting that part of the commit, only the line numers are corrected (checked with cdebug = DEBUG_PARSE in zconf.y), otherwise the menu tree remains unchanged (checked with zconfdump() enabled in conf.c). An example for the corrected line numbers: drivers/soc/Kconfig:15:source drivers/soc/tegra/Kconfig drivers/soc/tegra/Kconfig:4:if drivers/soc/tegra/Kconfig:6:if changes to: drivers/soc/Kconfig:15:source drivers/soc/tegra/Kconfig drivers/soc/tegra/Kconfig:1:if drivers/soc/tegra/Kconfig:4:if Signed-off-by: Dirk Gouders Signed-off-by: Masahiro Yamada --- scripts/kconfig/zconf.y | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index 6f9b0aa32a82..ed84d4a8e288 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -31,7 +31,7 @@ struct symbol *symbol_hash[SYMBOL_HASHSIZE]; static struct menu *current_menu, *current_entry; %} -%expect 32 +%expect 31 %union { @@ -337,7 +337,7 @@ choice_block: /* if entry */ -if_entry: T_IF expr nl +if_entry: T_IF expr T_EOL { printd(DEBUG_PARSE, "%s:%d:if\n", zconf_curname(), zconf_lineno()); menu_add_entry(NULL); From bdb60101df4a2999608430112a5abfb78628db1e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 22 Jun 2018 20:08:21 -0700 Subject: [PATCH 453/572] kconfig: document Kconfig source file comments I saw this type of Kconfig construct on LKML: config SYMBOOL #bool "prompt string" default y and wondered what it does. Then I wondered if '#' comments are even documented. They aren't, so add a little doc for that. Ah, good. kconfig says: arch/x86/Kconfig:2942:warning: config symbol defined without type Signed-off-by: Randy Dunlap Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kconfig-language.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt index 3534a84d206c..64e0775a62d4 100644 --- a/Documentation/kbuild/kconfig-language.txt +++ b/Documentation/kbuild/kconfig-language.txt @@ -430,6 +430,12 @@ This sets the config program's title bar if the config program chooses to use it. It should be placed at the top of the configuration, before any other statement. +'#' Kconfig source file comment: + +An unquoted '#' character anywhere in a source file line indicates +the beginning of a source file comment. The remainder of that line +is a comment. + Kconfig hints ------------- From 1376b0a2160319125c3a2822e8c09bd283cd8141 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 5 Jun 2018 12:36:30 +0300 Subject: [PATCH 454/572] staging: comedi: quatech_daqp_cs: fix no-op loop daqp_ao_insn_write() There is a '>' vs '<' typo so this loop is a no-op. Fixes: d35dcc89fc93 ("staging: comedi: quatech_daqp_cs: fix daqp_ao_insn_write()") Signed-off-by: Dan Carpenter Reviewed-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/drivers/quatech_daqp_cs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c index ea194aa01a64..257b0daff01f 100644 --- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c +++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c @@ -642,7 +642,7 @@ static int daqp_ao_insn_write(struct comedi_device *dev, /* Make sure D/A update mode is direct update */ outb(0, dev->iobase + DAQP_AUX_REG); - for (i = 0; i > insn->n; i++) { + for (i = 0; i < insn->n; i++) { unsigned int val = data[i]; int ret; From 0a2bc00341dcfcc793c0dbf4f8d43adf60458b05 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 11 Jun 2018 11:06:53 -0700 Subject: [PATCH 455/572] staging: android: ion: Return an ERR_PTR in ion_map_kernel The expected return value from ion_map_kernel is an ERR_PTR. The error path for a vmalloc failure currently just returns NULL, triggering a warning in ion_buffer_kmap_get. Encode the vmalloc failure as an ERR_PTR. Reported-by: syzbot+55b1d9f811650de944c6@syzkaller.appspotmail.com Signed-off-by: Laura Abbott Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_heap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c index e8c440329708..31db510018a9 100644 --- a/drivers/staging/android/ion/ion_heap.c +++ b/drivers/staging/android/ion/ion_heap.c @@ -30,7 +30,7 @@ void *ion_heap_map_kernel(struct ion_heap *heap, struct page **tmp = pages; if (!pages) - return NULL; + return ERR_PTR(-ENOMEM); if (buffer->flags & ION_FLAG_CACHED) pgprot = PAGE_KERNEL; From 01766229533f9bdb1144a41b4345c8c7286da7b4 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 11 Jun 2018 09:31:52 +0200 Subject: [PATCH 456/572] perf record: Support s390 random socket_id assignment On s390 the socket identifier assigned to a CPU identifier is random and (depending on the configuration of the LPAR) may be higher than the CPU identifier. This is currently not supported. Fix this by allowing arbitrary socket identifiers being assigned to CPU id. Output before: [root@p23lp27 perf]# ./perf report --header -I -v ... socket_id number is too big.You may need to upgrade the perf tool. Error: The perf.data file has no samples! # ======== # captured on : Tue May 29 09:29:57 2018 # header version : 1 ... # Core ID and Socket ID information is not available ... [root@p23lp27 perf]# Output after: [root@p23lp27 perf]# ./perf report --header -I -v ... Error: The perf.data file has no samples! # ======== # captured on : Tue May 29 09:29:57 2018 # header version : 1 ... # CPU 0: Core ID 0, Socket ID 6 # CPU 1: Core ID 1, Socket ID 3 # CPU 2: Core ID -1, Socket ID -1 ... [root@p23lp27 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180611073153.15592-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 540cd2dcd3e7..59fcc790c865 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -2129,6 +2129,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) int cpu_nr = ff->ph->env.nr_cpus_avail; u64 size = 0; struct perf_header *ph = ff->ph; + bool do_core_id_test = true; ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu)); if (!ph->env.cpu) @@ -2183,6 +2184,13 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) return 0; } + /* On s390 the socket_id number is not related to the numbers of cpus. + * The socket_id number might be higher than the numbers of cpus. + * This depends on the configuration. + */ + if (ph->env.arch && !strncmp(ph->env.arch, "s390", 4)) + do_core_id_test = false; + for (i = 0; i < (u32)cpu_nr; i++) { if (do_read_u32(ff, &nr)) goto free_cpu; @@ -2192,7 +2200,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused) if (do_read_u32(ff, &nr)) goto free_cpu; - if (nr != (u32)-1 && nr > (u32)cpu_nr) { + if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) { pr_debug("socket_id number is too big." "You may need to upgrade the perf tool.\n"); goto free_cpu; From b930e62ecd362843002bdf84c2940439822af321 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 11 Jun 2018 09:31:53 +0200 Subject: [PATCH 457/572] perf test session topology: Fix test on s390 On s390 this test case fails because the socket identifiction numbers assigned to the CPU are higher than the CPU identification numbers. F/ix this by adding the platform architecture into the perf data header flag information. This helps identifiing the test platform and handles s390 specifics in process_cpu_topology(). Before: [root@p23lp27 perf]# perf test -vvvvv -F 39 39: Session topology : --- start --- templ file: /tmp/perf-test-iUv755 socket_id number is too big.You may need to upgrade the perf tool. ---- end ---- Session topology: Skip [root@p23lp27 perf]# After: [root@p23lp27 perf]# perf test -vvvvv -F 39 39: Session topology : --- start --- templ file: /tmp/perf-test-8X8VTs CPU 0, core 0, socket 6 CPU 1, core 1, socket 3 ---- end ---- Session topology: Ok [root@p23lp27 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Fixes: c84974ed9fb6 ("perf test: Add entry to test cpu topology") Link: http://lkml.kernel.org/r/20180611073153.15592-2-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/topology.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index 40e30a26b23c..9497d02f69e6 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -45,6 +45,7 @@ static int session_write_header(char *path) perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY); perf_header__set_feat(&session->header, HEADER_NRCPUS); + perf_header__set_feat(&session->header, HEADER_ARCH); session->header.data_size += DATA_SIZE; From 143c99f6ac6812d23254e80844d6e34be897d3e1 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 11 Jun 2018 16:10:49 +0530 Subject: [PATCH 458/572] perf report powerpc: Fix crash if callchain is empty For some cases, the callchain provided by the kernel may be empty. So, the callchain ip filtering code will cause a crash if we do not check whether the struct ip_callchain pointer is NULL before accessing any members. This can be observed on a powerpc64le system running Fedora 27 as shown below. # perf record -b -e cycles:u ls Before: # perf report --branch-history perf: Segmentation fault -------- backtrace -------- perf[0x1027615c] linux-vdso64.so.1(__kernel_sigtramp_rt64+0x0)[0x7fff856304d8] perf(arch_skip_callchain_idx+0x44)[0x10257c58] perf[0x1017f2e4] perf(thread__resolve_callchain+0x124)[0x1017ff5c] perf(sample__resolve_callchain+0xf0)[0x10172788] ... After: # perf report --branch-history Samples: 25 of event 'cycles:u', Event count (approx.): 2306870 Overhead Source:Line Symbol Shared Object + 11.60% _init+35736 [.] _init ls + 9.84% strcoll_l.c:137 [.] __strcoll_l libc-2.26.so + 9.16% memcpy.S:175 [.] __memcpy_power7 libc-2.26.so + 9.01% gconv_charset.h:54 [.] _nl_find_locale libc-2.26.so + 8.87% dl-addr.c:52 [.] _dl_addr libc-2.26.so + 8.83% _init+236 [.] _init ls ... Reported-by: Ravi Bangoria Signed-off-by: Sandipan Das Acked-by: Ravi Bangoria Cc: Jiri Olsa Cc: Naveen N. Rao Cc: Sukadev Bhattiprolu Link: http://lkml.kernel.org/r/20180611104049.11048-1-sandipan@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/skip-callchain-idx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 3598b8b75d27..ef5d59a5742e 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -243,7 +243,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain) u64 ip; u64 skip_slot = -1; - if (chain->nr < 3) + if (!chain || chain->nr < 3) return skip_slot; ip = chain->ips[2]; From 933ccf2002aaef1037cb676622a694f5390c3d59 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 11 Jun 2018 11:34:21 +0200 Subject: [PATCH 459/572] perf tests: Add event parsing error handling to parse events test Add missing error handling for parse_events calls in test_event function that led to following segfault on s390: running test 52 'intel_pt//u' perf: Segmentation fault ... /lib64/libc.so.6(vasprintf+0xe6) [0x3fffca3f106] /lib64/libc.so.6(asprintf+0x46) [0x3fffca1aa96] ./perf(parse_events_add_pmu+0xb8) [0x80132088] ./perf(parse_events_parse+0xc62) [0x8019529a] ./perf(parse_events+0x98) [0x801341c0] ./perf(test__parse_events+0x48) [0x800cd140] ./perf(cmd_test+0x26a) [0x800bd44a] test child interrupted Adding the struct parse_events_error argument to parse_events call. Also adding parse_events_print_error to get more details on the parsing failures, like: # perf test 6 -v running test 52 'intel_pt//u'failed to parse event 'intel_pt//u', err 1, str 'Cannot find PMU `intel_pt'. Missing kernel support?' event syntax error: 'intel_pt//u' \___ Cannot find PMU `intel_pt'. Missing kernel support? Committer note: Use named initializers in the struct parse_events_error variable to avoid breaking the build on centos5, 6 and others with a similar gcc: cc1: warnings being treated as errors tests/parse-events.c: In function 'test_event': tests/parse-events.c:1696: error: missing initializer tests/parse-events.c:1696: error: (near initialization for 'err.str') Reported-by: Kim Phillips Signed-off-by: Jiri Olsa Tested-by: Kim Phillips Cc: Alexander Shishkin Cc: David Ahern Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20180611093422.1005-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 7d4077068454..9751e7563a45 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1686,6 +1686,7 @@ static struct terms_test test__terms[] = { static int test_event(struct evlist_test *e) { + struct parse_events_error err = { .idx = 0, }; struct perf_evlist *evlist; int ret; @@ -1693,10 +1694,11 @@ static int test_event(struct evlist_test *e) if (evlist == NULL) return -ENOMEM; - ret = parse_events(evlist, e->name, NULL); + ret = parse_events(evlist, e->name, &err); if (ret) { - pr_debug("failed to parse event '%s', err %d\n", - e->name, ret); + pr_debug("failed to parse event '%s', err %d, str '%s'\n", + e->name, ret, err.str); + parse_events_print_error(&err, e->name); } else { ret = e->check(evlist); } From 16ddcfbf7f3d07aa781e26b39f2c28636a4ed2fd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 11 Jun 2018 11:34:22 +0200 Subject: [PATCH 460/572] perf tests: Add valid callback for parse-events test Adding optional 'valid' callback for events tests in parse-events object, so we don't try to parse PMUs, which are not supported. Following line is displayed for skipped test: running test 52 'intel_pt//u'... SKIP Committer note: Use named initializers in the struct evlist_test variable to avoid breaking the build on centos:5, 6 and others with a similar gcc: cc1: warnings being treated as errors tests/parse-events.c: In function 'test_pmu_events': tests/parse-events.c:1817: error: missing initializer tests/parse-events.c:1817: error: (near initialization for 'e.type') Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Kim Phillips Cc: Martin Schwidefsky Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Link: http://lkml.kernel.org/r/20180611093422.1005-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/parse-events.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 9751e7563a45..61211918bfba 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -1309,6 +1309,11 @@ static int test__checkevent_config_cache(struct perf_evlist *evlist) return 0; } +static bool test__intel_pt_valid(void) +{ + return !!perf_pmu__find("intel_pt"); +} + static int test__intel_pt(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); @@ -1375,6 +1380,7 @@ struct evlist_test { const char *name; __u32 type; const int id; + bool (*valid)(void); int (*check)(struct perf_evlist *evlist); }; @@ -1648,6 +1654,7 @@ static struct evlist_test test__events[] = { }, { .name = "intel_pt//u", + .valid = test__intel_pt_valid, .check = test__intel_pt, .id = 52, }, @@ -1690,6 +1697,11 @@ static int test_event(struct evlist_test *e) struct perf_evlist *evlist; int ret; + if (e->valid && !e->valid()) { + pr_debug("... SKIP"); + return 0; + } + evlist = perf_evlist__new(); if (evlist == NULL) return -ENOMEM; @@ -1716,10 +1728,11 @@ static int test_events(struct evlist_test *events, unsigned cnt) for (i = 0; i < cnt; i++) { struct evlist_test *e = &events[i]; - pr_debug("running test %d '%s'\n", e->id, e->name); + pr_debug("running test %d '%s'", e->id, e->name); ret1 = test_event(e); if (ret1) ret2 = ret1; + pr_debug("\n"); } return ret2; @@ -1801,7 +1814,7 @@ static int test_pmu_events(void) } while (!ret && (ent = readdir(dir))) { - struct evlist_test e; + struct evlist_test e = { .id = 0, }; char name[2 * NAME_MAX + 1 + 12 + 3]; /* Names containing . are special and cannot be used directly */ From 621a5a327c1e36ffd7bb567f44a559f64f76358f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Thu, 7 Jun 2018 14:30:02 +0300 Subject: [PATCH 461/572] perf intel-pt: Fix packet decoding of CYC packets Use a 64-bit type so that the cycle count is not limited to 32-bits. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1528371002-8862-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index ba4c9dd18643..d426761a549d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -366,7 +366,7 @@ static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf, if (len < offs) return INTEL_PT_NEED_MORE_BYTES; byte = buf[offs++]; - payload |= (byte >> 1) << shift; + payload |= ((uint64_t)byte >> 1) << shift; } packet->type = INTEL_PT_CYC; From 7b818dc57230bf050bbc4218cd4df8ec59b9945d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jun 2018 11:33:35 -0300 Subject: [PATCH 462/572] tools headers uapi: Synchronize drm/drm.h To pick up the new ioctls added in these csets: 7595bda2fb43 ("drm: Add DRM client cap for aspect-ratio") The DRM caps are not yet being decoded in 'perf trace', so this sync doesn't incur in any change in behaviour in any tools, just silencing this tools/perf/ build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' Cc: Adrian Hunter Cc: Ankit Nautiyal Cc: David Ahern Cc: Jiri Olsa Cc: Maarten Lankhorst Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-atwz0arwanq1npu8pptwkoxt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 6fdff5945c8a..9c660e1688ab 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -680,6 +680,13 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_ATOMIC 3 +/** + * DRM_CLIENT_CAP_ASPECT_RATIO + * + * If set to 1, the DRM core will provide aspect ratio information in modes. + */ +#define DRM_CLIENT_CAP_ASPECT_RATIO 4 + /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; From b1494ec029af6d6ea189cbc96ad66463f8df3579 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jun 2018 11:48:43 -0300 Subject: [PATCH 463/572] perf tools: Update x86's syscall_64.tbl, adding 'io_pgetevents' and 'rseq' This updates the tools/perf/ copy of the system call table for x86 which makes 'perf trace' become aware of the new 'io_pgetevents' and 'rseq' syscalls, no matter in which system it gets built, i.e. older systems where the syscalls are not available in the running kernel (via tracefs) or in the system headers will still be aware of these syscalls/. These are the csets introducing the source drift: 05c17cedf85b ("x86: Wire up restartable sequence system call") 7a074e96dee6 ("aio: implement io_pgetevents") This results in this build time change: $ diff -u /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c.old /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c --- /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c.old 2018-06-15 11:48:17.648948094 -0300 +++ /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c 2018-06-15 11:48:22.133942480 -0300 @@ -332,5 +332,7 @@ [330] = "pkey_alloc", [331] = "pkey_free", [332] = "statx", + [333] = "io_pgetevents", + [334] = "rseq", }; -#define SYSCALLTBL_x86_64_MAX_ID 332 +#define SYSCALLTBL_x86_64_MAX_ID 334 $ This silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' Cc: Adrian Hunter Cc: Christoph Hellwig Cc: David Ahern Cc: Jiri Olsa Cc: Mathieu Desnoyers Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-tfvyz51sabuzemrszbrhzxni@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 4dfe42666d0c..f0b1709a5ffb 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -341,6 +341,8 @@ 330 common pkey_alloc __x64_sys_pkey_alloc 331 common pkey_free __x64_sys_pkey_free 332 common statx __x64_sys_statx +333 common io_pgetevents __x64_sys_io_pgetevents +334 common rseq __x64_sys_rseq # # x32-specific system call numbers start at 512 to avoid cache impact From 801f5e1ac783df9fafff8899ef2d5511bd4dbdcb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jun 2018 12:04:32 -0300 Subject: [PATCH 464/572] tools include powerpc: Update arch/powerpc/include/uapi/asm/unistd.h copy to get 'rseq' syscall This updates the tools/perf/ copy of the powerpc file used to generate the syscall table file used to make 'perf trace' become aware of the new 'rseq' syscall, no matter in which system it gets built, i.e. older systems where the syscalls are not available in the running kernel (via tracefs) or in the system headers will still be aware of these syscalls/. From this commit: bb862b021d75 ("powerpc: Wire up restartable sequences system call") Silencing this tools/perf build warning: Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/unistd.h' differs from latest version at 'arch/powerpc/include/uapi/asm/unistd.h' Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Boqun Feng Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Namhyung Kim Cc: Ravi Bangoria Cc: Thomas Gleixner Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-adtgz6u3apd76tghiu9w0k19@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/unistd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h index 389c36fd8299..ac5ba55066dd 100644 --- a/tools/arch/powerpc/include/uapi/asm/unistd.h +++ b/tools/arch/powerpc/include/uapi/asm/unistd.h @@ -398,5 +398,6 @@ #define __NR_pkey_alloc 384 #define __NR_pkey_free 385 #define __NR_pkey_mprotect 386 +#define __NR_rseq 387 #endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */ From bb9a33cb8a807e5ae9906563f5c1533904651b8b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jun 2018 16:46:57 -0300 Subject: [PATCH 465/572] tools include uapi: Update if_link.h to pick IFLA_{BRPORT_ISOLATED,VXLAN_TTL_INHERIT} The IFLA_BRPORT_ISOLATED and IFLA_VXLAN_TTL_INHERIT defines were added in: 7d850abd5f4e ("net: bridge: add support for port isolation") 72f6d71e491e ("vxlan: add ttl inherit support") Pick them, silencing this build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h' Cc: Alexei Starovoitov Cc: David S. Miller Cc: Eric Leblond Cc: Hangbin Liu Cc: Nikolay Aleksandrov Link: https://lkml.kernel.org/n/tip-ezi5u0mmdqm0wfm0y2y8176r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 68699f654118..cf01b6824244 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -333,6 +333,7 @@ enum { IFLA_BRPORT_BCAST_FLOOD, IFLA_BRPORT_GROUP_FWD_MASK, IFLA_BRPORT_NEIGH_SUPPRESS, + IFLA_BRPORT_ISOLATED, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -516,6 +517,7 @@ enum { IFLA_VXLAN_COLLECT_METADATA, IFLA_VXLAN_LABEL, IFLA_VXLAN_GPE, + IFLA_VXLAN_TTL_INHERIT, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) From f568b472815fd6c34fe4cc30dde4572ee300d6c4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 15 Jun 2018 16:55:32 -0300 Subject: [PATCH 466/572] tools include uapi: Synchronize bpf.h with the kernel To pick the rename in: bd3a08aaa9a3 ("bpf: flowlabel in bpf_fib_lookup should be flowinfo") Silencing this build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h' Cc: Adrian Hunter Cc: Alexei Starovoitov Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-zd1sgtbybtjrrt7bqdybu0s0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e0b06784f227..59b19b6a40d7 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2630,7 +2630,7 @@ struct bpf_fib_lookup { union { /* inputs to lookup */ __u8 tos; /* AF_INET */ - __be32 flowlabel; /* AF_INET6 */ + __be32 flowinfo; /* AF_INET6, flow_label + priority */ /* output: metric of fib result (IPv4/IPv6 only) */ __u32 rt_metric; From c6555c14572aeadf4fe2819abd971c4e7608b926 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Sat, 16 Jun 2018 10:47:39 -0700 Subject: [PATCH 467/572] perf tools: Fix a clang 7.0 compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arnaldo reported the perf build failure with latest llvm/clang compiler (7.0). $ make LIBCLANGLLVM=1 -C tools/perf/ CC /tmp/tmp.t53Qo38zci/tests/kmod-path.o util/c++/clang.cpp: In function ‘std::unique_ptr > perf::getBPFObjectFromModule(llvm::Module*)’: util/c++/clang.cpp:150:43: error: no matching function for call to ‘llvm::TargetMachine::addPassesToEmitFile(llvm::legacy::PassManager&, llvm::raw_svector_ostream&, llvm::TargetMachine::CodeGenFileType)’ TargetMachine::CGFT_ObjectFile)) { ^ In file included from util/c++/clang.cpp:25:0: /usr/local/include/llvm/Target/TargetMachine.h:254:16: note: candidate: virtual bool llvm::TargetMachine::addPassesToEmitFile( llvm::legacy::PassManagerBase&, llvm::raw_pwrite_stream&, llvm::raw_pwrite_stream*, llvm::TargetMachine::CodeGenFileType, bool, llvm::MachineModuleInfo*) virtual bool addPassesToEmitFile(PassManagerBase &, raw_pwrite_stream &, ^~~~~~~~~~~~~~~~~~~ /usr/local/include/llvm/Target/TargetMachine.h:254:16: note: candidate expects 6 arguments, 3 provided mv: cannot stat '/tmp/tmp.t53Qo38zci/util/c++/.clang.o.tmp': No such file or directory make[7]: *** [/home/acme/git/perf/tools/build/Makefile.build:101: /tmp/tmp.t53Qo38zci/util/c++/clang.o] Error 1 make[6]: *** [/home/acme/git/perf/tools/build/Makefile.build:139: c++] Error 2 make[5]: *** [/home/acme/git/perf/tools/build/Makefile.build:139: util] Error 2 make[5]: *** Waiting for unfinished jobs.... CC /tmp/tmp.t53Qo38zci/tests/thread-map.o The function addPassesToEmitFile signature changed in llvm 7.0 and such a change caused the failure. This patch fixed the issue with using proper function signatures under different compiler versions. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Yonghong Song Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Martin KaFai Lau Cc: Wang Nan Link: http://lkml.kernel.org/r/20180616174739.1076733-1-yhs@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/c++/clang.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp index bf31ceab33bd..89512504551b 100644 --- a/tools/perf/util/c++/clang.cpp +++ b/tools/perf/util/c++/clang.cpp @@ -146,8 +146,15 @@ getBPFObjectFromModule(llvm::Module *Module) raw_svector_ostream ostream(*Buffer); legacy::PassManager PM; - if (TargetMachine->addPassesToEmitFile(PM, ostream, - TargetMachine::CGFT_ObjectFile)) { + bool NotAdded; +#if CLANG_VERSION_MAJOR < 7 + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, + TargetMachine::CGFT_ObjectFile); +#else + NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr, + TargetMachine::CGFT_ObjectFile); +#endif + if (NotAdded) { llvm::errs() << "TargetMachine can't emit a file of this type\n"; return std::unique_ptr>(nullptr);; } From ea23ac73085743a4f1682d6605fe019577c82e1e Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 15 Jun 2018 12:11:03 +0200 Subject: [PATCH 468/572] perf alias: Remove trailing newline when reading sysfs files Remove a trailing newline when reading sysfs file contents such as /sys/devices/cpum_cf/events/TX_NC_TEND. This shows when verbose option -v is used. Output before: tx_nc_tend -> 'cpum_cf'/'event=0x008d '/ Output after: tx_nc_tend -> 'cpum_cf'/'event=0x8d'/ Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Jiri Olsa Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180615101105.47047-1-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d2fb597c9a8c..2738fc8d200d 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -303,6 +303,9 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI buf[ret] = 0; + /* Remove trailing newline from sysfs file */ + rtrim(buf); + return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, NULL, NULL, NULL); } From 0c24d6fb7bd3578e5b9e4972d01bbe3d087ded33 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 15 Jun 2018 12:11:04 +0200 Subject: [PATCH 469/572] perf alias: Rebuild alias expression string to make it comparable PMU alias definitions in sysfs files may have spaces, newlines and numbers with leading zeroes. Some alias definitions may also appear in JSON files without spaces, etc. Scan alias definitions and remove leading zeroes, spaces, newlines, etc and rebuild string to make alias->str member comparable. s390 for example has terms specified as event=0x0091 (read from files ..//events/ and terms specified as event=0x91 (read from JSON files). Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Jiri Olsa Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180615101105.47047-2-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 2738fc8d200d..f321ce97d9ec 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -241,9 +241,11 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *metric_expr, char *metric_name) { + struct parse_events_term *term; struct perf_pmu_alias *alias; int ret; int num; + char newval[256]; alias = malloc(sizeof(*alias)); if (!alias) @@ -262,6 +264,27 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, return ret; } + /* Scan event and remove leading zeroes, spaces, newlines, some + * platforms have terms specified as + * event=0x0091 (read from files ..//events/ + * and terms specified as event=0x91 (read from JSON files). + * + * Rebuild string to make alias->str member comparable. + */ + memset(newval, 0, sizeof(newval)); + ret = 0; + list_for_each_entry(term, &alias->terms, list) { + if (ret) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + ","); + if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + "%s=%#x", term->config, term->val.num); + else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) + ret += scnprintf(newval + ret, sizeof(newval) - ret, + "%s=%s", term->config, term->val.str); + } + alias->name = strdup(name); if (dir) { /* @@ -285,7 +308,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, snprintf(alias->unit, sizeof(alias->unit), "%s", unit); } alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1; - alias->str = strdup(val); + alias->str = strdup(newval); list_add_tail(&alias->list, list); From 6dde6429c5ff5b38d6d40a14a6ee105117e6364d Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 15 Jun 2018 12:11:05 +0200 Subject: [PATCH 470/572] perf stat: Remove duplicate event counting 'perf stat' shows a mismatch in perf stat regarding counter names on s390: Run command: [root@s35lp76 perf]# ./perf stat -e tx_nc_tend -v -- ~/mytesttx 1 >/tmp/111 tx_nc_tend: 1 573146 573146 tx_nc_tend: 1 573146 573146 Performance counter stats for '/root/mytesttx 1': 3 tx_nc_tend 0.001037252 seconds time elapsed [root@s35lp76 perf]# shows transaction counter tx_nc_tend with value 3 but it was triggered only once as seen by the output of mytesttx. When looking up the event name tx_nc_tend the following function sequence is called: parse_events_multi_pmu_add() +--> perf_pmu__scan() being called with NULL argument +--> pmu_read_sysfs() scans directory ../devices/ for all PMUs +--> perf_pmu__find() tries to find a PMU in the global pmu list. +--> pmu_lookup() called to read all file entries when not in global list. pmu_lookup() causes the issue. It calls +---> pmu_aliases() to read all the entries in the PMU directory. On s390 this is named /sys/devices/cpum_cf/events. +--> pmu_aliases_parse() reads all files and creates an alias for each file name. So we end up with first entry created by reading the sysfs file [root@s35lp76 perf]# cat /sys/devices/cpum_cf /events/TX_NC_TEND event=0x008d [root@s35lp76 perf]# Debug output shows this entry tx_nc_tend -> 'cpum_cf'/'event=0x008d '/ After all files in this directory have been read and aliases created this function is called: +--> pmu_add_cpu_aliases() This function looks up the CPU tables created by the json files. With json files for s390 now available all the aliases are added to the PMU alias list a second time. The second entry is added by reading the json file converted by jevent resulting in file pmu-events/pmu-events.c: { .name = "tx_nc_tend", .event = "event=0x8d", .desc = "Unit: cpum_cf Completed TEND \ instructions \ in non-constrained TX mode", .topic = "extended", .long_desc = "A TEND instruction has \ completed in a \ non-constrained \ transactional-execution mode", .pmu = "cpum_cf", }, Debug output shows this entry tx_nc_tend -> 'cpum_cf'/'event=0x8d'/ Function pmu_aliases_parse() and pmu_add_cpu_aliases() both use __perf_pmu__new_alias() to add an alias to the PMU alias list. There is no check if an alias already exist So we end up with 2 entries for tx_nc_tend in the PMU alias list. Having set up the PMU alias list for this PMU now parse_events_multi_add_pmu() reads the complete alias list and adds each alias with parse_events_add_pmu() to the global perfev_list. This causes the alias to be added multiple times to the event list. Fix this by making __perf_pmu__new_alias() to merge alias definitions if an alias is already on the alias list. Also print a debug message when the alias has mismatches in some fields. Output before: [root@s35lp76 perf]# ./perf stat -e tx_nc_tend -v \ -- ~/mytesttx 1 >/tmp/111 tx_nc_tend: 1 551446 551446 Performance counter stats for '/root/mytesttx 1': 3 tx_nc_tend 0.000961134 seconds time elapsed [root@s35lp76 perf]# Output after: [root@s35lp76 perf]# ./perf stat -e tx_nc_tend -v \ -- ~/mytesttx 1 >/tmp/111 tx_nc_tend: 1 551446 551446 Performance counter stats for '/root/mytesttx 1': 1 tx_nc_tend 0.000961134 seconds time elapsed [root@s35lp76 perf]# Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Reviewed-by: Jiri Olsa Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180615101105.47047-3-tmricht@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 71 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index f321ce97d9ec..3ba6a1742f91 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -234,6 +234,74 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, return 0; } +static void perf_pmu_assign_str(char *name, const char *field, char **old_str, + char **new_str) +{ + if (!*old_str) + goto set_new; + + if (*new_str) { /* Have new string, check with old */ + if (strcasecmp(*old_str, *new_str)) + pr_debug("alias %s differs in field '%s'\n", + name, field); + zfree(old_str); + } else /* Nothing new --> keep old string */ + return; +set_new: + *old_str = *new_str; + *new_str = NULL; +} + +static void perf_pmu_update_alias(struct perf_pmu_alias *old, + struct perf_pmu_alias *newalias) +{ + perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc); + perf_pmu_assign_str(old->name, "long_desc", &old->long_desc, + &newalias->long_desc); + perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic); + perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr, + &newalias->metric_expr); + perf_pmu_assign_str(old->name, "metric_name", &old->metric_name, + &newalias->metric_name); + perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str); + old->scale = newalias->scale; + old->per_pkg = newalias->per_pkg; + old->snapshot = newalias->snapshot; + memcpy(old->unit, newalias->unit, sizeof(old->unit)); +} + +/* Delete an alias entry. */ +static void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +{ + zfree(&newalias->name); + zfree(&newalias->desc); + zfree(&newalias->long_desc); + zfree(&newalias->topic); + zfree(&newalias->str); + zfree(&newalias->metric_expr); + zfree(&newalias->metric_name); + parse_events_terms__purge(&newalias->terms); + free(newalias); +} + +/* Merge an alias, search in alias list. If this name is already + * present merge both of them to combine all information. + */ +static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias, + struct list_head *alist) +{ + struct perf_pmu_alias *a; + + list_for_each_entry(a, alist, list) { + if (!strcasecmp(newalias->name, a->name)) { + perf_pmu_update_alias(a, newalias); + perf_pmu_free_alias(newalias); + return true; + } + } + return false; +} + static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, char *desc, char *val, char *long_desc, char *topic, @@ -310,7 +378,8 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1; alias->str = strdup(newval); - list_add_tail(&alias->list, list); + if (!perf_pmu_merge_alias(alias, list)) + list_add_tail(&alias->list, list); return 0; } From 983107072be1a39cbde67d45cb0059138190e015 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 20 Jun 2018 11:40:36 +0200 Subject: [PATCH 471/572] perf bench: Fix numa report output code Currently we can hit following assert when running numa bench: $ perf bench numa mem -p 3 -t 1 -P 512 -s 100 -zZ0cm --thp 1 perf: bench/numa.c:1577: __bench_numa: Assertion `!(!(((wait_stat) & 0x7f) == 0))' failed. The assertion is correct, because we hit the SIGFPE in following line: Thread 2.2 "thread 0/0" received signal SIGFPE, Arithmetic exception. [Switching to Thread 0x7fffd28c6700 (LWP 11750)] 0x000.. in worker_thread (__tdata=0x7.. ) at bench/numa.c:1257 1257 td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9; We don't check if the runtime is actually bigger than 1 second, and thus this might end up with zero division within FPU. Adding the check to prevent this. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180620094036.17278-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 63eb49082774..44195514b19e 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -1098,7 +1098,7 @@ static void *worker_thread(void *__tdata) u8 *global_data; u8 *process_data; u8 *thread_data; - u64 bytes_done; + u64 bytes_done, secs; long work_done; u32 l; struct rusage rusage; @@ -1254,7 +1254,8 @@ static void *worker_thread(void *__tdata) timersub(&stop, &start0, &diff); td->runtime_ns = diff.tv_sec * NSEC_PER_SEC; td->runtime_ns += diff.tv_usec * NSEC_PER_USEC; - td->speed_gbs = bytes_done / (td->runtime_ns / NSEC_PER_SEC) / 1e9; + secs = td->runtime_ns / NSEC_PER_SEC; + td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0; getrusage(RUSAGE_THREAD, &rusage); td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC; From 10e9cec905f96fdf47f398be70726e2931b376cd Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 25 Jun 2018 18:12:18 +0530 Subject: [PATCH 472/572] perf script: Add missing output fields in a hint A few fields are missing in a perf script -F hint. Add them. Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: David Carrillo-Cisneros Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20180625124220.6434-2-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a31d7082188e..f3fefbcc4503 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3125,8 +3125,9 @@ int cmd_script(int argc, const char **argv) "+field to add and -field to remove." "Valid types: hw,sw,trace,raw,synth. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,iregs,uregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr", + "addr,symoff,srcline,period,iregs,uregs,brstack," + "brstacksym,flags,bpf-output,brstackinsn,brstackoff," + "callindent,insn,insnlen,synth,phys_addr,metric,misc", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), From a3af66f51bd0bca72881ead4bf2bd19cb366582b Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 25 Jun 2018 18:12:19 +0530 Subject: [PATCH 473/572] perf script: Fix crash because of missing evsel->priv 'perf script' in piped mode is crashing because evsel->priv is not set properly. Fix it. Before: # perf record -o - -- ls | perf script Segmentation fault (core dumped) # After: # perf record -o - -- ls | perf script ls 2282 1031.731974: 250000 cpu-clock:uhH: 7effe4b3d29e ls 2282 1031.732222: 250000 cpu-clock:uhH: 7effe4b3a650 # Signed-off-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: David Carrillo-Cisneros Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Fixes: a14390fde64e ("perf script: Allow creating per-event dump files") Link: http://lkml.kernel.org/r/20180625124220.6434-3-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f3fefbcc4503..ad2ac1300420 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1834,6 +1834,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, struct perf_evlist *evlist; struct perf_evsel *evsel, *pos; int err; + static struct perf_evsel_script *es; err = perf_event__process_attr(tool, event, pevlist); if (err) @@ -1842,6 +1843,19 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, evlist = *pevlist; evsel = perf_evlist__last(*pevlist); + if (!evsel->priv) { + if (scr->per_event_dump) { + evsel->priv = perf_evsel_script__new(evsel, + scr->session->data); + } else { + es = zalloc(sizeof(*es)); + if (!es) + return -ENOMEM; + es->fp = stdout; + evsel->priv = es; + } + } + if (evsel->attr.type >= PERF_TYPE_MAX && evsel->attr.type != PERF_TYPE_SYNTH) return 0; From 92ead7ee30c80f8852d28735cbcb9d79bc85f715 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Mon, 25 Jun 2018 18:12:20 +0530 Subject: [PATCH 474/572] perf tools: Fix crash caused by accessing feat_ops[HEADER_LAST_FEATURE] perf_event__process_feature() accesses feat_ops[HEADER_LAST_FEATURE] which is not defined and thus perf is crashing. HEADER_LAST_FEATURE is used as an end marker for the perf report but it's unused for perf script/annotate. Ignore HEADER_LAST_FEATURE for perf script/annotate, just like it is done in 'perf report'. Before: # perf record -o - ls | perf script Segmentation fault (core dumped) # After: # perf record -o - ls | perf script Segmentation fault (core dumped) ls 7031 4392.099856: 250000 cpu-clock:uhH: 7f5e0ce7cd60 ls 7031 4392.100355: 250000 cpu-clock:uhH: 7f5e0c706ef7 # Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: David Carrillo-Cisneros Cc: Jin Yao Cc: Jiri Olsa Cc: Namhyung Kim Fixes: 57b5de463925 ("perf report: Support forced leader feature in pipe mode") Link: http://lkml.kernel.org/r/20180625124220.6434-4-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-annotate.c | 11 ++++++++++- tools/perf/builtin-report.c | 3 ++- tools/perf/builtin-script.c | 11 ++++++++++- tools/perf/util/header.c | 2 +- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 5eb22cc56363..8180319285af 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -283,6 +283,15 @@ out_put: return ret; } +static int process_feature_event(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(tool, event, session); + return 0; +} + static int hist_entry__tty_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct perf_annotate *ann) @@ -471,7 +480,7 @@ int cmd_annotate(int argc, const char **argv) .attr = perf_event__process_attr, .build_id = perf_event__process_build_id, .tracing_data = perf_event__process_tracing_data, - .feature = perf_event__process_feature, + .feature = process_feature_event, .ordered_events = true, .ordering_requires_timestamps = true, }, diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cdb5b6949832..c04dc7b53797 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -217,7 +217,8 @@ static int process_feature_event(struct perf_tool *tool, } /* - * All features are received, we can force the + * (feat_id = HEADER_LAST_FEATURE) is the end marker which + * means all features are received, now we can force the * group if needed. */ setup_forced_leader(rep, session->evlist); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ad2ac1300420..568ddfac3213 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3044,6 +3044,15 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } +static int process_feature_event(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + if (event->feat.feat_id < HEADER_LAST_FEATURE) + return perf_event__process_feature(tool, event, session); + return 0; +} + #ifdef HAVE_AUXTRACE_SUPPORT static int perf_script__process_auxtrace_info(struct perf_tool *tool, union perf_event *event, @@ -3088,7 +3097,7 @@ int cmd_script(int argc, const char **argv) .attr = process_attr, .event_update = perf_event__process_event_update, .tracing_data = perf_event__process_tracing_data, - .feature = perf_event__process_feature, + .feature = process_feature_event, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, .auxtrace_info = perf_script__process_auxtrace_info, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 59fcc790c865..653ff65aa2c3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3464,7 +3464,7 @@ int perf_event__process_feature(struct perf_tool *tool, pr_warning("invalid record type %d in pipe-mode\n", type); return 0; } - if (feat == HEADER_RESERVED || feat > HEADER_LAST_FEATURE) { + if (feat == HEADER_RESERVED || feat >= HEADER_LAST_FEATURE) { pr_warning("invalid record type %d in pipe-mode\n", type); return -1; } From b65c32ec5a942ab3ada93a048089a938918aba7f Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Sat, 19 May 2018 14:23:54 +0200 Subject: [PATCH 475/572] X.509: unpack RSA signatureValue field from BIT STRING The signatureValue field of a X.509 certificate is encoded as a BIT STRING. For RSA signatures this BIT STRING is of so-called primitive subtype, which contains a u8 prefix indicating a count of unused bits in the encoding. We have to strip this prefix from signature data, just as we already do for key data in x509_extract_key_data() function. This wasn't noticed earlier because this prefix byte is zero for RSA key sizes divisible by 8. Since BIT STRING is a big-endian encoding adding zero prefixes has no bearing on its value. The signature length, however was incorrect, which is a problem for RSA implementations that need it to be exactly correct (like AMD CCP). Signed-off-by: Maciej S. Szmigiero Fixes: c26fd69fa009 ("X.509: Add a crypto key parser for binary (DER) X.509 certificates") Cc: stable@vger.kernel.org Signed-off-by: James Morris --- crypto/asymmetric_keys/x509_cert_parser.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 7d81e6bb461a..b6cabac4b62b 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -249,6 +249,15 @@ int x509_note_signature(void *context, size_t hdrlen, return -EINVAL; } + if (strcmp(ctx->cert->sig->pkey_algo, "rsa") == 0) { + /* Discard the BIT STRING metadata */ + if (vlen < 1 || *(const u8 *)value != 0) + return -EBADMSG; + + value++; + vlen--; + } + ctx->cert->raw_sig = value; ctx->cert->raw_sig_size = vlen; return 0; From 90f26cc6bb90b35040f4da0347f480ea9df6e2fc Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 6 Jun 2018 09:43:27 +0800 Subject: [PATCH 476/572] usb: chipidea: host: fix disconnection detect issue The commit 4e88d4c08301 ("usb: add a flag to skip PHY initialization to struct usb_hcd") delete the assignment for hcd->usb_phy, it causes usb_phy_notify_connect{disconnect) are not called, the USB PHY driver is not notified of hot plug event, then the disconnection will not be detected by hardware. Fixes: 4e88d4c08301 ("usb: add a flag to skip PHY initialization to struct usb_hcd") Acked-by: Martin Blumenstingl Reported-by: Mats Karrman Tested-by: Mats Karrman Signed-off-by: Peter Chen --- drivers/usb/chipidea/host.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index af45aa3222b5..4638d9b066be 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -124,8 +124,11 @@ static int host_start(struct ci_hdrc *ci) hcd->power_budget = ci->platdata->power_budget; hcd->tpl_support = ci->platdata->tpl_support; - if (ci->phy || ci->usb_phy) + if (ci->phy || ci->usb_phy) { hcd->skip_phy_initialization = 1; + if (ci->usb_phy) + hcd->usb_phy = ci->usb_phy; + } ehci = hcd_to_ehci(hcd); ehci->caps = ci->hw_bank.cap; From 0ea3fa15b12739c5455cd5850ada4e9948171213 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 25 May 2018 18:08:24 +0200 Subject: [PATCH 477/572] qcom: cmd-db: enforce CONFIG_OF_RESERVED_MEM dependency Without CONFIG_OF_RESERVED_MEM, gcc sees that the global cmd_db_header variable is never initialized, and through code optimization concludes that a lot of other code cannot possibly work after that: drivers/soc/qcom/cmd-db.c: In function 'cmd_db_read_addr': drivers/soc/qcom/cmd-db.c:197:21: error: 'ent.addr' may be used uninitialized in this function [-Werror=maybe-uninitialized] return ret < 0 ? 0 : le32_to_cpu(ent.addr); drivers/soc/qcom/cmd-db.c: In function 'cmd_db_read_aux_data': drivers/soc/qcom/cmd-db.c:224:10: error: 'ent.len' may be used uninitialized in this function [-Werror=maybe-uninitialized] ent_len = le16_to_cpu(ent.len); drivers/soc/qcom/cmd-db.c:115:6: error: 'rsc_hdr.data_offset' may be used uninitialized in this function [-Werror=maybe-uninitialized] u16 offset = le16_to_cpu(hdr->data_offset); ^~~~~~ drivers/soc/qcom/cmd-db.c:116:6: error: 'ent.offset' may be used uninitialized in this function [-Werror=maybe-uninitialized] u16 loffset = le16_to_cpu(ent->offset); ^~~~~~~ drivers/soc/qcom/cmd-db.c: In function 'cmd_db_read_aux_data_len': drivers/soc/qcom/cmd-db.c:250:38: error: 'ent.len' may be used uninitialized in this function [-Werror=maybe-uninitialized] return ret < 0 ? 0 : le16_to_cpu(ent.len); ^ drivers/soc/qcom/cmd-db.c: In function 'cmd_db_read_slave_id': drivers/soc/qcom/cmd-db.c:272:7: error: 'ent.addr' may be used uninitialized in this function [-Werror=maybe-uninitialized] Using a hard CONFIG_OF_RESERVED_MEM dependency avoids this warning, and we can remove the CONFIG_OF dependency. Signed-off-by: Arnd Bergmann Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross --- drivers/soc/qcom/Kconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index 9dc02f390ba3..5856e792d09c 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -5,7 +5,8 @@ menu "Qualcomm SoC drivers" config QCOM_COMMAND_DB bool "Qualcomm Command DB" - depends on (ARCH_QCOM && OF) || COMPILE_TEST + depends on ARCH_QCOM || COMPILE_TEST + depends on OF_RESERVED_MEM help Command DB queries shared memory by key string for shared system resources. Platform drivers that require to set state of a shared From 32fdbd90cc03f01d452138bab4d8a120873d6acf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 26 Jun 2018 08:43:14 +0200 Subject: [PATCH 478/572] tools/headers: Pick up latest kernel ABIs Sync KVM ABI additions and x86 CPU features additions - neither of which has any impact on the tooling build. Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Namhyung Kim Cc: Jiri Olsa Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- tools/arch/arm/include/uapi/asm/kvm.h | 1 + tools/arch/arm64/include/uapi/asm/kvm.h | 1 + tools/arch/powerpc/include/uapi/asm/kvm.h | 1 + tools/arch/x86/include/asm/cpufeatures.h | 2 ++ tools/include/uapi/linux/kvm.h | 1 + 5 files changed, 6 insertions(+) diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index caae4843cb70..16e006f708ca 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -91,6 +91,7 @@ struct kvm_regs { #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 #define KVM_VGIC_ITS_ADDR_TYPE 4 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 04b3256f8e6d..4e76630dd655 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -91,6 +91,7 @@ struct kvm_regs { #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 #define KVM_VGIC_ITS_ADDR_TYPE 4 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 833ed9a16adf..1b32b56a03d3 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -633,6 +633,7 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd) #define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe) +#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index fb00a2fca990..5701f5cecd31 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -282,7 +282,9 @@ #define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ #define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 39e364c70caf..b6270a3b38e9 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -948,6 +948,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_BPB 152 #define KVM_CAP_GET_MSR_FEATURES 153 #define KVM_CAP_HYPERV_EVENTFD 154 +#define KVM_CAP_HYPERV_TLBFLUSH 155 #ifdef KVM_CAP_IRQ_ROUTING From 4188f063e3694ccbf2a2044cf17cc325f91e458f Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 25 Jun 2018 14:38:08 +0200 Subject: [PATCH 479/572] x86/mm: Get rid of KERN_CONT in show_fault_oops() KERN_CONT leads to split lines in kernel output and complicates useful changes to printk like printing context before each line. Only acceptable use of continuations is basically boot-time testing. Get rid of it. Signed-off-by: Dmitry Vyukov Reviewed-by: Sergey Senozhatsky Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180625123808.227417-1-dvyukov@gmail.com [ Removed unnecessary parentheses and prettified the printk statement. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9a84a0d08727..ee85766e6329 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -671,13 +671,9 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, printk(smep_warning, from_kuid(&init_user_ns, current_uid())); } - printk(KERN_ALERT "BUG: unable to handle kernel "); - if (address < PAGE_SIZE) - printk(KERN_CONT "NULL pointer dereference"); - else - printk(KERN_CONT "paging request"); - - printk(KERN_CONT " at %px\n", (void *) address); + pr_alert("BUG: unable to handle kernel %s at %px\n", + address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", + (void *)address); dump_pagetable(address); } From 236f0cd286b67291796362feeac4f342900cfa82 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 25 Jun 2018 04:21:59 -0600 Subject: [PATCH 480/572] x86/entry/32: Add explicit 'l' instruction suffix Omitting suffixes from instructions in AT&T mode is bad practice when operand size cannot be determined by the assembler from register operands, and is likely going to be warned about by upstream GAS in the future (mine does already). Add the single missing 'l' suffix here. Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5B30C24702000078001CD6A6@prv1-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2582881d19ce..c371bfee137a 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -477,7 +477,7 @@ ENTRY(entry_SYSENTER_32) * whereas POPF does not.) */ addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */ - btr $X86_EFLAGS_IF_BIT, (%esp) + btrl $X86_EFLAGS_IF_BIT, (%esp) popfl /* From 0e311d237d7f3022b7dafb639b42541bfb42fe94 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 25 Jun 2018 13:24:27 +0300 Subject: [PATCH 481/572] x86/mm: Don't free P4D table when it is folded at runtime When the P4D page table layer is folded at runtime, the p4d_free() should do nothing, the same as in . It seems this bug should cause double-free in efi_call_phys_epilog(), but I don't know how to trigger that code path, so I can't confirm that by testing. Signed-off-by: Andrey Ryabinin Reviewed-by: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org # 4.17 Fixes: 98219dda2ab5 ("x86/mm: Fold p4d page table layer at runtime") Link: http://lkml.kernel.org/r/20180625102427.15015-1-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgalloc.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index ada6410fd2ec..fbd578daa66e 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -184,6 +184,9 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr) static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) { + if (!pgtable_l5_enabled()) + return; + BUG_ON((unsigned long)p4d & (PAGE_SIZE-1)); free_page((unsigned long)p4d); } From 01a9e9493fb3f65c07077d59cf8ba8b6d2e0463e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 11 Jun 2018 18:53:35 +0200 Subject: [PATCH 482/572] drm/meson: Fix an un-handled error path in 'meson_drv_bind_master()' If 'platform_get_resource_byname()' fails, we should release some resources before leaving, as already done in the other error handling path of the function. Fixes: acaa3f13b8dd ("drm/meson: Fix potential NULL dereference in meson_drv_bind_master()") Signed-off-by: Christophe JAILLET Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20180611165335.24542-1-christophe.jaillet@wanadoo.fr --- drivers/gpu/drm/meson/meson_drv.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 32b1a6cdecfc..d3443125e661 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -197,8 +197,10 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) priv->io_base = regs; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "hhi"); - if (!res) - return -EINVAL; + if (!res) { + ret = -EINVAL; + goto free_drm; + } /* Simply ioremap since it may be a shared register zone */ regs = devm_ioremap(dev, res->start, resource_size(res)); if (!regs) { @@ -215,8 +217,10 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) } res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dmc"); - if (!res) - return -EINVAL; + if (!res) { + ret = -EINVAL; + goto free_drm; + } /* Simply ioremap since it may be a shared register zone */ regs = devm_ioremap(dev, res->start, resource_size(res)); if (!regs) { From 6f6f42466d902c92f21b46a45e6af22d1d663607 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Jun 2018 08:17:33 -0500 Subject: [PATCH 483/572] PCI: shpchp: Manage SHPC unconditionally on non-ACPI systems An SHPC can be operated either by platform firmware or by the OS. The OS uses a host bridge ACPI _OSC method to negotiate for control of SHPC. If firmware wants to prevent an OS from operating an SHPC, it must supply an _OSC method that declines to grant SHPC ownership to the OS. If acpi_pci_find_root() returns NULL, it means there's no ACPI host bridge device (PNP0A03 or PNP0A08) and hence no _OSC method, so the OS is always allowed to manage the SHPC. Fix a NULL pointer dereference when CONFIG_ACPI=y but the current hardware/firmware platform doesn't support ACPI. In that case, acpi_get_hp_hw_control_from_firmware() is implemented but acpi_pci_find_root() returns NULL. Fixes: 90cc0c3cc709 ("PCI: shpchp: Add shpchp_is_native()") Link: https://lkml.kernel.org/r/20180621164715.28160-1-marc.zyngier@arm.com Reported-by: Marc Zyngier Tested-by: Marc Zyngier Signed-off-by: Bjorn Helgaas Reviewed-by: Mika Westerberg --- drivers/pci/hotplug/acpi_pcihp.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c index 3979f89b250a..5bd6c1573295 100644 --- a/drivers/pci/hotplug/acpi_pcihp.c +++ b/drivers/pci/hotplug/acpi_pcihp.c @@ -7,7 +7,6 @@ * All rights reserved. * * Send feedback to - * */ #include @@ -87,8 +86,17 @@ int acpi_get_hp_hw_control_from_firmware(struct pci_dev *pdev) return 0; /* If _OSC exists, we should not evaluate OSHP */ + + /* + * If there's no ACPI host bridge (i.e., ACPI support is compiled + * into the kernel but the hardware platform doesn't support ACPI), + * there's nothing to do here. + */ host = pci_find_host_bridge(pdev->bus); root = acpi_pci_find_root(ACPI_HANDLE(&host->dev)); + if (!root) + return 0; + if (root->osc_support_set) goto no_control; From 941c06d58503b9f2718b20bc45ee7f1d701a1e1e Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Jun 2018 13:45:09 +0530 Subject: [PATCH 484/572] powerpc/mm/32: Fix pgtable_page_dtor call Commit 667416f38554 ("powerpc/mm: Fix kernel crash on page table free") added a call for pgtable_page_dtor in the rcu page table free routine. We missed the fact that for 32 bit platforms we did call the 'dtor' early. Drop the extra call for pgtable_page_dtor. We remove the call from __pte_free_tlb so that we do the page table free and 'dtor' call together. This should help when we switch these platforms to pte fragments. Fixes: 667416f38554 ("powerpc/mm: Fix kernel crash on page table free") Reported-by: Christophe Leroy Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/book3s/32/pgalloc.h | 1 - arch/powerpc/include/asm/nohash/32/pgalloc.h | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h index e4633803fe43..82e44b1a00ae 100644 --- a/arch/powerpc/include/asm/book3s/32/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h @@ -138,7 +138,6 @@ static inline void pgtable_free_tlb(struct mmu_gather *tlb, static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { - pgtable_page_dtor(table); pgtable_free_tlb(tlb, page_address(table), 0); } #endif /* _ASM_POWERPC_BOOK3S_32_PGALLOC_H */ diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h index 9de40eb614da..8825953c225b 100644 --- a/arch/powerpc/include/asm/nohash/32/pgalloc.h +++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h @@ -140,7 +140,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table, unsigned long address) { tlb_flush_pgtable(tlb, address); - pgtable_page_dtor(table); pgtable_free_tlb(tlb, page_address(table), 0); } #endif /* _ASM_POWERPC_PGALLOC_32_H */ From 1b6fe9798af8cb7d80fad5dd30ee1bcd1e0f51eb Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 17 May 2018 20:20:52 -0500 Subject: [PATCH 485/572] ARM: davinci: board-da850-evm: fix WP pin polarity for MMC/SD When booting from MMC/SD in rw mode on DA850 EVM, the system crashes because the write protect pin should be active high and not active low. This patch fixes the polarity of the WP pin. Fixes: bdf0e8364fd3 ("ARM: davinci: da850-evm: use gpio descriptor for mmc pins") Signed-off-by: Adam Ford Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/board-da850-evm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index e22fb40e34bc..6d5beb11bd96 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -774,7 +774,7 @@ static struct gpiod_lookup_table mmc_gpios_table = { GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_CD_PIN, "cd", GPIO_ACTIVE_LOW), GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_WP_PIN, "wp", - GPIO_ACTIVE_LOW), + GPIO_ACTIVE_HIGH), }, }; From 8b8f53af1ed9df88a4c0fbfdf3db58f62060edf3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 19 Jun 2018 18:20:34 +0800 Subject: [PATCH 486/572] ceph: fix dentry leak in splice_dentry() In any case, d_splice_alias() does not drop reference of original dentry. Signed-off-by: "Yan, Zheng" Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index ee764ac352ab..a866be999216 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1135,6 +1135,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in) if (IS_ERR(realdn)) { pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", PTR_ERR(realdn), dn, in, ceph_vinop(in)); + dput(dn); dn = realdn; /* note realdn contains the error */ goto out; } else if (realdn) { From 2e20ce4a66b798671c60198ea2547d5e090dd991 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 26 Jun 2018 16:59:40 +0100 Subject: [PATCH 487/572] certs/blacklist: fix const confusion Fixes commit 2be04df5668d ("certs/blacklist_nohashes.c: fix const confusion in certs blacklist") Signed-off-by: Nick Desaulniers Signed-off-by: David Howells Signed-off-by: James Morris --- certs/blacklist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/certs/blacklist.h b/certs/blacklist.h index 150d82da8e99..1efd6fa0dc60 100644 --- a/certs/blacklist.h +++ b/certs/blacklist.h @@ -1,3 +1,3 @@ #include -extern const char __initdata *const blacklist_hashes[]; +extern const char __initconst *const blacklist_hashes[]; From 3619dec5103dd999a777e3e4ea08c8f40a6ddc57 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 26 Jun 2018 16:59:46 +0100 Subject: [PATCH 488/572] dh key: fix rounding up KDF output length Commit 383203eff718 ("dh key: get rid of stack allocated array") changed kdf_ctr() to assume that the length of key material to derive is a multiple of the digest size. The length was supposed to be rounded up accordingly. However, the round_up() macro was used which only gives the correct result on power-of-2 arguments, whereas not all hash algorithms have power-of-2 digest sizes. In some cases this resulted in a write past the end of the 'outbuf' buffer. Fix it by switching to roundup(), which works for non-power-of-2 inputs. Reported-by: syzbot+486f97f892efeb2075a3@syzkaller.appspotmail.com Reported-by: syzbot+29d17b7898b41ee120a5@syzkaller.appspotmail.com Reported-by: syzbot+8a608baf8751184ec727@syzkaller.appspotmail.com Reported-by: syzbot+d04e58bd384f1fe0b112@syzkaller.appspotmail.com Fixes: 383203eff718 ("dh key: get rid of stack allocated array") Signed-off-by: Eric Biggers Acked-by: Kees Cook Acked-by: Tycho Andersen Signed-off-by: James Morris --- security/keys/dh.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/security/keys/dh.c b/security/keys/dh.c index f7403821db7f..b203f7758f97 100644 --- a/security/keys/dh.c +++ b/security/keys/dh.c @@ -142,6 +142,8 @@ static void kdf_dealloc(struct kdf_sdesc *sdesc) * The src pointer is defined as Z || other info where Z is the shared secret * from DH and other info is an arbitrary string (see SP800-56A section * 5.8.1.2). + * + * 'dlen' must be a multiple of the digest size. */ static int kdf_ctr(struct kdf_sdesc *sdesc, const u8 *src, unsigned int slen, u8 *dst, unsigned int dlen, unsigned int zlen) @@ -205,8 +207,8 @@ static int keyctl_dh_compute_kdf(struct kdf_sdesc *sdesc, { uint8_t *outbuf = NULL; int ret; - size_t outbuf_len = round_up(buflen, - crypto_shash_digestsize(sdesc->shash.tfm)); + size_t outbuf_len = roundup(buflen, + crypto_shash_digestsize(sdesc->shash.tfm)); outbuf = kmalloc(outbuf_len, GFP_KERNEL); if (!outbuf) { From 4595299c5eaebbec0ca5822214ad1925a10b3876 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Fri, 22 Jun 2018 13:35:38 -0500 Subject: [PATCH 489/572] arm64: dts: stratix10: Fix SPI nodes for Stratix10 Remove the unused bus-num node and change num-chipselect to num-cs to match SPI bindings. Cc: stable@vger.kernel.org Fixes: 78cd6a9d8e154 ("arm64: dts: Add base stratix 10 dtsi") Signed-off-by: Thor Thayer Signed-off-by: Dinh Nguyen Signed-off-by: Olof Johansson --- arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index e6b059378dc0..67dac595dc72 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -309,8 +309,7 @@ interrupts = <0 99 4>; resets = <&rst SPIM0_RESET>; reg-io-width = <4>; - num-chipselect = <4>; - bus-num = <0>; + num-cs = <4>; status = "disabled"; }; @@ -322,8 +321,7 @@ interrupts = <0 100 4>; resets = <&rst SPIM1_RESET>; reg-io-width = <4>; - num-chipselect = <4>; - bus-num = <0>; + num-cs = <4>; status = "disabled"; }; From 975ba94c2c3aca4d9f1ae26f3916d7787495ce86 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Fri, 22 Jun 2018 13:37:34 -0500 Subject: [PATCH 490/572] ARM: dts: Fix SPI node for Arria10 Remove the unused bus-num node and change num-chipselect to num-cs to match SPI bindings. Cc: stable@vger.kernel.org Fixes: f2d6f8f817814 ("ARM: dts: socfpga: Add SPI Master1 for Arria10 SR chip") Signed-off-by: Thor Thayer Signed-off-by: Dinh Nguyen Signed-off-by: Olof Johansson --- arch/arm/boot/dts/socfpga_arria10.dtsi | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/socfpga_arria10.dtsi b/arch/arm/boot/dts/socfpga_arria10.dtsi index d8b1aa309f76..791ca15c799e 100644 --- a/arch/arm/boot/dts/socfpga_arria10.dtsi +++ b/arch/arm/boot/dts/socfpga_arria10.dtsi @@ -593,8 +593,7 @@ #size-cells = <0>; reg = <0xffda5000 0x100>; interrupts = <0 102 4>; - num-chipselect = <4>; - bus-num = <0>; + num-cs = <4>; /*32bit_access;*/ tx-dma-channel = <&pdma 16>; rx-dma-channel = <&pdma 17>; From 15bfd21fbc5d35834b9ea383dc458a1f0c9e3434 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 26 Jun 2018 09:14:58 -0600 Subject: [PATCH 491/572] block: Fix transfer when chunk sectors exceeds max A device may have boundary restrictions where the number of sectors between boundaries exceeds its max transfer size. In this case, we need to cap the max size to the smaller of the two limits. Reported-by: Jitendra Bhivare Tested-by: Jitendra Bhivare Cc: Reviewed-by: Martin K. Petersen Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9154570edf29..79226ca8f80f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1119,8 +1119,8 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, if (!q->limits.chunk_sectors) return q->limits.max_sectors; - return q->limits.chunk_sectors - - (offset & (q->limits.chunk_sectors - 1)); + return min(q->limits.max_sectors, (unsigned int)(q->limits.chunk_sectors - + (offset & (q->limits.chunk_sectors - 1)))); } static inline unsigned int blk_rq_get_max_sectors(struct request *rq, From 68bb22e9d561ad7c20274eee92058048ac689b98 Mon Sep 17 00:00:00 2001 From: Alan Douglas Date: Fri, 22 Jun 2018 17:17:17 +0100 Subject: [PATCH 492/572] PCI: Initialize endpoint library before controllers The endpoint library must be initialized before its users, which are in drivers/pci/controllers. The endpoint initialization currently depends on link order. This corrects a kernel crash when loading the Cadence EP driver, since it calls devm_pci_epc_create() and this is only valid once the endpoint library has been initialized. Fixes: 6e0832fa432e ("PCI: Collect all native drivers under drivers/pci/controller/") Signed-off-by: Alan Douglas Signed-off-by: Bjorn Helgaas --- drivers/pci/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 535201984b8b..1b2cfe51e8d7 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -28,10 +28,10 @@ obj-$(CONFIG_PCI_PF_STUB) += pci-pf-stub.o obj-$(CONFIG_PCI_ECAM) += ecam.o obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o -obj-y += controller/ -obj-y += switch/ - # Endpoint library must be initialized before its users obj-$(CONFIG_PCI_ENDPOINT) += endpoint/ +obj-y += controller/ +obj-y += switch/ + ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG From 925d31668d5b6c9849c50167f31f9b5cf9eec892 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 19 Jun 2018 12:21:05 +0100 Subject: [PATCH 493/572] PCI: controller: Move PCI_DOMAINS selection to arch Kconfig Commit 51bc085d6454 ("PCI: Improve host drivers compile test coverage") added configuration options to allow PCI host controller drivers to be compile tested on all architectures. Some host controller drivers (eg PCIE_ALTERA) config entries select the PCI_DOMAINS config option to enable PCI domains management in the kernel. Now that host controller drivers can be compiled on all architectures, this triggers build regressions on arches that do not implement the PCI_DOMAINS required API (ie pci_domain_nr()): drivers/ata/pata_ali.c: In function 'ali_init_chipset': drivers/ata/pata_ali.c:469:38: error: implicit declaration of function 'pci_domain_nr'; did you mean 'pci_iomap_wc'? Furthemore, some software configurations (ie Jailhouse) require a PCI_DOMAINS enabled kernel to configure multiple host controllers without having an explicit dependency on the ARM platform on which they run. Make PCI_DOMAINS a visible configuration option on ARM so that software configurations that need it can manually select it and move the PCI_DOMAINS selection from PCI controllers configuration file to ARM sub-arch config entries that currently require it, fixing the issue. Fixes: 51bc085d6454 ("PCI: Improve host drivers compile test coverage") Link: https://lkml.kernel.org/r/20180612170229.GA10141@roeck-us.net Reported-by: Guenter Roeck Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Jan Kiszka Acked-by: Ley Foon Tan Acked-by: Rob Herring Cc: Scott Branden Cc: Will Deacon Cc: Russell King Cc: Guenter Roeck --- arch/arm/Kconfig | 8 +++++++- arch/arm/mach-bcm/Kconfig | 1 + arch/arm/mach-socfpga/Kconfig | 1 + drivers/pci/controller/Kconfig | 3 --- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 54eeb8d00bc6..843edfd000be 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1245,8 +1245,14 @@ config PCI VESA. If you have PCI, say Y, otherwise N. config PCI_DOMAINS - bool + bool "Support for multiple PCI domains" depends on PCI + help + Enable PCI domains kernel management. Say Y if your machine + has a PCI bus hierarchy that requires more than one PCI + domain (aka segment) to be correctly managed. Say N otherwise. + + If you don't know what to do here, say N. config PCI_DOMAINS_GENERIC def_bool PCI_DOMAINS diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index c46a728df44e..25aac6ee2ab1 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -20,6 +20,7 @@ config ARCH_BCM_IPROC select GPIOLIB select ARM_AMBA select PINCTRL + select PCI_DOMAINS if PCI help This enables support for systems based on Broadcom IPROC architected SoCs. The IPROC complex contains one or more ARM CPUs along with common diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig index d0f62eacf59d..4adb901dd5eb 100644 --- a/arch/arm/mach-socfpga/Kconfig +++ b/arch/arm/mach-socfpga/Kconfig @@ -10,6 +10,7 @@ menuconfig ARCH_SOCFPGA select HAVE_ARM_SCU select HAVE_ARM_TWD if SMP select MFD_SYSCON + select PCI_DOMAINS if PCI if ARCH_SOCFPGA config SOCFPGA_SUSPEND diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index 18fa09b3ac8f..cc9fa02d32a0 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -96,7 +96,6 @@ config PCI_HOST_GENERIC depends on OF select PCI_HOST_COMMON select IRQ_DOMAIN - select PCI_DOMAINS help Say Y here if you want to support a simple generic PCI host controller, such as the one emulated by kvmtool. @@ -138,7 +137,6 @@ config PCI_VERSATILE config PCIE_IPROC tristate - select PCI_DOMAINS help This enables the iProc PCIe core controller support for Broadcom's iProc family of SoCs. An appropriate bus interface driver needs @@ -176,7 +174,6 @@ config PCIE_IPROC_MSI config PCIE_ALTERA bool "Altera PCIe controller" depends on ARM || NIOS2 || COMPILE_TEST - select PCI_DOMAINS help Say Y here if you want to enable PCIe controller support on Altera FPGA. From 03ae3a9caf4a59edd32b65c89c375a98ce3ea1ef Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 25 Jun 2018 12:02:40 -0700 Subject: [PATCH 494/572] Input: psmouse - fix button reporting for basic protocols The commit ba667650c568 ("Input: psmouse - clean up code") was pretty brain-dead and broke extra buttons reporting for variety of PS/2 mice: Genius, Thinkmouse and Intellimouse Explorer. We need to actually inspect the data coming from the device when reporting events. Fixes: ba667650c568 ("Input: psmouse - clean up code") Reported-by: Jiri Slaby Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/psmouse-base.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c index 5ff5b1952be0..d3ff1fc09af7 100644 --- a/drivers/input/mouse/psmouse-base.c +++ b/drivers/input/mouse/psmouse-base.c @@ -192,8 +192,8 @@ psmouse_ret_t psmouse_process_byte(struct psmouse *psmouse) else input_report_rel(dev, REL_WHEEL, -wheel); - input_report_key(dev, BTN_SIDE, BIT(4)); - input_report_key(dev, BTN_EXTRA, BIT(5)); + input_report_key(dev, BTN_SIDE, packet[3] & BIT(4)); + input_report_key(dev, BTN_EXTRA, packet[3] & BIT(5)); break; } break; @@ -203,13 +203,13 @@ psmouse_ret_t psmouse_process_byte(struct psmouse *psmouse) input_report_rel(dev, REL_WHEEL, -(s8) packet[3]); /* Extra buttons on Genius NewNet 3D */ - input_report_key(dev, BTN_SIDE, BIT(6)); - input_report_key(dev, BTN_EXTRA, BIT(7)); + input_report_key(dev, BTN_SIDE, packet[0] & BIT(6)); + input_report_key(dev, BTN_EXTRA, packet[0] & BIT(7)); break; case PSMOUSE_THINKPS: /* Extra button on ThinkingMouse */ - input_report_key(dev, BTN_EXTRA, BIT(3)); + input_report_key(dev, BTN_EXTRA, packet[0] & BIT(3)); /* * Without this bit of weirdness moving up gives wildly @@ -223,7 +223,7 @@ psmouse_ret_t psmouse_process_byte(struct psmouse *psmouse) * Cortron PS2 Trackball reports SIDE button in the * 4th bit of the first byte. */ - input_report_key(dev, BTN_SIDE, BIT(3)); + input_report_key(dev, BTN_SIDE, packet[0] & BIT(3)); packet[0] |= BIT(3); break; From 22db552b50fa11d8c1d171de908a1f9ef62172b7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Jun 2018 16:02:27 +0200 Subject: [PATCH 495/572] powerpc/powermac: Fix rtc read/write functions As Mathieu pointed out, my conversion to time64_t was incorrect and resulted in negative times to be read from the RTC. The problem is that during the conversion from a byte array to a time64_t, the 'unsigned char' variable holding the top byte gets turned into a negative signed 32-bit integer before being assigned to the 64-bit variable for any times after 1972. This changes the logic to cast to an unsigned 32-bit number first for the Macintosh time and then convert that to the Unix time, which then gives us a time in the documented 1904..2040 year range. I decided not to use the longer 1970..2106 range that other drivers use, for consistency with the literal interpretation of the register, but that could be easily changed if we decide we want to support any Mac after 2040. Just to be on the safe side, I'm also adding a WARN_ON that will trigger if either the year 2040 has come and is observed by this driver, or we run into an RTC that got set back to a pre-1970 date for some reason (the two are indistinguishable). For the RTC write functions, Andreas found another problem: both pmu_request() and cuda_request() are varargs functions, so changing the type of the arguments passed into them from 32 bit to 64 bit breaks the API for the set_rtc_time functions. This changes it back to 32 bits. The same code exists in arch/m68k/ and is patched in an identical way now in a separate patch. Fixes: 5bfd643583b2 ("powerpc: use time64_t in read_persistent_clock") Reported-by: Mathieu Malaterre Reported-by: Andreas Schwab Signed-off-by: Arnd Bergmann Tested-by: Mathieu Malaterre Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powermac/time.c | 29 ++++++++++++++++++-------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 7c968e46736f..12e6e4d30602 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -42,7 +42,11 @@ #define DBG(x...) #endif -/* Apparently the RTC stores seconds since 1 Jan 1904 */ +/* + * Offset between Unix time (1970-based) and Mac time (1904-based). Cuda and PMU + * times wrap in 2040. If we need to handle later times, the read_time functions + * need to be changed to interpret wrapped times as post-2040. + */ #define RTC_OFFSET 2082844800 /* @@ -97,8 +101,11 @@ static time64_t cuda_get_time(void) if (req.reply_len != 7) printk(KERN_ERR "cuda_get_time: got %d byte reply\n", req.reply_len); - now = (req.reply[3] << 24) + (req.reply[4] << 16) - + (req.reply[5] << 8) + req.reply[6]; + now = (u32)((req.reply[3] << 24) + (req.reply[4] << 16) + + (req.reply[5] << 8) + req.reply[6]); + /* it's either after year 2040, or the RTC has gone backwards */ + WARN_ON(now < RTC_OFFSET); + return now - RTC_OFFSET; } @@ -106,10 +113,10 @@ static time64_t cuda_get_time(void) static int cuda_set_rtc_time(struct rtc_time *tm) { - time64_t nowtime; + u32 nowtime; struct adb_request req; - nowtime = rtc_tm_to_time64(tm) + RTC_OFFSET; + nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET); if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME, nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) @@ -140,8 +147,12 @@ static time64_t pmu_get_time(void) if (req.reply_len != 4) printk(KERN_ERR "pmu_get_time: got %d byte reply from PMU\n", req.reply_len); - now = (req.reply[0] << 24) + (req.reply[1] << 16) - + (req.reply[2] << 8) + req.reply[3]; + now = (u32)((req.reply[0] << 24) + (req.reply[1] << 16) + + (req.reply[2] << 8) + req.reply[3]); + + /* it's either after year 2040, or the RTC has gone backwards */ + WARN_ON(now < RTC_OFFSET); + return now - RTC_OFFSET; } @@ -149,10 +160,10 @@ static time64_t pmu_get_time(void) static int pmu_set_rtc_time(struct rtc_time *tm) { - time64_t nowtime; + u32 nowtime; struct adb_request req; - nowtime = rtc_tm_to_time64(tm) + RTC_OFFSET; + nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET); if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) return -ENXIO; From 22cd978e598618e82c3c3348d2069184f6884182 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Jun 2018 22:45:52 -0700 Subject: [PATCH 496/572] x86/entry/64/compat: Fix "x86/entry/64/compat: Preserve r8-r11 in int $0x80" Commit: 8bb2610bc496 ("x86/entry/64/compat: Preserve r8-r11 in int $0x80") was busted: my original patch had a minor conflict with some of the nospec changes, but "git apply" is very clever and silently accepted the patch by making the same changes to a different function in the same file. There was obviously a huge offset, but "git apply" for some reason doesn't feel any need to say so. Move the changes to the correct function. Now the test_syscall_vdso_32 selftests passes. If anyone cares to observe the original problem, try applying the patch at: https://lore.kernel.org/lkml/d4c4d9985fbe64f8c9e19291886453914b48caee.1523975710.git.luto@kernel.org/raw to the kernel at 316d097c4cd4e7f2ef50c40cff2db266593c4ec4: - "git am" and "git apply" accept the patch without any complaints at all - "patch -p1" at least prints out a message about the huge offset. Reported-by: zhijianx.li@intel.com Signed-off-by: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org #v4.17+ Fixes: 8bb2610bc496 ("x86/entry/64/compat: Preserve r8-r11 in int $0x80") Link: http://lkml.kernel.org/r/6012b922485401bc42676e804171ded262fc2ef2.1530078306.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_64_compat.S | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 9de7f1e1dede..7d0df78db727 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -84,13 +84,13 @@ ENTRY(entry_SYSENTER_compat) pushq %rdx /* pt_regs->dx */ pushq %rcx /* pt_regs->cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq %r8 /* pt_regs->r8 */ + pushq $0 /* pt_regs->r8 = 0 */ xorl %r8d, %r8d /* nospec r8 */ - pushq %r9 /* pt_regs->r9 */ + pushq $0 /* pt_regs->r9 = 0 */ xorl %r9d, %r9d /* nospec r9 */ - pushq %r10 /* pt_regs->r10 */ + pushq $0 /* pt_regs->r10 = 0 */ xorl %r10d, %r10d /* nospec r10 */ - pushq %r11 /* pt_regs->r11 */ + pushq $0 /* pt_regs->r11 = 0 */ xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ @@ -374,13 +374,13 @@ ENTRY(entry_INT80_compat) pushq %rcx /* pt_regs->cx */ xorl %ecx, %ecx /* nospec cx */ pushq $-ENOSYS /* pt_regs->ax */ - pushq $0 /* pt_regs->r8 = 0 */ + pushq %r8 /* pt_regs->r8 */ xorl %r8d, %r8d /* nospec r8 */ - pushq $0 /* pt_regs->r9 = 0 */ + pushq %r9 /* pt_regs->r9 */ xorl %r9d, %r9d /* nospec r9 */ - pushq $0 /* pt_regs->r10 = 0 */ + pushq %r10 /* pt_regs->r10*/ xorl %r10d, %r10d /* nospec r10 */ - pushq $0 /* pt_regs->r11 = 0 */ + pushq %r11 /* pt_regs->r11 */ xorl %r11d, %r11d /* nospec r11 */ pushq %rbx /* pt_regs->rbx */ xorl %ebx, %ebx /* nospec rbx */ From ec348020566009d3da9b99f07c05814d13969c78 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Jun 2018 22:17:17 -0700 Subject: [PATCH 497/572] selftests/x86/sigreturn/64: Fix spurious failures on AMD CPUs When I wrote the sigreturn test, I didn't realize that AMD's busted IRET behavior was different from Intel's busted IRET behavior: On AMD CPUs, the CPU leaks the high 32 bits of the kernel stack pointer to certain userspace contexts. Gee, thanks. There's very little the kernel can do about it. Modify the test so it passes. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/86e7fd3564497f657de30a36da4505799eebef01.1530076529.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/sigreturn.c | 46 ++++++++++++++++--------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 246145b84a12..2559e2c01793 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -612,19 +612,38 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) greg_t req = requested_regs[i], res = resulting_regs[i]; if (i == REG_TRAPNO || i == REG_IP) continue; /* don't care */ - if (i == REG_SP) { - printf("\tSP: %llx -> %llx\n", (unsigned long long)req, - (unsigned long long)res); + if (i == REG_SP) { /* - * In many circumstances, the high 32 bits of rsp - * are zeroed. For example, we could be a real - * 32-bit program, or we could hit any of a number - * of poorly-documented IRET or segmented ESP - * oddities. If this happens, it's okay. + * If we were using a 16-bit stack segment, then + * the kernel is a bit stuck: IRET only restores + * the low 16 bits of ESP/RSP if SS is 16-bit. + * The kernel uses a hack to restore bits 31:16, + * but that hack doesn't help with bits 63:32. + * On Intel CPUs, bits 63:32 end up zeroed, and, on + * AMD CPUs, they leak the high bits of the kernel + * espfix64 stack pointer. There's very little that + * the kernel can do about it. + * + * Similarly, if we are returning to a 32-bit context, + * the CPU will often lose the high 32 bits of RSP. */ - if (res == (req & 0xFFFFFFFF)) - continue; /* OK; not expected to work */ + + if (res == req) + continue; + + if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) { + printf("[NOTE]\tSP: %llx -> %llx\n", + (unsigned long long)req, + (unsigned long long)res); + continue; + } + + printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n", + (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + continue; } bool ignore_reg = false; @@ -663,13 +682,6 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) } if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { - /* - * SP is particularly interesting here. The - * usual cause of failures is that we hit the - * nasty IRET case of returning to a 16-bit SS, - * in which case bits 16:31 of the *kernel* - * stack pointer persist in ESP. - */ printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", i, (unsigned long long)requested_regs[i], (unsigned long long)resulting_regs[i]); From e8a445dea219c32727016af14f847d2e8f7ebec8 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 26 Jun 2018 22:17:18 -0700 Subject: [PATCH 498/572] selftests/x86/sigreturn: Do minor cleanups We have short names for the requested and resulting register values. Use them instead of spelling out the whole register entry for each case. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/bb3bc1f923a2f6fe7912d22a1068fe29d6033d38.1530076529.git.luto@kernel.org Signed-off-by: Ingo Molnar --- tools/testing/selftests/x86/sigreturn.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c index 2559e2c01793..4d9dc3f2fd70 100644 --- a/tools/testing/selftests/x86/sigreturn.c +++ b/tools/testing/selftests/x86/sigreturn.c @@ -610,6 +610,7 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) */ for (int i = 0; i < NGREG; i++) { greg_t req = requested_regs[i], res = resulting_regs[i]; + if (i == REG_TRAPNO || i == REG_IP) continue; /* don't care */ @@ -673,18 +674,18 @@ static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) #endif /* Sanity check on the kernel */ - if (i == REG_CX && requested_regs[i] != resulting_regs[i]) { + if (i == REG_CX && req != res) { printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", - (unsigned long long)requested_regs[i], - (unsigned long long)resulting_regs[i]); + (unsigned long long)req, + (unsigned long long)res); nerrs++; continue; } - if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { + if (req != res && !ignore_reg) { printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", - i, (unsigned long long)requested_regs[i], - (unsigned long long)resulting_regs[i]); + i, (unsigned long long)req, + (unsigned long long)res); nerrs++; } } From cfe19577047e74cdac5826adbdc2337d8437f8fb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 25 Jun 2018 15:08:52 +0300 Subject: [PATCH 499/572] x86/efi: Fix efi_call_phys_epilog() with CONFIG_X86_5LEVEL=y Open-coded page table entry checks don't work correctly when we fold the page table level at runtime. pgd_present() on 4-level paging machine always returns true, but open-coded version of the check may return false-negative result and we silently skip the rest of the loop body in efi_call_phys_epilog(). Replace open-coded checks with proper helpers. Signed-off-by: Kirill A. Shutemov Acked-by: Ard Biesheuvel Cc: Andrey Ryabinin Cc: Baoquan He Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org # v4.12+ Fixes: 94133e46a0f5 ("x86/efi: Correct EFI identity mapping under 'efi=old_map' when KASLR is enabled") Link: http://lkml.kernel.org/r/20180625120852.18300-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index e01f7ceb9e7a..77873ce700ae 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -166,14 +166,14 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) pgd = pgd_offset_k(pgd_idx * PGDIR_SIZE); set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]); - if (!(pgd_val(*pgd) & _PAGE_PRESENT)) + if (!pgd_present(*pgd)) continue; for (i = 0; i < PTRS_PER_P4D; i++) { p4d = p4d_offset(pgd, pgd_idx * PGDIR_SIZE + i * P4D_SIZE); - if (!(p4d_val(*p4d) & _PAGE_PRESENT)) + if (!p4d_present(*p4d)) continue; pud = (pud_t *)p4d_page_vaddr(*p4d); From b8c1e4293a5d1dfd19ab7b0984bfce8191940500 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 26 Jun 2018 13:03:41 +0300 Subject: [PATCH 500/572] x86/mm: Drop unneeded __always_inline for p4d page table helpers This reverts the following commits: 1ea66554d3b0 ("x86/mm: Mark p4d_offset() __always_inline") 046c0dbec023 ("x86: Mark native_set_p4d() as __always_inline") p4d_offset(), native_set_p4d() and native_p4d_clear() were marked __always_inline in attempt to move __pgtable_l5_enabled into __initdata section. It was required as KASAN initialization code is a user of USE_EARLY_PGTABLE_L5, so all pgtable_l5_enabled() translated to __pgtable_l5_enabled there. This includes pgtable_l5_enabled() called from inline p4d helpers. If compiler would decided to not inline these p4d helpers, but leave them standalone, we end up with section mismatch. We don't need __always_inline here anymore. __pgtable_l5_enabled moved back to be __ro_after_init. See the following commit: 51be13351517 ("Revert "x86/mm: Mark __pgtable_l5_enabled __initdata"") Signed-off-by: Kirill A. Shutemov Cc: Arnd Bergmann Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180626100341.49910-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 2 +- arch/x86/include/asm/pgtable_64.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 99ecde23c3ec..5715647fc4fe 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -898,7 +898,7 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) /* to find an entry in a page-table-directory. */ -static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { if (!pgtable_l5_enabled()) return (p4d_t *)pgd; diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 0fdcd21dadbd..3c5385f9a88f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -216,7 +216,7 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) } #endif -static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) +static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; @@ -230,7 +230,7 @@ static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) *p4dp = native_make_p4d(native_pgd_val(pgd)); } -static __always_inline void native_p4d_clear(p4d_t *p4d) +static inline void native_p4d_clear(p4d_t *p4d) { native_set_p4d(p4d, native_make_p4d(0)); } From 9331510135640429711afbd0c810686100824a79 Mon Sep 17 00:00:00 2001 From: Mathieu Malaterre Date: Tue, 26 Jun 2018 22:23:00 +0200 Subject: [PATCH 501/572] perf/core: Move inline keyword at the beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix non-fatal warning triggered during compilation with W=1: kernel/events/core.c:6106:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] static void __always_inline ^~~~~~ Signed-off-by: Mathieu Malaterre Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180626202301.20270-1-malat@debian.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 80cca2b30c4f..8f0434a9951a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6482,7 +6482,7 @@ void perf_prepare_sample(struct perf_event_header *header, data->phys_addr = perf_virt_to_phys(data->addr); } -static void __always_inline +static __always_inline void __perf_event_output(struct perf_event *event, struct perf_sample_data *data, struct pt_regs *regs, From 9e421b8fffb92bfd1c274f1deae611a6ab99d8a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Jun 2018 12:09:51 +0200 Subject: [PATCH 502/572] Documentation: admin-guide: intel_pstate: Fix sysfs path Fix an incorrect sysfs path in the intel_pstate admin-guide documentation. Fixes: 33fc30b47098 (cpufreq: intel_pstate: Document the current behavior and user interface) Reported-by: Pawit Pornkitprasan Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/intel_pstate.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index 8b9164990956..d74fb572f6cc 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -324,8 +324,7 @@ Global Attributes ``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to control its functionality at the system level. They are located in the -``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all -CPUs. +``/sys/devices/system/cpu/intel_pstate/`` directory and affect all CPUs. Some of them are not present if the ``intel_pstate=per_cpu_perf_limits`` argument is passed to the kernel in the command line. From 649f53a3e4cf6873741673b9271275e484c56194 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 26 Jun 2018 17:20:43 +0200 Subject: [PATCH 503/572] Documentation: intel_pstate: Describe hwp_dynamic_boost sysfs knob Document the recently introduced hwp_dynamic_boost sysfs knob allowing user space to tell intel_pstate to use iowait boosting in the active mode with HWP enabled (to improve performance). Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- Documentation/admin-guide/pm/intel_pstate.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst index d74fb572f6cc..8f1d3de449b5 100644 --- a/Documentation/admin-guide/pm/intel_pstate.rst +++ b/Documentation/admin-guide/pm/intel_pstate.rst @@ -378,6 +378,17 @@ argument is passed to the kernel in the command line. but it affects the maximum possible value of per-policy P-state limits (see `Interpretation of Policy Attributes`_ below for details). +``hwp_dynamic_boost`` + This attribute is only present if ``intel_pstate`` works in the + `active mode with the HWP feature enabled `_ in + the processor. If set (equal to 1), it causes the minimum P-state limit + to be increased dynamically for a short time whenever a task previously + waiting on I/O is selected to run on a given logical CPU (the purpose + of this mechanism is to improve performance). + + This setting has no effect on logical CPUs whose minimum P-state limit + is directly set to the highest non-turbo P-state or above it. + .. _status_attr: ``status`` From d79d0d8ad0cb3d782b41631dfeac8eb05e414bcd Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Wed, 27 Jun 2018 11:07:15 +0200 Subject: [PATCH 504/572] x86/mm: Clean up the printk()s in show_fault_oops() - Remove 'nx_warning' and 'smep_warning', which are just pointless obfuscation. - Also convert to pr_crit(). Suggested-by: Joe Perches Signed-off-by: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180627090715.28076-1-dvyukov@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ee85766e6329..2aafa6ab6103 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -641,11 +641,6 @@ static int is_f00f_bug(struct pt_regs *regs, unsigned long address) return 0; } -static const char nx_warning[] = KERN_CRIT -"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n"; -static const char smep_warning[] = KERN_CRIT -"unable to execute userspace code (SMEP?) (uid: %d)\n"; - static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address) @@ -664,11 +659,13 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, pte = lookup_address_in_pgd(pgd, address, &level); if (pte && pte_present(*pte) && !pte_exec(*pte)) - printk(nx_warning, from_kuid(&init_user_ns, current_uid())); + pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n", + from_kuid(&init_user_ns, current_uid())); if (pte && pte_present(*pte) && pte_exec(*pte) && (pgd_flags(*pgd) & _PAGE_USER) && (__read_cr4() & X86_CR4_SMEP)) - printk(smep_warning, from_kuid(&init_user_ns, current_uid())); + pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n", + from_kuid(&init_user_ns, current_uid())); } pr_alert("BUG: unable to handle kernel %s at %px\n", From 3eb1b955cd7ed1e621ace856710006c2a8a7f231 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Tue, 5 Jun 2018 15:37:51 +0530 Subject: [PATCH 505/572] ARM: dts: da850: Fix interrups property for gpio The intc #interrupt-cells is equal to 1. Currently gpio node has 2 cells per IRQ which is wrong. Remove the additional cell for each of the interrupts. Signed-off-by: Keerthy Fixes: 2e38b946dc54 ("ARM: davinci: da850: add GPIO DT node") Signed-off-by: Sekhar Nori --- arch/arm/boot/dts/da850.dtsi | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi index f6f1597b03df..0f4f817a9e22 100644 --- a/arch/arm/boot/dts/da850.dtsi +++ b/arch/arm/boot/dts/da850.dtsi @@ -549,11 +549,7 @@ gpio-controller; #gpio-cells = <2>; reg = <0x226000 0x1000>; - interrupts = <42 IRQ_TYPE_EDGE_BOTH - 43 IRQ_TYPE_EDGE_BOTH 44 IRQ_TYPE_EDGE_BOTH - 45 IRQ_TYPE_EDGE_BOTH 46 IRQ_TYPE_EDGE_BOTH - 47 IRQ_TYPE_EDGE_BOTH 48 IRQ_TYPE_EDGE_BOTH - 49 IRQ_TYPE_EDGE_BOTH 50 IRQ_TYPE_EDGE_BOTH>; + interrupts = <42 43 44 45 46 47 48 49 50>; ti,ngpio = <144>; ti,davinci-gpio-unbanked = <0>; status = "disabled"; From a685557fbbc3122ed11e8ad3fa63a11ebc5de8c3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 26 Jun 2018 12:04:23 -0400 Subject: [PATCH 506/572] dm thin: handle running out of data space vs concurrent discard Discards issued to a DM thin device can complete to userspace (via fstrim) _before_ the metadata changes associated with the discards is reflected in the thinp superblock (e.g. free blocks). As such, if a user constructs a test that loops repeatedly over these steps, block allocation can fail due to discards not having completed yet: 1) fill thin device via filesystem file 2) remove file 3) fstrim From initial report, here: https://www.redhat.com/archives/dm-devel/2018-April/msg00022.html "The root cause of this issue is that dm-thin will first remove mapping and increase corresponding blocks' reference count to prevent them from being reused before DISCARD bios get processed by the underlying layers. However. increasing blocks' reference count could also increase the nr_allocated_this_transaction in struct sm_disk which makes smd->old_ll.nr_allocated + smd->nr_allocated_this_transaction bigger than smd->old_ll.nr_blocks. In this case, alloc_data_block() will never commit metadata to reset the begin pointer of struct sm_disk, because sm_disk_get_nr_free() always return an underflow value." While there is room for improvement to the space-map accounting that thinp is making use of: the reality is this test is inherently racey and will result in the previous iteration's fstrim's discard(s) completing vs concurrent block allocation, via dd, in the next iteration of the loop. No amount of space map accounting improvements will be able to allow user's to use a block before a discard of that block has completed. So the best we can really do is allow DM thinp to gracefully handle such aggressive use of all the pool's data by degrading the pool into out-of-data-space (OODS) mode. We _should_ get that behaviour already (if space map accounting didn't falsely cause alloc_data_block() to believe free space was available).. but short of that we handle the current reality that dm_pool_alloc_data_block() can return -ENOSPC. Reported-by: Dennis Yang Cc: stable@vger.kernel.org Signed-off-by: Mike Snitzer --- drivers/md/dm-thin.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 7945238df1c0..b900723bbd0f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1386,6 +1386,8 @@ static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block, static void set_pool_mode(struct pool *pool, enum pool_mode new_mode); +static void requeue_bios(struct pool *pool); + static void check_for_space(struct pool *pool) { int r; @@ -1398,8 +1400,10 @@ static void check_for_space(struct pool *pool) if (r) return; - if (nr_free) + if (nr_free) { set_pool_mode(pool, PM_WRITE); + requeue_bios(pool); + } } /* @@ -1476,7 +1480,10 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) r = dm_pool_alloc_data_block(pool->pmd, result); if (r) { - metadata_operation_failed(pool, "dm_pool_alloc_data_block", r); + if (r == -ENOSPC) + set_pool_mode(pool, PM_OUT_OF_DATA_SPACE); + else + metadata_operation_failed(pool, "dm_pool_alloc_data_block", r); return r; } From 86676c4685f774d818f7a6c401c925bbf2a92903 Mon Sep 17 00:00:00 2001 From: Katsuhiro Suzuki Date: Tue, 19 Jun 2018 13:12:05 +0900 Subject: [PATCH 507/572] arm64: dts: uniphier: fix widget name of headphone for LD11/LD20 boards This patch fixes wrong name of headphone widget for receiving events of insert/remove headphone plug from simple-card or audio-graph-card. If we use wrong widget name then we get warning messages such as "asoc-audio-graph-card sound: ASoC: DAPM unknown pin Headphones" when the plug is inserted or removed from headphone jack. Fixes: fb21a0acaa2b7 ("arm64: dts: uniphier: add sound node") Signed-off-by: Katsuhiro Suzuki Signed-off-by: Masahiro Yamada Signed-off-by: Olof Johansson --- arch/arm64/boot/dts/socionext/uniphier-ld11-global.dts | 2 +- arch/arm64/boot/dts/socionext/uniphier-ld20-global.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11-global.dts b/arch/arm64/boot/dts/socionext/uniphier-ld11-global.dts index 9b4dc41703e3..ae3b5adf32df 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld11-global.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld11-global.dts @@ -54,7 +54,7 @@ sound { compatible = "audio-graph-card"; label = "UniPhier LD11"; - widgets = "Headphone", "Headphone Jack"; + widgets = "Headphone", "Headphones"; dais = <&i2s_port2 &i2s_port3 &i2s_port4 diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-global.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-global.dts index fe6608ea3277..7919233c9ce2 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20-global.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-global.dts @@ -54,7 +54,7 @@ sound { compatible = "audio-graph-card"; label = "UniPhier LD20"; - widgets = "Headphone", "Headphone Jack"; + widgets = "Headphone", "Headphones"; dais = <&i2s_port2 &i2s_port3 &i2s_port4 From 3b41c3e28e9882760b78864487558b66aaa6261e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 27 Jun 2018 08:40:44 -0700 Subject: [PATCH 508/572] checkpatch: remove warning for 'old' stable@kernel.org address It may not be the actual real stable mailing list address, but the stable scripts to actually pick up on the traditional way to mark stable patches. There are also reasons to explicitly avoid using the actual mailing list address, since security patches with embargo dates generally do want the stable marking, but don't want tools etc to mistakenly send the patch out to the mailing list early. So don't warn for things that are still actively used and explicitly supported. Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 ------ 1 file changed, 6 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index e3b7362b0ee4..a9c05506e325 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2606,12 +2606,6 @@ sub process { "A patch subject line should describe the change not the tool that found it\n" . $herecurr); } -# Check for old stable address - if ($line =~ /^\s*cc:\s*.*?.*$/i) { - ERROR("STABLE_ADDRESS", - "The 'stable' address should be 'stable\@vger.kernel.org'\n" . $herecurr); - } - # Check for unwanted Gerrit info if ($in_commit_log && $line =~ /^\s*change-id:/i) { ERROR("GERRIT_CHANGE_ID", From 429388682dc266e7a693f9c27e3aabd341d55343 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Jun 2018 09:31:15 +0100 Subject: [PATCH 509/572] arm64: Avoid flush_icache_range() in alternatives patching code The implementation of flush_icache_range() includes instruction sequences which are themselves patched at runtime, so it is not safe to call from the patching framework. This patch reworks the alternatives cache-flushing code so that it rolls its own internal D-cache maintenance using DC CIVAC before invalidating the entire I-cache after all alternatives have been applied at boot. Modules don't cause any issues, since flush_icache_range() is safe to call by the time they are loaded. Acked-by: Mark Rutland Reported-by: Rohit Khanna Cc: Alexander Van Brunt Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/alternative.h | 7 +++- arch/arm64/kernel/alternative.c | 51 ++++++++++++++++++++++++---- arch/arm64/kernel/module.c | 5 ++- 3 files changed, 52 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a91933b1e2e6..4b650ec1d7dd 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -28,7 +28,12 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); void __init apply_alternatives_all(void); -void apply_alternatives(void *start, size_t length); + +#ifdef CONFIG_MODULES +void apply_alternatives_module(void *start, size_t length); +#else +static inline void apply_alternatives_module(void *start, size_t length) { } +#endif #define ALTINSTR_ENTRY(feature,cb) \ " .word 661b - .\n" /* label */ \ diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 5c4bce4ac381..36fb069fd049 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -122,7 +122,30 @@ static void patch_alternative(struct alt_instr *alt, } } -static void __apply_alternatives(void *alt_region, bool use_linear_alias) +/* + * We provide our own, private D-cache cleaning function so that we don't + * accidentally call into the cache.S code, which is patched by us at + * runtime. + */ +static void clean_dcache_range_nopatch(u64 start, u64 end) +{ + u64 cur, d_size, ctr_el0; + + ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0); + d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0, + CTR_DMINLINE_SHIFT); + cur = start & ~(d_size - 1); + do { + /* + * We must clean+invalidate to the PoC in order to avoid + * Cortex-A53 errata 826319, 827319, 824069 and 819472 + * (this corresponds to ARM64_WORKAROUND_CLEAN_CACHE) + */ + asm volatile("dc civac, %0" : : "r" (cur) : "memory"); + } while (cur += d_size, cur < end); +} + +static void __apply_alternatives(void *alt_region, bool is_module) { struct alt_instr *alt; struct alt_region *region = alt_region; @@ -145,7 +168,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias) pr_info_once("patching kernel code\n"); origptr = ALT_ORIG_PTR(alt); - updptr = use_linear_alias ? lm_alias(origptr) : origptr; + updptr = is_module ? origptr : lm_alias(origptr); nr_inst = alt->orig_len / AARCH64_INSN_SIZE; if (alt->cpufeature < ARM64_CB_PATCH) @@ -155,8 +178,20 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias) alt_cb(alt, origptr, updptr, nr_inst); - flush_icache_range((uintptr_t)origptr, - (uintptr_t)(origptr + nr_inst)); + if (!is_module) { + clean_dcache_range_nopatch((u64)origptr, + (u64)(origptr + nr_inst)); + } + } + + /* + * The core module code takes care of cache maintenance in + * flush_module_icache(). + */ + if (!is_module) { + dsb(ish); + __flush_icache_all(); + isb(); } } @@ -178,7 +213,7 @@ static int __apply_alternatives_multi_stop(void *unused) isb(); } else { BUG_ON(alternatives_applied); - __apply_alternatives(®ion, true); + __apply_alternatives(®ion, false); /* Barriers provided by the cache flushing */ WRITE_ONCE(alternatives_applied, 1); } @@ -192,12 +227,14 @@ void __init apply_alternatives_all(void) stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); } -void apply_alternatives(void *start, size_t length) +#ifdef CONFIG_MODULES +void apply_alternatives_module(void *start, size_t length) { struct alt_region region = { .begin = start, .end = start + length, }; - __apply_alternatives(®ion, false); + __apply_alternatives(®ion, true); } +#endif diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 155fd91e78f4..f0f27aeefb73 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -448,9 +448,8 @@ int module_finalize(const Elf_Ehdr *hdr, const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { - apply_alternatives((void *)s->sh_addr, s->sh_size); - } + if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) + apply_alternatives_module((void *)s->sh_addr, s->sh_size); #ifdef CONFIG_ARM64_MODULE_PLTS if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) From 24fe1b0efad4fcdd32ce46cffeab297f22581707 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 22 Jun 2018 09:31:16 +0100 Subject: [PATCH 510/572] arm64: Remove unnecessary ISBs from set_{pte,pmd,pud} Commit 7f0b1bf04511 ("arm64: Fix barriers used for page table modifications") fixed a reported issue with fixmap page-table entries not being visible to the walker due to a missing DSB instruction. At the same time, it added ISB instructions to the arm64 set_{pte,pmd,pud} functions, which are not required by the architecture and make little sense in isolation. Remove the redundant ISBs. Acked-by: Catalin Marinas Acked-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9f82d6b53851..1bdeca8918a6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -224,10 +224,8 @@ static inline void set_pte(pte_t *ptep, pte_t pte) * Only if the new pte is valid and kernel, otherwise TLB maintenance * or update_mmu_cache() have the necessary barriers. */ - if (pte_valid_not_user(pte)) { + if (pte_valid_not_user(pte)) dsb(ishst); - isb(); - } } extern void __sync_icache_dcache(pte_t pteval); @@ -434,7 +432,6 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { WRITE_ONCE(*pmdp, pmd); dsb(ishst); - isb(); } static inline void pmd_clear(pmd_t *pmdp) @@ -485,7 +482,6 @@ static inline void set_pud(pud_t *pudp, pud_t pud) { WRITE_ONCE(*pudp, pud); dsb(ishst); - isb(); } static inline void pud_clear(pud_t *pudp) From 0e49740c35c1d8f0c8fd84e9387b6a1007228d6f Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Wed, 27 Jun 2018 13:07:07 -0500 Subject: [PATCH 511/572] MAINTAINERS: Timur has a kernel.org address Timur Tabi no longer works for Qualcomm, and he now has a kernel.org email address, so update MAINTAINERS accordingly. Signed-off-by: Timur Tabi Signed-off-by: Linus Torvalds --- MAINTAINERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index f0896c04811e..bfb8a18d4793 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5673,7 +5673,7 @@ F: drivers/crypto/caam/ F: Documentation/devicetree/bindings/crypto/fsl-sec4.txt FREESCALE DIU FRAMEBUFFER DRIVER -M: Timur Tabi +M: Timur Tabi L: linux-fbdev@vger.kernel.org S: Maintained F: drivers/video/fbdev/fsl-diu-fb.* @@ -5773,7 +5773,7 @@ S: Maintained F: drivers/net/wan/fsl_ucc_hdlc* FREESCALE QUICC ENGINE UCC UART DRIVER -M: Timur Tabi +M: Timur Tabi L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/tty/serial/ucc_uart.c @@ -5797,7 +5797,7 @@ F: drivers/net/ethernet/freescale/fs_enet/ F: include/linux/fs_enet_pd.h FREESCALE SOC SOUND DRIVERS -M: Timur Tabi +M: Timur Tabi M: Nicolin Chen M: Xiubo Li R: Fabio Estevam @@ -11813,9 +11813,9 @@ F: Documentation/devicetree/bindings/opp/kryo-cpufreq.txt F: drivers/cpufreq/qcom-cpufreq-kryo.c QUALCOMM EMAC GIGABIT ETHERNET DRIVER -M: Timur Tabi +M: Timur Tabi L: netdev@vger.kernel.org -S: Supported +S: Maintained F: drivers/net/ethernet/qualcomm/emac/ QUALCOMM HEXAGON ARCHITECTURE From d9e98ee248b04b2462e7411b204d420d04f1ba27 Mon Sep 17 00:00:00 2001 From: Leo Liu Date: Mon, 25 Jun 2018 14:56:06 -0400 Subject: [PATCH 512/572] drm/amdgpu: fix UBSAN: Undefined behaviour for amdgpu_fence.c Here is the UBSAN dump: [ 3.866656] index 2 is out of range for type 'amdgpu_uvd_inst [2]' [ 3.866693] Workqueue: events work_for_cpu_fn [ 3.866702] Call Trace: [ 3.866710] dump_stack+0x85/0xc5 [ 3.866719] ubsan_epilogue+0x9/0x40 [ 3.866727] __ubsan_handle_out_of_bounds+0x89/0x90 [ 3.866737] ? rcu_read_lock_sched_held+0x58/0x60 [ 3.866746] ? __kmalloc+0x26c/0x2d0 [ 3.866846] amdgpu_fence_driver_start_ring+0x259/0x280 [amdgpu] [ 3.866896] amdgpu_ring_init+0x12c/0x710 [amdgpu] [ 3.866906] ? sprintf+0x42/0x50 [ 3.866956] amdgpu_gfx_kiq_init_ring+0x1bc/0x3a0 [amdgpu] [ 3.867009] gfx_v8_0_sw_init+0x1ad3/0x2360 [amdgpu] [ 3.867062] ? smu7_init+0xec/0x160 [amdgpu] [ 3.867109] amdgpu_device_init+0x112c/0x1dc0 [amdgpu] 'ring->me' might be set as 2 with 'amdgpu_gfx_kiq_init_ring', that would cause out of range for 'amdgpu_uvd_inst[2]'. v2: simplified with ring type Signed-off-by: Leo Liu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 39ec6b8890a1..e74d620d9699 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -376,7 +376,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; uint64_t index; - if (ring != &adev->uvd.inst[ring->me].ring) { + if (ring->funcs->type != AMDGPU_RING_TYPE_UVD) { ring->fence_drv.cpu_addr = &adev->wb.wb[ring->fence_offs]; ring->fence_drv.gpu_addr = adev->wb.gpu_addr + (ring->fence_offs * 4); } else { From 62d5b8e33b774128c93baac35a4347a6cfb64434 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 19 Jun 2018 13:44:04 -0400 Subject: [PATCH 513/572] drm/amdgpu:Support new VCN FW version naming convention Support new VCN FW version naming convention: [31, 28] for VEP interface major version if applicable [27, 24] for decode interface major version [23, 20] for encode interface major version [19, 12] for encode interface minor version [11, 0] for firmware revision Bit 20-23, it is encode major and non-zero for new naming convention. This field is part of version minor and DRM_DISABLED_FLAG in old naming convention. Since the latest version minor is 0x5B and DRM_DISABLED_FLAG is zero in old naming convention, this field is always zero so far. These four bits are used to tell which naming convention is present. Signed-off-by: James Zhu Reviewed-by: Fang, Peter Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 33 ++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 127e87b470ff..1b4ad9b2a755 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -52,7 +52,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; - unsigned version_major, version_minor, family_id; + unsigned char fw_check; int r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -83,12 +83,33 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) hdr = (const struct common_firmware_header *)adev->vcn.fw->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); - family_id = le32_to_cpu(hdr->ucode_version) & 0xff; - version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; - version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", - version_major, version_minor, family_id); + /* Bit 20-23, it is encode major and non-zero for new naming convention. + * This field is part of version minor and DRM_DISABLED_FLAG in old naming + * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG + * is zero in old naming convention, this field is always zero so far. + * These four bits are used to tell which naming convention is present. + */ + fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf; + if (fw_check) { + unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev; + + fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff; + enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff; + enc_major = fw_check; + dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; + vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; + DRM_INFO("Found VCN firmware Version ENC: %hu.%hu DEC: %hu VEP: %hu Revision: %hu\n", + enc_major, enc_minor, dec_ver, vep, fw_rev); + } else { + unsigned int version_major, version_minor, family_id; + + family_id = le32_to_cpu(hdr->ucode_version) & 0xff; + version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; + version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + } bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE From 4de9f38bb2cce3a4821ffb8a83d6b08f6e37d905 Mon Sep 17 00:00:00 2001 From: Shirish S Date: Tue, 26 Jun 2018 09:32:39 +0530 Subject: [PATCH 514/572] drm/amd/display: release spinlock before committing updates to stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, amdgpu_do_flip() spinlocks crtc->dev->event_lock and releases it only after committing updates to the stream. dc_commit_updates_for_stream() should be moved out of spinlock for the below reasons: 1. event_lock is supposed to protect access to acrct->pflip_status _only_ 2. dc_commit_updates_for_stream() has potential sleep's and also its not appropriate to be in an atomic state for such long sequences of code. Signed-off-by: Shirish S Suggested-by: Andrey Grodzovsky Reviewed-by: Michel Dänzer Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 689dbdf44bbf..3a8d6356afc2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3928,10 +3928,11 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, if (acrtc->base.state->event) prepare_flip_isr(acrtc); + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + surface_updates->surface = dc_stream_get_status(acrtc_state->stream)->plane_states[0]; surface_updates->flip_addr = &addr; - dc_commit_updates_for_stream(adev->dm.dc, surface_updates, 1, @@ -3944,9 +3945,6 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc, __func__, addr.address.grph.addr.high_part, addr.address.grph.addr.low_part); - - - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); } /* From d5b4885b1dff72ac670b518cfeaac719d768bd4d Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 26 Apr 2018 12:50:46 +0200 Subject: [PATCH 515/572] ARM64: dts: meson: disable sd-uhs modes on the libretech-cc There is a problem with the sd-uhs mode when doing a soft reboot. Switching back from 1.8v to 3.3v messes with the card, which no longer respond (timeout errors). According to the specification, we should perform a card reset (power cycling the card) but this is something we cannot control on this design. Then the only solution to restore the communication with the card is an "unplug-plug" which is not acceptable Until we find a solution, if any, disable the sd-uhs modes on this design. For the people using uhs at the moment, there will a performance drop as a result. Fixes: 3cde63ebc85c ("ARM64: dts: meson-gxl: libretech-cc: enable high speed modes") Signed-off-by: Jerome Brunet Cc: stable@vger.kernel.org Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index 3e3eb31748a3..f63bceb88caa 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -234,9 +234,6 @@ bus-width = <4>; cap-sd-highspeed; - sd-uhs-sdr12; - sd-uhs-sdr25; - sd-uhs-sdr50; max-frequency = <100000000>; disable-wp; From e490520c902e06e837e07948f026e7949bb16007 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Mon, 4 Jun 2018 15:23:09 -0700 Subject: [PATCH 516/572] ARM64: dts: meson: fix register ranges for SD/eMMC Based on updated information from Amlogic, correct the register range for the SD/eMMC blocks to the right size. Reported-by: Yixun Lan Tested-by: Yixun Lan Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-axg.dtsi | 4 ++-- arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi index fee87737a201..67d7115e4eff 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-axg.dtsi @@ -132,7 +132,7 @@ sd_emmc_b: sd@5000 { compatible = "amlogic,meson-axg-mmc"; - reg = <0x0 0x5000 0x0 0x2000>; + reg = <0x0 0x5000 0x0 0x800>; interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_B>, @@ -144,7 +144,7 @@ sd_emmc_c: mmc@7000 { compatible = "amlogic,meson-axg-mmc"; - reg = <0x0 0x7000 0x0 0x2000>; + reg = <0x0 0x7000 0x0 0x800>; interrupts = ; status = "disabled"; clocks = <&clkc CLKID_SD_EMMC_C>, diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 3c31e21cbed7..b003f324ca31 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -457,21 +457,21 @@ sd_emmc_a: mmc@70000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; - reg = <0x0 0x70000 0x0 0x2000>; + reg = <0x0 0x70000 0x0 0x800>; interrupts = ; status = "disabled"; }; sd_emmc_b: mmc@72000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; - reg = <0x0 0x72000 0x0 0x2000>; + reg = <0x0 0x72000 0x0 0x800>; interrupts = ; status = "disabled"; }; sd_emmc_c: mmc@74000 { compatible = "amlogic,meson-gx-mmc", "amlogic,meson-gxbb-mmc"; - reg = <0x0 0x74000 0x0 0x2000>; + reg = <0x0 0x74000 0x0 0x800>; interrupts = ; status = "disabled"; }; From d511b3e4087eedbe11c7496c396432b8b7c2d7d9 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Tue, 5 Jun 2018 10:52:23 +0200 Subject: [PATCH 517/572] ARM64: dts: meson-gxl-s905x-p212: Add phy-supply for usb0 Like LibreTech-CC, the USB0 needs the 5V regulator to be enabled to power the devices on the P212 Reference Design based boards. Fixes: b9f07cb4f41f ("ARM64: dts: meson-gxl-s905x-p212: enable the USB controller") Signed-off-by: Neil Armstrong Acked-by: Martin Blumenstingl Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi index 0cfd701809de..a1b31013ab6e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-p212.dtsi @@ -189,3 +189,10 @@ &usb0 { status = "okay"; }; + +&usb2_phy0 { + /* + * HDMI_5V is also used as supply for the USB VBUS. + */ + phy-supply = <&hdmi_5v>; +}; From 48e21ded0432ee1e2359d4143d7a6925cefee1b5 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 7 Jun 2018 13:51:01 -0700 Subject: [PATCH 518/572] ARM64: dts: meson-gx: fix ATF reserved memory region Vendor firmware/uboot has different reserved regions depending on firmware version, but current codebase reserves the same regions on GXL and GXBB, so move the additional reserved memory region to common .dtsi. Found when putting a recent vendor u-boot on meson-gxbb-p200. Suggested-by: Neil Armstrong Cc: stable@vger.kernel.org Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gx.dtsi | 6 ++++++ arch/arm64/boot/dts/amlogic/meson-gxl.dtsi | 8 -------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index b003f324ca31..b8dc4dbb391b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -35,6 +35,12 @@ no-map; }; + /* Alternate 3 MiB reserved for ARM Trusted Firmware (BL31) */ + secmon_reserved_alt: secmon@5000000 { + reg = <0x0 0x05000000 0x0 0x300000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index 27538eea547b..c87a80e9bcc6 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -13,14 +13,6 @@ / { compatible = "amlogic,meson-gxl"; - reserved-memory { - /* Alternate 3 MiB reserved for ARM Trusted Firmware (BL31) */ - secmon_reserved_alt: secmon@5000000 { - reg = <0x0 0x05000000 0x0 0x300000>; - no-map; - }; - }; - soc { usb0: usb@c9000000 { status = "disabled"; From 6d28d577510f1a51f7ffbe830fdbf42077e0058b Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 25 Jun 2018 14:56:53 +0200 Subject: [PATCH 519/572] ARM64: dts: meson-axg: fix ethernet stability issue Like the odroid-c2 and wetek, the s400 uses the RTL8211F and seems to suffer from the kind of stability issue. Doing an iperf3 download test, we can see a significant number of LPI interrupts on the tx path. After a short while (5 to 15 seconds), the network connection dies. If using rootfs over NFS, the connection may also break during the boot sequence. We still don't have a real explanation for this problem so let's disable EEE once again. Fixes: f6f6ac914b82 ("ARM64: dts: meson-axg: enable ethernet for A113D S400 board") Signed-off-by: Jerome Brunet Reviewed-by: Neil Armstrong Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-axg-s400.dts | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts index 4b3331fbfe39..dff9b15eb3c0 100644 --- a/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts +++ b/arch/arm64/boot/dts/amlogic/meson-axg-s400.dts @@ -66,9 +66,22 @@ ðmac { status = "okay"; - phy-mode = "rgmii"; pinctrl-0 = <ð_rgmii_y_pins>; pinctrl-names = "default"; + phy-handle = <ð_phy0>; + phy-mode = "rgmii"; + + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + + eth_phy0: ethernet-phy@0 { + /* Realtek RTL8211F (0x001cc916) */ + reg = <0>; + eee-broken-1000t; + }; + }; }; &uart_A { From 1c38f4afd5d40234b67635b3c608a4093be04b96 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Sat, 23 Jun 2018 17:00:56 +0200 Subject: [PATCH 520/572] ARM64: dts: meson-gxl: fix Mali GPU compatible string meson-gxl-mali.dtsi is only used on GXL SoCs. Thus it should use the GXL specific compatible string instead of the GXBB one. For now this is purely cosmetic since the (out-of-tree) lima driver for this GPU currently uses the "arm,mali-450" match instead of the SoC specific one. However, update the .dts to match the documentation since this driver behavior might change in the future. Signed-off-by: Martin Blumenstingl Acked-by: Neil Armstrong Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi index eb327664a4d8..6aaafff674f9 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-mali.dtsi @@ -6,7 +6,7 @@ &apb { mali: gpu@c0000 { - compatible = "amlogic,meson-gxbb-mali", "arm,mali-450"; + compatible = "amlogic,meson-gxl-mali", "arm,mali-450"; reg = <0x0 0xc0000 0x0 0x40000>; interrupts = , , From 877f919e192a09e77962a13d7165783027dee5fd Mon Sep 17 00:00:00 2001 From: Chunyu Hu Date: Sun, 10 Jun 2018 03:51:24 +0800 Subject: [PATCH 521/572] proc: add proc_seq_release kmemleak reported some memory leak on reading proc files. After adding some debug lines, find that proc_seq_fops is using seq_release as release handler, which won't handle the free of 'private' field of seq_file, while in fact the open handler proc_seq_open could create the private data with __seq_open_private when state_size is greater than zero. So after reading files created with proc_create_seq_private, such as /proc/timer_list and /proc/vmallocinfo, the private mem of a seq_file is not freed. Fix it by adding the paired proc_seq_release as the default release handler of proc_seq_ops instead of seq_release. Fixes: 44414d82cfe0 ("proc: introduce proc_create_seq_private") Reviewed-by: Christoph Hellwig CC: Christoph Hellwig Signed-off-by: Chunyu Hu Signed-off-by: Al Viro --- fs/proc/generic.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 6ac1c92997ea..bb1c1625b158 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -564,11 +564,20 @@ static int proc_seq_open(struct inode *inode, struct file *file) return seq_open(file, de->seq_ops); } +static int proc_seq_release(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *de = PDE(inode); + + if (de->state_size) + return seq_release_private(inode, file); + return seq_release(inode, file); +} + static const struct file_operations proc_seq_fops = { .open = proc_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = proc_seq_release, }; struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode, From 6f7de19ed3d4d3526ca5eca428009f97cf969c2f Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 27 Jun 2018 18:19:55 +0800 Subject: [PATCH 522/572] btrfs: quota: Set rescan progress to (u64)-1 if we hit last leaf Commit ff3d27a048d9 ("btrfs: qgroup: Finish rescan when hit the last leaf of extent tree") added a new exit for rescan finish. However after finishing quota rescan, we set fs_info->qgroup_rescan_progress to (u64)-1 before we exit through the original exit path. While we missed that assignment of (u64)-1 in the new exit path. The end result is, the quota status item doesn't have the same value. (-1 vs the last bytenr + 1) Although it doesn't affect quota accounting, it's still better to keep the original behavior. Reported-by: Misono Tomohiro Fixes: ff3d27a048d9 ("btrfs: qgroup: Finish rescan when hit the last leaf of extent tree") Signed-off-by: Qu Wenruo Reviewed-by: Misono Tomohiro Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1874a6d2e6f5..99f2b9ce0f15 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2680,8 +2680,10 @@ out: free_extent_buffer(scratch_leaf); } - if (done && !ret) + if (done && !ret) { ret = 1; + fs_info->qgroup_rescan_progress.objectid = (u64)-1; + } return ret; } From 717beb96d969561cff53aad2f09547db20ee6ffd Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 25 Jun 2018 10:03:41 -0700 Subject: [PATCH 523/572] Btrfs: fix regression in btrfs_page_mkwrite() from vm_fault_t conversion The vm_fault_t conversion commit introduced a ret2 variable for tracking the integer return values from internal btrfs functions. It was sometimes returning VM_FAULT_LOCKED for pages that were actually invalid and had been removed from the radix. Something like this: ret2 = btrfs_delalloc_reserve_space() // returns zero on success lock_page(page) if (page->mapping != inode->i_mapping) goto out_unlock; ... out_unlock: if (!ret2) { ... return VM_FAULT_LOCKED; } This ends up triggering this WARNING in btrfs_destroy_inode() WARN_ON(BTRFS_I(inode)->block_rsv.size); xfstests generic/095 was able to reliably reproduce the errors. Since out_unlock: is only used for errors, this fix moves it below the if (!ret2) check we use to return VM_FAULT_LOCKED for success. Fixes: a528a2415087 (btrfs: change return type of btrfs_page_mkwrite to vm_fault_t) Signed-off-by: Chris Mason Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a6bf9e2ff68f..f21f4223d03b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9005,13 +9005,14 @@ again: unlock_extent_cached(io_tree, page_start, page_end, &cached_state); -out_unlock: if (!ret2) { btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, true); sb_end_pagefault(inode->i_sb); extent_changeset_free(data_reserved); return VM_FAULT_LOCKED; } + +out_unlock: unlock_page(page); out: btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE, (ret != 0)); From e4e7ede739f7fb468686dfffa2d1e35dca35bacd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Jun 2018 00:43:15 +0100 Subject: [PATCH 524/572] Btrfs: fix mount failure when qgroup rescan is in progress If a power failure happens while the qgroup rescan kthread is running, the next mount operation will always fail. This is because of a recent regression that makes qgroup_rescan_init() incorrectly return -EINVAL when we are mounting the filesystem (through btrfs_read_qgroup_config()). This causes the -EINVAL error to be returned regardless of any qgroup flags being set instead of returning the error only when neither of the flags BTRFS_QGROUP_STATUS_FLAG_RESCAN nor BTRFS_QGROUP_STATUS_FLAG_ON are set. A test case for fstests follows up soon. Fixes: 9593bf49675e ("btrfs: qgroup: show more meaningful qgroup_rescan_init error message") Signed-off-by: Filipe Manana Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 99f2b9ce0f15..c25dc47210a3 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2786,13 +2786,20 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, if (!init_flags) { /* we're resuming qgroup rescan at mount time */ - if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)) + if (!(fs_info->qgroup_flags & + BTRFS_QGROUP_STATUS_FLAG_RESCAN)) { btrfs_warn(fs_info, "qgroup rescan init failed, qgroup is not enabled"); - else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) + ret = -EINVAL; + } else if (!(fs_info->qgroup_flags & + BTRFS_QGROUP_STATUS_FLAG_ON)) { btrfs_warn(fs_info, "qgroup rescan init failed, qgroup rescan is not queued"); - return -EINVAL; + ret = -EINVAL; + } + + if (ret) + return ret; } mutex_lock(&fs_info->qgroup_rescan_lock); From 210d0797c97d0e8f3b1a932a0dc143f4c57008a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Jun 2018 13:59:25 +0200 Subject: [PATCH 525/572] swiotlb: export swiotlb_dma_ops For architectures that do not use per-device dma ops we need to export the dma_map_ops structure returned from get_arch_dma_ops(). Fixes: 10314e09 ("riscv: add swiotlb support") Signed-off-by: Christoph Hellwig Reported-by: Andreas Schwab --- kernel/dma/swiotlb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 04b68d9dffac..904541055792 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -1085,3 +1085,4 @@ const struct dma_map_ops swiotlb_dma_ops = { .unmap_page = swiotlb_unmap_page, .dma_supported = dma_direct_supported, }; +EXPORT_SYMBOL(swiotlb_dma_ops); From 3d63b7e4ae0dc5e02d28ddd2fa1f945defc68d81 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 26 May 2018 09:53:13 +0900 Subject: [PATCH 526/572] n_tty: Fix stall at n_tty_receive_char_special(). syzbot is reporting stalls at n_tty_receive_char_special() [1]. This is because comparison is not working as expected since ldata->read_head can change at any moment. Mitigate this by explicitly masking with buffer size when checking condition for "while" loops. [1] https://syzkaller.appspot.com/bug?id=3d7481a346958d9469bebbeb0537d5f056bdd6e8 Signed-off-by: Tetsuo Handa Reported-by: syzbot Fixes: bc5a5e3f45d04784 ("n_tty: Don't wrap input buffer indices at buffer size") Cc: stable Cc: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index cbe98bc2b998..b279f8730e04 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -124,6 +124,8 @@ struct n_tty_data { struct mutex output_lock; }; +#define MASK(x) ((x) & (N_TTY_BUF_SIZE - 1)) + static inline size_t read_cnt(struct n_tty_data *ldata) { return ldata->read_head - ldata->read_tail; @@ -978,14 +980,15 @@ static void eraser(unsigned char c, struct tty_struct *tty) } seen_alnums = 0; - while (ldata->read_head != ldata->canon_head) { + while (MASK(ldata->read_head) != MASK(ldata->canon_head)) { head = ldata->read_head; /* erase a single possibly multibyte character */ do { head--; c = read_buf(ldata, head); - } while (is_continuation(c, tty) && head != ldata->canon_head); + } while (is_continuation(c, tty) && + MASK(head) != MASK(ldata->canon_head)); /* do not partially erase */ if (is_continuation(c, tty)) @@ -1027,7 +1030,7 @@ static void eraser(unsigned char c, struct tty_struct *tty) * This info is used to go back the correct * number of columns. */ - while (tail != ldata->canon_head) { + while (MASK(tail) != MASK(ldata->canon_head)) { tail--; c = read_buf(ldata, tail); if (c == '\t') { @@ -1302,7 +1305,7 @@ n_tty_receive_char_special(struct tty_struct *tty, unsigned char c) finish_erasing(ldata); echo_char(c, tty); echo_char_raw('\n', ldata); - while (tail != ldata->read_head) { + while (MASK(tail) != MASK(ldata->read_head)) { echo_char(read_buf(ldata, tail), tty); tail++; } @@ -2411,7 +2414,7 @@ static unsigned long inq_canon(struct n_tty_data *ldata) tail = ldata->read_tail; nr = head - tail; /* Skip EOF-chars.. */ - while (head != tail) { + while (MASK(head) != MASK(tail)) { if (test_bit(tail & (N_TTY_BUF_SIZE - 1), ldata->read_flags) && read_buf(ldata, tail) == __DISABLED_CHAR) nr--; From ebec3f8f5271139df618ebdf8427e24ba102ba94 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sat, 26 May 2018 09:53:14 +0900 Subject: [PATCH 527/572] n_tty: Access echo_* variables carefully. syzbot is reporting stalls at __process_echoes() [1]. This is because since ldata->echo_commit < ldata->echo_tail becomes true for some reason, the discard loop is serving as almost infinite loop. This patch tries to avoid falling into ldata->echo_commit < ldata->echo_tail situation by making access to echo_* variables more carefully. Since reset_buffer_flags() is called without output_lock held, it should not touch echo_* variables. And omit a call to reset_buffer_flags() from n_tty_open() by using vzalloc(). Since add_echo_byte() is called without output_lock held, it needs memory barrier between storing into echo_buf[] and incrementing echo_head counter. echo_buf() needs corresponding memory barrier before reading echo_buf[]. Lack of handling the possibility of not-yet-stored multi-byte operation might be the reason of falling into ldata->echo_commit < ldata->echo_tail situation, for if I do WARN_ON(ldata->echo_commit == tail + 1) prior to echo_buf(ldata, tail + 1), the WARN_ON() fires. Also, explicitly masking with buffer for the former "while" loop, and use ldata->echo_commit > tail for the latter "while" loop. [1] https://syzkaller.appspot.com/bug?id=17f23b094cd80df750e5b0f8982c521ee6bcbf40 Signed-off-by: Tetsuo Handa Reported-by: syzbot Cc: Peter Hurley Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index b279f8730e04..431742201709 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -143,6 +143,7 @@ static inline unsigned char *read_buf_addr(struct n_tty_data *ldata, size_t i) static inline unsigned char echo_buf(struct n_tty_data *ldata, size_t i) { + smp_rmb(); /* Matches smp_wmb() in add_echo_byte(). */ return ldata->echo_buf[i & (N_TTY_BUF_SIZE - 1)]; } @@ -318,9 +319,7 @@ static inline void put_tty_queue(unsigned char c, struct n_tty_data *ldata) static void reset_buffer_flags(struct n_tty_data *ldata) { ldata->read_head = ldata->canon_head = ldata->read_tail = 0; - ldata->echo_head = ldata->echo_tail = ldata->echo_commit = 0; ldata->commit_head = 0; - ldata->echo_mark = 0; ldata->line_start = 0; ldata->erasing = 0; @@ -619,12 +618,19 @@ static size_t __process_echoes(struct tty_struct *tty) old_space = space = tty_write_room(tty); tail = ldata->echo_tail; - while (ldata->echo_commit != tail) { + while (MASK(ldata->echo_commit) != MASK(tail)) { c = echo_buf(ldata, tail); if (c == ECHO_OP_START) { unsigned char op; int no_space_left = 0; + /* + * Since add_echo_byte() is called without holding + * output_lock, we might see only portion of multi-byte + * operation. + */ + if (MASK(ldata->echo_commit) == MASK(tail + 1)) + goto not_yet_stored; /* * If the buffer byte is the start of a multi-byte * operation, get the next byte, which is either the @@ -636,6 +642,8 @@ static size_t __process_echoes(struct tty_struct *tty) unsigned int num_chars, num_bs; case ECHO_OP_ERASE_TAB: + if (MASK(ldata->echo_commit) == MASK(tail + 2)) + goto not_yet_stored; num_chars = echo_buf(ldata, tail + 2); /* @@ -730,7 +738,8 @@ static size_t __process_echoes(struct tty_struct *tty) /* If the echo buffer is nearly full (so that the possibility exists * of echo overrun before the next commit), then discard enough * data at the tail to prevent a subsequent overrun */ - while (ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) { + while (ldata->echo_commit > tail && + ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) { if (echo_buf(ldata, tail) == ECHO_OP_START) { if (echo_buf(ldata, tail + 1) == ECHO_OP_ERASE_TAB) tail += 3; @@ -740,6 +749,7 @@ static size_t __process_echoes(struct tty_struct *tty) tail++; } + not_yet_stored: ldata->echo_tail = tail; return old_space - space; } @@ -750,6 +760,7 @@ static void commit_echoes(struct tty_struct *tty) size_t nr, old, echoed; size_t head; + mutex_lock(&ldata->output_lock); head = ldata->echo_head; ldata->echo_mark = head; old = ldata->echo_commit - ldata->echo_tail; @@ -758,10 +769,12 @@ static void commit_echoes(struct tty_struct *tty) * is over the threshold (and try again each time another * block is accumulated) */ nr = head - ldata->echo_tail; - if (nr < ECHO_COMMIT_WATERMARK || (nr % ECHO_BLOCK > old % ECHO_BLOCK)) + if (nr < ECHO_COMMIT_WATERMARK || + (nr % ECHO_BLOCK > old % ECHO_BLOCK)) { + mutex_unlock(&ldata->output_lock); return; + } - mutex_lock(&ldata->output_lock); ldata->echo_commit = head; echoed = __process_echoes(tty); mutex_unlock(&ldata->output_lock); @@ -812,7 +825,9 @@ static void flush_echoes(struct tty_struct *tty) static inline void add_echo_byte(unsigned char c, struct n_tty_data *ldata) { - *echo_buf_addr(ldata, ldata->echo_head++) = c; + *echo_buf_addr(ldata, ldata->echo_head) = c; + smp_wmb(); /* Matches smp_rmb() in echo_buf(). */ + ldata->echo_head++; } /** @@ -1881,30 +1896,21 @@ static int n_tty_open(struct tty_struct *tty) struct n_tty_data *ldata; /* Currently a malloc failure here can panic */ - ldata = vmalloc(sizeof(*ldata)); + ldata = vzalloc(sizeof(*ldata)); if (!ldata) - goto err; + return -ENOMEM; ldata->overrun_time = jiffies; mutex_init(&ldata->atomic_read_lock); mutex_init(&ldata->output_lock); tty->disc_data = ldata; - reset_buffer_flags(tty->disc_data); - ldata->column = 0; - ldata->canon_column = 0; - ldata->num_overrun = 0; - ldata->no_room = 0; - ldata->lnext = 0; tty->closing = 0; /* indicate buffer work may resume */ clear_bit(TTY_LDISC_HALTED, &tty->flags); n_tty_set_termios(tty, NULL); tty_unthrottle(tty); - return 0; -err: - return -ENOMEM; } static inline int input_available_p(struct tty_struct *tty, int poll) From 20dcff436e9fcd2e106b0ccc48a52206bc176d70 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 6 Jun 2018 21:00:41 +0300 Subject: [PATCH 528/572] serial: 8250_pci: Remove stalled entries in blacklist After the commit 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") pure serial multi-port cards, such as CH355, got blacklisted and thus not being enumerated anymore. Previously, it seems, blacklisting them was on purpose to shut up pciserial_init_one() about record duplication. So, remove the entries from blacklist in order to get cards enumerated. Fixes: 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list") Reported-by: Matt Turner Cc: Sergej Pupykin Cc: Alexandr Petrenko Signed-off-by: Andy Shevchenko Reviewed-and-Tested-by: Matt Turner Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_pci.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 3296a05cda2d..f80a300b5d68 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -3339,9 +3339,7 @@ static const struct pci_device_id blacklist[] = { /* multi-io cards handled by parport_serial */ { PCI_DEVICE(0x4348, 0x7053), }, /* WCH CH353 2S1P */ { PCI_DEVICE(0x4348, 0x5053), }, /* WCH CH353 1S1P */ - { PCI_DEVICE(0x4348, 0x7173), }, /* WCH CH355 4S */ { PCI_DEVICE(0x1c00, 0x3250), }, /* WCH CH382 2S1P */ - { PCI_DEVICE(0x1c00, 0x3470), }, /* WCH CH384 4S */ /* Moxa Smartio MUE boards handled by 8250_moxa */ { PCI_VDEVICE(MOXA, 0x1024), }, From bc6cf3669d22371f573ab0305b3abf13887c0786 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 13 Jun 2018 17:08:59 +0200 Subject: [PATCH 529/572] serdev: fix memleak on module unload Make sure to free all resources associated with the ida on module exit. Fixes: cd6484e1830b ("serdev: Introduce new bus for serial attached devices") Cc: stable # 4.11 Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serdev/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index df93b727e984..9e59f4788589 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -617,6 +617,7 @@ EXPORT_SYMBOL_GPL(__serdev_device_driver_register); static void __exit serdev_exit(void) { bus_unregister(&serdev_bus_type); + ida_destroy(&ctrl_ida); } module_exit(serdev_exit); From 21eff69aaaa0e766ca0ce445b477698dc6a9f55a Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Thu, 14 Jun 2018 12:23:09 +0200 Subject: [PATCH 530/572] vt: prevent leaking uninitialized data to userspace via /dev/vcs* KMSAN reported an infoleak when reading from /dev/vcs*: BUG: KMSAN: kernel-infoleak in vcs_read+0x18ba/0x1cc0 Call Trace: ... kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253 copy_to_user ./include/linux/uaccess.h:184 vcs_read+0x18ba/0x1cc0 drivers/tty/vt/vc_screen.c:352 __vfs_read+0x1b2/0x9d0 fs/read_write.c:416 vfs_read+0x36c/0x6b0 fs/read_write.c:452 ... Uninit was created at: kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279 kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189 kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315 __kmalloc+0x13a/0x350 mm/slub.c:3818 kmalloc ./include/linux/slab.h:517 vc_allocate+0x438/0x800 drivers/tty/vt/vt.c:787 con_install+0x8c/0x640 drivers/tty/vt/vt.c:2880 tty_driver_install_tty drivers/tty/tty_io.c:1224 tty_init_dev+0x1b5/0x1020 drivers/tty/tty_io.c:1324 tty_open_by_driver drivers/tty/tty_io.c:1959 tty_open+0x17b4/0x2ed0 drivers/tty/tty_io.c:2007 chrdev_open+0xc25/0xd90 fs/char_dev.c:417 do_dentry_open+0xccc/0x1440 fs/open.c:794 vfs_open+0x1b6/0x2f0 fs/open.c:908 ... Bytes 0-79 of 240 are uninitialized Consistently allocating |vc_screenbuf| with kzalloc() fixes the problem Reported-by: syzbot+17a8efdf800000@syzkaller.appspotmail.com Signed-off-by: Alexander Potapenko Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 1eb1a376a041..15eb6c829d39 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -784,7 +784,7 @@ int vc_allocate(unsigned int currcons) /* return 0 on success */ if (!*vc->vc_uni_pagedir_loc) con_set_default_unimap(vc); - vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL); + vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_KERNEL); if (!vc->vc_screenbuf) goto err_free; @@ -871,7 +871,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (new_screen_size > (4 << 20)) return -EINVAL; - newscreen = kmalloc(new_screen_size, GFP_USER); + newscreen = kzalloc(new_screen_size, GFP_USER); if (!newscreen) return -ENOMEM; From ecd53ac2f2c6e42fe732d0aa282192cb6ad3faea Mon Sep 17 00:00:00 2001 From: Dirk Gouders Date: Fri, 22 Jun 2018 21:27:38 +0200 Subject: [PATCH 531/572] kconfig: handle P_SYMBOL in print_symbol() Each symbol has a property of type P_SYMBOL since commit 59e89e3ddf85 (kconfig: save location of config symbols). Handle those properties in print_symbol(). Further, place a pointer to print_symbol() in the comment above the list of known property type. Signed-off-by: Dirk Gouders Signed-off-by: Masahiro Yamada --- scripts/kconfig/expr.h | 3 +++ scripts/kconfig/zconf.y | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h index 94a383b21df6..f63b41b0dd49 100644 --- a/scripts/kconfig/expr.h +++ b/scripts/kconfig/expr.h @@ -171,6 +171,9 @@ struct symbol { * config BAZ * int "BAZ Value" * range 1..255 + * + * Please, also check zconf.y:print_symbol() when modifying the + * list of property types! */ enum prop_type { P_UNKNOWN, diff --git a/scripts/kconfig/zconf.y b/scripts/kconfig/zconf.y index ed84d4a8e288..4b68272ebdb9 100644 --- a/scripts/kconfig/zconf.y +++ b/scripts/kconfig/zconf.y @@ -717,6 +717,10 @@ static void print_symbol(FILE *out, struct menu *menu) print_quoted_string(out, prop->text); fputc('\n', out); break; + case P_SYMBOL: + fputs( " symbol ", out); + fprintf(out, "%s\n", prop->sym->name); + break; default: fprintf(out, " unknown prop %d!\n", prop->type); break; From 8b9d27124094964b3c4f8985ba66355ac4d9e842 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 24 Jun 2018 01:41:51 +0900 Subject: [PATCH 532/572] kbuild: reword help of LD_DEAD_CODE_DATA_ELIMINATION Since commit 5d20ee3192a5 ("kbuild: Allow LD_DEAD_CODE_DATA_ELIMINATION to be selectable if enabled"), HAVE_LD_DEAD_CODE_DATA_ELIMINATION is supposed to be selected by architectures that are capable of this functionality. LD_DEAD_CODE_DATA_ELIMINATION is now users' selection. Update the help message. Signed-off-by: Masahiro Yamada --- init/Kconfig | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 5a52f07259a2..eaad1536647f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1051,10 +1051,9 @@ config LD_DEAD_CODE_DATA_ELIMINATION depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION depends on EXPERT help - Select this if the architecture wants to do dead code and - data elimination with the linker by compiling with - -ffunction-sections -fdata-sections, and linking with - --gc-sections. + Enable this if you want to do dead code and data elimination with + the linker by compiling with -ffunction-sections -fdata-sections, + and linking with --gc-sections. This can reduce on disk and in-memory size of the kernel code and static data, particularly for small configs and From 73d1c580f92b203f4c3a189ee98c917c65712f7e Mon Sep 17 00:00:00 2001 From: Jerry James Date: Sat, 23 Jun 2018 22:49:04 +0200 Subject: [PATCH 533/572] kconfig: loop boundary condition fix If buf[-1] just happens to hold the byte 0x0A, then nread can wrap around to (size_t)-1, leading to invalid memory accesses. This has caused segmentation faults when trying to build the latest kernel snapshots for i686 in Fedora: https://bugzilla.redhat.com/show_bug.cgi?id=1592374 Signed-off-by: Jerry James [alexpl@fedoraproject.org: reformatted patch for submission] Signed-off-by: Alexander Ploumistos Signed-off-by: Masahiro Yamada --- scripts/kconfig/preprocess.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c index 65da87fce907..5ca2df790d3c 100644 --- a/scripts/kconfig/preprocess.c +++ b/scripts/kconfig/preprocess.c @@ -156,7 +156,7 @@ static char *do_shell(int argc, char *argv[]) nread--; /* remove trailing new lines */ - while (buf[nread - 1] == '\n') + while (nread > 0 && buf[nread - 1] == '\n') nread--; buf[nread] = 0; From 682630f00a219a1b0696abe9c0967e660068187b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 25 Jun 2018 20:58:17 +0300 Subject: [PATCH 534/572] nvme-rdma: fix possible double free of controller async event buffer If reconnect/reset failed where the controller async event buffer was freed, we might end up freeing it again as we call nvme_rdma_destroy_admin_queue again in the remove path. Given that the sequence is guaranteed to serialize by .ctrl_stop, we simply set ctrl->async_event_sqe.data to NULL and don't free it in future visits. Reported-by: Max Gurtovoy Tested-by: Max Gurtovoy Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9544625c0b7d..518c5b09038c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -732,8 +732,11 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_cleanup_queue(ctrl->ctrl.admin_q); nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); } - nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, - sizeof(struct nvme_command), DMA_TO_DEVICE); + if (ctrl->async_event_sqe.data) { + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); + ctrl->async_event_sqe.data = NULL; + } nvme_rdma_free_queue(&ctrl->queues[0]); } From 704e38303153c797d66c41bbe8325202f549b53c Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 18 May 2018 16:27:15 +0200 Subject: [PATCH 535/572] parisc: Drop struct sigaction from not exported header file This header file isn't exported to userspace, so there is no benefit in defining struct sigaction for userspace here. Signed-off-by: Helge Deller --- arch/parisc/include/asm/signal.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h index eeb5c8858663..715c96ba2ec8 100644 --- a/arch/parisc/include/asm/signal.h +++ b/arch/parisc/include/asm/signal.h @@ -21,14 +21,6 @@ typedef struct { unsigned long sig[_NSIG_WORDS]; } sigset_t; -#ifndef __KERNEL__ -struct sigaction { - __sighandler_t sa_handler; - unsigned long sa_flags; - sigset_t sa_mask; /* mask last for extensibility */ -}; -#endif - #include #endif /* !__ASSEMBLY */ From 1c971f39e62222d567f179ccaa1a186f1c203b52 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 15 Jun 2018 22:33:44 +0200 Subject: [PATCH 536/572] parisc: Mark 16kB and 64kB page sizes BROKEN A full boot only succeeds with 4kB page sizes currently. For 16kB and 64kB page size support somone needs to fix the LBA PCI code at least, so mark those broken for now. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index c480770fabcd..a9a507a19540 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -244,11 +244,11 @@ config PARISC_PAGE_SIZE_4KB config PARISC_PAGE_SIZE_16KB bool "16KB" - depends on PA8X00 + depends on PA8X00 && BROKEN config PARISC_PAGE_SIZE_64KB bool "64KB" - depends on PA8X00 + depends on PA8X00 && BROKEN endchoice From 5e791d2e4785f9ec7da4a02b9e8d00dace9e6917 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 29 May 2018 22:38:01 +0300 Subject: [PATCH 537/572] parisc: Convert printk(KERN_LEVEL) to pr_lvl() Convert printk(KERN_LEVEL) type of calls to pr_lvl() macros. While here, - convert printk() to pr_info() - join back string literal to be on one line - use %*phN (note, it gives 1 byte more for sake of simplicity) Signed-off-by: Andy Shevchenko Signed-off-by: Helge Deller --- arch/parisc/kernel/drivers.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index e0e1c9775c32..5eb979d04b90 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -154,17 +154,14 @@ int register_parisc_driver(struct parisc_driver *driver) { /* FIXME: we need this because apparently the sti * driver can be registered twice */ - if(driver->drv.name) { - printk(KERN_WARNING - "BUG: skipping previously registered driver %s\n", - driver->name); + if (driver->drv.name) { + pr_warn("BUG: skipping previously registered driver %s\n", + driver->name); return 1; } if (!driver->probe) { - printk(KERN_WARNING - "BUG: driver %s has no probe routine\n", - driver->name); + pr_warn("BUG: driver %s has no probe routine\n", driver->name); return 1; } @@ -491,12 +488,9 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path) dev = create_parisc_device(mod_path); if (dev->id.hw_type != HPHW_FAULTY) { - printk(KERN_ERR "Two devices have hardware path [%s]. " - "IODC data for second device: " - "%02x%02x%02x%02x%02x%02x\n" - "Rearranging GSC cards sometimes helps\n", - parisc_pathname(dev), iodc_data[0], iodc_data[1], - iodc_data[3], iodc_data[4], iodc_data[5], iodc_data[6]); + pr_err("Two devices have hardware path [%s]. IODC data for second device: %7phN\n" + "Rearranging GSC cards sometimes helps\n", + parisc_pathname(dev), iodc_data); return NULL; } @@ -528,8 +522,7 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path) * the keyboard controller */ if ((hpa & 0xfff) == 0 && insert_resource(&iomem_resource, &dev->hpa)) - printk("Unable to claim HPA %lx for device %s\n", - hpa, name); + pr_warn("Unable to claim HPA %lx for device %s\n", hpa, name); return dev; } @@ -875,7 +868,7 @@ static void print_parisc_device(struct parisc_device *dev) static int count; print_pa_hwpath(dev, hw_path); - printk(KERN_INFO "%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", + pr_info("%d. %s at 0x%px [%s] { %d, 0x%x, 0x%.3x, 0x%.5x }", ++count, dev->name, (void*) dev->hpa.start, hw_path, dev->id.hw_type, dev->id.hversion_rev, dev->id.hversion, dev->id.sversion); From 435d34c7a48de5e89047ef9c7dce6528831b258b Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 15 Jun 2018 22:38:22 +0200 Subject: [PATCH 538/572] parisc: Default to 4 SMP CPUs I haven't seen any real SMP machine yet with > 4 CPUs (we don't suport SuperDomes yet), so reducing the default maximum number of CPUs may speed up various bitop functions which depend on number of CPUs in the system. bload-o-meter on a typical 64-bit kernel shows: Data: add/remove: 0/0 grow/shrink: 0/10 up/down: 0/-3724 (-3724) Total: Before=1910404, After=1906680, chg -0.19% Code: add/remove: 0/2 grow/shrink: 42/38 up/down: 2320/-3500 (-1180) Total: Before=11053099, After=11051919, chg -0.01% Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index a9a507a19540..17526bebcbd2 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -347,7 +347,7 @@ config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 depends on SMP - default "32" + default "4" endmenu From 2765b3edc41bdf18960ca7e6b656fb933ac191d6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 28 Jun 2018 17:41:58 +0200 Subject: [PATCH 539/572] parisc: Wire up io_pgetevents syscall Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/unistd.h | 3 ++- arch/parisc/kernel/syscall_table.S | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index 4872e77aa96b..dc77c5a51db7 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -364,8 +364,9 @@ #define __NR_preadv2 (__NR_Linux + 347) #define __NR_pwritev2 (__NR_Linux + 348) #define __NR_statx (__NR_Linux + 349) +#define __NR_io_pgetevents (__NR_Linux + 350) -#define __NR_Linux_syscalls (__NR_statx + 1) +#define __NR_Linux_syscalls (__NR_io_pgetevents + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 6308749359e4..fe3f2a49d2b1 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -445,6 +445,7 @@ ENTRY_COMP(preadv2) ENTRY_COMP(pwritev2) ENTRY_SAME(statx) + ENTRY_COMP(io_pgetevents) /* 350 */ .ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b)) From 297ba57dcdec7ea37e702bcf1a577ac32a034e21 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 27 Jun 2018 12:55:18 -0700 Subject: [PATCH 540/572] block: Fix cloning of requests with a special payload This patch avoids that removing a path controlled by the dm-mpath driver while mkfs is running triggers the following kernel bug: kernel BUG at block/blk-core.c:3347! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 20 PID: 24369 Comm: mkfs.ext4 Not tainted 4.18.0-rc1-dbg+ #2 RIP: 0010:blk_end_request_all+0x68/0x70 Call Trace: dm_softirq_done+0x326/0x3d0 [dm_mod] blk_done_softirq+0x19b/0x1e0 __do_softirq+0x128/0x60d irq_exit+0x100/0x110 smp_call_function_single_interrupt+0x90/0x330 call_function_single_interrupt+0xf/0x20 Fixes: f9d03f96b988 ("block: improve handling of the magic discard payload") Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Bart Van Assche Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index afd2596ea3d3..f84a9b7b6f5a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -3473,6 +3473,10 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src) dst->cpu = src->cpu; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); + if (src->rq_flags & RQF_SPECIAL_PAYLOAD) { + dst->rq_flags |= RQF_SPECIAL_PAYLOAD; + dst->special_vec = src->special_vec; + } dst->nr_phys_segments = src->nr_phys_segments; dst->ioprio = src->ioprio; dst->extra_len = src->extra_len; From f904390ac8b2657b97ba3c1ad2b1be0822fa62ad Mon Sep 17 00:00:00 2001 From: oscardagrach Date: Wed, 13 Jun 2018 10:13:05 -0500 Subject: [PATCH 541/572] arm64: dts: hikey: Define wl1835 power capabilities These properties are required for compatibility with runtime PM. Without these properties, MMC host controller will not be aware of power capabilities. When the wlcore driver attempts to power on the device, it will erroneously fail with -EACCES. Fixes: 60f36637bbbd ("wlcore: sdio: allow pm to handle sdio power") Signed-off-by: Ryan Grachek Tested-by: John Stultz Acked-by: John Stultz Signed-off-by: Wei Xu --- arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index edb4ee0b8896..7f12624f6c8e 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -322,6 +322,8 @@ dwmmc_2: dwmmc2@f723f000 { bus-width = <0x4>; non-removable; + cap-power-off-card; + keep-power-in-suspend; vmmc-supply = <®_vdd_3v3>; mmc-pwrseq = <&wl1835_pwrseq>; From a30449eb3ac908f26b4bc963a58039a5f2725ffa Mon Sep 17 00:00:00 2001 From: oscardagrach Date: Wed, 13 Jun 2018 13:03:21 -0500 Subject: [PATCH 542/572] arm64: dts: hikey960: Define wl1837 power capabilities These properties are required for compatibility with runtime PM. Without these properties, MMC host controller will not be aware of power capabilities. When the wlcore driver attempts to power on the device, it will erroneously fail with -EACCES. This fixes a regression found here: https://lkml.org/lkml/2018/6/12/930 Fixes: 60f36637bbbd ("wlcore: sdio: allow pm to handle sdio power") Signed-off-by: Ryan Grachek Tested-by: John Stultz Acked-by: John Stultz Tested-by: Valentin Schneider Signed-off-by: Wei Xu --- arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts b/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts index c6999624ed8a..68c5a6c819ae 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts +++ b/arch/arm64/boot/dts/hisilicon/hi3660-hikey960.dts @@ -585,6 +585,8 @@ vmmc-supply = <&wlan_en>; ti,non-removable; non-removable; + cap-power-off-card; + keep-power-in-suspend; #address-cells = <0x1>; #size-cells = <0x0>; status = "ok"; From a11e1d432b51f63ba698d044441284a661f01144 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 28 Jun 2018 09:43:44 -0700 Subject: [PATCH 543/572] Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL The poll() changes were not well thought out, and completely unexplained. They also caused a huge performance regression, because "->poll()" was no longer a trivial file operation that just called down to the underlying file operations, but instead did at least two indirect calls. Indirect calls are sadly slow now with the Spectre mitigation, but the performance problem could at least be largely mitigated by changing the "->get_poll_head()" operation to just have a per-file-descriptor pointer to the poll head instead. That gets rid of one of the new indirections. But that doesn't fix the new complexity that is completely unwarranted for the regular case. The (undocumented) reason for the poll() changes was some alleged AIO poll race fixing, but we don't make the common case slower and more complex for some uncommon special case, so this all really needs way more explanations and most likely a fundamental redesign. [ This revert is a revert of about 30 different commits, not reverted individually because that would just be unnecessarily messy - Linus ] Cc: Al Viro Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 7 +- Documentation/filesystems/vfs.txt | 13 --- crypto/af_alg.c | 13 ++- crypto/algif_aead.c | 4 +- crypto/algif_skcipher.c | 4 +- drivers/char/random.c | 29 +++--- drivers/isdn/mISDN/socket.c | 2 +- drivers/net/ppp/pppoe.c | 2 +- fs/aio.c | 148 +----------------------------- fs/eventfd.c | 19 ++-- fs/eventpoll.c | 15 +-- fs/pipe.c | 22 ++--- fs/select.c | 23 ----- fs/timerfd.c | 22 ++--- include/crypto/if_alg.h | 3 +- include/linux/fs.h | 2 - include/linux/net.h | 1 - include/linux/poll.h | 14 +-- include/linux/skbuff.h | 3 +- include/net/bluetooth/bluetooth.h | 2 +- include/net/iucv/af_iucv.h | 2 + include/net/sctp/sctp.h | 3 +- include/net/tcp.h | 3 +- include/net/tls.h | 6 +- include/net/udp.h | 2 +- include/uapi/linux/aio_abi.h | 8 +- net/appletalk/ddp.c | 2 +- net/atm/common.c | 11 ++- net/atm/common.h | 2 +- net/atm/pvc.c | 2 +- net/atm/svc.c | 2 +- net/ax25/af_ax25.c | 2 +- net/bluetooth/af_bluetooth.c | 7 +- net/bluetooth/hci_sock.c | 2 +- net/bluetooth/l2cap_sock.c | 2 +- net/bluetooth/rfcomm/sock.c | 2 +- net/bluetooth/sco.c | 2 +- net/caif/caif_socket.c | 12 ++- net/can/bcm.c | 2 +- net/can/raw.c | 2 +- net/core/datagram.c | 13 ++- net/dccp/dccp.h | 3 +- net/dccp/ipv4.c | 2 +- net/dccp/ipv6.c | 2 +- net/dccp/proto.c | 13 ++- net/decnet/af_decnet.c | 6 +- net/ieee802154/socket.c | 4 +- net/ipv4/af_inet.c | 8 +- net/ipv4/tcp.c | 23 +++-- net/ipv4/udp.c | 10 +- net/ipv6/af_inet6.c | 4 +- net/ipv6/raw.c | 4 +- net/iucv/af_iucv.c | 7 +- net/kcm/kcmsock.c | 10 +- net/key/af_key.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- net/l2tp/l2tp_ppp.c | 2 +- net/llc/af_llc.c | 2 +- net/netlink/af_netlink.c | 2 +- net/netrom/af_netrom.c | 2 +- net/nfc/llcp_sock.c | 9 +- net/nfc/rawsock.c | 4 +- net/packet/af_packet.c | 9 +- net/phonet/socket.c | 9 +- net/qrtr/qrtr.c | 2 +- net/rose/af_rose.c | 2 +- net/rxrpc/af_rxrpc.c | 10 +- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 2 +- net/sctp/socket.c | 4 +- net/smc/af_smc.c | 12 ++- net/socket.c | 48 +--------- net/tipc/socket.c | 14 ++- net/tls/tls_main.c | 2 +- net/tls/tls_sw.c | 19 ++-- net/unix/af_unix.c | 30 +++--- net/vmw_vsock/af_vsock.c | 19 ++-- net/x25/af_x25.c | 2 +- net/xdp/xsk.c | 7 +- 80 files changed, 302 insertions(+), 451 deletions(-) diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 2c391338c675..37bf0a9de75c 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -441,8 +441,6 @@ prototypes: int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -473,7 +471,7 @@ prototypes: }; locking rules: - All except for ->poll_mask may block. + All may block. ->llseek() locking has moved from llseek to the individual llseek implementations. If your fs is not using generic_file_llseek, you @@ -505,9 +503,6 @@ in sys_read() and friends. the lease within the individual filesystem to record the result of the operation -->poll_mask can be called with or without the waitqueue lock for the waitqueue -returned from ->get_poll_head. - --------------------------- dquot_operations ------------------------------- prototypes: int (*write_dquot) (struct dquot *); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 829a7b7857a4..f608180ad59d 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -857,8 +857,6 @@ struct file_operations { ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -903,17 +901,6 @@ otherwise noted. activity on this file and (optionally) go to sleep until there is activity. Called by the select(2) and poll(2) system calls - get_poll_head: Returns the struct wait_queue_head that callers can - wait on. Callers need to check the returned events using ->poll_mask - once woken. Can return NULL to indicate polling is not supported, - or any error code using the ERR_PTR convention to indicate that a - grave error occured and ->poll_mask shall not be called. - - poll_mask: return the mask of EPOLL* values describing the file descriptor - state. Called either before going to sleep on the waitqueue returned by - get_poll_head, or after it has been woken. If ->get_poll_head and - ->poll_mask are implemented ->poll does not need to be implement. - unlocked_ioctl: called by the ioctl(2) system call. compat_ioctl: called by the ioctl(2) system call when 32 bit system calls diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 49fa8582138b..314c52c967e5 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -1060,12 +1060,19 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err) } EXPORT_SYMBOL_GPL(af_alg_async_cb); -__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events) +/** + * af_alg_poll - poll system call handler + */ +__poll_t af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); struct af_alg_ctx *ctx = ask->private; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; if (!ctx->more || ctx->used) mask |= EPOLLIN | EPOLLRDNORM; @@ -1075,7 +1082,7 @@ __poll_t af_alg_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL_GPL(af_alg_poll_mask); +EXPORT_SYMBOL_GPL(af_alg_poll); /** * af_alg_alloc_areq - allocate struct af_alg_async_req diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 825524f27438..c40a8c7ee8ae 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -375,7 +375,7 @@ static struct proto_ops algif_aead_ops = { .sendmsg = aead_sendmsg, .sendpage = af_alg_sendpage, .recvmsg = aead_recvmsg, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static int aead_check_key(struct socket *sock) @@ -471,7 +471,7 @@ static struct proto_ops algif_aead_ops_nokey = { .sendmsg = aead_sendmsg_nokey, .sendpage = aead_sendpage_nokey, .recvmsg = aead_recvmsg_nokey, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static void *aead_bind(const char *name, u32 type, u32 mask) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 4c04eb9888ad..cfdaab2b7d76 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -206,7 +206,7 @@ static struct proto_ops algif_skcipher_ops = { .sendmsg = skcipher_sendmsg, .sendpage = af_alg_sendpage, .recvmsg = skcipher_recvmsg, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static int skcipher_check_key(struct socket *sock) @@ -302,7 +302,7 @@ static struct proto_ops algif_skcipher_ops_nokey = { .sendmsg = skcipher_sendmsg_nokey, .sendpage = skcipher_sendpage_nokey, .recvmsg = skcipher_recvmsg_nokey, - .poll_mask = af_alg_poll_mask, + .poll = af_alg_poll, }; static void *skcipher_bind(const char *name, u32 type, u32 mask) diff --git a/drivers/char/random.c b/drivers/char/random.c index a8fb0020ba5c..cd888d4ee605 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -402,7 +402,8 @@ static struct poolinfo { /* * Static global variables */ -static DECLARE_WAIT_QUEUE_HEAD(random_wait); +static DECLARE_WAIT_QUEUE_HEAD(random_read_wait); +static DECLARE_WAIT_QUEUE_HEAD(random_write_wait); static struct fasync_struct *fasync; static DEFINE_SPINLOCK(random_ready_list_lock); @@ -721,8 +722,8 @@ retry: /* should we wake readers? */ if (entropy_bits >= random_read_wakeup_bits && - wq_has_sleeper(&random_wait)) { - wake_up_interruptible_poll(&random_wait, POLLIN); + wq_has_sleeper(&random_read_wait)) { + wake_up_interruptible(&random_read_wait); kill_fasync(&fasync, SIGIO, POLL_IN); } /* If the input pool is getting full, send some @@ -1396,7 +1397,7 @@ retry: trace_debit_entropy(r->name, 8 * ibytes); if (ibytes && (r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) { - wake_up_interruptible_poll(&random_wait, POLLOUT); + wake_up_interruptible(&random_write_wait); kill_fasync(&fasync, SIGIO, POLL_OUT); } @@ -1838,7 +1839,7 @@ _random_read(int nonblock, char __user *buf, size_t nbytes) if (nonblock) return -EAGAIN; - wait_event_interruptible(random_wait, + wait_event_interruptible(random_read_wait, ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits); if (signal_pending(current)) @@ -1875,17 +1876,14 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) return ret; } -static struct wait_queue_head * -random_get_poll_head(struct file *file, __poll_t events) -{ - return &random_wait; -} - static __poll_t -random_poll_mask(struct file *file, __poll_t events) +random_poll(struct file *file, poll_table * wait) { - __poll_t mask = 0; + __poll_t mask; + poll_wait(file, &random_read_wait, wait); + poll_wait(file, &random_write_wait, wait); + mask = 0; if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits) mask |= EPOLLIN | EPOLLRDNORM; if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits) @@ -1992,8 +1990,7 @@ static int random_fasync(int fd, struct file *filp, int on) const struct file_operations random_fops = { .read = random_read, .write = random_write, - .get_poll_head = random_get_poll_head, - .poll_mask = random_poll_mask, + .poll = random_poll, .unlocked_ioctl = random_ioctl, .fasync = random_fasync, .llseek = noop_llseek, @@ -2326,7 +2323,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, * We'll be woken up again once below random_write_wakeup_thresh, * or when the calling thread is about to terminate. */ - wait_event_interruptible(random_wait, kthread_should_stop() || + wait_event_interruptible(random_write_wait, kthread_should_stop() || ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); mix_pool_bytes(poolp, buffer, count); credit_entropy_bits(poolp, entropy); diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c index 98f90aadd141..18c0a1281914 100644 --- a/drivers/isdn/mISDN/socket.c +++ b/drivers/isdn/mISDN/socket.c @@ -588,7 +588,7 @@ static const struct proto_ops data_sock_ops = { .getname = data_sock_getname, .sendmsg = mISDN_sock_sendmsg, .recvmsg = mISDN_sock_recvmsg, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = data_sock_setsockopt, diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c index de51e8f70f44..ce61231e96ea 100644 --- a/drivers/net/ppp/pppoe.c +++ b/drivers/net/ppp/pppoe.c @@ -1107,7 +1107,7 @@ static const struct proto_ops pppoe_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppoe_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, diff --git a/fs/aio.c b/fs/aio.c index e1d20124ec0e..210df9da1283 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -5,7 +5,6 @@ * Implements an efficient asynchronous io interface. * * Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved. - * Copyright 2018 Christoph Hellwig. * * See ../COPYING for licensing terms. */ @@ -165,22 +164,10 @@ struct fsync_iocb { bool datasync; }; -struct poll_iocb { - struct file *file; - __poll_t events; - struct wait_queue_head *head; - - union { - struct wait_queue_entry wait; - struct work_struct work; - }; -}; - struct aio_kiocb { union { struct kiocb rw; struct fsync_iocb fsync; - struct poll_iocb poll; }; struct kioctx *ki_ctx; @@ -1590,6 +1577,7 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)) return -EINVAL; + req->file = fget(iocb->aio_fildes); if (unlikely(!req->file)) return -EBADF; @@ -1604,137 +1592,6 @@ static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync) return 0; } -/* need to use list_del_init so we can check if item was present */ -static inline bool __aio_poll_remove(struct poll_iocb *req) -{ - if (list_empty(&req->wait.entry)) - return false; - list_del_init(&req->wait.entry); - return true; -} - -static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask) -{ - fput(iocb->poll.file); - aio_complete(iocb, mangle_poll(mask), 0); -} - -static void aio_poll_work(struct work_struct *work) -{ - struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work); - - if (!list_empty_careful(&iocb->ki_list)) - aio_remove_iocb(iocb); - __aio_poll_complete(iocb, iocb->poll.events); -} - -static int aio_poll_cancel(struct kiocb *iocb) -{ - struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); - struct poll_iocb *req = &aiocb->poll; - struct wait_queue_head *head = req->head; - bool found = false; - - spin_lock(&head->lock); - found = __aio_poll_remove(req); - spin_unlock(&head->lock); - - if (found) { - req->events = 0; - INIT_WORK(&req->work, aio_poll_work); - schedule_work(&req->work); - } - return 0; -} - -static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, - void *key) -{ - struct poll_iocb *req = container_of(wait, struct poll_iocb, wait); - struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll); - struct file *file = req->file; - __poll_t mask = key_to_poll(key); - - assert_spin_locked(&req->head->lock); - - /* for instances that support it check for an event match first: */ - if (mask && !(mask & req->events)) - return 0; - - mask = file->f_op->poll_mask(file, req->events) & req->events; - if (!mask) - return 0; - - __aio_poll_remove(req); - - /* - * Try completing without a context switch if we can acquire ctx_lock - * without spinning. Otherwise we need to defer to a workqueue to - * avoid a deadlock due to the lock order. - */ - if (spin_trylock(&iocb->ki_ctx->ctx_lock)) { - list_del_init(&iocb->ki_list); - spin_unlock(&iocb->ki_ctx->ctx_lock); - - __aio_poll_complete(iocb, mask); - } else { - req->events = mask; - INIT_WORK(&req->work, aio_poll_work); - schedule_work(&req->work); - } - - return 1; -} - -static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb) -{ - struct kioctx *ctx = aiocb->ki_ctx; - struct poll_iocb *req = &aiocb->poll; - __poll_t mask; - - /* reject any unknown events outside the normal event mask. */ - if ((u16)iocb->aio_buf != iocb->aio_buf) - return -EINVAL; - /* reject fields that are not defined for poll */ - if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags) - return -EINVAL; - - req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; - req->file = fget(iocb->aio_fildes); - if (unlikely(!req->file)) - return -EBADF; - if (!file_has_poll_mask(req->file)) - goto out_fail; - - req->head = req->file->f_op->get_poll_head(req->file, req->events); - if (!req->head) - goto out_fail; - if (IS_ERR(req->head)) { - mask = EPOLLERR; - goto done; - } - - init_waitqueue_func_entry(&req->wait, aio_poll_wake); - aiocb->ki_cancel = aio_poll_cancel; - - spin_lock_irq(&ctx->ctx_lock); - spin_lock(&req->head->lock); - mask = req->file->f_op->poll_mask(req->file, req->events) & req->events; - if (!mask) { - __add_wait_queue(req->head, &req->wait); - list_add_tail(&aiocb->ki_list, &ctx->active_reqs); - } - spin_unlock(&req->head->lock); - spin_unlock_irq(&ctx->ctx_lock); -done: - if (mask) - __aio_poll_complete(aiocb, mask); - return 0; -out_fail: - fput(req->file); - return -EINVAL; /* same as no support for IOCB_CMD_POLL */ -} - static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, bool compat) { @@ -1808,9 +1665,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, case IOCB_CMD_FDSYNC: ret = aio_fsync(&req->fsync, &iocb, true); break; - case IOCB_CMD_POLL: - ret = aio_poll(req, &iocb); - break; default: pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode); ret = -EINVAL; diff --git a/fs/eventfd.c b/fs/eventfd.c index ceb1031f1cac..08d3bd602f73 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -101,20 +101,14 @@ static int eventfd_release(struct inode *inode, struct file *file) return 0; } -static struct wait_queue_head * -eventfd_get_poll_head(struct file *file, __poll_t events) -{ - struct eventfd_ctx *ctx = file->private_data; - - return &ctx->wqh; -} - -static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) +static __poll_t eventfd_poll(struct file *file, poll_table *wait) { struct eventfd_ctx *ctx = file->private_data; __poll_t events = 0; u64 count; + poll_wait(file, &ctx->wqh, wait); + /* * All writes to ctx->count occur within ctx->wqh.lock. This read * can be done outside ctx->wqh.lock because we know that poll_wait @@ -156,11 +150,11 @@ static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask) count = READ_ONCE(ctx->count); if (count > 0) - events |= (EPOLLIN & eventmask); + events |= EPOLLIN; if (count == ULLONG_MAX) events |= EPOLLERR; if (ULLONG_MAX - 1 > count) - events |= (EPOLLOUT & eventmask); + events |= EPOLLOUT; return events; } @@ -311,8 +305,7 @@ static const struct file_operations eventfd_fops = { .show_fdinfo = eventfd_show_fdinfo, #endif .release = eventfd_release, - .get_poll_head = eventfd_get_poll_head, - .poll_mask = eventfd_poll_mask, + .poll = eventfd_poll, .read = eventfd_read, .write = eventfd_write, .llseek = noop_llseek, diff --git a/fs/eventpoll.c b/fs/eventpoll.c index ea4436f409fb..67db22fe99c5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -922,18 +922,14 @@ static __poll_t ep_read_events_proc(struct eventpoll *ep, struct list_head *head return 0; } -static struct wait_queue_head *ep_eventpoll_get_poll_head(struct file *file, - __poll_t eventmask) -{ - struct eventpoll *ep = file->private_data; - return &ep->poll_wait; -} - -static __poll_t ep_eventpoll_poll_mask(struct file *file, __poll_t eventmask) +static __poll_t ep_eventpoll_poll(struct file *file, poll_table *wait) { struct eventpoll *ep = file->private_data; int depth = 0; + /* Insert inside our poll wait queue */ + poll_wait(file, &ep->poll_wait, wait); + /* * Proceed to find out if wanted events are really available inside * the ready list. @@ -972,8 +968,7 @@ static const struct file_operations eventpoll_fops = { .show_fdinfo = ep_show_fdinfo, #endif .release = ep_eventpoll_release, - .get_poll_head = ep_eventpoll_get_poll_head, - .poll_mask = ep_eventpoll_poll_mask, + .poll = ep_eventpoll_poll, .llseek = noop_llseek, }; diff --git a/fs/pipe.c b/fs/pipe.c index bb0840e234f3..39d6f431da83 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -509,22 +509,19 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } -static struct wait_queue_head * -pipe_get_poll_head(struct file *filp, __poll_t events) -{ - struct pipe_inode_info *pipe = filp->private_data; - - return &pipe->wait; -} - /* No kernel lock held - fine */ -static __poll_t pipe_poll_mask(struct file *filp, __poll_t events) +static __poll_t +pipe_poll(struct file *filp, poll_table *wait) { + __poll_t mask; struct pipe_inode_info *pipe = filp->private_data; - int nrbufs = pipe->nrbufs; - __poll_t mask = 0; + int nrbufs; + + poll_wait(filp, &pipe->wait, wait); /* Reading only -- no need for acquiring the semaphore. */ + nrbufs = pipe->nrbufs; + mask = 0; if (filp->f_mode & FMODE_READ) { mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0; if (!pipe->writers && filp->f_version != pipe->w_counter) @@ -1023,8 +1020,7 @@ const struct file_operations pipefifo_fops = { .llseek = no_llseek, .read_iter = pipe_read, .write_iter = pipe_write, - .get_poll_head = pipe_get_poll_head, - .poll_mask = pipe_poll_mask, + .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, .fasync = pipe_fasync, diff --git a/fs/select.c b/fs/select.c index 317891ff8165..4a6b6e4b21cb 100644 --- a/fs/select.c +++ b/fs/select.c @@ -34,29 +34,6 @@ #include -__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) -{ - if (file->f_op->poll) { - return file->f_op->poll(file, pt); - } else if (file_has_poll_mask(file)) { - unsigned int events = poll_requested_events(pt); - struct wait_queue_head *head; - - if (pt && pt->_qproc) { - head = file->f_op->get_poll_head(file, events); - if (!head) - return DEFAULT_POLLMASK; - if (IS_ERR(head)) - return EPOLLERR; - pt->_qproc(file, head, pt); - } - - return file->f_op->poll_mask(file, events); - } else { - return DEFAULT_POLLMASK; - } -} -EXPORT_SYMBOL_GPL(vfs_poll); /* * Estimate expected accuracy in ns from a timeval. diff --git a/fs/timerfd.c b/fs/timerfd.c index d84a2bee4f82..cdad49da3ff7 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -226,20 +226,21 @@ static int timerfd_release(struct inode *inode, struct file *file) kfree_rcu(ctx, rcu); return 0; } - -static struct wait_queue_head *timerfd_get_poll_head(struct file *file, - __poll_t eventmask) + +static __poll_t timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; + __poll_t events = 0; + unsigned long flags; - return &ctx->wqh; -} + poll_wait(file, &ctx->wqh, wait); -static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask) -{ - struct timerfd_ctx *ctx = file->private_data; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->ticks) + events |= EPOLLIN; + spin_unlock_irqrestore(&ctx->wqh.lock, flags); - return ctx->ticks ? EPOLLIN : 0; + return events; } static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, @@ -363,8 +364,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg static const struct file_operations timerfd_fops = { .release = timerfd_release, - .get_poll_head = timerfd_get_poll_head, - .poll_mask = timerfd_poll_mask, + .poll = timerfd_poll, .read = timerfd_read, .llseek = noop_llseek, .show_fdinfo = timerfd_show, diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index cc414db9da0a..482461d8931d 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -245,7 +245,8 @@ ssize_t af_alg_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags); void af_alg_free_resources(struct af_alg_async_req *areq); void af_alg_async_cb(struct crypto_async_request *_req, int err); -__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events); +__poll_t af_alg_poll(struct file *file, struct socket *sock, + poll_table *wait); struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk, unsigned int areqlen); int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags, diff --git a/include/linux/fs.h b/include/linux/fs.h index 5c91108846db..d78d146a98da 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1720,8 +1720,6 @@ struct file_operations { int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); - struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t); - __poll_t (*poll_mask) (struct file *, __poll_t); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); diff --git a/include/linux/net.h b/include/linux/net.h index 08b6eb964dd6..6554d3ba4396 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -147,7 +147,6 @@ struct proto_ops { int (*getname) (struct socket *sock, struct sockaddr *addr, int peer); - __poll_t (*poll_mask) (struct socket *sock, __poll_t events); __poll_t (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, diff --git a/include/linux/poll.h b/include/linux/poll.h index fdf86b4cbc71..7e0fdcf905d2 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -74,17 +74,17 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc) pt->_key = ~(__poll_t)0; /* all events enabled */ } -static inline bool file_has_poll_mask(struct file *file) -{ - return file->f_op->get_poll_head && file->f_op->poll_mask; -} - static inline bool file_can_poll(struct file *file) { - return file->f_op->poll || file_has_poll_mask(file); + return file->f_op->poll; } -__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt); +static inline __poll_t vfs_poll(struct file *file, struct poll_table_struct *pt) +{ + if (unlikely(!file->f_op->poll)) + return DEFAULT_POLLMASK; + return file->f_op->poll(file, pt); +} struct poll_table_entry { struct file *filp; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c86885954994..164cdedf6012 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3252,7 +3252,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags, int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events); +__poll_t datagram_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 53ce8176c313..ec9d6bc65855 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -271,7 +271,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events); +__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); int bt_sock_wait_ready(struct sock *sk, unsigned long flags); diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index b0eaeb02d46d..f4c21b5a1242 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -153,6 +153,8 @@ struct iucv_sock_list { atomic_t autobind_name; }; +__poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait); void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); void iucv_accept_enqueue(struct sock *parent, struct sock *sk); diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 30b3e2fe240a..8c2caa370e0f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -109,7 +109,8 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); void sctp_data_ready(struct sock *sk); -__poll_t sctp_poll_mask(struct socket *sock, __poll_t events); +__poll_t sctp_poll(struct file *file, struct socket *sock, + poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); diff --git a/include/net/tcp.h b/include/net/tcp.h index 0448e7c5d2b4..800582b5dd54 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -388,7 +388,8 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); -__poll_t tcp_poll_mask(struct socket *sock, __poll_t events); +__poll_t tcp_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, diff --git a/include/net/tls.h b/include/net/tls.h index 7f84ea3e217c..70c273777fe9 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -109,7 +109,8 @@ struct tls_sw_context_rx { struct strparser strp; void (*saved_data_ready)(struct sock *sk); - __poll_t (*sk_poll_mask)(struct socket *sock, __poll_t events); + unsigned int (*sk_poll)(struct file *file, struct socket *sock, + struct poll_table_struct *wait); struct sk_buff *recv_pkt; u8 control; bool decrypted; @@ -224,7 +225,8 @@ void tls_sw_free_resources_tx(struct sock *sk); void tls_sw_free_resources_rx(struct sock *sk); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events); +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/include/net/udp.h b/include/net/udp.h index b1ea8b0f5e6a..81afdacd4fff 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -285,7 +285,7 @@ int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); -__poll_t udp_poll_mask(struct socket *sock, __poll_t events); +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6); diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index d00221345c19..d4e768d55d14 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -39,8 +39,10 @@ enum { IOCB_CMD_PWRITE = 1, IOCB_CMD_FSYNC = 2, IOCB_CMD_FDSYNC = 3, - /* 4 was the experimental IOCB_CMD_PREADX */ - IOCB_CMD_POLL = 5, + /* These two are experimental. + * IOCB_CMD_PREADX = 4, + * IOCB_CMD_POLL = 5, + */ IOCB_CMD_NOOP = 6, IOCB_CMD_PREADV = 7, IOCB_CMD_PWRITEV = 8, @@ -109,7 +111,7 @@ struct iocb { #undef IFLITTLE struct __aio_sigset { - const sigset_t __user *sigmask; + sigset_t __user *sigmask; size_t sigsetsize; }; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 55fdba05d7d9..9b6bc5abe946 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1869,7 +1869,7 @@ static const struct proto_ops atalk_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = atalk_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = atalk_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = atalk_compat_ioctl, diff --git a/net/atm/common.c b/net/atm/common.c index ff5748b2190f..a7a68e509628 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -647,11 +647,16 @@ out: return error; } -__poll_t vcc_poll_mask(struct socket *sock, __poll_t events) +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - struct atm_vcc *vcc = ATM_SD(sock); - __poll_t mask = 0; + struct atm_vcc *vcc; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + vcc = ATM_SD(sock); /* exceptional events */ if (sk->sk_err) diff --git a/net/atm/common.h b/net/atm/common.h index 526796ad230f..5850649068bb 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci); int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags); int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len); -__poll_t vcc_poll_mask(struct socket *sock, __poll_t events); +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait); int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int vcc_setsockopt(struct socket *sock, int level, int optname, diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 9f75092fe778..2cb10af16afc 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -113,7 +113,7 @@ static const struct proto_ops pvc_proto_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pvc_getname, - .poll_mask = vcc_poll_mask, + .poll = vcc_poll, .ioctl = vcc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vcc_compat_ioctl, diff --git a/net/atm/svc.c b/net/atm/svc.c index 53f4ad7087b1..2f91b766ac42 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -636,7 +636,7 @@ static const struct proto_ops svc_proto_ops = { .socketpair = sock_no_socketpair, .accept = svc_accept, .getname = svc_getname, - .poll_mask = vcc_poll_mask, + .poll = vcc_poll, .ioctl = svc_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = svc_compat_ioctl, diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d1d2442ce573..c603d33d5410 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1941,7 +1941,7 @@ static const struct proto_ops ax25_proto_ops = { .socketpair = sock_no_socketpair, .accept = ax25_accept, .getname = ax25_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ax25_ioctl, .listen = ax25_listen, .shutdown = ax25_shutdown, diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 510ab4f55df5..3264e1873219 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -437,13 +437,16 @@ static inline __poll_t bt_accept_poll(struct sock *parent) return 0; } -__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events) +__poll_t bt_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; BT_DBG("sock %p, sk %p", sock, sk); + poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); @@ -475,7 +478,7 @@ __poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(bt_sock_poll_mask); +EXPORT_SYMBOL(bt_sock_poll); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index d6c099861538..1506e1632394 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -1975,7 +1975,7 @@ static const struct proto_ops hci_sock_ops = { .sendmsg = hci_sock_sendmsg, .recvmsg = hci_sock_recvmsg, .ioctl = hci_sock_ioctl, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = hci_sock_setsockopt, diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 742a190034e6..686bdc6b35b0 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1653,7 +1653,7 @@ static const struct proto_ops l2cap_sock_ops = { .getname = l2cap_sock_getname, .sendmsg = l2cap_sock_sendmsg, .recvmsg = l2cap_sock_recvmsg, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 1cf57622473a..d606e9212291 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1049,7 +1049,7 @@ static const struct proto_ops rfcomm_sock_ops = { .setsockopt = rfcomm_sock_setsockopt, .getsockopt = rfcomm_sock_getsockopt, .ioctl = rfcomm_sock_ioctl, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .socketpair = sock_no_socketpair, .mmap = sock_no_mmap }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index d60dbc61d170..413b8ee49fec 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1197,7 +1197,7 @@ static const struct proto_ops sco_sock_ops = { .getname = sco_sock_getname, .sendmsg = sco_sock_sendmsg, .recvmsg = sco_sock_recvmsg, - .poll_mask = bt_sock_poll_mask, + .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index c7991867d622..a6fb1b3bcad9 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -934,11 +934,15 @@ static int caif_release(struct socket *sock) } /* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */ -static __poll_t caif_poll_mask(struct socket *sock, __poll_t events) +static __poll_t caif_poll(struct file *file, + struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; + __poll_t mask; struct caifsock *cf_sk = container_of(sk, struct caifsock, sk); - __poll_t mask = 0; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err) @@ -972,7 +976,7 @@ static const struct proto_ops caif_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = caif_poll_mask, + .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -993,7 +997,7 @@ static const struct proto_ops caif_stream_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = caif_poll_mask, + .poll = caif_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/can/bcm.c b/net/can/bcm.c index 9393f25df08d..0af8f0db892a 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1660,7 +1660,7 @@ static const struct proto_ops bcm_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/can/raw.c b/net/can/raw.c index fd7e2f49ea6a..1051eee82581 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -843,7 +843,7 @@ static const struct proto_ops raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = raw_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/core/datagram.c b/net/core/datagram.c index f19bf3dc2bd6..9938952c5c78 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -819,8 +819,9 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); /** * datagram_poll - generic datagram poll + * @file: file struct * @sock: socket - * @events to wait for + * @wait: poll table * * Datagram poll: Again totally generic. This also handles * sequenced packet sockets providing the socket receive queue @@ -830,10 +831,14 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg); * and you use a different write policy from sock_writeable() * then please supply your own write_space callback. */ -__poll_t datagram_poll_mask(struct socket *sock, __poll_t events) +__poll_t datagram_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) @@ -866,4 +871,4 @@ __poll_t datagram_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(datagram_poll_mask); +EXPORT_SYMBOL(datagram_poll); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 0ea2ee56ac1b..f91e3816806b 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -316,7 +316,8 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); void dccp_shutdown(struct sock *sk, int how); int inet_dccp_listen(struct socket *sock, int backlog); -__poll_t dccp_poll_mask(struct socket *sock, __poll_t events); +__poll_t dccp_poll(struct file *file, struct socket *sock, + poll_table *wait); int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); void dccp_req_err(struct sock *sk, u64 seq); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index a9e478cd3787..b08feb219b44 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -984,7 +984,7 @@ static const struct proto_ops inet_dccp_ops = { .accept = inet_accept, .getname = inet_getname, /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ - .poll_mask = dccp_poll_mask, + .poll = dccp_poll, .ioctl = inet_ioctl, /* FIXME: work on inet_listen to rename it to sock_common_listen */ .listen = inet_dccp_listen, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 17fc4e0166ba..6344f1b18a6a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1070,7 +1070,7 @@ static const struct proto_ops inet6_dccp_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet6_getname, - .poll_mask = dccp_poll_mask, + .poll = dccp_poll, .ioctl = inet6_ioctl, .listen = inet_dccp_listen, .shutdown = inet_shutdown, diff --git a/net/dccp/proto.c b/net/dccp/proto.c index ca21c1c76da0..0d56e36a6db7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -312,11 +312,20 @@ int dccp_disconnect(struct sock *sk, int flags) EXPORT_SYMBOL_GPL(dccp_disconnect); -__poll_t dccp_poll_mask(struct socket *sock, __poll_t events) +/* + * Wait for a DCCP event. + * + * Note that we don't need to lock the socket, as the upper poll layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. + */ +__poll_t dccp_poll(struct file *file, struct socket *sock, + poll_table *wait) { __poll_t mask; struct sock *sk = sock->sk; + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); @@ -358,7 +367,7 @@ __poll_t dccp_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL_GPL(dccp_poll_mask); +EXPORT_SYMBOL_GPL(dccp_poll); int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) { diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 9a686d890bfa..7d6ff983ba2c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1207,11 +1207,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer) } -static __poll_t dn_poll_mask(struct socket *sock, __poll_t events) +static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct dn_scp *scp = DN_SK(sk); - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); if (!skb_queue_empty(&scp->other_receive_queue)) mask |= EPOLLRDBAND; @@ -2331,7 +2331,7 @@ static const struct proto_ops dn_proto_ops = { .socketpair = sock_no_socketpair, .accept = dn_accept, .getname = dn_getname, - .poll_mask = dn_poll_mask, + .poll = dn_poll, .ioctl = dn_ioctl, .listen = dn_listen, .shutdown = dn_shutdown, diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index a0768d2759b8..a60658c85a9a 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -423,7 +423,7 @@ static const struct proto_ops ieee802154_raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -969,7 +969,7 @@ static const struct proto_ops ieee802154_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = ieee802154_sock_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 15e125558c76..b403499fdabe 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, - .poll_mask = tcp_poll_mask, + .poll = tcp_poll, .ioctl = inet_ioctl, .listen = inet_listen, .shutdown = inet_shutdown, @@ -1021,7 +1021,7 @@ const struct proto_ops inet_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, - .poll_mask = udp_poll_mask, + .poll = udp_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, @@ -1042,7 +1042,7 @@ EXPORT_SYMBOL(inet_dgram_ops); /* * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without - * udp_poll_mask + * udp_poll */ static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, @@ -1053,7 +1053,7 @@ static const struct proto_ops inet_sockraw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 141acd92e58a..e7b53d2a971f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -494,21 +494,32 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, } /* - * Socket is not locked. We are protected from async events by poll logic and - * correct handling of state changes made by other threads is impossible in - * any case. + * Wait for a TCP event. + * + * Note that we don't need to lock the socket, as the upper poll layers + * take care of normal races (between the test and the event) and we don't + * go look at any of the socket buffers directly. */ -__poll_t tcp_poll_mask(struct socket *sock, __poll_t events) +__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { + __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); - __poll_t mask = 0; int state; + sock_poll_wait(file, sk_sleep(sk), wait); + state = inet_sk_state_load(sk); if (state == TCP_LISTEN) return inet_csk_listen_poll(sk); + /* Socket is not locked. We are protected from async events + * by poll logic and correct handling of state changes + * made by other threads is impossible in any case. + */ + + mask = 0; + /* * EPOLLHUP is certainly not done right. But poll() doesn't * have a notion of HUP in just one direction, and for a @@ -589,7 +600,7 @@ __poll_t tcp_poll_mask(struct socket *sock, __poll_t events) return mask; } -EXPORT_SYMBOL(tcp_poll_mask); +EXPORT_SYMBOL(tcp_poll); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 9bb27df4dac5..24e116ddae79 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2591,7 +2591,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, * udp_poll - wait for a UDP event. * @file - file struct * @sock - socket - * @events - events to wait for + * @wait - poll table * * This is same as datagram poll, except for the special case of * blocking sockets. If application is using a blocking fd @@ -2600,23 +2600,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname, * but then block when reading it. Add special case code * to work around these arguably broken applications. */ -__poll_t udp_poll_mask(struct socket *sock, __poll_t events) +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) { - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) mask |= EPOLLIN | EPOLLRDNORM; /* Check for false positives due to checksum errors */ - if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) && + if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) mask &= ~(EPOLLIN | EPOLLRDNORM); return mask; } -EXPORT_SYMBOL(udp_poll_mask); +EXPORT_SYMBOL(udp_poll); int udp_abort(struct sock *sk, int err) { diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 74f2a261e8df..9ed0eae91758 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -570,7 +570,7 @@ const struct proto_ops inet6_stream_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = inet_accept, /* ok */ .getname = inet6_getname, - .poll_mask = tcp_poll_mask, /* ok */ + .poll = tcp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = inet_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ @@ -603,7 +603,7 @@ const struct proto_ops inet6_dgram_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, - .poll_mask = udp_poll_mask, /* ok */ + .poll = udp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ce6f0d15b5dd..afc307c89d1a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1334,7 +1334,7 @@ void raw6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -/* Same as inet6_dgram_ops, sans udp_poll_mask. */ +/* Same as inet6_dgram_ops, sans udp_poll. */ const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, @@ -1344,7 +1344,7 @@ const struct proto_ops inet6_sockraw_ops = { .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, - .poll_mask = datagram_poll_mask, /* ok */ + .poll = datagram_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 68e86257a549..893a022f9620 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1488,11 +1488,14 @@ static inline __poll_t iucv_accept_poll(struct sock *parent) return 0; } -static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events) +__poll_t iucv_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == IUCV_LISTEN) return iucv_accept_poll(sk); @@ -2385,7 +2388,7 @@ static const struct proto_ops iucv_sock_ops = { .getname = iucv_sock_getname, .sendmsg = iucv_sock_sendmsg, .recvmsg = iucv_sock_recvmsg, - .poll_mask = iucv_sock_poll_mask, + .poll = iucv_sock_poll, .ioctl = sock_no_ioctl, .mmap = sock_no_mmap, .socketpair = sock_no_socketpair, diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 84b7d5c6fec8..d3601d421571 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1336,9 +1336,9 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) struct list_head *head; int index = 0; - /* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state, - * so we set sk_state, otherwise epoll_wait always returns right away - * with EPOLLHUP + /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so + * we set sk_state, otherwise epoll_wait always returns right away with + * EPOLLHUP */ kcm->sk.sk_state = TCP_ESTABLISHED; @@ -1903,7 +1903,7 @@ static const struct proto_ops kcm_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = kcm_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -1924,7 +1924,7 @@ static const struct proto_ops kcm_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = kcm_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/key/af_key.c b/net/key/af_key.c index 8bdc1cbe490a..5e1d2946ffbf 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3751,7 +3751,7 @@ static const struct proto_ops pfkey_ops = { /* Now the operations that really occur. */ .release = pfkey_release, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .sendmsg = pfkey_sendmsg, .recvmsg = pfkey_recvmsg, }; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 181073bf6925..a9c05b2bc1b0 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -613,7 +613,7 @@ static const struct proto_ops l2tp_ip_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = l2tp_ip_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 336e4c00abbc..957369192ca1 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -754,7 +754,7 @@ static const struct proto_ops l2tp_ip6_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = l2tp_ip6_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = inet6_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 55188382845c..e398797878a9 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1818,7 +1818,7 @@ static const struct proto_ops pppol2tp_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppol2tp_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = pppol2tp_setsockopt, diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 804de8490186..1beeea9549fa 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1192,7 +1192,7 @@ static const struct proto_ops llc_ui_ops = { .socketpair = sock_no_socketpair, .accept = llc_ui_accept, .getname = llc_ui_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = llc_ui_ioctl, .listen = llc_ui_listen, .shutdown = llc_ui_shutdown, diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1189b84413d5..393573a99a5a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2658,7 +2658,7 @@ static const struct proto_ops netlink_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = netlink_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = netlink_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 93fbcafbf388..03f37c4e64fe 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1355,7 +1355,7 @@ static const struct proto_ops nr_proto_ops = { .socketpair = sock_no_socketpair, .accept = nr_accept, .getname = nr_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = nr_ioctl, .listen = nr_listen, .shutdown = sock_no_shutdown, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ab5bb14b49af..ea0c0c6f1874 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -548,13 +548,16 @@ static inline __poll_t llcp_accept_poll(struct sock *parent) return 0; } -static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events) +static __poll_t llcp_sock_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; pr_debug("%p\n", sk); + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); @@ -896,7 +899,7 @@ static const struct proto_ops llcp_sock_ops = { .socketpair = sock_no_socketpair, .accept = llcp_sock_accept, .getname = llcp_sock_getname, - .poll_mask = llcp_sock_poll_mask, + .poll = llcp_sock_poll, .ioctl = sock_no_ioctl, .listen = llcp_sock_listen, .shutdown = sock_no_shutdown, @@ -916,7 +919,7 @@ static const struct proto_ops llcp_rawsock_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = llcp_sock_getname, - .poll_mask = llcp_sock_poll_mask, + .poll = llcp_sock_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 60c322531c49..e2188deb08dc 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -284,7 +284,7 @@ static const struct proto_ops rawsock_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -304,7 +304,7 @@ static const struct proto_ops rawsock_raw_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ff8e7e245c37..57634bc3da74 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4076,11 +4076,12 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, return 0; } -static __poll_t packet_poll_mask(struct socket *sock, __poll_t events) +static __poll_t packet_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - __poll_t mask = datagram_poll_mask(sock, events); + __poll_t mask = datagram_poll(file, sock, wait); spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { @@ -4422,7 +4423,7 @@ static const struct proto_ops packet_ops_spkt = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname_spkt, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -4443,7 +4444,7 @@ static const struct proto_ops packet_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = packet_getname, - .poll_mask = packet_poll_mask, + .poll = packet_poll, .ioctl = packet_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index c295c4e20f01..30187990257f 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -340,12 +340,15 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, return sizeof(struct sockaddr_pn); } -static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events) +static __poll_t pn_socket_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct pep_sock *pn = pep_sk(sk); __poll_t mask = 0; + poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_state == TCP_CLOSE) return EPOLLERR; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -445,7 +448,7 @@ const struct proto_ops phonet_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pn_socket_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = pn_socket_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -470,7 +473,7 @@ const struct proto_ops phonet_stream_ops = { .socketpair = sock_no_socketpair, .accept = pn_socket_accept, .getname = pn_socket_getname, - .poll_mask = pn_socket_poll_mask, + .poll = pn_socket_poll, .ioctl = pn_socket_ioctl, .listen = pn_socket_listen, .shutdown = sock_no_shutdown, diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 1b5025ea5b04..2aa07b547b16 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1023,7 +1023,7 @@ static const struct proto_ops qrtr_proto_ops = { .recvmsg = qrtr_recvmsg, .getname = qrtr_getname, .ioctl = qrtr_ioctl, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, .getsockopt = sock_no_getsockopt, diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ebe42e7eb456..d00a0ef39a56 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1470,7 +1470,7 @@ static const struct proto_ops rose_proto_ops = { .socketpair = sock_no_socketpair, .accept = rose_accept, .getname = rose_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = rose_ioctl, .listen = rose_listen, .shutdown = sock_no_shutdown, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 3b1ac93efee2..2b463047dd7b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -734,11 +734,15 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname, /* * permit an RxRPC socket to be polled */ -static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t rxrpc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct rxrpc_sock *rx = rxrpc_sk(sk); - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* the socket is readable if there are any messages waiting on the Rx * queue */ @@ -945,7 +949,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = rxrpc_poll_mask, + .poll = rxrpc_poll, .ioctl = sock_no_ioctl, .listen = rxrpc_listen, .shutdown = rxrpc_shutdown, diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7339918a805d..0cd2e764f47f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -1010,7 +1010,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, - .poll_mask = sctp_poll_mask, + .poll = sctp_poll, .ioctl = inet6_ioctl, .listen = sctp_inet_listen, .shutdown = inet_shutdown, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5dffbc493008..67f73d3a1356 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1016,7 +1016,7 @@ static const struct proto_ops inet_seqpacket_ops = { .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, /* Semantics are different. */ - .poll_mask = sctp_poll_mask, + .poll = sctp_poll, .ioctl = inet_ioctl, .listen = sctp_inet_listen, .shutdown = inet_shutdown, /* Looks harmless. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d20f7addee19..ce620e878538 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7717,12 +7717,14 @@ out: * here, again, by modeling the current TCP/UDP code. We don't have * a good way to test with it yet. */ -__poll_t sctp_poll_mask(struct socket *sock, __poll_t events) +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct sctp_sock *sp = sctp_sk(sk); __poll_t mask; + poll_wait(file, sk_sleep(sk), wait); + sock_rps_record_flow(sk); /* A TCP-style listening socket becomes readable when the accept queue diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index da7f02edcd37..973b4471b532 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1273,7 +1273,8 @@ static __poll_t smc_accept_poll(struct sock *parent) return mask; } -static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t smc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; @@ -1289,7 +1290,7 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) if ((sk->sk_state == SMC_INIT) || smc->use_fallback) { /* delegate to CLC child sock */ release_sock(sk); - mask = smc->clcsock->ops->poll_mask(smc->clcsock, events); + mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); lock_sock(sk); sk->sk_err = smc->clcsock->sk->sk_err; if (sk->sk_err) { @@ -1307,6 +1308,11 @@ static __poll_t smc_poll_mask(struct socket *sock, __poll_t events) } } } else { + if (sk->sk_state != SMC_CLOSED) { + release_sock(sk); + sock_poll_wait(file, sk_sleep(sk), wait); + lock_sock(sk); + } if (sk->sk_err) mask |= EPOLLERR; if ((sk->sk_shutdown == SHUTDOWN_MASK) || @@ -1619,7 +1625,7 @@ static const struct proto_ops smc_sock_ops = { .socketpair = sock_no_socketpair, .accept = smc_accept, .getname = smc_getname, - .poll_mask = smc_poll_mask, + .poll = smc_poll, .ioctl = smc_ioctl, .listen = smc_listen, .shutdown = smc_shutdown, diff --git a/net/socket.c b/net/socket.c index 8a109012608a..a564c6ed19d5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -117,10 +117,8 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); -static struct wait_queue_head *sock_get_poll_head(struct file *file, - __poll_t events); -static __poll_t sock_poll_mask(struct file *file, __poll_t); -static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait); +static __poll_t sock_poll(struct file *file, + struct poll_table_struct *wait); static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, @@ -143,8 +141,6 @@ static const struct file_operations socket_file_ops = { .llseek = no_llseek, .read_iter = sock_read_iter, .write_iter = sock_write_iter, - .get_poll_head = sock_get_poll_head, - .poll_mask = sock_poll_mask, .poll = sock_poll, .unlocked_ioctl = sock_ioctl, #ifdef CONFIG_COMPAT @@ -1130,48 +1126,14 @@ out_release: } EXPORT_SYMBOL(sock_create_lite); -static struct wait_queue_head *sock_get_poll_head(struct file *file, - __poll_t events) -{ - struct socket *sock = file->private_data; - - if (!sock->ops->poll_mask) - return NULL; - sock_poll_busy_loop(sock, events); - return sk_sleep(sock->sk); -} - -static __poll_t sock_poll_mask(struct file *file, __poll_t events) -{ - struct socket *sock = file->private_data; - - /* - * We need to be sure we are in sync with the socket flags modification. - * - * This memory barrier is paired in the wq_has_sleeper. - */ - smp_mb(); - - /* this socket can poll_ll so tell the system call */ - return sock->ops->poll_mask(sock, events) | - (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0); -} - /* No kernel lock held - perfect */ static __poll_t sock_poll(struct file *file, poll_table *wait) { struct socket *sock = file->private_data; - __poll_t events = poll_requested_events(wait), mask = 0; + __poll_t events = poll_requested_events(wait); - if (sock->ops->poll) { - sock_poll_busy_loop(sock, events); - mask = sock->ops->poll(file, sock, wait); - } else if (sock->ops->poll_mask) { - sock_poll_wait(file, sock_get_poll_head(file, events), wait); - mask = sock->ops->poll_mask(sock, events); - } - - return mask | sock_poll_busy_flag(sock); + sock_poll_busy_loop(sock, events); + return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 14a5d055717d..930852c54d7a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -692,9 +692,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, } /** - * tipc_poll - read pollmask + * tipc_poll - read and possibly block on pollmask * @file: file structure associated with the socket * @sock: socket for which to calculate the poll bits + * @wait: ??? * * Returns pollmask value * @@ -708,12 +709,15 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * imply that the operation will succeed, merely that it should be performed * and will not block. */ -static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events) +static __poll_t tipc_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); __poll_t revents = 0; + sock_poll_wait(file, sk_sleep(sk), wait); + if (sk->sk_shutdown & RCV_SHUTDOWN) revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) @@ -3033,7 +3037,7 @@ static const struct proto_ops msg_ops = { .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = sock_no_listen, .shutdown = tipc_shutdown, @@ -3054,7 +3058,7 @@ static const struct proto_ops packet_ops = { .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, @@ -3075,7 +3079,7 @@ static const struct proto_ops stream_ops = { .socketpair = tipc_socketpair, .accept = tipc_accept, .getname = tipc_getname, - .poll_mask = tipc_poll_mask, + .poll = tipc_poll, .ioctl = tipc_ioctl, .listen = tipc_listen, .shutdown = tipc_shutdown, diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index a127d61e8af9..301f22430469 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -712,7 +712,7 @@ static int __init tls_register(void) build_protos(tls_prots[TLSV4], &tcp_prot); tls_sw_proto_ops = inet_stream_ops; - tls_sw_proto_ops.poll_mask = tls_sw_poll_mask; + tls_sw_proto_ops.poll = tls_sw_poll; tls_sw_proto_ops.splice_read = tls_sw_splice_read; #ifdef CONFIG_TLS_DEVICE diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index f127fac88acf..d2380548f8f6 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -919,22 +919,23 @@ splice_read_end: return copied ? : err; } -__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events) +unsigned int tls_sw_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) { + unsigned int ret; struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); - __poll_t mask; - /* Grab EPOLLOUT and EPOLLHUP from the underlying socket */ - mask = ctx->sk_poll_mask(sock, events); + /* Grab POLLOUT and POLLHUP from the underlying socket */ + ret = ctx->sk_poll(file, sock, wait); - /* Clear EPOLLIN bits, and set based on recv_pkt */ - mask &= ~(EPOLLIN | EPOLLRDNORM); + /* Clear POLLIN bits, and set based on recv_pkt */ + ret &= ~(POLLIN | POLLRDNORM); if (ctx->recv_pkt) - mask |= EPOLLIN | EPOLLRDNORM; + ret |= POLLIN | POLLRDNORM; - return mask; + return ret; } static int tls_read_size(struct strparser *strp, struct sk_buff *skb) @@ -1191,7 +1192,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) sk->sk_data_ready = tls_data_ready; write_unlock_bh(&sk->sk_callback_lock); - sw_ctx_rx->sk_poll_mask = sk->sk_socket->ops->poll_mask; + sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll; strp_check_rcv(&sw_ctx_rx->strp); } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 95b02a71fd47..e5473c03d667 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -638,8 +638,9 @@ static int unix_stream_connect(struct socket *, struct sockaddr *, static int unix_socketpair(struct socket *, struct socket *); static int unix_accept(struct socket *, struct socket *, int, bool); static int unix_getname(struct socket *, struct sockaddr *, int); -static __poll_t unix_poll_mask(struct socket *, __poll_t); -static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t); +static __poll_t unix_poll(struct file *, struct socket *, poll_table *); +static __poll_t unix_dgram_poll(struct file *, struct socket *, + poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); @@ -680,7 +681,7 @@ static const struct proto_ops unix_stream_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll_mask = unix_poll_mask, + .poll = unix_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -703,7 +704,7 @@ static const struct proto_ops unix_dgram_ops = { .socketpair = unix_socketpair, .accept = sock_no_accept, .getname = unix_getname, - .poll_mask = unix_dgram_poll_mask, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = sock_no_listen, .shutdown = unix_shutdown, @@ -725,7 +726,7 @@ static const struct proto_ops unix_seqpacket_ops = { .socketpair = unix_socketpair, .accept = unix_accept, .getname = unix_getname, - .poll_mask = unix_dgram_poll_mask, + .poll = unix_dgram_poll, .ioctl = unix_ioctl, .listen = unix_listen, .shutdown = unix_shutdown, @@ -2629,10 +2630,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; } -static __poll_t unix_poll_mask(struct socket *sock, __poll_t events) +static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - __poll_t mask = 0; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err) @@ -2661,11 +2665,15 @@ static __poll_t unix_poll_mask(struct socket *sock, __poll_t events) return mask; } -static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events) +static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, + poll_table *wait) { struct sock *sk = sock->sk, *other; - int writable; - __poll_t mask = 0; + unsigned int writable; + __poll_t mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; /* exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) @@ -2691,7 +2699,7 @@ static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events) } /* No write status requested, avoid expensive OUT tests. */ - if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) + if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; writable = unix_writable(sk); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index bb5d5fa68c35..c1076c19b858 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -850,11 +850,18 @@ static int vsock_shutdown(struct socket *sock, int mode) return err; } -static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events) +static __poll_t vsock_poll(struct file *file, struct socket *sock, + poll_table *wait) { - struct sock *sk = sock->sk; - struct vsock_sock *vsk = vsock_sk(sk); - __poll_t mask = 0; + struct sock *sk; + __poll_t mask; + struct vsock_sock *vsk; + + sk = sock->sk; + vsk = vsock_sk(sk); + + poll_wait(file, sk_sleep(sk), wait); + mask = 0; if (sk->sk_err) /* Signify that there has been an error on this socket. */ @@ -1084,7 +1091,7 @@ static const struct proto_ops vsock_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = vsock_getname, - .poll_mask = vsock_poll_mask, + .poll = vsock_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = vsock_shutdown, @@ -1842,7 +1849,7 @@ static const struct proto_ops vsock_stream_ops = { .socketpair = sock_no_socketpair, .accept = vsock_accept, .getname = vsock_getname, - .poll_mask = vsock_poll_mask, + .poll = vsock_poll, .ioctl = sock_no_ioctl, .listen = vsock_listen, .shutdown = vsock_shutdown, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index f93365ae0fdd..d49aa79b7997 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1750,7 +1750,7 @@ static const struct proto_ops x25_proto_ops = { .socketpair = sock_no_socketpair, .accept = x25_accept, .getname = x25_getname, - .poll_mask = datagram_poll_mask, + .poll = datagram_poll, .ioctl = x25_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_x25_ioctl, diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 3b3410ada097..59fb7d3c36a3 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -303,9 +303,10 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len); } -static __poll_t xsk_poll_mask(struct socket *sock, __poll_t events) +static unsigned int xsk_poll(struct file *file, struct socket *sock, + struct poll_table_struct *wait) { - __poll_t mask = datagram_poll_mask(sock, events); + unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); @@ -696,7 +697,7 @@ static const struct proto_ops xsk_proto_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, - .poll_mask = xsk_poll_mask, + .poll = xsk_poll, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, From 4bb6e96ab808f88d746343637f0cc2243b527da5 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 27 Jun 2018 23:26:01 -0700 Subject: [PATCH 544/572] lib/percpu_ida.c: don't do alloc from per-CPU list if there is none In commit 804209d8a009 ("lib/percpu_ida.c: use _irqsave() instead of local_irq_save() + spin_lock") I inlined alloc_local_tag() and mixed up the >= check from percpu_ida_alloc() with the one in alloc_local_tag(). Don't alloc from per-CPU freelist if ->nr_free is zero. Link: http://lkml.kernel.org/r/20180613075830.c3zeva52fuj6fxxv@linutronix.de Fixes: 804209d8a009 ("lib/percpu_ida.c: use _irqsave() instead of local_irq_save() + spin_lock") Signed-off-by: Sebastian Andrzej Siewior Reported-by: David Disseldorp Tested-by: David Disseldorp Cc: Thomas Gleixner Cc: Nicholas Bellinger Cc: Shaohua Li Cc: Kent Overstreet Cc: Matthew Wilcox Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/percpu_ida.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 9bbd9c5d375a..beb14839b41a 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -141,7 +141,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, int state) spin_lock_irqsave(&tags->lock, flags); /* Fastpath */ - if (likely(tags->nr_free >= 0)) { + if (likely(tags->nr_free)) { tag = tags->freelist[--tags->nr_free]; spin_unlock_irqrestore(&tags->lock, flags); return tag; From 28557cc106e6d2aa8b8c5c7687ea9f8055ff3911 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 27 Jun 2018 23:26:05 -0700 Subject: [PATCH 545/572] Revert mm/vmstat.c: fix vmstat_update() preemption BUG Revert commit c7f26ccfb2c3 ("mm/vmstat.c: fix vmstat_update() preemption BUG"). Steven saw a "using smp_processor_id() in preemptible" message and added a preempt_disable() section around it to keep it quiet. This is not the right thing to do it does not fix the real problem. vmstat_update() is invoked by a kworker on a specific CPU. This worker it bound to this CPU. The name of the worker was "kworker/1:1" so it should have been a worker which was bound to CPU1. A worker which can run on any CPU would have a `u' before the first digit. smp_processor_id() can be used in a preempt-enabled region as long as the task is bound to a single CPU which is the case here. If it could run on an arbitrary CPU then this is the problem we have an should seek to resolve. Not only this smp_processor_id() must not be migrated to another CPU but also refresh_cpu_vm_stats() which might access wrong per-CPU variables. Not to mention that other code relies on the fact that such a worker runs on one specific CPU only. Therefore revert that commit and we should look instead what broke the affinity mask of the kworker. Link: http://lkml.kernel.org/r/20180504104451.20278-1-bigeasy@linutronix.de Signed-off-by: Sebastian Andrzej Siewior Cc: Steven J. Hill Cc: Tejun Heo Cc: Vlastimil Babka Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 75eda9c2b260..8ba0870ecddd 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1796,11 +1796,9 @@ static void vmstat_update(struct work_struct *w) * to occur in the future. Keep on running the * update worker thread. */ - preempt_disable(); queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, this_cpu_ptr(&vmstat_work), round_jiffies_relative(sysctl_stat_interval)); - preempt_enable(); } } From d50d82faa0c964e31f7a946ba8aba7c715ca7ab0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 27 Jun 2018 23:26:09 -0700 Subject: [PATCH 546/572] slub: fix failure when we delete and create a slab cache In kernel 4.17 I removed some code from dm-bufio that did slab cache merging (commit 21bb13276768: "dm bufio: remove code that merges slab caches") - both slab and slub support merging caches with identical attributes, so dm-bufio now just calls kmem_cache_create and relies on implicit merging. This uncovered a bug in the slub subsystem - if we delete a cache and immediatelly create another cache with the same attributes, it fails because of duplicate filename in /sys/kernel/slab/. The slub subsystem offloads freeing the cache to a workqueue - and if we create the new cache before the workqueue runs, it complains because of duplicate filename in sysfs. This patch fixes the bug by moving the call of kobject_del from sysfs_slab_remove_workfn to shutdown_cache. kobject_del must be called while we hold slab_mutex - so that the sysfs entry is deleted before a cache with the same attributes could be created. Running device-mapper-test-suite with: dmtest run --suite thin-provisioning -n /commit_failure_causes_fallback/ triggered: Buffer I/O error on dev dm-0, logical block 1572848, async page read device-mapper: thin: 253:1: metadata operation 'dm_pool_alloc_data_block' failed: error = -5 device-mapper: thin: 253:1: aborting current metadata transaction sysfs: cannot create duplicate filename '/kernel/slab/:a-0000144' CPU: 2 PID: 1037 Comm: kworker/u48:1 Not tainted 4.17.0.snitm+ #25 Hardware name: Supermicro SYS-1029P-WTR/X11DDW-L, BIOS 2.0a 12/06/2017 Workqueue: dm-thin do_worker [dm_thin_pool] Call Trace: dump_stack+0x5a/0x73 sysfs_warn_dup+0x58/0x70 sysfs_create_dir_ns+0x77/0x80 kobject_add_internal+0xba/0x2e0 kobject_init_and_add+0x70/0xb0 sysfs_slab_add+0xb1/0x250 __kmem_cache_create+0x116/0x150 create_cache+0xd9/0x1f0 kmem_cache_create_usercopy+0x1c1/0x250 kmem_cache_create+0x18/0x20 dm_bufio_client_create+0x1ae/0x410 [dm_bufio] dm_block_manager_create+0x5e/0x90 [dm_persistent_data] __create_persistent_data_objects+0x38/0x940 [dm_thin_pool] dm_pool_abort_metadata+0x64/0x90 [dm_thin_pool] metadata_operation_failed+0x59/0x100 [dm_thin_pool] alloc_data_block.isra.53+0x86/0x180 [dm_thin_pool] process_cell+0x2a3/0x550 [dm_thin_pool] do_worker+0x28d/0x8f0 [dm_thin_pool] process_one_work+0x171/0x370 worker_thread+0x49/0x3f0 kthread+0xf8/0x130 ret_from_fork+0x35/0x40 kobject_add_internal failed for :a-0000144 with -EEXIST, don't try to register things with the same name in the same directory. kmem_cache_create(dm_bufio_buffer-16) failed with error -17 Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1806151817130.6333@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Mikulas Patocka Reported-by: Mike Snitzer Tested-by: Mike Snitzer Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 4 ++++ mm/slab_common.c | 4 ++++ mm/slub.c | 7 ++++++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 09fa2c6f0e68..3a1a1dbc6f49 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -155,8 +155,12 @@ struct kmem_cache { #ifdef CONFIG_SYSFS #define SLAB_SUPPORTS_SYSFS +void sysfs_slab_unlink(struct kmem_cache *); void sysfs_slab_release(struct kmem_cache *); #else +static inline void sysfs_slab_unlink(struct kmem_cache *s) +{ +} static inline void sysfs_slab_release(struct kmem_cache *s) { } diff --git a/mm/slab_common.c b/mm/slab_common.c index 890b1f04a03a..2296caf87bfb 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -567,10 +567,14 @@ static int shutdown_cache(struct kmem_cache *s) list_del(&s->list); if (s->flags & SLAB_TYPESAFE_BY_RCU) { +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); +#endif list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { #ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_unlink(s); sysfs_slab_release(s); #else slab_kmem_cache_release(s); diff --git a/mm/slub.c b/mm/slub.c index a3b8467c14af..51258eff4178 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -5667,7 +5667,6 @@ static void sysfs_slab_remove_workfn(struct work_struct *work) kset_unregister(s->memcg_kset); #endif kobject_uevent(&s->kobj, KOBJ_REMOVE); - kobject_del(&s->kobj); out: kobject_put(&s->kobj); } @@ -5752,6 +5751,12 @@ static void sysfs_slab_remove(struct kmem_cache *s) schedule_work(&s->kobj_remove_work); } +void sysfs_slab_unlink(struct kmem_cache *s) +{ + if (slab_state >= FULL) + kobject_del(&s->kobj); +} + void sysfs_slab_release(struct kmem_cache *s) { if (slab_state >= FULL) From 124049decbb121ec32742c94fb5d9d6bed8f24d8 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 27 Jun 2018 23:26:13 -0700 Subject: [PATCH 547/572] x86/e820: put !E820_TYPE_RAM regions into memblock.reserved There is a kernel panic that is triggered when reading /proc/kpageflags on the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]': BUG: unable to handle kernel paging request at fffffffffffffffe PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 1728 Comm: page-types Not tainted 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 04/01/2014 RIP: 0010:stable_page_flags+0x27/0x3c0 Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7 RSP: 0018:ffffbbd44111fde0 EFLAGS: 00010202 RAX: fffffffffffffffe RBX: 00007fffffffeff9 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000202 RDI: ffffed1182fff5c0 RBP: ffffffffffffffff R08: 0000000000000001 R09: 0000000000000001 R10: ffffbbd44111fed8 R11: 0000000000000000 R12: ffffed1182fff5c0 R13: 00000000000bffd7 R14: 0000000002fff5c0 R15: ffffbbd44111ff10 FS: 00007efc4335a500(0000) GS:ffff93a5bfc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: fffffffffffffffe CR3: 00000000b2a58000 CR4: 00000000001406e0 Call Trace: kpageflags_read+0xc7/0x120 proc_reg_read+0x3c/0x60 __vfs_read+0x36/0x170 vfs_read+0x89/0x130 ksys_pread64+0x71/0x90 do_syscall_64+0x5b/0x160 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7efc42e75e23 Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24 According to kernel bisection, this problem became visible due to commit f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") which changes how struct pages are initialized. Memblock layout affects the pfn ranges covered by node/zone. Consider that we have a VM with 2 NUMA nodes and each node has 4GB memory, and the default (no memmap= given) memblock layout is like below: MEMBLOCK configuration: memory size = 0x00000001fff75c00 reserved size = 0x000000000300c000 memory.cnt = 0x4 memory[0x0] [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0 memory[0x1] [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0 memory[0x2] [0x0000000100000000-0x000000013fffffff], 0x0000000040000000 bytes on node 0 flags: 0x0 memory[0x3] [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0 ... If you give memmap=1G!4G (so it just covers memory[0x2]), the range [0x100000000-0x13fffffff] is gone: MEMBLOCK configuration: memory size = 0x00000001bff75c00 reserved size = 0x000000000300c000 memory.cnt = 0x3 memory[0x0] [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0 memory[0x1] [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0 memory[0x2] [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0 ... This causes shrinking node 0's pfn range because it is calculated by the address range of memblock.memory. So some of struct pages in the gap range are left uninitialized. We have a function zero_resv_unavail() which does zeroing the struct pages within the reserved unavailable range (i.e. memblock.memory && !memblock.reserved). This patch utilizes it to cover all unavailable ranges by putting them into memblock.reserved. Link: http://lkml.kernel.org/r/20180615072947.GB23273@hori1.linux.bs1.fc.nec.co.jp Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap") Signed-off-by: Naoya Horiguchi Tested-by: Oscar Salvador Tested-by: "Herton R. Krzesinski" Acked-by: Michal Hocko Reviewed-by: Pavel Tatashin Cc: Steven Sistare Cc: Daniel Jordan Cc: Matthew Wilcox Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/e820.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d1f25c831447..c88c23c658c1 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1248,6 +1248,7 @@ void __init e820__memblock_setup(void) { int i; u64 end; + u64 addr = 0; /* * The bootstrap memblock region count maximum is 128 entries @@ -1264,13 +1265,21 @@ void __init e820__memblock_setup(void) struct e820_entry *entry = &e820_table->entries[i]; end = entry->addr + entry->size; + if (addr < entry->addr) + memblock_reserve(addr, entry->addr - addr); + addr = end; if (end != (resource_size_t)end) continue; + /* + * all !E820_TYPE_RAM ranges (including gap ranges) are put + * into memblock.reserved to make sure that struct pages in + * such regions are not left uninitialized after bootup. + */ if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) - continue; - - memblock_add(entry->addr, entry->size); + memblock_reserve(entry->addr, entry->size); + else + memblock_add(entry->addr, entry->size); } /* Throw away partial pages: */ From f77bc3a82ce58fe7977a11f28e0b6cac8a9e087c Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Wed, 27 Jun 2018 23:26:17 -0700 Subject: [PATCH 548/572] include/linux/dax.h: dax_iomap_fault() returns vm_fault_t Commit 1c8f422059ae ("mm: change return type to vm_fault_t") missed a conversion. It's not a big problem at present because mainline is still using typedef int vm_fault_t; Fixes: 1c8f422059ae ("mm: change return type to vm_fault_t") Link: http://lkml.kernel.org/r/20180620172046.GA27894@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Reviewed-by: Andrew Morton Cc: Matthew Wilcox Cc: Dan Williams Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dax.h b/include/linux/dax.h index 3855e3800f48..deb0f663252f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -135,7 +135,7 @@ void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); -int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, +vm_fault_t dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t *pfnp, int *errp, const struct iomap_ops *ops); vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t pfn); From dd275caf4a0d9b219fffe49288b6cc33cd564312 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 27 Jun 2018 23:26:20 -0700 Subject: [PATCH 549/572] kasan: depend on CONFIG_SLUB_DEBUG KASAN depends on having access to some of the accounting that SLUB_DEBUG does; without it, there are immediate crashes [1]. So, the natural thing to do is to make KASAN select SLUB_DEBUG. [1] http://lkml.kernel.org/r/CAHmME9rtoPwxUSnktxzKso14iuVCWT7BE_-_8PAC=pGw1iJnQg@mail.gmail.com Link: http://lkml.kernel.org/r/20180622154623.25388-1-Jason@zx2c4.com Fixes: f9e13c0a5a33 ("slab, slub: skip unnecessary kasan_cache_shutdown()") Signed-off-by: Jason A. Donenfeld Acked-by: Michal Hocko Reviewed-by: Shakeel Butt Acked-by: Christoph Lameter Cc: Shakeel Butt Cc: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Cc: Andrey Ryabinin Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.kasan | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 3d35d062970d..c253c1b46c6b 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" depends on SLUB || (SLAB && !DEBUG_SLAB) + select SLUB_DEBUG if SLUB select CONSTRUCTORS select STACKDEPOT help From 008e682b5bf1bdd735b2eecf2f3903d7da49dd52 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 27 Jun 2018 23:26:24 -0700 Subject: [PATCH 550/572] proc: add Alexey to MAINTAINERS I know I'll regret it. Link: http://lkml.kernel.org/r/20180627194840.GA18113@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index bfb8a18d4793..07d1576fc766 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11481,6 +11481,15 @@ W: http://wireless.kernel.org/en/users/Drivers/p54 S: Obsolete F: drivers/net/wireless/intersil/prism54/ +PROC FILESYSTEM +R: Alexey Dobriyan +L: linux-kernel@vger.kernel.org +L: linux-fsdevel@vger.kernel.org +S: Maintained +F: fs/proc/ +F: include/linux/proc_fs.h +F: tools/testing/selftests/proc/ + PROC SYSCTL M: "Luis R. Rodriguez" M: Kees Cook From 4557641b4c7046625c026fb809c47ef0d43ae595 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 26 Jun 2018 16:30:39 -0600 Subject: [PATCH 551/572] pmem: only set QUEUE_FLAG_DAX for fsdax mode QUEUE_FLAG_DAX is an indication that a given block device supports filesystem DAX and should not be set for PMEM namespaces which are in "raw" mode. These namespaces lack struct page and are prevented from participating in filesystem DAX as of commit 569d0365f571 ("dax: require 'struct page' by default for filesystem dax"). Signed-off-by: Ross Zwisler Suggested-by: Mike Snitzer Fixes: 569d0365f571 ("dax: require 'struct page' by default for filesystem dax") Cc: stable@vger.kernel.org Acked-by: Dan Williams Reviewed-by: Toshi Kani Signed-off-by: Mike Snitzer --- drivers/nvdimm/pmem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 68940356cad3..8b1fd7f1a224 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -414,7 +414,8 @@ static int pmem_attach_disk(struct device *dev, blk_queue_logical_block_size(q, pmem_sector_size(ndns)); blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - blk_queue_flag_set(QUEUE_FLAG_DAX, q); + if (pmem->pfn_flags & PFN_MAP) + blk_queue_flag_set(QUEUE_FLAG_DAX, q); q->queuedata = pmem; disk = alloc_disk_node(0, nid); From 15256f6cc4b44f2e70503758150267fd2a53c0d6 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 26 Jun 2018 16:30:40 -0600 Subject: [PATCH 552/572] dax: check for QUEUE_FLAG_DAX in bdev_dax_supported() Add an explicit check for QUEUE_FLAG_DAX to __bdev_dax_supported(). This is needed for DM configurations where the first element in the dm-linear or dm-stripe target supports DAX, but other elements do not. Without this check __bdev_dax_supported() will pass for such devices, letting a filesystem on that device mount with the DAX option. Signed-off-by: Ross Zwisler Suggested-by: Mike Snitzer Fixes: commit 545ed20e6df6 ("dm: add infrastructure for DAX support") Cc: stable@vger.kernel.org Acked-by: Dan Williams Reviewed-by: Toshi Kani Signed-off-by: Mike Snitzer --- drivers/dax/super.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 903d9c473749..45276abf03aa 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -86,6 +86,7 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) { struct dax_device *dax_dev; bool dax_enabled = false; + struct request_queue *q; pgoff_t pgoff; int err, id; void *kaddr; @@ -99,6 +100,13 @@ bool __bdev_dax_supported(struct block_device *bdev, int blocksize) return false; } + q = bdev_get_queue(bdev); + if (!q || !blk_queue_dax(q)) { + pr_debug("%s: error: request queue doesn't support dax\n", + bdevname(bdev, buf)); + return false; + } + err = bdev_dax_pgoff(bdev, 0, PAGE_SIZE, &pgoff); if (err) { pr_debug("%s: error: unaligned partition for dax\n", From dbc626597c39b24cefce09fbd8e9dea85869a801 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 26 Jun 2018 16:30:41 -0600 Subject: [PATCH 553/572] dm: prevent DAX mounts if not supported Currently device_supports_dax() just checks to see if the QUEUE_FLAG_DAX flag is set on the device's request queue to decide whether or not the device supports filesystem DAX. Really we should be using bdev_dax_supported() like filesystems do at mount time. This performs other tests like checking to make sure the dax_direct_access() path works. We also explicitly clear QUEUE_FLAG_DAX on the DM device's request queue if any of the underlying devices do not support DAX. This makes the handling of QUEUE_FLAG_DAX consistent with the setting/clearing of most other flags in dm_table_set_restrictions(). Now that bdev_dax_supported() explicitly checks for QUEUE_FLAG_DAX, this will ensure that filesystems built upon DM devices will only be able to mount with DAX if all underlying devices also support DAX. Signed-off-by: Ross Zwisler Fixes: commit 545ed20e6df6 ("dm: add infrastructure for DAX support") Cc: stable@vger.kernel.org Acked-by: Dan Williams Reviewed-by: Toshi Kani Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 7 ++++--- drivers/md/dm.c | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 938766794c2e..3d0e2c198f06 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -885,9 +885,7 @@ EXPORT_SYMBOL_GPL(dm_table_set_type); static int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { - struct request_queue *q = bdev_get_queue(dev->bdev); - - return q && blk_queue_dax(q); + return bdev_dax_supported(dev->bdev, PAGE_SIZE); } static bool dm_table_supports_dax(struct dm_table *t) @@ -1907,6 +1905,9 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, if (dm_table_supports_dax(t)) blk_queue_flag_set(QUEUE_FLAG_DAX, q); + else + blk_queue_flag_clear(QUEUE_FLAG_DAX, q); + if (dm_table_supports_dax_write_cache(t)) dax_write_cache(t->md->dax_dev, true); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a3b103e8e3ce..b0dd7027848b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1056,8 +1056,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, if (len < 1) goto out; nr_pages = min(len, nr_pages); - if (ti->type->direct_access) - ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); + ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); out: dm_put_live_table(md, srcu_idx); From 63ba82c0e63f1dd400d84e12f2142c2cb691aec1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 28 Jun 2018 22:21:24 +0200 Subject: [PATCH 554/572] parisc: Reduce debug output in unwind code Signed-off-by: Helge Deller --- arch/parisc/kernel/unwind.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 143f90e2f9f3..2ef83d78eec4 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -25,7 +25,7 @@ /* #define DEBUG 1 */ #ifdef DEBUG -#define dbg(x...) printk(x) +#define dbg(x...) pr_debug(x) #else #define dbg(x...) #endif @@ -182,7 +182,7 @@ int __init unwind_init(void) start = (long)&__start___unwind[0]; stop = (long)&__stop___unwind[0]; - printk("unwind_init: start = 0x%lx, end = 0x%lx, entries = %lu\n", + dbg("unwind_init: start = 0x%lx, end = 0x%lx, entries = %lu\n", start, stop, (stop - start) / sizeof(struct unwind_table_entry)); From 0da74120c5341389b97c4ee27487a97224999ee1 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 28 Jun 2018 20:39:54 -0400 Subject: [PATCH 555/572] selinux: move user accesses in selinuxfs out of locked regions If a user is accessing a file in selinuxfs with a pointer to a userspace buffer that is backed by e.g. a userfaultfd, the userspace access can stall indefinitely, which can block fsi->mutex if it is held. For sel_read_policy(), remove the locking, since this method doesn't seem to access anything that requires locking. For sel_read_bool(), move the user access below the locked region. For sel_write_bool() and sel_commit_bools_write(), move the user access up above the locked region. Cc: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Jann Horn Acked-by: Stephen Smalley [PM: removed an unused variable in sel_read_policy()] Signed-off-by: Paul Moore --- security/selinux/selinuxfs.c | 78 +++++++++++++++--------------------- 1 file changed, 33 insertions(+), 45 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index c0cadbc5f85c..19e35dd695d7 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -441,22 +441,16 @@ static int sel_release_policy(struct inode *inode, struct file *filp) static ssize_t sel_read_policy(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; struct policy_load_memory *plm = filp->private_data; int ret; - mutex_lock(&fsi->mutex); - ret = avc_has_perm(&selinux_state, current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); if (ret) - goto out; + return ret; - ret = simple_read_from_buffer(buf, count, ppos, plm->data, plm->len); -out: - mutex_unlock(&fsi->mutex); - return ret; + return simple_read_from_buffer(buf, count, ppos, plm->data, plm->len); } static vm_fault_t sel_mmap_policy_fault(struct vm_fault *vmf) @@ -1188,25 +1182,29 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, ret = -EINVAL; if (index >= fsi->bool_num || strcmp(name, fsi->bool_pending_names[index])) - goto out; + goto out_unlock; ret = -ENOMEM; page = (char *)get_zeroed_page(GFP_KERNEL); if (!page) - goto out; + goto out_unlock; cur_enforcing = security_get_bool_value(fsi->state, index); if (cur_enforcing < 0) { ret = cur_enforcing; - goto out; + goto out_unlock; } length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing, fsi->bool_pending_values[index]); - ret = simple_read_from_buffer(buf, count, ppos, page, length); -out: mutex_unlock(&fsi->mutex); + ret = simple_read_from_buffer(buf, count, ppos, page, length); +out_free: free_page((unsigned long)page); return ret; + +out_unlock: + mutex_unlock(&fsi->mutex); + goto out_free; } static ssize_t sel_write_bool(struct file *filep, const char __user *buf, @@ -1219,6 +1217,17 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK; const char *name = filep->f_path.dentry->d_name.name; + if (count >= PAGE_SIZE) + return -ENOMEM; + + /* No partial writes. */ + if (*ppos != 0) + return -EINVAL; + + page = memdup_user_nul(buf, count); + if (IS_ERR(page)) + return PTR_ERR(page); + mutex_lock(&fsi->mutex); length = avc_has_perm(&selinux_state, @@ -1233,22 +1242,6 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, fsi->bool_pending_names[index])) goto out; - length = -ENOMEM; - if (count >= PAGE_SIZE) - goto out; - - /* No partial writes. */ - length = -EINVAL; - if (*ppos != 0) - goto out; - - page = memdup_user_nul(buf, count); - if (IS_ERR(page)) { - length = PTR_ERR(page); - page = NULL; - goto out; - } - length = -EINVAL; if (sscanf(page, "%d", &new_value) != 1) goto out; @@ -1280,6 +1273,17 @@ static ssize_t sel_commit_bools_write(struct file *filep, ssize_t length; int new_value; + if (count >= PAGE_SIZE) + return -ENOMEM; + + /* No partial writes. */ + if (*ppos != 0) + return -EINVAL; + + page = memdup_user_nul(buf, count); + if (IS_ERR(page)) + return PTR_ERR(page); + mutex_lock(&fsi->mutex); length = avc_has_perm(&selinux_state, @@ -1289,22 +1293,6 @@ static ssize_t sel_commit_bools_write(struct file *filep, if (length) goto out; - length = -ENOMEM; - if (count >= PAGE_SIZE) - goto out; - - /* No partial writes. */ - length = -EINVAL; - if (*ppos != 0) - goto out; - - page = memdup_user_nul(buf, count); - if (IS_ERR(page)) { - length = PTR_ERR(page); - page = NULL; - goto out; - } - length = -EINVAL; if (sscanf(page, "%d", &new_value) != 1) goto out; From 2a2c8ee2d72c4f1ba0f7fbb02dc74f971df0f934 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 16 Jun 2018 22:37:56 +0900 Subject: [PATCH 556/572] Revert "i2c: algo-bit: init the bus to a known state" This reverts commit 3e5f06bed72fe72166a6778f630241a893f67799. As per bugzilla #200045, this caused a regression. I don't really see a way to fix it without having the hardware. So, revert the patch and I will fix the issue I was seeing originally in the i2c-gpio driver itself. I couldn't find new users of this algorithm since, so there should be no one depending on the new behaviour. Reported-by: Sergey Larin Fixes: 3e5f06bed72f ("i2c: algo-bit: init the bus to a known state") Signed-off-by: Wolfram Sang Acked-by: Alex Deucher Tested-by: Sergey Larin Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/algos/i2c-algo-bit.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index 4a34f311e1ff..0c0eb16d710f 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -647,11 +647,6 @@ static int __i2c_bit_add_bus(struct i2c_adapter *adap, if (bit_adap->getscl == NULL) adap->quirks = &i2c_bit_quirk_no_clk_stretch; - /* Bring bus to a known state. Looks like STOP if bus is not free yet */ - setscl(bit_adap, 1); - udelay(bit_adap->udelay); - setsda(bit_adap, 1); - ret = add_adapter(adap); if (ret < 0) return ret; From 2173ed0adc7f0473e6b6ad636d8684a0d82da5e9 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 16 Jun 2018 22:37:57 +0900 Subject: [PATCH 557/572] i2c: algos: bit: mention our experience about initial states So, if somebody wants to re-implement this in the future, we pinpoint to a problem case. Signed-off-by: Wolfram Sang Acked-by: Alex Deucher Signed-off-by: Wolfram Sang --- drivers/i2c/algos/i2c-algo-bit.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index 0c0eb16d710f..6ec65adaba49 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -647,6 +647,11 @@ static int __i2c_bit_add_bus(struct i2c_adapter *adap, if (bit_adap->getscl == NULL) adap->quirks = &i2c_bit_quirk_no_clk_stretch; + /* + * We tried forcing SCL/SDA to an initial state here. But that caused a + * regression, sadly. Check Bugzilla #200045 for details. + */ + ret = add_adapter(adap); if (ret < 0) return ret; From 9aa613674f89d01248ae2e4afe691b515ff8fbb6 Mon Sep 17 00:00:00 2001 From: Peter Rosin Date: Wed, 20 Jun 2018 11:43:23 +0200 Subject: [PATCH 558/572] i2c: smbus: kill memory leak on emulated and failed DMA SMBus xfers If DMA safe memory was allocated, but the subsequent I2C transfer fails the memory is leaked. Plug this leak. Fixes: 8a77821e74d6 ("i2c: smbus: use DMA safe buffers for emulated SMBus transactions") Signed-off-by: Peter Rosin Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-core-smbus.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/i2c-core-smbus.c b/drivers/i2c/i2c-core-smbus.c index f3f683041e7f..51970bae3c4a 100644 --- a/drivers/i2c/i2c-core-smbus.c +++ b/drivers/i2c/i2c-core-smbus.c @@ -465,15 +465,18 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, status = i2c_transfer(adapter, msg, num); if (status < 0) - return status; - if (status != num) - return -EIO; + goto cleanup; + if (status != num) { + status = -EIO; + goto cleanup; + } + status = 0; /* Check PEC if last message is a read */ if (i && (msg[num-1].flags & I2C_M_RD)) { status = i2c_smbus_check_pec(partial_pec, &msg[num-1]); if (status < 0) - return status; + goto cleanup; } if (read_write == I2C_SMBUS_READ) @@ -499,12 +502,13 @@ static s32 i2c_smbus_xfer_emulated(struct i2c_adapter *adapter, u16 addr, break; } +cleanup: if (msg[0].flags & I2C_M_DMA_SAFE) kfree(msg[0].buf); if (msg[1].flags & I2C_M_DMA_SAFE) kfree(msg[1].buf); - return 0; + return status; } /** From 12b731dd46d9ee646318e6e9dc587314a3908a46 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 16 Jun 2018 21:56:36 +0900 Subject: [PATCH 559/572] i2c: gpio: initialize SCL to HIGH again It seems that during the conversion from gpio* to gpiod*, the initial state of SCL was wrongly switched to LOW. Fix it to be HIGH again. Fixes: 7bb75029ef34 ("i2c: gpio: Enforce open drain through gpiolib") Signed-off-by: Wolfram Sang Tested-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-gpio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c index 005e6e0330c2..66f85bbf3591 100644 --- a/drivers/i2c/busses/i2c-gpio.c +++ b/drivers/i2c/busses/i2c-gpio.c @@ -279,9 +279,9 @@ static int i2c_gpio_probe(struct platform_device *pdev) * required for an I2C bus. */ if (pdata->scl_is_open_drain) - gflags = GPIOD_OUT_LOW; + gflags = GPIOD_OUT_HIGH; else - gflags = GPIOD_OUT_LOW_OPEN_DRAIN; + gflags = GPIOD_OUT_HIGH_OPEN_DRAIN; priv->scl = i2c_gpio_get_desc(dev, "scl", 1, gflags); if (IS_ERR(priv->scl)) return PTR_ERR(priv->scl); From e88958e6369aeba48623afa18dd67fdf41c98d75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 29 Jun 2018 15:37:24 +0200 Subject: [PATCH 560/572] net: handle NULL ->poll gracefully The big aio poll revert broke various network protocols that don't implement ->poll as a patch in the aio poll serie removed sock_no_poll and made the common code handle this case. Reported-by: syzbot+57727883dbad76db2ef0@syzkaller.appspotmail.com Reported-by: syzbot+cdb0d3176b53d35ad454@syzkaller.appspotmail.com Reported-by: syzbot+2c7e8f74f8b2571c87e8@syzkaller.appspotmail.com Reported-by: Tetsuo Handa Fixes: a11e1d432b51 ("Revert changes to convert to ->poll_mask() and aio IOCB_CMD_POLL") Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- net/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/socket.c b/net/socket.c index a564c6ed19d5..85633622c94d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1133,6 +1133,8 @@ static __poll_t sock_poll(struct file *file, poll_table *wait) __poll_t events = poll_requested_events(wait); sock_poll_busy_loop(sock, events); + if (!sock->ops->poll) + return 0; return sock->ops->poll(file, sock, wait) | sock_poll_busy_flag(sock); } From 2cd3ae2129736f9019130064d09713a375870941 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 29 Jun 2018 15:37:25 +0200 Subject: [PATCH 561/572] aio: mark __aio_sigset::sigmask const io_pgetevents() will not change the signal mask. Mark it const to make it clear and to reduce the need for casts in user code. Reviewed-by: Christoph Hellwig Signed-off-by: Avi Kivity Signed-off-by: Al Viro [hch: reapply the patch that got incorrectly reverted] Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- include/uapi/linux/aio_abi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h index d4e768d55d14..3c5038b587ba 100644 --- a/include/uapi/linux/aio_abi.h +++ b/include/uapi/linux/aio_abi.h @@ -111,7 +111,7 @@ struct iocb { #undef IFLITTLE struct __aio_sigset { - sigset_t __user *sigmask; + const sigset_t __user *sigmask; size_t sigsetsize; }; From 1f57f8d442f8017587eeebd8617913bfc3661d3d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 28 Jun 2018 11:54:01 -0600 Subject: [PATCH 562/572] blk-mq: don't queue more if we get a busy return Some devices have different queue limits depending on the type of IO. A classic case is SATA NCQ, where some commands can queue, but others cannot. If we have NCQ commands inflight and encounter a non-queueable command, the driver returns busy. Currently we attempt to dispatch more from the scheduler, if we were able to queue some commands. But for the case where we ended up stopping due to BUSY, we should not attempt to retrieve more from the scheduler. If we do, we can get into a situation where we attempt to queue a non-queueable command, get BUSY, then successfully retrieve more commands from that scheduler and queue those. This can repeat forever, starving the non-queuable command indefinitely. Fix this by NOT attempting to pull more commands from the scheduler, if we get a BUSY return. This should also be more optimal in terms of letting requests stay in the scheduler for as long as possible, if we get a BUSY due to the regular out-of-tags condition. Reviewed-by: Omar Sandoval Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/block/blk-mq.c b/block/blk-mq.c index b6888ff556cf..d394cdd8d8c6 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1075,6 +1075,9 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx **hctx, #define BLK_MQ_RESOURCE_DELAY 3 /* ms units */ +/* + * Returns true if we did some work AND can potentially do more. + */ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, bool got_budget) { @@ -1205,8 +1208,17 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, blk_mq_run_hw_queue(hctx, true); else if (needs_restart && (ret == BLK_STS_RESOURCE)) blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); + + return false; } + /* + * If the host/device is unable to accept more work, inform the + * caller of that. + */ + if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) + return false; + return (queued + errors) != 0; } From fad2d4ef636654e926d374ef038f4cd4286661f6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Jun 2018 15:51:30 -0700 Subject: [PATCH 563/572] drbd: Fix drbd_request_prepare() discard handling Fix the test that verifies whether bio_op(bio) represents a discard or write zeroes operation. Compile-tested only. Cc: Philipp Reisner Cc: Lars Ellenberg Fixes: 7435e9018f91 ("drbd: zero-out partial unaligned discards on local backend") Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index a47e4987ee46..d146fedc38bb 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -1244,8 +1244,8 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long _drbd_start_io_acct(device, req); /* process discards always from our submitter thread */ - if ((bio_op(bio) & REQ_OP_WRITE_ZEROES) || - (bio_op(bio) & REQ_OP_DISCARD)) + if (bio_op(bio) == REQ_OP_WRITE_ZEROES || + bio_op(bio) == REQ_OP_DISCARD) goto queue_for_submitter_thread; if (rw == WRITE && req->private_bio && req->i.size From 9544bc5347207a68eb308cc8aaaed6c3a687cabd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 29 Jun 2018 08:48:06 -0600 Subject: [PATCH 564/572] sg: remove ->sg_magic member This was introduced more than a decade ago when sg chaining was added, but we never really caught anything with it. The scatterlist entry size can be critical, since drivers allocate it, so remove the magic member. Recently it's been triggering allocation stalls and failures in NVMe. Tested-by: Jordan Glover Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/gpu/drm/i915/i915_drv.h | 3 --- include/linux/scatterlist.h | 18 ------------------ lib/scatterlist.c | 6 ------ tools/virtio/linux/scatterlist.h | 18 ------------------ 4 files changed, 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 34c125e2d90c..9180f67746b4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2238,9 +2238,6 @@ static inline struct scatterlist *____sg_next(struct scatterlist *sg) **/ static inline struct scatterlist *__sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif return sg_is_last(sg) ? NULL : ____sg_next(sg); } diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 51f52020ad5f..093aa57120b0 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -9,9 +9,6 @@ #include struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif unsigned long page_link; unsigned int offset; unsigned int length; @@ -64,7 +61,6 @@ struct sg_table { * */ -#define SG_MAGIC 0x87654321 #define SG_CHAIN 0x01UL #define SG_END 0x02UL @@ -98,7 +94,6 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) */ BUG_ON((unsigned long) page & (SG_CHAIN | SG_END)); #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif sg->page_link = page_link | (unsigned long) page; @@ -129,7 +124,6 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page, static inline struct page *sg_page(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif return (struct page *)((sg)->page_link & ~(SG_CHAIN | SG_END)); @@ -195,9 +189,6 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, **/ static inline void sg_mark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif /* * Set termination bit, clear potential chain bit */ @@ -215,9 +206,6 @@ static inline void sg_mark_end(struct scatterlist *sg) **/ static inline void sg_unmark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif sg->page_link &= ~SG_END; } @@ -260,12 +248,6 @@ static inline void *sg_virt(struct scatterlist *sg) static inline void sg_init_marker(struct scatterlist *sgl, unsigned int nents) { -#ifdef CONFIG_DEBUG_SG - unsigned int i; - - for (i = 0; i < nents; i++) - sgl[i].sg_magic = SG_MAGIC; -#endif sg_mark_end(&sgl[nents - 1]); } diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 06dad7a072fd..d4ae67d6cd1e 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -24,9 +24,6 @@ **/ struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -111,10 +108,7 @@ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) for_each_sg(sgl, sg, nents, i) ret = sg; -#ifdef CONFIG_DEBUG_SG - BUG_ON(sgl[0].sg_magic != SG_MAGIC); BUG_ON(!sg_is_last(ret)); -#endif return ret; } EXPORT_SYMBOL(sg_last); diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h index 9a45f90e2d08..369ee308b668 100644 --- a/tools/virtio/linux/scatterlist.h +++ b/tools/virtio/linux/scatterlist.h @@ -36,7 +36,6 @@ static inline void sg_assign_page(struct scatterlist *sg, struct page *page) */ BUG_ON((unsigned long) page & 0x03); #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif sg->page_link = page_link | (unsigned long) page; @@ -67,7 +66,6 @@ static inline void sg_set_page(struct scatterlist *sg, struct page *page, static inline struct page *sg_page(struct scatterlist *sg) { #ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); BUG_ON(sg_is_chain(sg)); #endif return (struct page *)((sg)->page_link & ~0x3); @@ -116,9 +114,6 @@ static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, **/ static inline void sg_mark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif /* * Set termination bit, clear potential chain bit */ @@ -136,17 +131,11 @@ static inline void sg_mark_end(struct scatterlist *sg) **/ static inline void sg_unmark_end(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif sg->page_link &= ~0x02; } static inline struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -160,13 +149,6 @@ static inline struct scatterlist *sg_next(struct scatterlist *sg) static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents) { memset(sgl, 0, sizeof(*sgl) * nents); -#ifdef CONFIG_DEBUG_SG - { - unsigned int i; - for (i = 0; i < nents; i++) - sgl[i].sg_magic = SG_MAGIC; - } -#endif sg_mark_end(&sgl[nents - 1]); } From 24b6c22504a27210a8377e54d24d425ae414f2c1 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 20 Apr 2018 23:13:44 +0200 Subject: [PATCH 565/572] parisc: Build kernel without -ffunction-sections As suggested by Nick Piggin it seems we can drop the -ffunction-sections compile flag, now that the kernel uses thin archives. Testing with 32- and 64-bit kernel showed no difference in kernel size. Suggested-by: Nicholas Piggin Signed-off-by: Helge Deller --- arch/parisc/Makefile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 714284ea6cc2..5ce030266e7d 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -65,10 +65,6 @@ endif # kernel. cflags-y += -mdisable-fpregs -# Without this, "ld -r" results in .text sections that are too big -# (> 0x40000) for branches to reach stubs. -cflags-y += -ffunction-sections - # Use long jumps instead of long branches (needed if your linker fails to # link a too big vmlinux executable). Not enabled for building modules. ifdef CONFIG_MLONGCALLS From 021c91791a5e7e85c567452f1be3e4c2c6cb6063 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Jul 2018 16:04:53 -0700 Subject: [PATCH 566/572] Linux 4.18-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c9132594860b..c5ce55cbc543 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 18 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Merciless Moray # *DOCUMENTATION* From d9c0ffcabd6aae7ff1e34e8078354c13bb9f1183 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 28 Jun 2018 18:29:41 +0200 Subject: [PATCH 567/572] sched/nohz: Skip remote tick on idle task entirely Some people have reported that the warning in sched_tick_remote() occasionally triggers, especially in favour of some RCU-Torture pressure: WARNING: CPU: 11 PID: 906 at kernel/sched/core.c:3138 sched_tick_remote+0xb6/0xc0 Modules linked in: CPU: 11 PID: 906 Comm: kworker/u32:3 Not tainted 4.18.0-rc2+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 Workqueue: events_unbound sched_tick_remote RIP: 0010:sched_tick_remote+0xb6/0xc0 Code: e8 0f 06 b8 00 c6 03 00 fb eb 9d 8b 43 04 85 c0 75 8d 48 8b 83 e0 0a 00 00 48 85 c0 75 81 eb 88 48 89 df e8 bc fe ff ff eb aa <0f> 0b eb +c5 66 0f 1f 44 00 00 bf 17 00 00 00 e8 b6 2e fe ff 0f b6 Call Trace: process_one_work+0x1df/0x3b0 worker_thread+0x44/0x3d0 kthread+0xf3/0x130 ? set_worker_desc+0xb0/0xb0 ? kthread_create_worker_on_cpu+0x70/0x70 ret_from_fork+0x35/0x40 This happens when the remote tick applies on an idle task. Usually the idle_cpu() check avoids that, but it is performed before we lock the runqueue and it is therefore racy. It was intended to be that way in order to prevent from useless runqueue locks since idle task tick callback is a no-op. Now if the racy check slips out of our hands and we end up remotely ticking an idle task, the empty task_tick_idle() is harmless. Still it won't pass the WARN_ON_ONCE() test that ensures rq_clock_task() is not too far from curr->se.exec_start because update_curr_idle() doesn't update the exec_start value like other scheduler policies. Hence the reported false positive. So let's have another check, while the rq is locked, to make sure we don't remote tick on an idle task. The lockless idle_cpu() still applies to avoid unecessary rq lock contention. Reported-by: Jacek Tomaka Reported-by: Paul E. McKenney Reported-by: Anna-Maria Gleixner Signed-off-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1530203381-31234-1-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78d8facba456..22fce36426c0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3113,7 +3113,9 @@ static void sched_tick_remote(struct work_struct *work) struct tick_work *twork = container_of(dwork, struct tick_work, work); int cpu = twork->cpu; struct rq *rq = cpu_rq(cpu); + struct task_struct *curr; struct rq_flags rf; + u64 delta; /* * Handle the tick only if it appears the remote CPU is running in full @@ -3122,24 +3124,28 @@ static void sched_tick_remote(struct work_struct *work) * statistics and checks timeslices in a time-independent way, regardless * of when exactly it is running. */ - if (!idle_cpu(cpu) && tick_nohz_tick_stopped_cpu(cpu)) { - struct task_struct *curr; - u64 delta; + if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu)) + goto out_requeue; - rq_lock_irq(rq, &rf); - update_rq_clock(rq); - curr = rq->curr; - delta = rq_clock_task(rq) - curr->se.exec_start; + rq_lock_irq(rq, &rf); + curr = rq->curr; + if (is_idle_task(curr)) + goto out_unlock; - /* - * Make sure the next tick runs within a reasonable - * amount of time. - */ - WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); - curr->sched_class->task_tick(rq, curr, 0); - rq_unlock_irq(rq, &rf); - } + update_rq_clock(rq); + delta = rq_clock_task(rq) - curr->se.exec_start; + /* + * Make sure the next tick runs within a reasonable + * amount of time. + */ + WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3); + curr->sched_class->task_tick(rq, curr, 0); + +out_unlock: + rq_unlock_irq(rq, &rf); + +out_requeue: /* * Run the remote tick once per second (1Hz). This arbitrary * frequency is large enough to avoid overload but short enough From 296b2ffe7fa9ed756c41415c6b1512bc4ad687b1 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Tue, 26 Jun 2018 15:53:22 +0200 Subject: [PATCH 568/572] sched/rt: Fix call to cpufreq_update_util() With commit: 8f111bc357aa ("cpufreq/schedutil: Rewrite CPUFREQ_RT support") the schedutil governor uses rq->rt.rt_nr_running to detect whether an RT task is currently running on the CPU and to set frequency to max if necessary. cpufreq_update_util() is called in enqueue/dequeue_top_rt_rq() but rq->rt.rt_nr_running has not been updated yet when dequeue_top_rt_rq() is called so schedutil still considers that an RT task is running when the last task is dequeued. The update of rq->rt.rt_nr_running happens later in dequeue_rt_stack(). In fact, we can take advantage of the sequence that the dequeue then re-enqueue rt entities when a rt task is enqueued or dequeued; As a result enqueue_top_rt_rq() is always called when a task is enqueued or dequeued and also when groups are throttled or unthrottled. The only place that not use enqueue_top_rt_rq() is when root rt_rq is throttled. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: efault@gmx.de Cc: juri.lelli@redhat.com Cc: patrick.bellasi@arm.com Cc: viresh.kumar@linaro.org Fixes: 8f111bc357aa ('cpufreq/schedutil: Rewrite CPUFREQ_RT support') Link: http://lkml.kernel.org/r/1530021202-21695-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/cpufreq_schedutil.c | 2 +- kernel/sched/rt.c | 16 ++++++++++------ kernel/sched/sched.h | 5 +++++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 3cde46483f0a..c907fde01eaa 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -192,7 +192,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); - if (rq->rt.rt_nr_running) + if (rt_rq_is_runnable(&rq->rt)) return sg_cpu->max; /* diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 47556b0c9a95..572567078b60 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) rt_se = rt_rq->tg->rt_se[cpu]; - if (!rt_se) + if (!rt_se) { dequeue_top_rt_rq(rt_rq); + /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ + cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); + } else if (on_rt_rq(rt_se)) dequeue_rt_entity(rt_se, 0); } @@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq) sub_nr_running(rq, rt_rq->rt_nr_running); rt_rq->rt_queued = 0; - /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_util(rq, 0); } static void @@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq) if (rt_rq->rt_queued) return; - if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) + + if (rt_rq_throttled(rt_rq)) return; - add_nr_running(rq, rt_rq->rt_nr_running); - rt_rq->rt_queued = 1; + if (rt_rq->rt_nr_running) { + add_nr_running(rq, rt_rq->rt_nr_running); + rt_rq->rt_queued = 1; + } /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ cpufreq_update_util(rq, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6601baf2361c..27ddec334601 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -609,6 +609,11 @@ struct rt_rq { #endif }; +static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq) +{ + return rt_rq->rt_queued && rt_rq->rt_nr_running; +} + /* Deadline class' related fields in a runqueue */ struct dl_rq { /* runqueue is an rbtree, ordered by deadline */ From 512ac999d2755d2b7109e996a76b6fb8b888631d Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 20 Jun 2018 18:18:33 +0800 Subject: [PATCH 569/572] sched/fair: Fix bandwidth timer clock drift condition I noticed that cgroup task groups constantly get throttled even if they have low CPU usage, this causes some jitters on the response time to some of our business containers when enabling CPU quotas. It's very simple to reproduce: mkdir /sys/fs/cgroup/cpu/test cd /sys/fs/cgroup/cpu/test echo 100000 > cpu.cfs_quota_us echo $$ > tasks then repeat: cat cpu.stat | grep nr_throttled # nr_throttled will increase steadily After some analysis, we found that cfs_rq::runtime_remaining will be cleared by expire_cfs_rq_runtime() due to two equal but stale "cfs_{b|q}->runtime_expires" after period timer is re-armed. The current condition to judge clock drift in expire_cfs_rq_runtime() is wrong, the two runtime_expires are actually the same when clock drift happens, so this condtion can never hit. The orginal design was correctly done by this commit: a9cf55b28610 ("sched: Expire invalid runtime") ... but was changed to be the current implementation due to its locking bug. This patch introduces another way, it adds a new field in both structures cfs_rq and cfs_bandwidth to record the expiration update sequence, and uses them to figure out if clock drift happens (true if they are equal). Signed-off-by: Xunlei Pang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 51f2176d74ac ("sched/fair: Fix unlocked reads of some cfs_b->quota/period") Link: http://lkml.kernel.org/r/20180620101834.24455-1-xlpang@linux.alibaba.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 14 ++++++++------ kernel/sched/sched.h | 6 ++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1866e64792a7..791707c56886 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4590,6 +4590,7 @@ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) now = sched_clock_cpu(smp_processor_id()); cfs_b->runtime = cfs_b->quota; cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); + cfs_b->expires_seq++; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -4612,6 +4613,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); u64 amount = 0, min_amount, expires; + int expires_seq; /* note: this is a positive sum as runtime_remaining <= 0 */ min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; @@ -4628,6 +4630,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_b->idle = 0; } } + expires_seq = cfs_b->expires_seq; expires = cfs_b->runtime_expires; raw_spin_unlock(&cfs_b->lock); @@ -4637,8 +4640,10 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) * spread between our sched_clock and the one on which runtime was * issued. */ - if ((s64)(expires - cfs_rq->runtime_expires) > 0) + if (cfs_rq->expires_seq != expires_seq) { + cfs_rq->expires_seq = expires_seq; cfs_rq->runtime_expires = expires; + } return cfs_rq->runtime_remaining > 0; } @@ -4664,12 +4669,9 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) * has not truly expired. * * Fortunately we can check determine whether this the case by checking - * whether the global deadline has advanced. It is valid to compare - * cfs_b->runtime_expires without any locks since we only care about - * exact equality, so a partial write will still work. + * whether the global deadline(cfs_b->expires_seq) has advanced. */ - - if (cfs_rq->runtime_expires != cfs_b->runtime_expires) { + if (cfs_rq->expires_seq == cfs_b->expires_seq) { /* extend local deadline, drift is bounded above by 2 ticks */ cfs_rq->runtime_expires += TICK_NSEC; } else { diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 27ddec334601..c7742dcc136c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -334,9 +334,10 @@ struct cfs_bandwidth { u64 runtime; s64 hierarchical_quota; u64 runtime_expires; + int expires_seq; - int idle; - int period_active; + short idle; + short period_active; struct hrtimer period_timer; struct hrtimer slack_timer; struct list_head throttled_cfs_rq; @@ -551,6 +552,7 @@ struct cfs_rq { #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; + int expires_seq; u64 runtime_expires; s64 runtime_remaining; From f1d1be8aee6c461652aea8f58bedebaa73d7f4d3 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Wed, 20 Jun 2018 18:18:34 +0800 Subject: [PATCH 570/572] sched/fair: Advance global expiration when period timer is restarted When period gets restarted after some idle time, start_cfs_bandwidth() doesn't update the expiration information, expire_cfs_rq_runtime() will see cfs_rq->runtime_expires smaller than rq clock and go to the clock drift logic, wasting needless CPU cycles on the scheduler hot path. Update the global expiration in start_cfs_bandwidth() to avoid frequent expire_cfs_rq_runtime() calls once a new period begins. Signed-off-by: Xunlei Pang Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ben Segall Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180620101834.24455-2-xlpang@linux.alibaba.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 791707c56886..840b92ee6f89 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5204,13 +5204,18 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) { + u64 overrun; + lockdep_assert_held(&cfs_b->lock); - if (!cfs_b->period_active) { - cfs_b->period_active = 1; - hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); - hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); - } + if (cfs_b->period_active) + return; + + cfs_b->period_active = 1; + overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period); + cfs_b->runtime_expires += (overrun + 1) * ktime_to_ns(cfs_b->period); + cfs_b->expires_seq++; + hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED); } static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) From 3482d98bbc730758b63a5d1cf41d05ea17481412 Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Thu, 14 Jun 2018 12:33:00 +0200 Subject: [PATCH 571/572] sched/util_est: Fix util_est_dequeue() for throttled cfs_rq When a cfs_rq is throttled, parent cfs_rq->nr_running is decreased and everything happens at cfs_rq level. Currently util_est stays unchanged in such case and it keeps accounting the utilization of throttled tasks. This can somewhat make sense as we don't dequeue tasks but only throttled cfs_rq. If a task of another group is enqueued/dequeued and root cfs_rq becomes idle during the dequeue, util_est will be cleared whereas it was accounting util_est of throttled tasks before. So the behavior of util_est is not always the same regarding throttled tasks and depends of side activity. Furthermore, util_est will not be updated when the cfs_rq is unthrottled as everything happens at cfs_rq level. Main results is that util_est will stay null whereas we now have running tasks. We have to wait for the next dequeue/enqueue of the previously throttled tasks to get an up to date util_est. Remove the assumption that cfs_rq's estimated utilization of a CPU is 0 if there is no running task so the util_est of a task remains until the latter is dequeued even if its cfs_rq has been throttled. Signed-off-by: Vincent Guittot Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Patrick Bellasi Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 7f65ea42eb00 ("sched/fair: Add util_est on top of PELT") Link: http://lkml.kernel.org/r/1528972380-16268-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 840b92ee6f89..2f0a0be4d344 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3982,18 +3982,10 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) if (!sched_feat(UTIL_EST)) return; - /* - * Update root cfs_rq's estimated utilization - * - * If *p is the last task then the root cfs_rq's estimated utilization - * of a CPU is 0 by definition. - */ - ue.enqueued = 0; - if (cfs_rq->nr_running) { - ue.enqueued = cfs_rq->avg.util_est.enqueued; - ue.enqueued -= min_t(unsigned int, ue.enqueued, - (_task_util_est(p) | UTIL_AVG_UNCHANGED)); - } + /* Update root cfs_rq's estimated utilization */ + ue.enqueued = cfs_rq->avg.util_est.enqueued; + ue.enqueued -= min_t(unsigned int, ue.enqueued, + (_task_util_est(p) | UTIL_AVG_UNCHANGED)); WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued); /* From 1cef1150ef40ec52f507436a14230cbc2623299c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 7 Jun 2018 11:45:49 +0200 Subject: [PATCH 572/572] kthread, sched/core: Fix kthread_parkme() (again...) Gaurav reports that commit: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue") isn't working for him. Because of the following race: > controller Thread CPUHP Thread > takedown_cpu > kthread_park > kthread_parkme > Set KTHREAD_SHOULD_PARK > smpboot_thread_fn > set Task interruptible > > > wake_up_process > if (!(p->state & state)) > goto out; > > Kthread_parkme > SET TASK_PARKED > schedule > raw_spin_lock(&rq->lock) > ttwu_remote > waiting for __task_rq_lock > context_switch > > finish_lock_switch > > > > Case TASK_PARKED > kthread_park_complete > > > SET Running Furthermore, Oleg noticed that the whole scheduler TASK_PARKED handling is buggered because the TASK_DEAD thing is done with preemption disabled, the current code can still complete early on preemption :/ So basically revert that earlier fix and go with a variant of the alternative mentioned in the commit. Promote TASK_PARKED to special state to avoid the store-store issue on task->state leading to the WARN in kthread_unpark() -> __kthread_bind(). But in addition, add wait_task_inactive() to kthread_park() to ensure the task really is PARKED when we return from kthread_park(). This avoids the whole kthread still gets migrated nonsense -- although it would be really good to get this done differently. Reported-by: Gaurav Kohli Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 85f1abe0019f ("kthread, sched/wait: Fix kthread_parkme() completion issue") Signed-off-by: Ingo Molnar --- include/linux/kthread.h | 1 - include/linux/sched.h | 2 +- kernel/kthread.c | 30 ++++++++++++++++++++++++------ kernel/sched/core.c | 31 +++++++++++-------------------- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 2803264c512f..c1961761311d 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -62,7 +62,6 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_park_complete(struct task_struct *k); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/sched.h b/include/linux/sched.h index 9256118bd40c..43731fe51c97 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -118,7 +118,7 @@ struct task_group; * the comment with set_special_state(). */ #define is_special_task_state(state) \ - ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD)) + ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) #define __set_current_state(state_value) \ do { \ diff --git a/kernel/kthread.c b/kernel/kthread.c index 481951bf091d..750cb8082694 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -177,9 +177,20 @@ void *kthread_probe_data(struct task_struct *task) static void __kthread_parkme(struct kthread *self) { for (;;) { - set_current_state(TASK_PARKED); + /* + * TASK_PARKED is a special state; we must serialize against + * possible pending wakeups to avoid store-store collisions on + * task->state. + * + * Such a collision might possibly result in the task state + * changin from TASK_PARKED and us failing the + * wait_task_inactive() in kthread_park(). + */ + set_special_state(TASK_PARKED); if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) break; + + complete_all(&self->parked); schedule(); } __set_current_state(TASK_RUNNING); @@ -191,11 +202,6 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); -void kthread_park_complete(struct task_struct *k) -{ - complete_all(&to_kthread(k)->parked); -} - static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -461,6 +467,9 @@ void kthread_unpark(struct task_struct *k) reinit_completion(&kthread->parked); clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + /* + * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup. + */ wake_up_state(k, TASK_PARKED); } EXPORT_SYMBOL_GPL(kthread_unpark); @@ -487,7 +496,16 @@ int kthread_park(struct task_struct *k) set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); if (k != current) { wake_up_process(k); + /* + * Wait for __kthread_parkme() to complete(), this means we + * _will_ have TASK_PARKED and are about to call schedule(). + */ wait_for_completion(&kthread->parked); + /* + * Now wait for that schedule() to complete and the task to + * get scheduled out. + */ + WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED)); } return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 22fce36426c0..fe365c9a08e9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7,7 +7,6 @@ */ #include "sched.h" -#include #include #include @@ -2724,28 +2723,20 @@ static struct rq *finish_task_switch(struct task_struct *prev) membarrier_mm_sync_core_before_usermode(mm); mmdrop(mm); } - if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) { - switch (prev_state) { - case TASK_DEAD: - if (prev->sched_class->task_dead) - prev->sched_class->task_dead(prev); + if (unlikely(prev_state == TASK_DEAD)) { + if (prev->sched_class->task_dead) + prev->sched_class->task_dead(prev); - /* - * Remove function-return probe instances associated with this - * task and put them back on the free list. - */ - kprobe_flush_task(prev); + /* + * Remove function-return probe instances associated with this + * task and put them back on the free list. + */ + kprobe_flush_task(prev); - /* Task is done with its stack. */ - put_task_stack(prev); + /* Task is done with its stack. */ + put_task_stack(prev); - put_task_struct(prev); - break; - - case TASK_PARKED: - kthread_park_complete(prev); - break; - } + put_task_struct(prev); } tick_nohz_task_switch();