From 33c45ef8adc8a7cf781b2566d50e6ea8e97b3596 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Mon, 19 Sep 2016 12:02:50 +0200 Subject: [PATCH 001/884] ARM: mvebu: Select corediv clk for all mvebu v7 SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the commit bd3677ff31a3 ("clk: mvebu: Remove corediv clock from Armada XP"), the corediv clk is no more selected for Armada XP, however this clock is used for Armada XP using the compatible armada-370-corediv-clock. While since commit 1594d568c6e3 ("clk: mvebu: Move corediv config to mvebu config") Armada 38x and Armada 375 got corediv support again, not only Armada XP was missed but also Armada 39x. Actually all the SoC selecting MVEBU_V7 config need this clock: git grep "\-corediv-clock" arch/arm/boot/dts arch/arm/boot/dts/armada-370-xp.dtsi: compatible = "marvell,armada-370-corediv-clock"; arch/arm/boot/dts/armada-375.dtsi: compatible = "marvell,armada-375-corediv-clock"; arch/arm/boot/dts/armada-38x.dtsi: compatible = "marvell,armada-380-corediv-clock"; arch/arm/boot/dts/armada-39x.dtsi: compatible = "marvell,armada-390-corediv-clock" This commit now fixes this behavior by letting MVEBU_V7 select MVEBU_CLK_COREDIV. Fixes: bd3677ff31a3 ("clk: mvebu: Remove corediv clock from Armada XP") Cc: stable@vger.kernel.org Reported-by: Uwe Kleine-König Acked-by: Uwe Kleine-König Signed-off-by: Gregory CLEMENT --- arch/arm/mach-mvebu/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index f9b6bd306cfe..541647f57192 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -23,6 +23,7 @@ config MACH_MVEBU_V7 select CACHE_L2X0 select ARM_CPU_SUSPEND select MACH_MVEBU_ANY + select MVEBU_CLK_COREDIV config MACH_ARMADA_370 bool "Marvell Armada 370 boards" @@ -32,7 +33,6 @@ config MACH_ARMADA_370 select CPU_PJ4B select MACH_MVEBU_V7 select PINCTRL_ARMADA_370 - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 370 SoC with device tree. @@ -50,7 +50,6 @@ config MACH_ARMADA_375 select HAVE_SMP select MACH_MVEBU_V7 select PINCTRL_ARMADA_375 - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 375 SoC with device tree. @@ -68,7 +67,6 @@ config MACH_ARMADA_38X select HAVE_SMP select MACH_MVEBU_V7 select PINCTRL_ARMADA_38X - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 380/385 SoC with device tree. From 51227bf52008bd4c4c50da4b749bbc6e7bbbca52 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 6 Sep 2016 19:41:11 +0200 Subject: [PATCH 002/884] arm64: dts: marvell: fix clocksource for CP110 master SPI0 I2C and SPI interfaces share common clock trees within the CP110 HW block. It occurred that SPI0 interface has wrong clock assignment in the device tree, which is fixed in this commit to a proper value. Fixes: 728dacc7f4dd ("arm64: dts: marvell: initial DT description of ...") Signed-off-by: Marcin Wojtas CC: v4.7+ Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index da31bbbbb59e..399271853aad 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -131,7 +131,7 @@ #address-cells = <0x1>; #size-cells = <0x0>; cell-index = <1>; - clocks = <&cpm_syscon0 0 3>; + clocks = <&cpm_syscon0 1 21>; status = "disabled"; }; From 54f9c4d0778b3f9ab791b1b7eb1a5d2809f02f50 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 20 Sep 2016 09:01:38 +0200 Subject: [PATCH 003/884] ipmi: add an Aspeed BT IPMI BMC driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a simple device driver to expose the iBT interface on Aspeed SOCs (AST2400 and AST2500) as a character device. Such SOCs are commonly used as BMCs (BaseBoard Management Controllers) and this driver implements the BMC side of the BT interface. The BT (Block Transfer) interface is used to perform in-band IPMI communication between a host and its BMC. Entire messages are buffered before sending a notification to the other end, host or BMC, that there is data to be read. Usually, the host emits requests and the BMC responses but the specification provides a mean for the BMC to send SMS Attention (BMC-to-Host attention or System Management Software attention) messages. For this purpose, the driver introduces a specific ioctl on the device: 'BT_BMC_IOCTL_SMS_ATN' that can be used by the system running on the BMC to signal the host of such an event. The device name defaults to '/dev/ipmi-bt-host' Signed-off-by: Alistair Popple Signed-off-by: Jeremy Kerr Signed-off-by: Joel Stanley [clg: - checkpatch fixes - added a devicetree binding documentation - replace 'bt_host' by 'bt_bmc' to reflect that the driver is the BMC side of the IPMI BT interface - renamed the device to 'ipmi-bt-host' - introduced a temporary buffer to copy_{to,from}_user - used platform_get_irq() - moved the driver under drivers/char/ipmi/ but kept it as a misc device - changed the compatible cell to "aspeed,ast2400-bt-bmc" ] Signed-off-by: Cédric Le Goater Acked-by: Arnd Bergmann [clg: - checkpatch --strict fixes - removed the use of devm_iounmap, devm_kfree in cleanup paths - introduced an atomic-t to limit opens to 1 - introduced a mutex to protect write/read operations] Acked-by: Rob Herring Signed-off-by: Cédric Le Goater Signed-off-by: Corey Minyard --- .../bindings/ipmi/aspeed,ast2400-bt-bmc.txt | 23 + .../bindings/{ipmi.txt => ipmi/ipmi-smic.txt} | 0 drivers/Makefile | 2 +- drivers/char/ipmi/Kconfig | 7 + drivers/char/ipmi/Makefile | 1 + drivers/char/ipmi/bt-bmc.c | 510 ++++++++++++++++++ include/uapi/linux/Kbuild | 1 + include/uapi/linux/bt-bmc.h | 18 + 8 files changed, 561 insertions(+), 1 deletion(-) create mode 100644 Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt rename Documentation/devicetree/bindings/{ipmi.txt => ipmi/ipmi-smic.txt} (100%) create mode 100644 drivers/char/ipmi/bt-bmc.c create mode 100644 include/uapi/linux/bt-bmc.h diff --git a/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt new file mode 100644 index 000000000000..fbbacd958240 --- /dev/null +++ b/Documentation/devicetree/bindings/ipmi/aspeed,ast2400-bt-bmc.txt @@ -0,0 +1,23 @@ +* Aspeed BT (Block Transfer) IPMI interface + +The Aspeed SOCs (AST2400 and AST2500) are commonly used as BMCs +(BaseBoard Management Controllers) and the BT interface can be used to +perform in-band IPMI communication with their host. + +Required properties: + +- compatible : should be "aspeed,ast2400-bt-bmc" +- reg: physical address and size of the registers + +Optional properties: + +- interrupts: interrupt generated by the BT interface. without an + interrupt, the driver will operate in poll mode. + +Example: + + ibt@1e789140 { + compatible = "aspeed,ast2400-bt-bmc"; + reg = <0x1e789140 0x18>; + interrupts = <8>; + }; diff --git a/Documentation/devicetree/bindings/ipmi.txt b/Documentation/devicetree/bindings/ipmi/ipmi-smic.txt similarity index 100% rename from Documentation/devicetree/bindings/ipmi.txt rename to Documentation/devicetree/bindings/ipmi/ipmi-smic.txt diff --git a/drivers/Makefile b/drivers/Makefile index 53abb4a5f736..5a9e7b6b7928 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -21,7 +21,7 @@ obj-y += video/ obj-y += idle/ # IPMI must come before ACPI in order to provide IPMI opregion support -obj-$(CONFIG_IPMI_HANDLER) += char/ipmi/ +obj-y += char/ipmi/ obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_SFI) += sfi/ diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig index 5a9350b1069a..2c234e3e7513 100644 --- a/drivers/char/ipmi/Kconfig +++ b/drivers/char/ipmi/Kconfig @@ -76,3 +76,10 @@ config IPMI_POWEROFF the IPMI management controller is capable of this. endif # IPMI_HANDLER + +config ASPEED_BT_IPMI_BMC + tristate "BT IPMI bmc driver" + help + Provides a driver for the BT (Block Transfer) IPMI interface + found on Aspeed SOCs (AST2400 and AST2500). The driver + implements the BMC side of the BT interface. diff --git a/drivers/char/ipmi/Makefile b/drivers/char/ipmi/Makefile index f3ffde1f5f1f..0d98cd91def1 100644 --- a/drivers/char/ipmi/Makefile +++ b/drivers/char/ipmi/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_IPMI_SSIF) += ipmi_ssif.o obj-$(CONFIG_IPMI_POWERNV) += ipmi_powernv.o obj-$(CONFIG_IPMI_WATCHDOG) += ipmi_watchdog.o obj-$(CONFIG_IPMI_POWEROFF) += ipmi_poweroff.o +obj-$(CONFIG_ASPEED_BT_IPMI_BMC) += bt-bmc.o diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c new file mode 100644 index 000000000000..2e880bf0be26 --- /dev/null +++ b/drivers/char/ipmi/bt-bmc.c @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2015-2016, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * This is a BMC device used to communicate to the host + */ +#define DEVICE_NAME "ipmi-bt-host" + +#define BT_IO_BASE 0xe4 +#define BT_IRQ 10 + +#define BT_CR0 0x0 +#define BT_CR0_IO_BASE 16 +#define BT_CR0_IRQ 12 +#define BT_CR0_EN_CLR_SLV_RDP 0x8 +#define BT_CR0_EN_CLR_SLV_WRP 0x4 +#define BT_CR0_ENABLE_IBT 0x1 +#define BT_CR1 0x4 +#define BT_CR1_IRQ_H2B 0x01 +#define BT_CR1_IRQ_HBUSY 0x40 +#define BT_CR2 0x8 +#define BT_CR2_IRQ_H2B 0x01 +#define BT_CR2_IRQ_HBUSY 0x40 +#define BT_CR3 0xc +#define BT_CTRL 0x10 +#define BT_CTRL_B_BUSY 0x80 +#define BT_CTRL_H_BUSY 0x40 +#define BT_CTRL_OEM0 0x20 +#define BT_CTRL_SMS_ATN 0x10 +#define BT_CTRL_B2H_ATN 0x08 +#define BT_CTRL_H2B_ATN 0x04 +#define BT_CTRL_CLR_RD_PTR 0x02 +#define BT_CTRL_CLR_WR_PTR 0x01 +#define BT_BMC2HOST 0x14 +#define BT_INTMASK 0x18 +#define BT_INTMASK_B2H_IRQEN 0x01 +#define BT_INTMASK_B2H_IRQ 0x02 +#define BT_INTMASK_BMC_HWRST 0x80 + +#define BT_BMC_BUFFER_SIZE 256 + +struct bt_bmc { + struct device dev; + struct miscdevice miscdev; + void __iomem *base; + int irq; + wait_queue_head_t queue; + struct timer_list poll_timer; + struct mutex mutex; +}; + +static atomic_t open_count = ATOMIC_INIT(0); + +static u8 bt_inb(struct bt_bmc *bt_bmc, int reg) +{ + return ioread8(bt_bmc->base + reg); +} + +static void bt_outb(struct bt_bmc *bt_bmc, u8 data, int reg) +{ + iowrite8(data, bt_bmc->base + reg); +} + +static void clr_rd_ptr(struct bt_bmc *bt_bmc) +{ + bt_outb(bt_bmc, BT_CTRL_CLR_RD_PTR, BT_CTRL); +} + +static void clr_wr_ptr(struct bt_bmc *bt_bmc) +{ + bt_outb(bt_bmc, BT_CTRL_CLR_WR_PTR, BT_CTRL); +} + +static void clr_h2b_atn(struct bt_bmc *bt_bmc) +{ + bt_outb(bt_bmc, BT_CTRL_H2B_ATN, BT_CTRL); +} + +static void set_b_busy(struct bt_bmc *bt_bmc) +{ + if (!(bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_B_BUSY)) + bt_outb(bt_bmc, BT_CTRL_B_BUSY, BT_CTRL); +} + +static void clr_b_busy(struct bt_bmc *bt_bmc) +{ + if (bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_B_BUSY) + bt_outb(bt_bmc, BT_CTRL_B_BUSY, BT_CTRL); +} + +static void set_b2h_atn(struct bt_bmc *bt_bmc) +{ + bt_outb(bt_bmc, BT_CTRL_B2H_ATN, BT_CTRL); +} + +static u8 bt_read(struct bt_bmc *bt_bmc) +{ + return bt_inb(bt_bmc, BT_BMC2HOST); +} + +static ssize_t bt_readn(struct bt_bmc *bt_bmc, u8 *buf, size_t n) +{ + int i; + + for (i = 0; i < n; i++) + buf[i] = bt_read(bt_bmc); + return n; +} + +static void bt_write(struct bt_bmc *bt_bmc, u8 c) +{ + bt_outb(bt_bmc, c, BT_BMC2HOST); +} + +static ssize_t bt_writen(struct bt_bmc *bt_bmc, u8 *buf, size_t n) +{ + int i; + + for (i = 0; i < n; i++) + bt_write(bt_bmc, buf[i]); + return n; +} + +static void set_sms_atn(struct bt_bmc *bt_bmc) +{ + bt_outb(bt_bmc, BT_CTRL_SMS_ATN, BT_CTRL); +} + +static struct bt_bmc *file_bt_bmc(struct file *file) +{ + return container_of(file->private_data, struct bt_bmc, miscdev); +} + +static int bt_bmc_open(struct inode *inode, struct file *file) +{ + struct bt_bmc *bt_bmc = file_bt_bmc(file); + + if (atomic_inc_return(&open_count) == 1) { + clr_b_busy(bt_bmc); + return 0; + } + + atomic_dec(&open_count); + return -EBUSY; +} + +/* + * The BT (Block Transfer) interface means that entire messages are + * buffered by the host before a notification is sent to the BMC that + * there is data to be read. The first byte is the length and the + * message data follows. The read operation just tries to capture the + * whole before returning it to userspace. + * + * BT Message format : + * + * Byte 1 Byte 2 Byte 3 Byte 4 Byte 5:N + * Length NetFn/LUN Seq Cmd Data + * + */ +static ssize_t bt_bmc_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct bt_bmc *bt_bmc = file_bt_bmc(file); + u8 len; + int len_byte = 1; + u8 kbuffer[BT_BMC_BUFFER_SIZE]; + ssize_t ret = 0; + ssize_t nread; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + WARN_ON(*ppos); + + if (wait_event_interruptible(bt_bmc->queue, + bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_H2B_ATN)) + return -ERESTARTSYS; + + mutex_lock(&bt_bmc->mutex); + + if (unlikely(!(bt_inb(bt_bmc, BT_CTRL) & BT_CTRL_H2B_ATN))) { + ret = -EIO; + goto out_unlock; + } + + set_b_busy(bt_bmc); + clr_h2b_atn(bt_bmc); + clr_rd_ptr(bt_bmc); + + /* + * The BT frames start with the message length, which does not + * include the length byte. + */ + kbuffer[0] = bt_read(bt_bmc); + len = kbuffer[0]; + + /* We pass the length back to userspace as well */ + if (len + 1 > count) + len = count - 1; + + while (len) { + nread = min_t(ssize_t, len, sizeof(kbuffer) - len_byte); + + bt_readn(bt_bmc, kbuffer + len_byte, nread); + + if (copy_to_user(buf, kbuffer, nread + len_byte)) { + ret = -EFAULT; + break; + } + len -= nread; + buf += nread + len_byte; + ret += nread + len_byte; + len_byte = 0; + } + + clr_b_busy(bt_bmc); + +out_unlock: + mutex_unlock(&bt_bmc->mutex); + return ret; +} + +/* + * BT Message response format : + * + * Byte 1 Byte 2 Byte 3 Byte 4 Byte 5 Byte 6:N + * Length NetFn/LUN Seq Cmd Code Data + */ +static ssize_t bt_bmc_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct bt_bmc *bt_bmc = file_bt_bmc(file); + u8 kbuffer[BT_BMC_BUFFER_SIZE]; + ssize_t ret = 0; + ssize_t nwritten; + + /* + * send a minimum response size + */ + if (count < 5) + return -EINVAL; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + WARN_ON(*ppos); + + /* + * There's no interrupt for clearing bmc busy so we have to + * poll + */ + if (wait_event_interruptible(bt_bmc->queue, + !(bt_inb(bt_bmc, BT_CTRL) & + (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN)))) + return -ERESTARTSYS; + + mutex_lock(&bt_bmc->mutex); + + if (unlikely(bt_inb(bt_bmc, BT_CTRL) & + (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN))) { + ret = -EIO; + goto out_unlock; + } + + clr_wr_ptr(bt_bmc); + + while (count) { + nwritten = min_t(ssize_t, count, sizeof(kbuffer)); + if (copy_from_user(&kbuffer, buf, nwritten)) { + ret = -EFAULT; + break; + } + + bt_writen(bt_bmc, kbuffer, nwritten); + + count -= nwritten; + buf += nwritten; + ret += nwritten; + } + + set_b2h_atn(bt_bmc); + +out_unlock: + mutex_unlock(&bt_bmc->mutex); + return ret; +} + +static long bt_bmc_ioctl(struct file *file, unsigned int cmd, + unsigned long param) +{ + struct bt_bmc *bt_bmc = file_bt_bmc(file); + + switch (cmd) { + case BT_BMC_IOCTL_SMS_ATN: + set_sms_atn(bt_bmc); + return 0; + } + return -EINVAL; +} + +static int bt_bmc_release(struct inode *inode, struct file *file) +{ + struct bt_bmc *bt_bmc = file_bt_bmc(file); + + atomic_dec(&open_count); + set_b_busy(bt_bmc); + return 0; +} + +static unsigned int bt_bmc_poll(struct file *file, poll_table *wait) +{ + struct bt_bmc *bt_bmc = file_bt_bmc(file); + unsigned int mask = 0; + u8 ctrl; + + poll_wait(file, &bt_bmc->queue, wait); + + ctrl = bt_inb(bt_bmc, BT_CTRL); + + if (ctrl & BT_CTRL_H2B_ATN) + mask |= POLLIN; + + if (!(ctrl & (BT_CTRL_H_BUSY | BT_CTRL_B2H_ATN))) + mask |= POLLOUT; + + return mask; +} + +static const struct file_operations bt_bmc_fops = { + .owner = THIS_MODULE, + .open = bt_bmc_open, + .read = bt_bmc_read, + .write = bt_bmc_write, + .release = bt_bmc_release, + .poll = bt_bmc_poll, + .unlocked_ioctl = bt_bmc_ioctl, +}; + +static void poll_timer(unsigned long data) +{ + struct bt_bmc *bt_bmc = (void *)data; + + bt_bmc->poll_timer.expires += msecs_to_jiffies(500); + wake_up(&bt_bmc->queue); + add_timer(&bt_bmc->poll_timer); +} + +static irqreturn_t bt_bmc_irq(int irq, void *arg) +{ + struct bt_bmc *bt_bmc = arg; + u32 reg; + + reg = ioread32(bt_bmc->base + BT_CR2); + reg &= BT_CR2_IRQ_H2B | BT_CR2_IRQ_HBUSY; + if (!reg) + return IRQ_NONE; + + /* ack pending IRQs */ + iowrite32(reg, bt_bmc->base + BT_CR2); + + wake_up(&bt_bmc->queue); + return IRQ_HANDLED; +} + +static int bt_bmc_config_irq(struct bt_bmc *bt_bmc, + struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + u32 reg; + int rc; + + bt_bmc->irq = platform_get_irq(pdev, 0); + if (!bt_bmc->irq) + return -ENODEV; + + rc = devm_request_irq(dev, bt_bmc->irq, bt_bmc_irq, IRQF_SHARED, + DEVICE_NAME, bt_bmc); + if (rc < 0) { + dev_warn(dev, "Unable to request IRQ %d\n", bt_bmc->irq); + bt_bmc->irq = 0; + return rc; + } + + /* + * Configure IRQs on the bmc clearing the H2B and HBUSY bits; + * H2B will be asserted when the bmc has data for us; HBUSY + * will be cleared (along with B2H) when we can write the next + * message to the BT buffer + */ + reg = ioread32(bt_bmc->base + BT_CR1); + reg |= BT_CR1_IRQ_H2B | BT_CR1_IRQ_HBUSY; + iowrite32(reg, bt_bmc->base + BT_CR1); + + return 0; +} + +static int bt_bmc_probe(struct platform_device *pdev) +{ + struct bt_bmc *bt_bmc; + struct device *dev; + struct resource *res; + int rc; + + if (!pdev || !pdev->dev.of_node) + return -ENODEV; + + dev = &pdev->dev; + dev_info(dev, "Found bt bmc device\n"); + + bt_bmc = devm_kzalloc(dev, sizeof(*bt_bmc), GFP_KERNEL); + if (!bt_bmc) + return -ENOMEM; + + dev_set_drvdata(&pdev->dev, bt_bmc); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "Unable to find resources\n"); + return -ENXIO; + } + + bt_bmc->base = devm_ioremap_resource(&pdev->dev, res); + if (!bt_bmc->base) + return -ENOMEM; + + mutex_init(&bt_bmc->mutex); + init_waitqueue_head(&bt_bmc->queue); + + bt_bmc->miscdev.minor = MISC_DYNAMIC_MINOR, + bt_bmc->miscdev.name = DEVICE_NAME, + bt_bmc->miscdev.fops = &bt_bmc_fops, + bt_bmc->miscdev.parent = dev; + rc = misc_register(&bt_bmc->miscdev); + if (rc) { + dev_err(dev, "Unable to register misc device\n"); + return rc; + } + + bt_bmc_config_irq(bt_bmc, pdev); + + if (bt_bmc->irq) { + dev_info(dev, "Using IRQ %d\n", bt_bmc->irq); + } else { + dev_info(dev, "No IRQ; using timer\n"); + setup_timer(&bt_bmc->poll_timer, poll_timer, + (unsigned long)bt_bmc); + bt_bmc->poll_timer.expires = jiffies + msecs_to_jiffies(10); + add_timer(&bt_bmc->poll_timer); + } + + iowrite32((BT_IO_BASE << BT_CR0_IO_BASE) | + (BT_IRQ << BT_CR0_IRQ) | + BT_CR0_EN_CLR_SLV_RDP | + BT_CR0_EN_CLR_SLV_WRP | + BT_CR0_ENABLE_IBT, + bt_bmc->base + BT_CR0); + + clr_b_busy(bt_bmc); + + return 0; +} + +static int bt_bmc_remove(struct platform_device *pdev) +{ + struct bt_bmc *bt_bmc = dev_get_drvdata(&pdev->dev); + + misc_deregister(&bt_bmc->miscdev); + if (!bt_bmc->irq) + del_timer_sync(&bt_bmc->poll_timer); + return 0; +} + +static const struct of_device_id bt_bmc_match[] = { + { .compatible = "aspeed,ast2400-bt-bmc" }, + { }, +}; + +static struct platform_driver bt_bmc_driver = { + .driver = { + .name = DEVICE_NAME, + .of_match_table = bt_bmc_match, + }, + .probe = bt_bmc_probe, + .remove = bt_bmc_remove, +}; + +module_platform_driver(bt_bmc_driver); + +MODULE_DEVICE_TABLE(of, bt_bmc_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Alistair Popple "); +MODULE_DESCRIPTION("Linux device interface to the BT interface"); diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 185f8ea2702f..17b12942c67d 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -74,6 +74,7 @@ header-y += bpf_common.h header-y += bpf.h header-y += bpqether.h header-y += bsg.h +header-y += bt-bmc.h header-y += btrfs.h header-y += can.h header-y += capability.h diff --git a/include/uapi/linux/bt-bmc.h b/include/uapi/linux/bt-bmc.h new file mode 100644 index 000000000000..d9ec766a63d0 --- /dev/null +++ b/include/uapi/linux/bt-bmc.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2015-2016, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _UAPI_LINUX_BT_BMC_H +#define _UAPI_LINUX_BT_BMC_H + +#include + +#define __BT_BMC_IOCTL_MAGIC 0xb1 +#define BT_BMC_IOCTL_SMS_ATN _IO(__BT_BMC_IOCTL_MAGIC, 0x00) + +#endif /* _UAPI_LINUX_BT_BMC_H */ From 1a377a79211a08c5c8a05c0b6dee6d5b13ef4107 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 21 Sep 2016 19:35:53 +0930 Subject: [PATCH 004/884] ipmi: Fix ioremap error handling in bt-bmc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit devm_ioremap_resource returns ERR_PTR so we can't check for NULL. Signed-off-by: Joel Stanley Acked-by: Cédric Le Goater Signed-off-by: Corey Minyard --- drivers/char/ipmi/bt-bmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index 2e880bf0be26..de64bf1f2f4d 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -438,8 +438,8 @@ static int bt_bmc_probe(struct platform_device *pdev) } bt_bmc->base = devm_ioremap_resource(&pdev->dev, res); - if (!bt_bmc->base) - return -ENOMEM; + if (IS_ERR(bt_bmc->base)) + return PTR_ERR(bt_bmc->base); mutex_init(&bt_bmc->mutex); init_waitqueue_head(&bt_bmc->queue); From a3e6061bad6292f2d5be3c1c4ccf1fa136517dec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= Date: Wed, 21 Sep 2016 12:24:34 +0200 Subject: [PATCH 005/884] ipmi/bt-bmc: add a dependency on ARCH_ASPEED MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Cédric Le Goater Signed-off-by: Corey Minyard --- drivers/char/ipmi/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/char/ipmi/Kconfig b/drivers/char/ipmi/Kconfig index 2c234e3e7513..7f816655cbbf 100644 --- a/drivers/char/ipmi/Kconfig +++ b/drivers/char/ipmi/Kconfig @@ -78,6 +78,7 @@ config IPMI_POWEROFF endif # IPMI_HANDLER config ASPEED_BT_IPMI_BMC + depends on ARCH_ASPEED tristate "BT IPMI bmc driver" help Provides a driver for the BT (Block Transfer) IPMI interface From d94655b405ba08838fb3db301dddb02a435ae16c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 24 Sep 2016 12:02:54 +0000 Subject: [PATCH 006/884] ipmi/bt-bmc: remove redundant return value check of platform_get_resource() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unneeded error handling on the result of a call to platform_get_resource() when the value is passed to devm_ioremap_resource(). Signed-off-by: Wei Yongjun Acked-by: Cédric Le Goater Signed-off-by: Corey Minyard --- drivers/char/ipmi/bt-bmc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/char/ipmi/bt-bmc.c b/drivers/char/ipmi/bt-bmc.c index de64bf1f2f4d..b49e61320952 100644 --- a/drivers/char/ipmi/bt-bmc.c +++ b/drivers/char/ipmi/bt-bmc.c @@ -432,11 +432,6 @@ static int bt_bmc_probe(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, bt_bmc); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "Unable to find resources\n"); - return -ENXIO; - } - bt_bmc->base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(bt_bmc->base)) return PTR_ERR(bt_bmc->base); From 231147ee77f39f4134935686e9d7e415bdf48149 Mon Sep 17 00:00:00 2001 From: sayli karnik Date: Wed, 28 Sep 2016 21:46:51 +0530 Subject: [PATCH 007/884] iio: maxim_thermocouple: Align 16 bit big endian value of raw reads Driver was reporting invalid raw read values for MAX6675 on big endian architectures. MAX6675 buffered mode is not affected, nor is the MAX31855. The driver was losing a 2 byte read value when it used a 32 bit integer buffer to store a 16 bit big endian value. Use big endian types to properly align buffers on big endian architectures. Fixes following sparse endianness warnings: warning: cast to restricted __be16 warning: cast to restricted __be32 Fixes checkpatch issue: CHECK: No space is necessary after a cast Signed-off-by: sayli karnik Fixes: 1f25ca11d84a ("iio: temperature: add support for Maxim thermocouple chips") Signed-off-by: Jonathan Cameron --- drivers/iio/temperature/maxim_thermocouple.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index 39dd2026ccc9..066161a4bccd 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -123,22 +123,24 @@ static int maxim_thermocouple_read(struct maxim_thermocouple_data *data, { unsigned int storage_bytes = data->chip->read_size; unsigned int shift = chan->scan_type.shift + (chan->address * 8); - unsigned int buf; + __be16 buf16; + __be32 buf32; int ret; - ret = spi_read(data->spi, (void *) &buf, storage_bytes); - if (ret) - return ret; - switch (storage_bytes) { case 2: - *val = be16_to_cpu(buf); + ret = spi_read(data->spi, (void *)&buf16, storage_bytes); + *val = be16_to_cpu(buf16); break; case 4: - *val = be32_to_cpu(buf); + ret = spi_read(data->spi, (void *)&buf32, storage_bytes); + *val = be32_to_cpu(buf32); break; } + if (ret) + return ret; + /* check to be sure this is a valid reading */ if (*val & data->chip->status_bit) return -EINVAL; From bd85f4b37ddf2da22ccf5b29d264b2459b6722df Mon Sep 17 00:00:00 2001 From: Xie XiuQi Date: Tue, 27 Sep 2016 15:07:12 +0800 Subject: [PATCH 008/884] ipmi: fix crash on reading version from proc after unregisted bmc I meet a crash, which could be reproduce: 1) while true; do cat /proc/ipmi/0/version; done 2) modprobe -rv ipmi_si ipmi_msghandler ipmi_devintf [82761.021137] IPMI BT: req2rsp=5 secs retries=2 [82761.034524] ipmi device interface [82761.222218] ipmi_si ipmi_si.0: Found new BMC (man_id: 0x0007db, prod_id: 0x0001, dev_id: 0x01) [82761.222230] ipmi_si ipmi_si.0: IPMI bt interface initialized [82903.922740] BUG: unable to handle kernel NULL pointer dereference at 00000000000002d4 [82903.930952] IP: [] smi_version_proc_show+0x18/0x40 [ipmi_msghandler] [82903.939220] PGD 86693a067 PUD 865304067 PMD 0 [82903.943893] Thread overran stack, or stack corrupted [82903.949034] Oops: 0000 [#1] SMP [82903.983091] Modules linked in: ipmi_si(-) ipmi_msghandler binfmt_misc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter ... [82904.057285] pps_core scsi_transport_sas dm_mod vfio_iommu_type1 vfio xt_sctp nf_conntrack_proto_sctp nf_nat_proto_sctp nf_nat nf_conntrack sctp libcrc32c [last unloaded: ipmi_devintf] [82904.073169] CPU: 37 PID: 28089 Comm: cat Tainted: GF O ---- ------- 3.10.0-327.28.3.el7.x86_64 #1 [82904.083373] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 3.22 05/16/2016 [82904.090592] task: ffff880101cc2e00 ti: ffff880369c54000 task.ti: ffff880369c54000 [82904.098414] RIP: 0010:[] [] smi_version_proc_show+0x18/0x40 [ipmi_msghandler] [82904.109124] RSP: 0018:ffff880369c57e70 EFLAGS: 00010203 [82904.114608] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000024688470 [82904.121912] RDX: fffffffffffffff4 RSI: ffffffffa0313404 RDI: ffff8808670ce200 [82904.129218] RBP: ffff880369c57e70 R08: 0000000000019720 R09: ffffffff81204a27 [82904.136521] R10: ffff88046f803300 R11: 0000000000000246 R12: ffff880662399700 [82904.143828] R13: 0000000000000001 R14: ffff880369c57f48 R15: ffff8808670ce200 [82904.151128] FS: 00007fb70c9ca740(0000) GS:ffff88086e340000(0000) knlGS:0000000000000000 [82904.159557] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [82904.165473] CR2: 00000000000002d4 CR3: 0000000864c0c000 CR4: 00000000003407e0 [82904.172778] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [82904.180084] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [82904.187385] Stack: [82904.189573] ffff880369c57ee0 ffffffff81204f1a 00000000122a2427 0000000001426000 [82904.197392] ffff8808670ce238 0000000000010000 0000000000000000 0000000000000fff [82904.205198] 00000000122a2427 ffff880862079600 0000000001426000 ffff880369c57f48 [82904.212962] Call Trace: [82904.219667] [] seq_read+0xfa/0x3a0 [82904.224893] [] proc_reg_read+0x3d/0x80 [82904.230468] [] vfs_read+0x9c/0x170 [82904.235689] [] SyS_read+0x7f/0xe0 [82904.240816] [] system_call_fastpath+0x16/0x1b [82904.246991] Code: 30 a0 e8 0c 6f ef e0 5b 5d c3 66 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 47 78 55 48 c7 c6 04 34 31 a0 48 89 e5 48 8b 40 50 <0f> b6 90 d4 02 00 00 31 c0 89 d1 83 e2 0f c0 e9 04 0f b6 c9 e8 [82904.267710] RIP [] smi_version_proc_show+0x18/0x40 [ipmi_msghandler] [82904.276079] RSP [82904.279734] CR2: 00000000000002d4 [82904.283731] ---[ end trace a69e4328b49dd7c4 ]--- [82904.328118] Kernel panic - not syncing: Fatal exception Reading versin from /proc need bmc device struct available. So in this patch we move add/remove_proc_entries between ipmi_bmc_register and ipmi_bmc_unregister. Cc: Kefeng Wang Signed-off-by: Xie XiuQi Signed-off-by: Corey Minyard --- drivers/char/ipmi/ipmi_msghandler.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index d8619998cfb5..fcdd886819f5 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2891,11 +2891,11 @@ int ipmi_register_smi(const struct ipmi_smi_handlers *handlers, intf->curr_channel = IPMI_MAX_CHANNELS; } + rv = ipmi_bmc_register(intf, i); + if (rv == 0) rv = add_proc_entries(intf, i); - rv = ipmi_bmc_register(intf, i); - out: if (rv) { if (intf->proc_dir) @@ -2982,8 +2982,6 @@ int ipmi_unregister_smi(ipmi_smi_t intf) int intf_num = intf->intf_num; ipmi_user_t user; - ipmi_bmc_unregister(intf); - mutex_lock(&smi_watchers_mutex); mutex_lock(&ipmi_interfaces_mutex); intf->intf_num = -1; @@ -3007,6 +3005,7 @@ int ipmi_unregister_smi(ipmi_smi_t intf) mutex_unlock(&ipmi_interfaces_mutex); remove_proc_entries(intf); + ipmi_bmc_unregister(intf); /* * Call all the watcher interfaces to tell them that From ea908ba8f73446dfbf87ff71f7cadb1994d2c5bb Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 4 Oct 2016 22:19:57 +0100 Subject: [PATCH 009/884] drm/armada: fix clock counts The DPMS handling wrt clock enables/disables was incorrect: we could end up decrementing the clock count multiple times if we transition via several low-power DPMS states, resulting in a kernel warning. Fix this by only testing to see whether we are entering or exiting the DPMS off state. Signed-off-by: Russell King --- drivers/gpu/drm/armada/armada_crtc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index 2f58e9e2a59c..a51f8cbcfe26 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -332,17 +332,19 @@ static void armada_drm_crtc_dpms(struct drm_crtc *crtc, int dpms) { struct armada_crtc *dcrtc = drm_to_armada_crtc(crtc); - if (dcrtc->dpms != dpms) { - dcrtc->dpms = dpms; - if (!IS_ERR(dcrtc->clk) && !dpms_blanked(dpms)) - WARN_ON(clk_prepare_enable(dcrtc->clk)); - armada_drm_crtc_update(dcrtc); - if (!IS_ERR(dcrtc->clk) && dpms_blanked(dpms)) - clk_disable_unprepare(dcrtc->clk); + if (dpms_blanked(dcrtc->dpms) != dpms_blanked(dpms)) { if (dpms_blanked(dpms)) armada_drm_vblank_off(dcrtc); - else + else if (!IS_ERR(dcrtc->clk)) + WARN_ON(clk_prepare_enable(dcrtc->clk)); + dcrtc->dpms = dpms; + armada_drm_crtc_update(dcrtc); + if (!dpms_blanked(dpms)) drm_crtc_vblank_on(&dcrtc->crtc); + else if (!IS_ERR(dcrtc->clk)) + clk_disable_unprepare(dcrtc->clk); + } else if (dcrtc->dpms != dpms) { + dcrtc->dpms = dpms; } } From f38b7c2547537a8219d273e20eb3b88e6fc6b764 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 28 Sep 2016 14:38:36 +0000 Subject: [PATCH 010/884] wlcore: sdio: drop kfree for memory allocated with devm_kzalloc It's not necessary to free memory allocated with devm_kzalloc and using kfree leads to a double free. Fixes: d776fc86b82f ("wlcore: sdio: Populate config firmware data") Signed-off-by: Wei Yongjun Acked-by: Tony Lindgren Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/sdio.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index a6e94b1a12cb..47fe7f96a242 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -391,7 +391,6 @@ static void wl1271_remove(struct sdio_func *func) pm_runtime_get_noresume(&func->dev); platform_device_unregister(glue->core); - kfree(glue); } #ifdef CONFIG_PM From 1e54134ccad00f76ddf00f3e77db3dc8fdefbb47 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Thu, 29 Sep 2016 15:40:54 -0400 Subject: [PATCH 011/884] rtl8xxxu: Fix memory leak in handling rxdesc16 packets A device running without RX package aggregation could return more data in the USB packet than the actual network packet. In this case we could would clone the skb but then determine that that there was no packet to handle and exit without freeing the cloned skb first. This has so far only been observed with 8188eu devices, but could affect others. Signed-off-by: Jes Sorensen Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index b2d7f6e69667..a96ff17759e4 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5197,7 +5197,12 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb) pkt_offset = roundup(pkt_len + drvinfo_sz + desc_shift + sizeof(struct rtl8xxxu_rxdesc16), 128); - if (pkt_cnt > 1) + /* + * Only clone the skb if there's enough data at the end to + * at least cover the rx descriptor + */ + if (pkt_cnt > 1 && + urb_len > (pkt_offset + sizeof(struct rtl8xxxu_rxdesc16))) next_skb = skb_clone(skb, GFP_ATOMIC); rx_status = IEEE80211_SKB_RXCB(skb); From 8a55698f2f29d227825173420d7b99b9277ca88c Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Thu, 29 Sep 2016 15:40:55 -0400 Subject: [PATCH 012/884] rtl8xxxu: Fix big-endian problem reporting mactime The full RX descriptor is converted so converting tsfl again would return it to it's original endian value. Signed-off-by: Jes Sorensen Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h | 4 ++-- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 1016628926d2..08d587a342d3 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -238,7 +238,7 @@ struct rtl8xxxu_rxdesc16 { u32 pattern1match:1; u32 pattern0match:1; #endif - __le32 tsfl; + u32 tsfl; #if 0 u32 bassn:12; u32 bavld:1; @@ -368,7 +368,7 @@ struct rtl8xxxu_rxdesc24 { u32 ldcp:1; u32 splcp:1; #endif - __le32 tsfl; + u32 tsfl; }; struct rtl8xxxu_txdesc32 { diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index a96ff17759e4..a5e6ec2152bf 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5220,7 +5220,7 @@ int rtl8xxxu_parse_rxdesc16(struct rtl8xxxu_priv *priv, struct sk_buff *skb) rtl8xxxu_rx_parse_phystats(priv, rx_status, phy_stats, rx_desc->rxmcs); - rx_status->mactime = le32_to_cpu(rx_desc->tsfl); + rx_status->mactime = rx_desc->tsfl; rx_status->flag |= RX_FLAG_MACTIME_START; if (!rx_desc->swdec) @@ -5290,7 +5290,7 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb) rtl8xxxu_rx_parse_phystats(priv, rx_status, phy_stats, rx_desc->rxmcs); - rx_status->mactime = le32_to_cpu(rx_desc->tsfl); + rx_status->mactime = rx_desc->tsfl; rx_status->flag |= RX_FLAG_MACTIME_START; if (!rx_desc->swdec) From ab05e5ec81c76f3a852919c22984c885edd2414a Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 30 Sep 2016 19:35:17 -0400 Subject: [PATCH 013/884] rtl8xxxu: Fix rtl8723bu driver reload issue The generic disable_rf() function clears bits 22 and 23 in REG_RX_WAIT_CCA, however we did not re-enable them again in rtl8723b_enable_rf() This resolves the problem for me with 8723bu devices not working again after reloading the driver. Signed-off-by: Jes Sorensen Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index 6c086b5657e9..02b8ddd98a95 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1498,6 +1498,10 @@ static void rtl8723b_enable_rf(struct rtl8xxxu_priv *priv) u32 val32; u8 val8; + val32 = rtl8xxxu_read32(priv, REG_RX_WAIT_CCA); + val32 |= (BIT(22) | BIT(23)); + rtl8xxxu_write32(priv, REG_RX_WAIT_CCA, val32); + /* * No indication anywhere as to what 0x0790 does. The 2 antenna * vendor code preserves bits 6-7 here. From 29d5e6fbd65be89dcd32f070fc45ee0e3b2f82b6 Mon Sep 17 00:00:00 2001 From: Jes Sorensen Date: Fri, 30 Sep 2016 19:35:18 -0400 Subject: [PATCH 014/884] rtl8xxxu: Fix rtl8192eu driver reload issue The 8192eu suffered from two issues when reloading the driver. The same problems as with the 8723bu where REG_RX_WAIT_CCA bits 22 and 23 didn't get set in rtl8192e_enable_rf(). In addition it also seems prone to issues when setting REG_RF_CTRL to 0 intead of just disabling the RF_ENABLE bit. Similar to what was causing issues with the 8188eu. With this patch I can successfully reload the driver and reassociate to an APi with an 8192eu dongle. Signed-off-by: Jes Sorensen Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index df54d27e7851..a793fedc3654 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1461,7 +1461,9 @@ static int rtl8192eu_active_to_emu(struct rtl8xxxu_priv *priv) int count, ret = 0; /* Turn off RF */ - rtl8xxxu_write8(priv, REG_RF_CTRL, 0); + val8 = rtl8xxxu_read8(priv, REG_RF_CTRL); + val8 &= ~RF_ENABLE; + rtl8xxxu_write8(priv, REG_RF_CTRL, val8); /* Switch DPDT_SEL_P output from register 0x65[2] */ val8 = rtl8xxxu_read8(priv, REG_LEDCFG2); @@ -1593,6 +1595,10 @@ static void rtl8192e_enable_rf(struct rtl8xxxu_priv *priv) u32 val32; u8 val8; + val32 = rtl8xxxu_read32(priv, REG_RX_WAIT_CCA); + val32 |= (BIT(22) | BIT(23)); + rtl8xxxu_write32(priv, REG_RX_WAIT_CCA, val32); + val8 = rtl8xxxu_read8(priv, REG_GPIO_MUXCFG); val8 |= BIT(5); rtl8xxxu_write8(priv, REG_GPIO_MUXCFG, val8); From 2967999fbceffa8520987ab9b3b00a55d6997dba Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 5 Oct 2016 17:46:50 +0300 Subject: [PATCH 015/884] iio: adc: ti-adc081c: Select IIO_TRIGGERED_BUFFER to prevent build errors Commit 08e05d1fce5c ("ti-adc081c: Initial triggered buffer support") added triggered buffer support but that also requires CONFIG_IIO_TRIGGERED_BUFFER, otherwise we get errors from linker such as: drivers/built-in.o: In function `adc081c_remove': drivers/iio/adc/ti-adc081c.c:225: undefined reference to `iio_triggered_buffer_cleanup' Fix these by explicitly selecting both CONFIG_IIO_TRIGGERED_BUFFER and CONFIG_IIO_BUFFER in Kconfig for the driver. Signed-off-by: Mika Westerberg Signed-off-by: Jonathan Cameron --- drivers/iio/adc/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 7edcf3238620..99c051490eff 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -437,6 +437,8 @@ config STX104 config TI_ADC081C tristate "Texas Instruments ADC081C/ADC101C/ADC121C family" depends on I2C + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help If you say yes here you get support for Texas Instruments ADC081C, ADC101C and ADC121C ADC chips. From 3d1355b3cfad53feba76a73b052c757a7de7f4de Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 3 Oct 2016 21:21:42 +0200 Subject: [PATCH 016/884] HID: hid-led: fix issue with transfer buffer not being dma capable The hid-led driver works fine under 4.8.0, however with the next kernel from today I get this: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 2578 at drivers/usb/core/hcd.c:1584 usb_hcd_map_urb_for_dma+0x373/0x550 [usbcore] transfer buffer not dma capable Modules linked in: hid_led(+) usbhid vfat fat ir_sony_decoder iwlmvm led_class mac80211 snd_hda_codec_realtek snd_hda_codec_generic x86_pkg_temp_thermal iwlwifi crc32c_intel snd_hda_codec_hdmi i2c_i801 i2c_smbus snd_hda_intel cfg80211 snd_hda_codec snd_hda_core snd_pcm r8169 snd_timer mei_me mii snd mei ir_lirc_codec lirc_dev nuvoton_cir rc_core btusb btintel bluetooth rfkill usb_storage efivarfs ipv6 ehci_pci ehci_hcd xhci_pci xhci_hcd usbcore usb_common ext4 jbd2 mbcache ahci libahci libata CPU: 0 PID: 2578 Comm: systemd-udevd Not tainted 4.8.0-rc8-next-20161003 #1 Hardware name: ZOTAC ZBOX-CI321NANO/ZBOX-CI321NANO, BIOS B246P105 06/01/2015 ffffc90003dbb7e0 ffffffff81280425 ffffc90003dbb830 0000000000000000 ffffc90003dbb820 ffffffff8105b086 0000063003dbb800 ffff88006f374480 0000000000000000 0000000000000000 0000000000000001 ffff880079544000 Call Trace: [] dump_stack+0x68/0x93 [] __warn+0xc6/0xe0 [] warn_slowpath_fmt+0x4a/0x50 [] usb_hcd_map_urb_for_dma+0x373/0x550 [usbcore] [] usb_hcd_submit_urb+0x316/0x9c0 [usbcore] [] ? rcu_read_lock_sched_held+0x40/0x80 [] ? module_assert_mutex_or_preempt+0x13/0x50 [] ? __module_address+0x27/0xf0 [] usb_submit_urb+0x2c4/0x520 [usbcore] [] usb_start_wait_urb+0x5a/0xe0 [usbcore] [] usb_control_msg+0xbc/0xf0 [usbcore] [] ? __module_address+0x27/0xf0 [] usbhid_raw_request+0xa4/0x180 [usbhid] [] hidled_recv+0x71/0xe0 [hid_led] [] thingm_init+0x2d/0x50 [hid_led] [] hidled_probe+0xcb/0x24a [hid_led] [] hid_device_probe+0xd2/0x150 [] driver_probe_device+0x1fd/0x2c0 [] __driver_attach+0x9a/0xa0 [] ? driver_probe_device+0x2c0/0x2c0 [] bus_for_each_dev+0x5d/0x90 [] driver_attach+0x19/0x20 [] bus_add_driver+0x11f/0x220 [] ? 0xffffffffa07ac000 [] driver_register+0x5b/0xd0 [] ? 0xffffffffa07ac000 [] __hid_register_driver+0x61/0xa0 [] hidled_driver_init+0x1e/0x20 [hid_led] [] do_one_initcall+0x38/0x150 [] ? rcu_read_lock_sched_held+0x40/0x80 [] ? kmem_cache_alloc_trace+0x1d0/0x230 [] do_init_module+0x5a/0x1cb [] load_module+0x1e42/0x2530 [] ? __symbol_put+0x50/0x50 [] ? show_coresize+0x30/0x30 [] ? kernel_read_file+0x100/0x190 [] ? kernel_read_file_from_fd+0x44/0x70 [] SYSC_finit_module+0xba/0xc0 [] SyS_finit_module+0x9/0x10 [] entry_SYSCALL_64_fastpath+0x18/0xad ---[ end trace c9e6ea27003ecf9e ]--- Fix this by using a kmalloc'ed buffer when calling hid_hw_raw_request. Signed-off-by: Heiner Kallweit Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-led.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-led.c b/drivers/hid/hid-led.c index d8d55f37b4f5..d3e1ab162f7c 100644 --- a/drivers/hid/hid-led.c +++ b/drivers/hid/hid-led.c @@ -100,6 +100,7 @@ struct hidled_device { const struct hidled_config *config; struct hid_device *hdev; struct hidled_rgb *rgb; + u8 *buf; struct mutex lock; }; @@ -118,13 +119,19 @@ static int hidled_send(struct hidled_device *ldev, __u8 *buf) mutex_lock(&ldev->lock); + /* + * buffer provided to hid_hw_raw_request must not be on the stack + * and must not be part of a data structure + */ + memcpy(ldev->buf, buf, ldev->config->report_size); + if (ldev->config->report_type == RAW_REQUEST) - ret = hid_hw_raw_request(ldev->hdev, buf[0], buf, + ret = hid_hw_raw_request(ldev->hdev, buf[0], ldev->buf, ldev->config->report_size, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); else if (ldev->config->report_type == OUTPUT_REPORT) - ret = hid_hw_output_report(ldev->hdev, buf, + ret = hid_hw_output_report(ldev->hdev, ldev->buf, ldev->config->report_size); else ret = -EINVAL; @@ -147,17 +154,21 @@ static int hidled_recv(struct hidled_device *ldev, __u8 *buf) mutex_lock(&ldev->lock); - ret = hid_hw_raw_request(ldev->hdev, buf[0], buf, + memcpy(ldev->buf, buf, ldev->config->report_size); + + ret = hid_hw_raw_request(ldev->hdev, buf[0], ldev->buf, ldev->config->report_size, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret < 0) goto err; - ret = hid_hw_raw_request(ldev->hdev, buf[0], buf, + ret = hid_hw_raw_request(ldev->hdev, buf[0], ldev->buf, ldev->config->report_size, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); + + memcpy(buf, ldev->buf, ldev->config->report_size); err: mutex_unlock(&ldev->lock); @@ -447,6 +458,10 @@ static int hidled_probe(struct hid_device *hdev, const struct hid_device_id *id) if (!ldev) return -ENOMEM; + ldev->buf = devm_kmalloc(&hdev->dev, MAX_REPORT_SIZE, GFP_KERNEL); + if (!ldev->buf) + return -ENOMEM; + ret = hid_parse(hdev); if (ret) return ret; From e15944099870f374ca7efc62f98cf23ba272ef43 Mon Sep 17 00:00:00 2001 From: Ioan-Adrian Ratiu Date: Tue, 27 Sep 2016 21:41:38 +0300 Subject: [PATCH 017/884] HID: hid-dr: add input mapping for axis selection Commit 79346d620e9d ("HID: input: force generic axis to be mapped to their user space axis") made mapping generic axes to their userspace equivalents mandatory and some lower end gamepads which were depending on the previous behaviour suffered severe regressions because they were reusing axes and expecting hid-input to multiplex their map to the respective userspace axis by always searching for and using the next available axis. One solution is to add a hid quirk for this type of "previous" behaviour in hid-input to bypass the new axes policy in favour of the old one, but since only one hardware vendor seems to be affected negatively we're better off making and exception and mapping in the driver for now; if more vendors or drivers turn out to experience the problem we should reconsider the quirk solution. Signed-off-by: Ioan-Adrian Ratiu Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-dr.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/hid/hid-dr.c b/drivers/hid/hid-dr.c index 8fd4bf77f264..0ed843939b2c 100644 --- a/drivers/hid/hid-dr.c +++ b/drivers/hid/hid-dr.c @@ -306,6 +306,30 @@ static __u8 *dr_report_fixup(struct hid_device *hdev, __u8 *rdesc, return rdesc; } +#define map_abs(c) hid_map_usage(hi, usage, bit, max, EV_ABS, (c)) +#define map_rel(c) hid_map_usage(hi, usage, bit, max, EV_REL, (c)) + +static int dr_input_mapping(struct hid_device *hdev, struct hid_input *hi, + struct hid_field *field, struct hid_usage *usage, + unsigned long **bit, int *max) +{ + switch (usage->hid) { + /* + * revert to the old hid-input behavior where axes + * can be randomly assigned when hid->usage is reused. + */ + case HID_GD_X: case HID_GD_Y: case HID_GD_Z: + case HID_GD_RX: case HID_GD_RY: case HID_GD_RZ: + if (field->flags & HID_MAIN_ITEM_RELATIVE) + map_rel(usage->hid & 0xf); + else + map_abs(usage->hid & 0xf); + return 1; + } + + return 0; +} + static int dr_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; @@ -352,6 +376,7 @@ static struct hid_driver dr_driver = { .id_table = dr_devices, .report_fixup = dr_report_fixup, .probe = dr_probe, + .input_mapping = dr_input_mapping, }; module_hid_driver(dr_driver); From 1bcaa05ebee115213e34f1806cc6a4f7a6175a88 Mon Sep 17 00:00:00 2001 From: Ioan-Adrian Ratiu Date: Tue, 27 Sep 2016 21:41:37 +0300 Subject: [PATCH 018/884] Revert "HID: dragonrise: fix HID Descriptor for 0x0006 PID" This reverts commit 18339f59c3a6 ("HID: dragonrise: fix HID...") because it breaks certain dragonrise 0079:0006 gamepads. While it may fix a breakage caused by commit 79346d620e9d ("HID: input: force generic axis to be mapped to their user space axis"), it is probable that the manufacturer released different hardware with the same PID so this fix works for only a subset and breaks the other gamepads sharing the PID. What is needed is another more generic solution which fixes 79346d620e9d ("HID: input: force generic axis ...") breakage for this controller: we need to add an exception for this driver to make it keep the old behaviour previous to the initial breakage (this is done in patch 2 of this series). Signed-off-by: Ioan-Adrian Ratiu Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-dr.c | 58 -------------------------------------------- 1 file changed, 58 deletions(-) diff --git a/drivers/hid/hid-dr.c b/drivers/hid/hid-dr.c index 0ed843939b2c..818ea7d93533 100644 --- a/drivers/hid/hid-dr.c +++ b/drivers/hid/hid-dr.c @@ -234,58 +234,6 @@ static __u8 pid0011_rdesc_fixed[] = { 0xC0 /* End Collection */ }; -static __u8 pid0006_rdesc_fixed[] = { - 0x05, 0x01, /* Usage Page (Generic Desktop) */ - 0x09, 0x04, /* Usage (Joystick) */ - 0xA1, 0x01, /* Collection (Application) */ - 0xA1, 0x02, /* Collection (Logical) */ - 0x75, 0x08, /* Report Size (8) */ - 0x95, 0x05, /* Report Count (5) */ - 0x15, 0x00, /* Logical Minimum (0) */ - 0x26, 0xFF, 0x00, /* Logical Maximum (255) */ - 0x35, 0x00, /* Physical Minimum (0) */ - 0x46, 0xFF, 0x00, /* Physical Maximum (255) */ - 0x09, 0x30, /* Usage (X) */ - 0x09, 0x33, /* Usage (Ry) */ - 0x09, 0x32, /* Usage (Z) */ - 0x09, 0x31, /* Usage (Y) */ - 0x09, 0x34, /* Usage (Ry) */ - 0x81, 0x02, /* Input (Variable) */ - 0x75, 0x04, /* Report Size (4) */ - 0x95, 0x01, /* Report Count (1) */ - 0x25, 0x07, /* Logical Maximum (7) */ - 0x46, 0x3B, 0x01, /* Physical Maximum (315) */ - 0x65, 0x14, /* Unit (Centimeter) */ - 0x09, 0x39, /* Usage (Hat switch) */ - 0x81, 0x42, /* Input (Variable) */ - 0x65, 0x00, /* Unit (None) */ - 0x75, 0x01, /* Report Size (1) */ - 0x95, 0x0C, /* Report Count (12) */ - 0x25, 0x01, /* Logical Maximum (1) */ - 0x45, 0x01, /* Physical Maximum (1) */ - 0x05, 0x09, /* Usage Page (Button) */ - 0x19, 0x01, /* Usage Minimum (0x01) */ - 0x29, 0x0C, /* Usage Maximum (0x0C) */ - 0x81, 0x02, /* Input (Variable) */ - 0x06, 0x00, 0xFF, /* Usage Page (Vendor Defined) */ - 0x75, 0x01, /* Report Size (1) */ - 0x95, 0x08, /* Report Count (8) */ - 0x25, 0x01, /* Logical Maximum (1) */ - 0x45, 0x01, /* Physical Maximum (1) */ - 0x09, 0x01, /* Usage (0x01) */ - 0x81, 0x02, /* Input (Variable) */ - 0xC0, /* End Collection */ - 0xA1, 0x02, /* Collection (Logical) */ - 0x75, 0x08, /* Report Size (8) */ - 0x95, 0x07, /* Report Count (7) */ - 0x46, 0xFF, 0x00, /* Physical Maximum (255) */ - 0x26, 0xFF, 0x00, /* Logical Maximum (255) */ - 0x09, 0x02, /* Usage (0x02) */ - 0x91, 0x02, /* Output (Variable) */ - 0xC0, /* End Collection */ - 0xC0 /* End Collection */ -}; - static __u8 *dr_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { @@ -296,12 +244,6 @@ static __u8 *dr_report_fixup(struct hid_device *hdev, __u8 *rdesc, *rsize = sizeof(pid0011_rdesc_fixed); } break; - case 0x0006: - if (*rsize == sizeof(pid0006_rdesc_fixed)) { - rdesc = pid0006_rdesc_fixed; - *rsize = sizeof(pid0006_rdesc_fixed); - } - break; } return rdesc; } From 4973ca9a01e2354b159acedec1b9b8eb8de02ab7 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sun, 9 Oct 2016 14:21:50 +0200 Subject: [PATCH 019/884] HID: add quirk for Akai MIDImix. The Akai MIDImix (09e8:0031) is a MIDI fader controller that speaks regular MIDI and works well with Linux. However, initialization gets delayed due to reports timeout: [3643645.631124] hid-generic 0003:09E8:0031.0020: timeout initializing reports [3643645.632416] hid-generic 0003:09E8:0031.0020: hiddev0: USB HID v1.11 Device [AKAI MIDI Mix] on usb-0000:00:14.0-2/input0 Adding "usbhid.quirks=0x09e8:0x0031:0x20000000" on the kernel command line makes the issues go away. Signed-off-by: Steinar H. Gunderson Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index cd59c79eebdd..6cfb5cacc253 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -64,6 +64,9 @@ #define USB_VENDOR_ID_AKAI 0x2011 #define USB_DEVICE_ID_AKAI_MPKMINI2 0x0715 +#define USB_VENDOR_ID_AKAI_09E8 0x09E8 +#define USB_DEVICE_ID_AKAI_09E8_MIDIMIX 0x0031 + #define USB_VENDOR_ID_ALCOR 0x058f #define USB_DEVICE_ID_ALCOR_USBRS232 0x9720 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 0a0eca5da47d..354d49ea36dd 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -56,6 +56,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_AIREN, USB_DEVICE_ID_AIREN_SLIMPLUS, HID_QUIRK_NOGET }, { USB_VENDOR_ID_AKAI, USB_DEVICE_ID_AKAI_MPKMINI2, HID_QUIRK_NO_INIT_REPORTS }, + { USB_VENDOR_ID_AKAI_09E8, USB_DEVICE_ID_AKAI_09E8_MIDIMIX, HID_QUIRK_NO_INIT_REPORTS }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_UC100KM, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_CS124U, HID_QUIRK_NOGET }, { USB_VENDOR_ID_ATEN, USB_DEVICE_ID_ATEN_2PORTKVM, HID_QUIRK_NOGET }, From 13d62fd26924b30593ffd970be17c7344149b188 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 25 Sep 2016 15:44:03 +0000 Subject: [PATCH 020/884] mmc: sdhci-of-arasan: Fix non static symbol warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following sparse warning: drivers/mmc/host/sdhci-of-arasan.c:253:6: warning: symbol 'sdhci_arasan_reset' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Sören Brinkmann Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index da8e40af6f85..e263671abab8 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -250,7 +250,7 @@ static void sdhci_arasan_hs400_enhanced_strobe(struct mmc_host *mmc, writel(vendor, host->ioaddr + SDHCI_ARASAN_VENDOR_REGISTER); } -void sdhci_arasan_reset(struct sdhci_host *host, u8 mask) +static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask) { u8 ctrl; struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); From 48ab086d262e705c4d042228479aaf0b2daffcf9 Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Fri, 30 Sep 2016 09:37:38 +0800 Subject: [PATCH 021/884] mmc: block: add missing header dependencies We get 1 warning when building kernel with W=1: drivers/mmc/card/block.c:2147:5: warning: no previous prototype for 'mmc_blk_issue_rq' [-Wmissing-prototypes] In fact, this function is declared in drivers/mmc/card/block.h, so this patch adds missing header dependencies. Signed-off-by: Baoyou Xie Signed-off-by: Ulf Hansson --- drivers/mmc/card/block.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index c3335112e68c..0f2cc9f22357 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -46,6 +46,7 @@ #include #include "queue.h" +#include "block.h" MODULE_ALIAS("mmc:block"); #ifdef MODULE_PARAM_PREFIX From 1720d3545b772c49b2975eeb3b8f4d3f56dc2085 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 30 Sep 2016 14:18:58 +0800 Subject: [PATCH 022/884] mmc: core: switch to 1V8 or 1V2 for hs400es mode When introducing hs400es, I didn't notice that we haven't switched voltage to 1V2 or 1V8 for it. That happens to work as the first controller claiming to support hs400es, arasan(5.1), which is designed to only support 1V8. So the voltage is fixed to 1V8. But it actually is wrong, and will not fit for other host controllers. Let's fix it. Fixes: commit 81ac2af65793ecf ("mmc: core: implement enhanced strobe support") Cc: Signed-off-by: Shawn Lin Reviewed-by: Douglas Anderson Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 3486bc7fbb64..f4ed5accafd0 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1263,6 +1263,16 @@ static int mmc_select_hs400es(struct mmc_card *card) goto out_err; } + if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400_1_2V) + err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120); + + if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400_1_8V) + err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180); + + /* If fails try again during next card power cycle */ + if (err) + goto out_err; + err = mmc_select_bus_width(card); if (err < 0) goto out_err; From 4f25580fb84d934d7ecffa3c0aa8f10f7e23af92 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 30 Sep 2016 14:18:59 +0800 Subject: [PATCH 023/884] mmc: core: changes frequency to hs_max_dtr when selecting hs400es Per JESD84-B51 P49, Host need to change frequency to <=52MHz after setting HS_TIMING to 0x1, and host may changes frequency to <= 200MHz after setting HS_TIMING to 0x3. That means the card expects the clock rate to increase from the current used f_init (which is less than 400KHz, but still being less than 52MHz) to 52MHz, otherwise we find some eMMC devices significantly report failure when sending status. Reported-by: Xiao Yao Signed-off-by: Shawn Lin Reviewed-by: Douglas Anderson Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index f4ed5accafd0..39fc5b2b96c5 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1282,6 +1282,8 @@ static int mmc_select_hs400es(struct mmc_card *card) if (err) goto out_err; + mmc_set_clock(host, card->ext_csd.hs_max_dtr); + err = mmc_switch_status(card); if (err) goto out_err; From 8a3bee9b13824147dd15efb2993fb1b19c0e5555 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Fri, 30 Sep 2016 14:19:00 +0800 Subject: [PATCH 024/884] mmc: sdhci-of-arasan: add sdhci_arasan_voltage_switch for arasan, 5.1 Per the vendor's requirement, we shouldn't do any setting for 1.8V Signaling Enable, otherwise the interaction/behaviour between phy and controller will be undefined. Mostly it works fine if we do that, but we still see failures. Anyway, let's fix it to meet the vendor's requirement. The error log looks like: [ 93.405085] mmc1: unexpected status 0x800900 after switch [ 93.408474] mmc1: switch to bus width 1 failed [ 93.408482] mmc1: mmc_select_hs200 failed, error -110 [ 93.408492] mmc1: error -110 during resume (card was removed?) [ 93.408705] PM: resume of devices complete after 213.453 msecs Signed-off-by: Shawn Lin Acked-by: Adrian Hunter Reviewed-by: Douglas Anderson Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-arasan.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index e263671abab8..410a55b1c25f 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -265,6 +265,28 @@ static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask) } } +static int sdhci_arasan_voltage_switch(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + switch (ios->signal_voltage) { + case MMC_SIGNAL_VOLTAGE_180: + /* + * Plese don't switch to 1V8 as arasan,5.1 doesn't + * actually refer to this setting to indicate the + * signal voltage and the state machine will be broken + * actually if we force to enable 1V8. That's something + * like broken quirk but we could work around here. + */ + return 0; + case MMC_SIGNAL_VOLTAGE_330: + case MMC_SIGNAL_VOLTAGE_120: + /* We don't support 3V3 and 1V2 */ + break; + } + + return -EINVAL; +} + static struct sdhci_ops sdhci_arasan_ops = { .set_clock = sdhci_arasan_set_clock, .get_max_clock = sdhci_pltfm_clk_get_max_clock, @@ -661,6 +683,8 @@ static int sdhci_arasan_probe(struct platform_device *pdev) host->mmc_host_ops.hs400_enhanced_strobe = sdhci_arasan_hs400_enhanced_strobe; + host->mmc_host_ops.start_signal_voltage_switch = + sdhci_arasan_voltage_switch; } ret = sdhci_add_host(host); From 3f2d26643595973e835e8356ea90c7c15cb1b0f1 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 3 Oct 2016 10:58:28 +0200 Subject: [PATCH 025/884] mmc: core: Annotate cmd_hdr as __le32 Commit f68381a70bb2 (mmc: block: fix packed command header endianness) correctly fixed endianness handling of packed_cmd_hdr in mmc_blk_packed_hdr_wrq_prep. But now, sparse complains about incorrect types: drivers/mmc/card/block.c:1613:27: sparse: incorrect type in assignment (different base types) drivers/mmc/card/block.c:1613:27: expected unsigned int [unsigned] [usertype] drivers/mmc/card/block.c:1613:27: got restricted __le32 [usertype] ... So annotate cmd_hdr properly using __le32 to make everyone happy. Signed-off-by: Jiri Slaby Fixes: f68381a70bb2 (mmc: block: fix packed command header endianness) Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/card/block.c | 2 +- drivers/mmc/card/queue.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 0f2cc9f22357..709a872ed484 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1787,7 +1787,7 @@ static void mmc_blk_packed_hdr_wrq_prep(struct mmc_queue_req *mqrq, struct mmc_blk_data *md = mq->data; struct mmc_packed *packed = mqrq->packed; bool do_rel_wr, do_data_tag; - u32 *packed_cmd_hdr; + __le32 *packed_cmd_hdr; u8 hdr_blocks; u8 i = 1; diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h index 3c15a75bae86..342f1e3f301e 100644 --- a/drivers/mmc/card/queue.h +++ b/drivers/mmc/card/queue.h @@ -31,7 +31,7 @@ enum mmc_packed_type { struct mmc_packed { struct list_head list; - u32 cmd_hdr[1024]; + __le32 cmd_hdr[1024]; unsigned int blocks; u8 nr_entries; u8 retries; From fc605f1d8060133596bb6083fc4b7b306d1d5931 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 5 Oct 2016 12:11:21 +0300 Subject: [PATCH 026/884] mmc: sdhci: Fix SDHCI_QUIRK2_STOP_WITH_TC Multi-block data transfers can specify the number of blocks either using a Set Block Count command (CMD23) or by sending a STOP command (CMD12) after the required number of blocks has transferred. CMD23 is preferred, but some cards don't support it. CMD12 with R1b response is used for writes, and R1 response for reads. Some SDHCI host controllers give a Transfer Complete (TC) interrupt for the STOP command (CMD12) whether or not a R1b response has been specified. The quirk SDHCI_QUIRK2_STOP_WITH_TC identifies those host controllers, but the implementation only considers the case where the TC interrupt arrives at the same time as the Command Complete (CC) interrupt. However, occasionally TC arrives before CC. That is harmless, but does generate an error message "Got data interrupt 0x00000002 even though no data operation was in progress". A simpler approach is to force R1b response onto all STOP commands, because SDHCI will handle TC before CC in the general case, so do that. Signed-off-by: Adrian Hunter Cc: Giuseppe Cavallaro Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 48055666c655..3711813f5654 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1077,6 +1077,10 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd) /* Initially, a command has no error */ cmd->error = 0; + if ((host->quirks2 & SDHCI_QUIRK2_STOP_WITH_TC) && + cmd->opcode == MMC_STOP_TRANSMISSION) + cmd->flags |= MMC_RSP_BUSY; + /* Wait max 10 ms */ timeout = 10; @@ -2409,7 +2413,7 @@ static void sdhci_timeout_data_timer(unsigned long data) * * \*****************************************************************************/ -static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask) +static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask) { if (!host->cmd) { /* @@ -2453,11 +2457,6 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask) return; } - if ((host->quirks2 & SDHCI_QUIRK2_STOP_WITH_TC) && - !(host->cmd->flags & MMC_RSP_BUSY) && !host->data && - host->cmd->opcode == MMC_STOP_TRANSMISSION) - *mask &= ~SDHCI_INT_DATA_END; - if (intmask & SDHCI_INT_RESPONSE) sdhci_finish_command(host); } @@ -2680,8 +2679,7 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id) } if (intmask & SDHCI_INT_CMD_MASK) - sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK, - &intmask); + sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK); if (intmask & SDHCI_INT_DATA_MASK) sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK); From 606d313124094d87050896a10894200cdd2b0514 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 5 Oct 2016 12:11:22 +0300 Subject: [PATCH 027/884] mmc: sdhci: Rename sdhci_set_power() to sdhci_set_power_noreg() Unlike other cases, sdhci_set_power() does not reflect the default implementation of the ->set_power() callback. Rename it and create sdhci_set_power() that is the default implementation. Signed-off-by: Adrian Hunter Cc: Jisheng Zhang Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pxav3.c | 2 +- drivers/mmc/host/sdhci.c | 26 +++++++++++++------------- drivers/mmc/host/sdhci.h | 2 ++ 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index dd1938d341f7..d0f5c05fbc19 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -315,7 +315,7 @@ static void pxav3_set_power(struct sdhci_host *host, unsigned char mode, struct mmc_host *mmc = host->mmc; u8 pwr = host->pwr; - sdhci_set_power(host, mode, vdd); + sdhci_set_power_noreg(host, mode, vdd); if (host->pwr == pwr) return; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 3711813f5654..223a91e039dc 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1394,8 +1394,8 @@ static void sdhci_set_power_reg(struct sdhci_host *host, unsigned char mode, sdhci_writeb(host, 0, SDHCI_POWER_CONTROL); } -void sdhci_set_power(struct sdhci_host *host, unsigned char mode, - unsigned short vdd) +void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) { u8 pwr = 0; @@ -1459,20 +1459,17 @@ void sdhci_set_power(struct sdhci_host *host, unsigned char mode, mdelay(10); } } -EXPORT_SYMBOL_GPL(sdhci_set_power); +EXPORT_SYMBOL_GPL(sdhci_set_power_noreg); -static void __sdhci_set_power(struct sdhci_host *host, unsigned char mode, - unsigned short vdd) +void sdhci_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) { - struct mmc_host *mmc = host->mmc; - - if (host->ops->set_power) - host->ops->set_power(host, mode, vdd); - else if (!IS_ERR(mmc->supply.vmmc)) - sdhci_set_power_reg(host, mode, vdd); + if (IS_ERR(host->mmc->supply.vmmc)) + sdhci_set_power_noreg(host, mode, vdd); else - sdhci_set_power(host, mode, vdd); + sdhci_set_power_reg(host, mode, vdd); } +EXPORT_SYMBOL_GPL(sdhci_set_power); /*****************************************************************************\ * * @@ -1613,7 +1610,10 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } } - __sdhci_set_power(host, ios->power_mode, ios->vdd); + if (host->ops->set_power) + host->ops->set_power(host, ios->power_mode, ios->vdd); + else + sdhci_set_power(host, ios->power_mode, ios->vdd); if (host->ops->platform_send_init_74_clocks) host->ops->platform_send_init_74_clocks(host, ios->power_mode); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index c722cd23205c..766df17fb7eb 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -683,6 +683,8 @@ u16 sdhci_calc_clk(struct sdhci_host *host, unsigned int clock, void sdhci_set_clock(struct sdhci_host *host, unsigned int clock); void sdhci_set_power(struct sdhci_host *host, unsigned char mode, unsigned short vdd); +void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode, + unsigned short vdd); void sdhci_set_bus_width(struct sdhci_host *host, int width); void sdhci_reset(struct sdhci_host *host, u8 mask); void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing); From 6bc090631dfc3394da0619e920662e6636dbe89c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 5 Oct 2016 12:11:23 +0300 Subject: [PATCH 028/884] mmc: sdhci-pci: Let devices define their own sdhci_ops Let devices define their own sdhci_ops so that device-specific variations can be implemented without adding quirks. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 4 +++- drivers/mmc/host/sdhci-pci.h | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 72a1f1f5180a..5e11e0e3be63 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1648,7 +1648,9 @@ static struct sdhci_pci_slot *sdhci_pci_probe_slot( } host->hw_name = "PCI"; - host->ops = &sdhci_pci_ops; + host->ops = chip->fixes && chip->fixes->ops ? + chip->fixes->ops : + &sdhci_pci_ops; host->quirks = chip->quirks; host->quirks2 = chip->quirks2; diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index 9c7c08b93223..6bccf56bc5ff 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -65,6 +65,8 @@ struct sdhci_pci_fixes { int (*suspend) (struct sdhci_pci_chip *); int (*resume) (struct sdhci_pci_chip *); + + const struct sdhci_ops *ops; }; struct sdhci_pci_slot { From fee686b74a9c115d3c4c851eb6613d1378ad0e0c Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 5 Oct 2016 12:11:24 +0300 Subject: [PATCH 029/884] mmc: sdhci-pci: Fix bus power failing to enable for some Intel controllers Some Intel controllers (e.g. BXT) might fail to set bus power after a D3 -> D0 transition due to the present state not yet having propagated. Retry for up to 2 milliseconds. Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 50 +++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index 5e11e0e3be63..1d9e00a00e9f 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -32,6 +32,14 @@ #include "sdhci-pci.h" #include "sdhci-pci-o2micro.h" +static int sdhci_pci_enable_dma(struct sdhci_host *host); +static void sdhci_pci_set_bus_width(struct sdhci_host *host, int width); +static void sdhci_pci_hw_reset(struct sdhci_host *host); +static int sdhci_pci_select_drive_strength(struct sdhci_host *host, + struct mmc_card *card, + unsigned int max_dtr, int host_drv, + int card_drv, int *drv_type); + /*****************************************************************************\ * * * Hardware specific quirk handling * @@ -390,6 +398,45 @@ static int byt_sd_probe_slot(struct sdhci_pci_slot *slot) return 0; } +#define SDHCI_INTEL_PWR_TIMEOUT_CNT 20 +#define SDHCI_INTEL_PWR_TIMEOUT_UDELAY 100 + +static void sdhci_intel_set_power(struct sdhci_host *host, unsigned char mode, + unsigned short vdd) +{ + int cntr; + u8 reg; + + sdhci_set_power(host, mode, vdd); + + if (mode == MMC_POWER_OFF) + return; + + /* + * Bus power might not enable after D3 -> D0 transition due to the + * present state not yet having propagated. Retry for up to 2ms. + */ + for (cntr = 0; cntr < SDHCI_INTEL_PWR_TIMEOUT_CNT; cntr++) { + reg = sdhci_readb(host, SDHCI_POWER_CONTROL); + if (reg & SDHCI_POWER_ON) + break; + udelay(SDHCI_INTEL_PWR_TIMEOUT_UDELAY); + reg |= SDHCI_POWER_ON; + sdhci_writeb(host, reg, SDHCI_POWER_CONTROL); + } +} + +static const struct sdhci_ops sdhci_intel_byt_ops = { + .set_clock = sdhci_set_clock, + .set_power = sdhci_intel_set_power, + .enable_dma = sdhci_pci_enable_dma, + .set_bus_width = sdhci_pci_set_bus_width, + .reset = sdhci_reset, + .set_uhs_signaling = sdhci_set_uhs_signaling, + .hw_reset = sdhci_pci_hw_reset, + .select_drive_strength = sdhci_pci_select_drive_strength, +}; + static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = { .allow_runtime_pm = true, .probe_slot = byt_emmc_probe_slot, @@ -397,6 +444,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = { .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN | SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 | SDHCI_QUIRK2_STOP_WITH_TC, + .ops = &sdhci_intel_byt_ops, }; static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { @@ -405,6 +453,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { SDHCI_QUIRK2_PRESET_VALUE_BROKEN, .allow_runtime_pm = true, .probe_slot = byt_sdio_probe_slot, + .ops = &sdhci_intel_byt_ops, }; static const struct sdhci_pci_fixes sdhci_intel_byt_sd = { @@ -415,6 +464,7 @@ static const struct sdhci_pci_fixes sdhci_intel_byt_sd = { .allow_runtime_pm = true, .own_cd_for_runtime_pm = true, .probe_slot = byt_sd_probe_slot, + .ops = &sdhci_intel_byt_ops, }; /* Define Host controllers for Intel Merrifield platform */ From 8c136b590f79f7f4f60ae4709fc1340885ca2eba Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Wed, 5 Oct 2016 18:30:43 +0200 Subject: [PATCH 030/884] drm/etnaviv: ensure write caches are flushed at end of user cmdstream If the GPU is done with one user command stream the buffers referenced by this command stream may go away and get unmapped from the MMU. If the write caches are still dirty at this point later evictions will run into MMU faults, killing the GPU. Make sure the write caches are flushed before signaling completion of the user command stream. Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_buffer.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c index cb86c7e5495c..d9230132dfbc 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c @@ -329,20 +329,34 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event, /* * Append a LINK to the submitted command buffer to return to * the ring buffer. return_target is the ring target address. - * We need three dwords: event, wait, link. + * We need at most 7 dwords in the return target: 2 cache flush + + * 2 semaphore stall + 1 event + 1 wait + 1 link. */ - return_dwords = 3; + return_dwords = 7; return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords); CMD_LINK(cmdbuf, return_dwords, return_target); /* - * Append event, wait and link pointing back to the wait - * command to the ring buffer. + * Append a cache flush, stall, event, wait and link pointing back to + * the wait command to the ring buffer. */ + if (gpu->exec_state == ETNA_PIPE_2D) { + CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, + VIVS_GL_FLUSH_CACHE_PE2D); + } else { + CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, + VIVS_GL_FLUSH_CACHE_DEPTH | + VIVS_GL_FLUSH_CACHE_COLOR); + CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE, + VIVS_TS_FLUSH_CACHE_FLUSH); + } + CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE); + CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE); CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) | VIVS_GL_EVENT_FROM_PE); CMD_WAIT(buffer); - CMD_LINK(buffer, 2, return_target + 8); + CMD_LINK(buffer, 2, etnaviv_iommu_get_cmdbuf_va(gpu, buffer) + + buffer->user_size - 4); if (drm_debug & DRM_UT_DRIVER) pr_info("stream link to 0x%08x @ 0x%08x %p\n", From 8814d2dce00f77c5eeb7278981ac6fd08835629e Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 6 Oct 2016 17:03:28 +0200 Subject: [PATCH 031/884] drm/etnaviv: block 64K of address space behind each cmdstream To make sure we don't place anything there which might confuse the FE prefetcher. This gets rid of another case of FE MMU faults when the address space gets crowded before triggering the reaper. Signed-off-by: Lucas Stach --- drivers/gpu/drm/etnaviv/etnaviv_mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c index d3796ed8d8c5..169ac96e8f08 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c @@ -330,7 +330,8 @@ u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu, return (u32)buf->vram_node.start; mutex_lock(&mmu->lock); - ret = etnaviv_iommu_find_iova(mmu, &buf->vram_node, buf->size); + ret = etnaviv_iommu_find_iova(mmu, &buf->vram_node, + buf->size + SZ_64K); if (ret < 0) { mutex_unlock(&mmu->lock); return 0; From 2d8e60e8b0742b7a5cddc806fe38bb81ee876c33 Mon Sep 17 00:00:00 2001 From: Baole Ni Date: Tue, 2 Aug 2016 18:50:25 +0800 Subject: [PATCH 032/884] drm/vmwgfx: Replace numeric parameter like 0444 with macro I find that the developers often just specified the numeric value when calling a macro which is defined with a parameter for access permission. As we know, these numeric value for access permission have had the corresponding macro, and that using macro can improve the robustness and readability of the code, thus, I suggest replacing the numeric parameter with the macro. Signed-off-by: Chuansheng Liu Signed-off-by: Baole Ni Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index e8ae3dc476d1..18061a4bc2f2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -241,15 +241,15 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, void *ptr); MODULE_PARM_DESC(enable_fbdev, "Enable vmwgfx fbdev"); -module_param_named(enable_fbdev, enable_fbdev, int, 0600); +module_param_named(enable_fbdev, enable_fbdev, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(force_dma_api, "Force using the DMA API for TTM pages"); -module_param_named(force_dma_api, vmw_force_iommu, int, 0600); +module_param_named(force_dma_api, vmw_force_iommu, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(restrict_iommu, "Try to limit IOMMU usage for TTM pages"); -module_param_named(restrict_iommu, vmw_restrict_iommu, int, 0600); +module_param_named(restrict_iommu, vmw_restrict_iommu, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages"); -module_param_named(force_coherent, vmw_force_coherent, int, 0600); +module_param_named(force_coherent, vmw_force_coherent, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU"); -module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600); +module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, S_IRUSR | S_IWUSR); MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes"); module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600); From 07028959bcc674dc8ca143323aeab05849a83742 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 29 Aug 2016 08:08:28 +0100 Subject: [PATCH 033/884] drm/vmwgfx: Remove call to reservation_object_test_signaled_rcu before wait Since fence_wait_timeout_reservation_object_wait_timeout_rcu() with a timeout of 0 becomes reservation_object_test_signaled_rcu(), we do not need to handle such conversion in the caller. The only challenge are those callers that wish to differentiate the error code between the nonblocking busy check and potentially blocking wait. Signed-off-by: Chris Wilson Cc: Sinclair Yeh Cc: Thomas Hellstrom Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 6a328d507a28..52ca1c9d070e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -574,10 +574,8 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo, bool nonblock = !!(flags & drm_vmw_synccpu_dontblock); long lret; - if (nonblock) - return reservation_object_test_signaled_rcu(bo->resv, true) ? 0 : -EBUSY; - - lret = reservation_object_wait_timeout_rcu(bo->resv, true, true, MAX_SCHEDULE_TIMEOUT); + lret = reservation_object_wait_timeout_rcu(bo->resv, true, true, + nonblock ? 0 : MAX_SCHEDULE_TIMEOUT); if (!lret) return -EBUSY; else if (lret < 0) From 1f982e4e390b31d6c44fb8bf03e3462ab33b8244 Mon Sep 17 00:00:00 2001 From: Charmaine Lee Date: Mon, 10 Oct 2016 10:37:03 -0700 Subject: [PATCH 034/884] drm/vmwgfx: Enable SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command And bump VMWGFX_DRIVER_MINOR to 11 Signed-off-by: Charmaine Lee Reviewed-by: Sinclair Yeh Reviewed-by: Brian Paul Signed-off-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 32 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 74304b03f9d4..7d01c183079d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -43,7 +43,7 @@ #define VMWGFX_DRIVER_DATE "20160210" #define VMWGFX_DRIVER_MAJOR 2 -#define VMWGFX_DRIVER_MINOR 10 +#define VMWGFX_DRIVER_MINOR 11 #define VMWGFX_DRIVER_PATCHLEVEL 0 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index dc5beff2b4aa..0243acc8845f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3029,6 +3029,35 @@ static int vmw_cmd_dx_genmips(struct vmw_private *dev_priv, cmd->body.shaderResourceViewId); } +/** + * vmw_cmd_dx_transfer_from_buffer - + * Validate an SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER command + * + * @dev_priv: Pointer to a device private struct. + * @sw_context: The software context being used for this batch. + * @header: Pointer to the command header in the command stream. + */ +static int vmw_cmd_dx_transfer_from_buffer(struct vmw_private *dev_priv, + struct vmw_sw_context *sw_context, + SVGA3dCmdHeader *header) +{ + struct { + SVGA3dCmdHeader header; + SVGA3dCmdDXTransferFromBuffer body; + } *cmd = container_of(header, typeof(*cmd), header); + int ret; + + ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, + user_surface_converter, + &cmd->body.srcSid, NULL); + if (ret != 0) + return ret; + + return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, + user_surface_converter, + &cmd->body.destSid, NULL); +} + static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv, struct vmw_sw_context *sw_context, void *buf, uint32_t *size) @@ -3379,6 +3408,9 @@ static const struct vmw_cmd_entry vmw_cmd_entries[SVGA_3D_CMD_MAX] = { &vmw_cmd_buffer_copy_check, true, false, true), VMW_CMD_DEF(SVGA_3D_CMD_DX_PRED_COPY_REGION, &vmw_cmd_pred_copy_check, true, false, true), + VMW_CMD_DEF(SVGA_3D_CMD_DX_TRANSFER_FROM_BUFFER, + &vmw_cmd_dx_transfer_from_buffer, + true, false, true), }; static int vmw_cmd_check(struct vmw_private *dev_priv, From e7a45284ba1abcea591f7c01b05227b6698b596c Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 10 Oct 2016 10:44:00 -0700 Subject: [PATCH 035/884] drm/vmwgfx: Allow resource relocations on byte boundaries So far, resource allocations have only been allowed on 4-byte boundaries. As commands get packed tighter, allow them on byte boundaries. Signed-off-by: Thomas Hellstrom Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 41 ++++++++++++++++--------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 0243acc8845f..b915f621187f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -39,7 +39,7 @@ * * @head: List head for the software context's relocation list. * @res: Non-ref-counted pointer to the resource. - * @offset: Offset of 4 byte entries into the command buffer where the + * @offset: Offset of single byte entries into the command buffer where the * id that needs fixup is located. */ struct vmw_resource_relocation { @@ -109,7 +109,18 @@ static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context, struct vmw_dma_buffer *vbo, bool validate_as_mob, uint32_t *p_val_node); - +/** + * vmw_ptr_diff - Compute the offset from a to b in bytes + * + * @a: A starting pointer. + * @b: A pointer offset in the same address space. + * + * Returns: The offset in bytes between the two pointers. + */ +static size_t vmw_ptr_diff(void *a, void *b) +{ + return (unsigned long) b - (unsigned long) a; +} /** * vmw_resources_unreserve - unreserve resources previously reserved for @@ -409,7 +420,7 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, * @list: Pointer to head of relocation list. * @res: The resource. * @offset: Offset into the command buffer currently being parsed where the - * id that needs fixup is located. Granularity is 4 bytes. + * id that needs fixup is located. Granularity is one byte. */ static int vmw_resource_relocation_add(struct list_head *list, const struct vmw_resource *res, @@ -460,10 +471,11 @@ static void vmw_resource_relocations_apply(uint32_t *cb, struct vmw_resource_relocation *rel; list_for_each_entry(rel, list, head) { + u32 *addr = (u32 *)((unsigned long) cb + rel->offset); if (likely(rel->res != NULL)) - cb[rel->offset] = rel->res->id; + *addr = rel->res->id; else - cb[rel->offset] = SVGA_3D_CMD_NOP; + *addr = SVGA_3D_CMD_NOP; } } @@ -655,7 +667,8 @@ static int vmw_cmd_res_reloc_add(struct vmw_private *dev_priv, *p_val = NULL; ret = vmw_resource_relocation_add(&sw_context->res_relocations, res, - id_loc - sw_context->buf_start); + vmw_ptr_diff(sw_context->buf_start, + id_loc)); if (unlikely(ret != 0)) return ret; @@ -721,7 +734,7 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, return vmw_resource_relocation_add (&sw_context->res_relocations, res, - id_loc - sw_context->buf_start); + vmw_ptr_diff(sw_context->buf_start, id_loc)); } ret = vmw_user_resource_lookup_handle(dev_priv, @@ -2143,10 +2156,9 @@ static int vmw_cmd_shader_define(struct vmw_private *dev_priv, return ret; return vmw_resource_relocation_add(&sw_context->res_relocations, - NULL, &cmd->header.id - - sw_context->buf_start); - - return 0; + NULL, + vmw_ptr_diff(sw_context->buf_start, + &cmd->header.id)); } /** @@ -2188,10 +2200,9 @@ static int vmw_cmd_shader_destroy(struct vmw_private *dev_priv, return ret; return vmw_resource_relocation_add(&sw_context->res_relocations, - NULL, &cmd->header.id - - sw_context->buf_start); - - return 0; + NULL, + vmw_ptr_diff(sw_context->buf_start, + &cmd->header.id)); } /** From 728c3b53995f71e4b175d5939b8ba3211b6bc34d Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 10 Oct 2016 10:45:55 -0700 Subject: [PATCH 036/884] drm/vmwgfx: Remove a leftover debug printout Remove a leftover debug printout Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index b915f621187f..ddd5e8a59723 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -4275,9 +4275,6 @@ void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv, ttm_bo_unref(&query_val.bo); ttm_bo_unref(&pinned_val.bo); vmw_dmabuf_unreference(&dev_priv->pinned_bo); - DRM_INFO("Dummy query bo pin count: %d\n", - dev_priv->dummy_query_bo->pin_count); - out_unlock: return; From 51ab70bed997f64f091a639dbe22b629725a7faf Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 10 Oct 2016 10:51:24 -0700 Subject: [PATCH 037/884] drm/vmwgfx: Limit the user-space command buffer size With older hardware versions, the user could specify arbitrarily large command buffer sizes, causing a vmalloc / vmap space exhaustion. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh Cc: stable@vger.kernel.org --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index ddd5e8a59723..d1f4a48dee0f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3891,14 +3891,14 @@ static void *vmw_execbuf_cmdbuf(struct vmw_private *dev_priv, int ret; *header = NULL; - if (!dev_priv->cman || kernel_commands) - return kernel_commands; - if (command_size > SVGA_CB_MAX_SIZE) { DRM_ERROR("Command buffer is too large.\n"); return ERR_PTR(-EINVAL); } + if (!dev_priv->cman || kernel_commands) + return kernel_commands; + /* If possible, add a little space for fencing. */ cmdbuf_size = command_size + 512; cmdbuf_size = min_t(size_t, cmdbuf_size, SVGA_CB_MAX_SIZE); From a19440304db2d97aed5cee9bfa5017c98d2348bf Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Mon, 10 Oct 2016 11:06:45 -0700 Subject: [PATCH 038/884] drm/vmwgfx: Avoid validating views on view destruction When a view destruction command was present in the command stream, the view was validated to avoid a device error. That caused excessive and unnecessary validations of views, surfaces and mobs on view destruction. Replace this with a new relocation type that patches the view destruction command to a NOP if the view is not present in the device after the execbuf validation sequence. Also add checks for the member size of the vmw_res_relocation struct. Fixes sporadic command submission errors on google-earth exit. Reported-by: Brian Paul Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh Cc: stable@vger.kernel.org --- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c | 71 ++++++++++++++++++++----- 1 file changed, 58 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index d1f4a48dee0f..c7b53d987f06 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -34,6 +34,24 @@ #define VMW_RES_HT_ORDER 12 +/** + * enum vmw_resource_relocation_type - Relocation type for resources + * + * @vmw_res_rel_normal: Traditional relocation. The resource id in the + * command stream is replaced with the actual id after validation. + * @vmw_res_rel_nop: NOP relocation. The command is unconditionally replaced + * with a NOP. + * @vmw_res_rel_cond_nop: Conditional NOP relocation. If the resource id + * after validation is -1, the command is replaced with a NOP. Otherwise no + * action. + */ +enum vmw_resource_relocation_type { + vmw_res_rel_normal, + vmw_res_rel_nop, + vmw_res_rel_cond_nop, + vmw_res_rel_max +}; + /** * struct vmw_resource_relocation - Relocation info for resources * @@ -41,11 +59,13 @@ * @res: Non-ref-counted pointer to the resource. * @offset: Offset of single byte entries into the command buffer where the * id that needs fixup is located. + * @rel_type: Type of relocation. */ struct vmw_resource_relocation { struct list_head head; const struct vmw_resource *res; - unsigned long offset; + u32 offset:29; + enum vmw_resource_relocation_type rel_type:3; }; /** @@ -421,10 +441,13 @@ static int vmw_resource_context_res_add(struct vmw_private *dev_priv, * @res: The resource. * @offset: Offset into the command buffer currently being parsed where the * id that needs fixup is located. Granularity is one byte. + * @rel_type: Relocation type. */ static int vmw_resource_relocation_add(struct list_head *list, const struct vmw_resource *res, - unsigned long offset) + unsigned long offset, + enum vmw_resource_relocation_type + rel_type) { struct vmw_resource_relocation *rel; @@ -436,6 +459,7 @@ static int vmw_resource_relocation_add(struct list_head *list, rel->res = res; rel->offset = offset; + rel->rel_type = rel_type; list_add_tail(&rel->head, list); return 0; @@ -470,12 +494,24 @@ static void vmw_resource_relocations_apply(uint32_t *cb, { struct vmw_resource_relocation *rel; + /* Validate the struct vmw_resource_relocation member size */ + BUILD_BUG_ON(SVGA_CB_MAX_SIZE >= (1 << 29)); + BUILD_BUG_ON(vmw_res_rel_max >= (1 << 3)); + list_for_each_entry(rel, list, head) { u32 *addr = (u32 *)((unsigned long) cb + rel->offset); - if (likely(rel->res != NULL)) + switch (rel->rel_type) { + case vmw_res_rel_normal: *addr = rel->res->id; - else + break; + case vmw_res_rel_nop: *addr = SVGA_3D_CMD_NOP; + break; + default: + if (rel->res->id == -1) + *addr = SVGA_3D_CMD_NOP; + break; + } } } @@ -668,7 +704,8 @@ static int vmw_cmd_res_reloc_add(struct vmw_private *dev_priv, ret = vmw_resource_relocation_add(&sw_context->res_relocations, res, vmw_ptr_diff(sw_context->buf_start, - id_loc)); + id_loc), + vmw_res_rel_normal); if (unlikely(ret != 0)) return ret; @@ -734,7 +771,8 @@ vmw_cmd_res_check(struct vmw_private *dev_priv, return vmw_resource_relocation_add (&sw_context->res_relocations, res, - vmw_ptr_diff(sw_context->buf_start, id_loc)); + vmw_ptr_diff(sw_context->buf_start, id_loc), + vmw_res_rel_normal); } ret = vmw_user_resource_lookup_handle(dev_priv, @@ -2158,7 +2196,8 @@ static int vmw_cmd_shader_define(struct vmw_private *dev_priv, return vmw_resource_relocation_add(&sw_context->res_relocations, NULL, vmw_ptr_diff(sw_context->buf_start, - &cmd->header.id)); + &cmd->header.id), + vmw_res_rel_nop); } /** @@ -2202,7 +2241,8 @@ static int vmw_cmd_shader_destroy(struct vmw_private *dev_priv, return vmw_resource_relocation_add(&sw_context->res_relocations, NULL, vmw_ptr_diff(sw_context->buf_start, - &cmd->header.id)); + &cmd->header.id), + vmw_res_rel_nop); } /** @@ -2859,8 +2899,7 @@ static int vmw_cmd_dx_cid_check(struct vmw_private *dev_priv, * @header: Pointer to the command header in the command stream. * * Check that the view exists, and if it was not created using this - * command batch, make sure it's validated (present in the device) so that - * the remove command will not confuse the device. + * command batch, conditionally make this command a NOP. */ static int vmw_cmd_dx_view_remove(struct vmw_private *dev_priv, struct vmw_sw_context *sw_context, @@ -2888,10 +2927,16 @@ static int vmw_cmd_dx_view_remove(struct vmw_private *dev_priv, return ret; /* - * Add view to the validate list iff it was not created using this - * command batch. + * If the view wasn't created during this command batch, it might + * have been removed due to a context swapout, so add a + * relocation to conditionally make this command a NOP to avoid + * device errors. */ - return vmw_view_res_val_add(sw_context, view); + return vmw_resource_relocation_add(&sw_context->res_relocations, + view, + vmw_ptr_diff(sw_context->buf_start, + &cmd->header.id), + vmw_res_rel_cond_nop); } /** From 7ed3b3943281e9da32b52e2aac77bdb2c42c5117 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 22 Sep 2016 21:54:33 +0200 Subject: [PATCH 039/884] drm/vmwgfx: Use kmalloc_array() in vmw_surface_define_ioctl() Multiplications for the size determination of memory allocations indicated that array data structures should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index c2a721a8cef9..f55754936d90 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -763,14 +763,16 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels)); srf->num_sizes = num_sizes; user_srf->size = size; - - srf->sizes = kmalloc(srf->num_sizes * sizeof(*srf->sizes), GFP_KERNEL); + srf->sizes = kmalloc_array(srf->num_sizes, + sizeof(*srf->sizes), + GFP_KERNEL); if (unlikely(srf->sizes == NULL)) { ret = -ENOMEM; goto out_no_sizes; } - srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets), - GFP_KERNEL); + srf->offsets = kmalloc_array(srf->num_sizes, + sizeof(*srf->offsets), + GFP_KERNEL); if (unlikely(srf->offsets == NULL)) { ret = -ENOMEM; goto out_no_offsets; From c138d03f1bf3b9c7bfd449e890cc003658b5c45a Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 23 Sep 2016 17:26:02 +0200 Subject: [PATCH 040/884] drm/vmwgfx: Use memdup_user() rather than duplicating its implementation * Reuse existing functionality from memdup_user() instead of keeping duplicate source code. * Try this copy operation before allocating memory for the data structure member "offsets". * Delete the local variable "user_sizes" which became unnecessary with this refactoring. Signed-off-by: Markus Elfring Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index f55754936d90..15504c6ca3e0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -700,7 +700,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, struct drm_vmw_surface_create_req *req = &arg->req; struct drm_vmw_surface_arg *rep = &arg->rep; struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct drm_vmw_size __user *user_sizes; int ret; int i, j; uint32_t cur_bo_offset; @@ -763,11 +762,11 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels)); srf->num_sizes = num_sizes; user_srf->size = size; - srf->sizes = kmalloc_array(srf->num_sizes, - sizeof(*srf->sizes), - GFP_KERNEL); - if (unlikely(srf->sizes == NULL)) { - ret = -ENOMEM; + srf->sizes = memdup_user((struct drm_vmw_size __user *)(unsigned long) + req->size_addr, + sizeof(*srf->sizes) * srf->num_sizes); + if (IS_ERR(srf->sizes)) { + ret = PTR_ERR(srf->sizes); goto out_no_sizes; } srf->offsets = kmalloc_array(srf->num_sizes, @@ -778,16 +777,6 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, goto out_no_offsets; } - user_sizes = (struct drm_vmw_size __user *)(unsigned long) - req->size_addr; - - ret = copy_from_user(srf->sizes, user_sizes, - srf->num_sizes * sizeof(*srf->sizes)); - if (unlikely(ret != 0)) { - ret = -EFAULT; - goto out_no_copy; - } - srf->base_size = *srf->sizes; srf->autogen_filter = SVGA3D_TEX_FILTER_NONE; srf->multisample_count = 0; From 862f6157d176c9db5a7ed423245108d9bb3d7038 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 23 Sep 2016 17:53:49 +0200 Subject: [PATCH 041/884] drm/vmwgfx: Adjust checks for null pointers in 13 functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The script "checkpatch.pl" can point information out like the following. Comparison to NULL could be written !… Thus fix the affected source code places. Signed-off-by: Markus Elfring Reviewed-by: Sinclair Yeh Signed-off-by: Sinclair Yeh --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 31 ++++++++++++------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 15504c6ca3e0..b445ce9b9757 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -324,7 +324,7 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) if (res->id != -1) { cmd = vmw_fifo_reserve(dev_priv, vmw_surface_destroy_size()); - if (unlikely(cmd == NULL)) { + if (unlikely(!cmd)) { DRM_ERROR("Failed reserving FIFO space for surface " "destruction.\n"); return; @@ -397,7 +397,7 @@ static int vmw_legacy_srf_create(struct vmw_resource *res) submit_size = vmw_surface_define_size(srf); cmd = vmw_fifo_reserve(dev_priv, submit_size); - if (unlikely(cmd == NULL)) { + if (unlikely(!cmd)) { DRM_ERROR("Failed reserving FIFO space for surface " "creation.\n"); ret = -ENOMEM; @@ -446,11 +446,10 @@ static int vmw_legacy_srf_dma(struct vmw_resource *res, uint8_t *cmd; struct vmw_private *dev_priv = res->dev_priv; - BUG_ON(val_buf->bo == NULL); - + BUG_ON(!val_buf->bo); submit_size = vmw_surface_dma_size(srf); cmd = vmw_fifo_reserve(dev_priv, submit_size); - if (unlikely(cmd == NULL)) { + if (unlikely(!cmd)) { DRM_ERROR("Failed reserving FIFO space for surface " "DMA.\n"); return -ENOMEM; @@ -538,7 +537,7 @@ static int vmw_legacy_srf_destroy(struct vmw_resource *res) submit_size = vmw_surface_destroy_size(); cmd = vmw_fifo_reserve(dev_priv, submit_size); - if (unlikely(cmd == NULL)) { + if (unlikely(!cmd)) { DRM_ERROR("Failed reserving FIFO space for surface " "eviction.\n"); return -ENOMEM; @@ -578,7 +577,7 @@ static int vmw_surface_init(struct vmw_private *dev_priv, int ret; struct vmw_resource *res = &srf->res; - BUG_ON(res_free == NULL); + BUG_ON(!res_free); if (!dev_priv->has_mob) vmw_fifo_resource_inc(dev_priv); ret = vmw_resource_init(dev_priv, res, true, res_free, @@ -747,7 +746,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, } user_srf = kzalloc(sizeof(*user_srf), GFP_KERNEL); - if (unlikely(user_srf == NULL)) { + if (unlikely(!user_srf)) { ret = -ENOMEM; goto out_no_user_srf; } @@ -772,7 +771,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, srf->offsets = kmalloc_array(srf->num_sizes, sizeof(*srf->offsets), GFP_KERNEL); - if (unlikely(srf->offsets == NULL)) { + if (unlikely(!srf->offsets)) { ret = -ENOMEM; goto out_no_offsets; } @@ -914,7 +913,7 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, ret = -EINVAL; base = ttm_base_object_lookup_for_ref(dev_priv->tdev, handle); - if (unlikely(base == NULL)) { + if (unlikely(!base)) { DRM_ERROR("Could not find surface to reference.\n"); goto out_no_lookup; } @@ -1060,7 +1059,7 @@ static int vmw_gb_surface_create(struct vmw_resource *res) cmd = vmw_fifo_reserve(dev_priv, submit_len); cmd2 = (typeof(cmd2))cmd; - if (unlikely(cmd == NULL)) { + if (unlikely(!cmd)) { DRM_ERROR("Failed reserving FIFO space for surface " "creation.\n"); ret = -ENOMEM; @@ -1126,7 +1125,7 @@ static int vmw_gb_surface_bind(struct vmw_resource *res, submit_size = sizeof(*cmd1) + (res->backup_dirty ? sizeof(*cmd2) : 0); cmd1 = vmw_fifo_reserve(dev_priv, submit_size); - if (unlikely(cmd1 == NULL)) { + if (unlikely(!cmd1)) { DRM_ERROR("Failed reserving FIFO space for surface " "binding.\n"); return -ENOMEM; @@ -1176,7 +1175,7 @@ static int vmw_gb_surface_unbind(struct vmw_resource *res, submit_size = sizeof(*cmd3) + (readback ? sizeof(*cmd1) : sizeof(*cmd2)); cmd = vmw_fifo_reserve(dev_priv, submit_size); - if (unlikely(cmd == NULL)) { + if (unlikely(!cmd)) { DRM_ERROR("Failed reserving FIFO space for surface " "unbinding.\n"); return -ENOMEM; @@ -1235,7 +1234,7 @@ static int vmw_gb_surface_destroy(struct vmw_resource *res) vmw_binding_res_list_scrub(&res->binding_head); cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd)); - if (unlikely(cmd == NULL)) { + if (unlikely(!cmd)) { DRM_ERROR("Failed reserving FIFO space for surface " "destruction.\n"); mutex_unlock(&dev_priv->binding_mutex); @@ -1401,7 +1400,7 @@ int vmw_gb_surface_reference_ioctl(struct drm_device *dev, void *data, user_srf = container_of(base, struct vmw_user_surface, prime.base); srf = &user_srf->srf; - if (srf->res.backup == NULL) { + if (!srf->res.backup) { DRM_ERROR("Shared GB surface is missing a backup buffer.\n"); goto out_bad_resource; } @@ -1515,7 +1514,7 @@ int vmw_surface_gb_priv_define(struct drm_device *dev, } user_srf = kzalloc(sizeof(*user_srf), GFP_KERNEL); - if (unlikely(user_srf == NULL)) { + if (unlikely(!user_srf)) { ret = -ENOMEM; goto out_no_user_srf; } From 5c33677c87cbe44ae04df69c4a29c1750a9ec4e5 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Tue, 11 Oct 2016 15:16:57 +0100 Subject: [PATCH 042/884] dm raid: fix compat_features validation In ecbfb9f118bce4 ("dm raid: add raid level takeover support") a new compatible feature flag was added. Validation for these compat_features was added but this only passes for new raid mappings with this feature flag. This causes previously created raid mappings to be failed at import. Check compat_features for the only valid combination. Fixes: ecbfb9f118bce4 ("dm raid: add raid level takeover support") Cc: stable@vger.kernel.org # v4.8 Signed-off-by: Andy Whitcroft Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 8abde6b8cedc..2a3970097991 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2258,7 +2258,8 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) if (!mddev->events && super_init_validation(rs, rdev)) return -EINVAL; - if (le32_to_cpu(sb->compat_features) != FEATURE_FLAG_SUPPORTS_V190) { + if (le32_to_cpu(sb->compat_features) && + le32_to_cpu(sb->compat_features) != FEATURE_FLAG_SUPPORTS_V190) { rs->ti->error = "Unable to assemble array: Unknown flag(s) in compatible feature flags"; return -EINVAL; } From 0ba43a81ef7b78ddf404f7709a2257be59436411 Mon Sep 17 00:00:00 2001 From: Xose Vazquez Perez Date: Fri, 7 Oct 2016 18:19:57 +0200 Subject: [PATCH 043/884] scsi: Replace wrong device handler name for CLARiiON arrays At drivers/scsi/device_handler/scsi_dh_emc.c it was defined as: Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Christophe Varoqui Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: SCSI ML Cc: device-mapper development Signed-off-by: Xose Vazquez Perez Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_dh.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_dh.c b/drivers/scsi/scsi_dh.c index 54d446c9f56e..b8d3b97b217a 100644 --- a/drivers/scsi/scsi_dh.c +++ b/drivers/scsi/scsi_dh.c @@ -36,9 +36,9 @@ struct scsi_dh_blist { }; static const struct scsi_dh_blist scsi_dh_blist[] = { - {"DGC", "RAID", "clariion" }, - {"DGC", "DISK", "clariion" }, - {"DGC", "VRAID", "clariion" }, + {"DGC", "RAID", "emc" }, + {"DGC", "DISK", "emc" }, + {"DGC", "VRAID", "emc" }, {"COMPAQ", "MSA1000 VOLUME", "hp_sw" }, {"COMPAQ", "HSV110", "hp_sw" }, From bcd8f2e94808fcddf6ef3af5f060a36820dcc432 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 9 Oct 2016 13:23:27 +0800 Subject: [PATCH 044/884] scsi: Fix use-after-free This patch fixes one use-after-free report[1] by KASAN. In __scsi_scan_target(), when a type 31 device is probed, SCSI_SCAN_TARGET_PRESENT is returned and the target will be scanned again. Inside the following scsi_report_lun_scan(), one new scsi_device instance is allocated, and scsi_probe_and_add_lun() is called again to probe the target and still see type 31 device, finally __scsi_remove_device() is called to remove & free the device at the end of scsi_probe_and_add_lun(), so cause use-after-free in scsi_report_lun_scan(). And the following SCSI log can be observed: scsi 0:0:2:0: scsi scan: INQUIRY pass 1 length 36 scsi 0:0:2:0: scsi scan: INQUIRY successful with code 0x0 scsi 0:0:2:0: scsi scan: peripheral device type of 31, no device added scsi 0:0:2:0: scsi scan: Sending REPORT LUNS to (try 0) scsi 0:0:2:0: scsi scan: REPORT LUNS successful (try 0) result 0x0 scsi 0:0:2:0: scsi scan: REPORT LUN scan scsi 0:0:2:0: scsi scan: INQUIRY pass 1 length 36 scsi 0:0:2:0: scsi scan: INQUIRY successful with code 0x0 scsi 0:0:2:0: scsi scan: peripheral device type of 31, no device added BUG: KASAN: use-after-free in __scsi_scan_target+0xbf8/0xe40 at addr ffff88007b44a104 This patch fixes the issue by moving the putting reference at the end of scsi_report_lun_scan(). [1] KASAN report ================================================================== [ 3.274597] PM: Adding info for serio:serio1 [ 3.275127] BUG: KASAN: use-after-free in __scsi_scan_target+0xd87/0xdf0 at addr ffff880254d8c304 [ 3.275653] Read of size 4 by task kworker/u10:0/27 [ 3.275903] CPU: 3 PID: 27 Comm: kworker/u10:0 Not tainted 4.8.0 #2121 [ 3.276258] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 3.276797] Workqueue: events_unbound async_run_entry_fn [ 3.277083] ffff880254d8c380 ffff880259a37870 ffffffff94bbc6c1 ffff880078402d80 [ 3.277532] ffff880254d8bb80 ffff880259a37898 ffffffff9459fec1 ffff880259a37930 [ 3.277989] ffff880254d8bb80 ffff880078402d80 ffff880259a37920 ffffffff945a0165 [ 3.278436] Call Trace: [ 3.278528] [] dump_stack+0x65/0x84 [ 3.278797] [] kasan_object_err+0x21/0x70 [ 3.279063] device: 'psaux': device_add [ 3.279616] [] kasan_report_error+0x205/0x500 [ 3.279651] PM: Adding info for No Bus:psaux [ 3.280202] [] ? kfree_const+0x22/0x30 [ 3.280486] [] ? kobject_release+0x119/0x370 [ 3.280805] [] __asan_report_load4_noabort+0x43/0x50 [ 3.281170] [] ? __scsi_scan_target+0xd87/0xdf0 [ 3.281506] [] __scsi_scan_target+0xd87/0xdf0 [ 3.281848] [] ? scsi_add_device+0x30/0x30 [ 3.282156] [] ? pm_runtime_autosuspend_expiration+0x60/0x60 [ 3.282570] [] ? _raw_spin_lock+0x17/0x40 [ 3.282880] [] scsi_scan_channel+0x105/0x160 [ 3.283200] [] scsi_scan_host_selected+0x212/0x2f0 [ 3.283563] [] do_scsi_scan_host+0x1bc/0x250 [ 3.283882] [] do_scan_async+0x41/0x450 [ 3.284173] [] async_run_entry_fn+0xfe/0x610 [ 3.284492] [] ? pwq_dec_nr_in_flight+0x124/0x2a0 [ 3.284876] [] ? preempt_count_add+0x130/0x160 [ 3.285207] [] process_one_work+0x544/0x12d0 [ 3.285526] [] worker_thread+0xd9/0x12f0 [ 3.285844] [] ? process_one_work+0x12d0/0x12d0 [ 3.286182] [] kthread+0x1c5/0x260 [ 3.286443] [] ? __switch_to+0x88d/0x1430 [ 3.286745] [] ? kthread_worker_fn+0x5a0/0x5a0 [ 3.287085] [] ret_from_fork+0x1f/0x40 [ 3.287368] [] ? kthread_worker_fn+0x5a0/0x5a0 [ 3.287697] Object at ffff880254d8bb80, in cache kmalloc-2048 size: 2048 [ 3.288064] Allocated: [ 3.288147] PID = 27 [ 3.288218] [] save_stack_trace+0x2b/0x50 [ 3.288531] [] save_stack+0x46/0xd0 [ 3.288806] [] kasan_kmalloc+0xad/0xe0 [ 3.289098] [] __kmalloc+0x13e/0x250 [ 3.289378] [] scsi_alloc_sdev+0xea/0xcf0 [ 3.289701] [] __scsi_scan_target+0xa06/0xdf0 [ 3.290034] [] scsi_scan_channel+0x105/0x160 [ 3.290362] [] scsi_scan_host_selected+0x212/0x2f0 [ 3.290724] [] do_scsi_scan_host+0x1bc/0x250 [ 3.291055] [] do_scan_async+0x41/0x450 [ 3.291354] [] async_run_entry_fn+0xfe/0x610 [ 3.291695] [] process_one_work+0x544/0x12d0 [ 3.292022] [] worker_thread+0xd9/0x12f0 [ 3.292325] [] kthread+0x1c5/0x260 [ 3.292594] [] ret_from_fork+0x1f/0x40 [ 3.292886] Freed: [ 3.292945] PID = 27 [ 3.293016] [] save_stack_trace+0x2b/0x50 [ 3.293327] [] save_stack+0x46/0xd0 [ 3.293600] [] kasan_slab_free+0x71/0xb0 [ 3.293916] [] kfree+0xa2/0x1f0 [ 3.294168] [] scsi_device_dev_release_usercontext+0x50a/0x730 [ 3.294598] [] execute_in_process_context+0xda/0x130 [ 3.294974] [] scsi_device_dev_release+0x1c/0x20 [ 3.295322] [] device_release+0x76/0x1e0 [ 3.295626] [] kobject_release+0x107/0x370 [ 3.295942] [] kobject_put+0x4e/0xa0 [ 3.296222] [] put_device+0x17/0x20 [ 3.296497] [] scsi_device_put+0x7c/0xa0 [ 3.296801] [] __scsi_scan_target+0xd4c/0xdf0 [ 3.297132] [] scsi_scan_channel+0x105/0x160 [ 3.297458] [] scsi_scan_host_selected+0x212/0x2f0 [ 3.297829] [] do_scsi_scan_host+0x1bc/0x250 [ 3.298156] [] do_scan_async+0x41/0x450 [ 3.298453] [] async_run_entry_fn+0xfe/0x610 [ 3.298777] [] process_one_work+0x544/0x12d0 [ 3.299105] [] worker_thread+0xd9/0x12f0 [ 3.299408] [] kthread+0x1c5/0x260 [ 3.299676] [] ret_from_fork+0x1f/0x40 [ 3.299967] Memory state around the buggy address: [ 3.300209] ffff880254d8c200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 3.300608] ffff880254d8c280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 3.300986] >ffff880254d8c300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 3.301408] ^ [ 3.301550] ffff880254d8c380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 3.301987] ffff880254d8c400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 3.302396] ================================================================== Cc: Christoph Hellwig Cc: stable@vger.kernel.org Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 212e98d940bc..bb9b58e21d95 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1470,12 +1470,12 @@ retry: out_err: kfree(lun_data); out: - scsi_device_put(sdev); if (scsi_device_created(sdev)) /* * the sdev we used didn't appear in the report luns scan */ __scsi_remove_device(sdev); + scsi_device_put(sdev); return ret; } From 03eb6b8d314e89e94d4f79ee3d3e6596a75bc857 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 10 Oct 2016 23:25:33 +0800 Subject: [PATCH 045/884] scsi: Remove one useless stack variable The local variable of 'devname' in scsi_report_lun_scan() isn't used any more, so remove it. Cc: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index bb9b58e21d95..6f7128f49c30 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1307,7 +1307,6 @@ static void scsi_sequential_lun_scan(struct scsi_target *starget, static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, enum scsi_scan_mode rescan) { - char devname[64]; unsigned char scsi_cmd[MAX_COMMAND_SIZE]; unsigned int length; u64 lun; @@ -1349,9 +1348,6 @@ static int scsi_report_lun_scan(struct scsi_target *starget, int bflags, } } - sprintf(devname, "host %d channel %d id %d", - shost->host_no, sdev->channel, sdev->id); - /* * Allocate enough to hold the header (the same size as one scsi_lun) * plus the number of luns we are requesting. 511 was the default From ea720935cf6686f72def9d322298bf7e9bd53377 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 10:14:42 +0200 Subject: [PATCH 046/884] mac80211: discard multicast and 4-addr A-MSDUs In mac80211, multicast A-MSDUs are accepted in many cases that they shouldn't be accepted in: * drop A-MSDUs with a multicast A1 (RA), as required by the spec in 9.11 (802.11-2012 version) * drop A-MSDUs with a 4-addr header, since the fourth address can't actually be useful for them; unless 4-address frame format is actually requested, even though the fourth address is still not useful in this case, but ignored Accepting the first case, in particular, is very problematic since it allows anyone else with possession of a GTK to send unicast frames encapsulated in a multicast A-MSDU, even when the AP has client isolation enabled. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6175db385ba7..3ac2f1cba317 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2308,16 +2308,22 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (!(status->rx_flags & IEEE80211_RX_AMSDU)) return RX_CONTINUE; - if (ieee80211_has_a4(hdr->frame_control) && - rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - !rx->sdata->u.vlan.sta) - return RX_DROP_UNUSABLE; + if (unlikely(ieee80211_has_a4(hdr->frame_control))) { + switch (rx->sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + if (!rx->sdata->u.vlan.sta) + return RX_DROP_UNUSABLE; + break; + case NL80211_IFTYPE_STATION: + if (!rx->sdata->u.mgd.use_4addr) + return RX_DROP_UNUSABLE; + break; + default: + return RX_DROP_UNUSABLE; + } + } - if (is_multicast_ether_addr(hdr->addr1) && - ((rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && - rx->sdata->u.vlan.sta) || - (rx->sdata->vif.type == NL80211_IFTYPE_STATION && - rx->sdata->u.mgd.use_4addr))) + if (is_multicast_ether_addr(hdr->addr1)) return RX_DROP_UNUSABLE; skb->dev = dev; From 7f6990c830f3e8d703f13d7963acf51936c52ad2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 15:29:49 +0200 Subject: [PATCH 047/884] cfg80211: let ieee80211_amsdu_to_8023s() take only header-less SKB There's only a single case where has_80211_header is passed as true, which is in mac80211. Given that there's only simple code that needs to be done before calling it, export that function from cfg80211 instead and let mac80211 call it itself. Signed-off-by: Johannes Berg --- .../wireless/marvell/mwifiex/11n_rxreorder.c | 2 +- include/net/cfg80211.h | 31 +++++++++++++------ net/mac80211/rx.c | 8 ++++- net/wireless/util.c | 24 +++----------- 4 files changed, 35 insertions(+), 30 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index 94480123efa3..e9f462e3730b 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -45,7 +45,7 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, skb_trim(skb, le16_to_cpu(local_rx_pd->rx_pkt_length)); ieee80211_amsdu_to_8023s(skb, &list, priv->curr_addr, - priv->wdev.iftype, 0, false); + priv->wdev.iftype, 0); while (!skb_queue_empty(&list)) { struct rx_packet_hdr *rx_hdr; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fe78f02a242e..f372eadd1963 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4039,6 +4039,18 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); * that do not do the 802.11/802.3 conversion on the device. */ +/** + * ieee80211_data_to_8023_exthdr - convert an 802.11 data frame to 802.3 + * @skb: the 802.11 data frame + * @ehdr: pointer to a &struct ethhdr that will get the header, instead + * of it being pushed into the SKB + * @addr: the device MAC address + * @iftype: the virtual interface type + * Return: 0 on success. Non-zero on error. + */ +int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, + const u8 *addr, enum nl80211_iftype iftype); + /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame @@ -4046,8 +4058,11 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); * @iftype: the virtual interface type * Return: 0 on success. Non-zero on error. */ -int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype); +static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, + enum nl80211_iftype iftype) +{ + return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype); +} /** * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11 @@ -4065,22 +4080,20 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, /** * ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame * - * Decode an IEEE 802.11n A-MSDU frame and convert it to a list of - * 802.3 frames. The @list will be empty if the decode fails. The - * @skb is consumed after the function returns. + * Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames. + * The @list will be empty if the decode fails. The @skb must be fully + * header-less before being passed in here; it is freed in this function. * - * @skb: The input IEEE 802.11n A-MSDU frame. + * @skb: The input A-MSDU frame without any headers. * @list: The output list of 802.3 frames. It must be allocated and * initialized by by the caller. * @addr: The device MAC address. * @iftype: The device interface type. * @extra_headroom: The hardware extra headroom for SKBs in the @list. - * @has_80211_header: Set it true if SKB is with IEEE 802.11 header. */ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom, - bool has_80211_header); + const unsigned int extra_headroom); /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 3ac2f1cba317..de2d75fa5c51 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2298,6 +2298,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) __le16 fc = hdr->frame_control; struct sk_buff_head frame_list; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); + struct ethhdr ethhdr; if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -2329,9 +2330,14 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) skb->dev = dev; __skb_queue_head_init(&frame_list); + if (ieee80211_data_to_8023_exthdr(skb, ðhdr, + rx->sdata->vif.addr, + rx->sdata->vif.type)) + return RX_DROP_UNUSABLE; + ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, - rx->local->hw.extra_tx_headroom, true); + rx->local->hw.extra_tx_headroom); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); diff --git a/net/wireless/util.c b/net/wireless/util.c index 8edce22d1b93..e36ede840b88 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -420,8 +420,8 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) } EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); -static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, - const u8 *addr, enum nl80211_iftype iftype) +int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, + const u8 *addr, enum nl80211_iftype iftype) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct { @@ -525,13 +525,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, return 0; } - -int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype) -{ - return __ieee80211_data_to_8023(skb, NULL, addr, iftype); -} -EXPORT_SYMBOL(ieee80211_data_to_8023); +EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype, @@ -745,25 +739,18 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom, - bool has_80211_header) + const unsigned int extra_headroom) { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; u16 ethertype; u8 *payload; - int offset = 0, remaining, err; + int offset = 0, remaining; struct ethhdr eth; bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb); bool reuse_skb = false; bool last = false; - if (has_80211_header) { - err = __ieee80211_data_to_8023(skb, ð, addr, iftype); - if (err) - goto out; - } - while (!last) { unsigned int subframe_len; int len; @@ -819,7 +806,6 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, purge: __skb_queue_purge(list); - out: dev_kfree_skb(skb); } EXPORT_SYMBOL(ieee80211_amsdu_to_8023s); From 8b935ee2ea17db720d70f6420f77f594c0c93f75 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 16:17:01 +0200 Subject: [PATCH 048/884] cfg80211: add ability to check DA/SA in A-MSDU decapsulation We should not accept arbitrary DA/SA inside A-MSDUs, it could be used to circumvent protections, like allowing a station to send frames and make them seem to come from somewhere else. Add the necessary infrastructure in cfg80211 to allow such checks, in further patches we'll start using them. Signed-off-by: Johannes Berg --- .../net/wireless/marvell/mwifiex/11n_rxreorder.c | 2 +- include/net/cfg80211.h | 5 ++++- net/mac80211/rx.c | 3 ++- net/wireless/util.c | 14 ++++++++++++-- 4 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index e9f462e3730b..274dd5a1574a 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -45,7 +45,7 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, skb_trim(skb, le16_to_cpu(local_rx_pd->rx_pkt_length)); ieee80211_amsdu_to_8023s(skb, &list, priv->curr_addr, - priv->wdev.iftype, 0); + priv->wdev.iftype, 0, NULL, NULL); while (!skb_queue_empty(&list)) { struct rx_packet_hdr *rx_hdr; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f372eadd1963..7df600c463eb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4090,10 +4090,13 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, * @addr: The device MAC address. * @iftype: The device interface type. * @extra_headroom: The hardware extra headroom for SKBs in the @list. + * @check_da: DA to check in the inner ethernet header, or NULL + * @check_sa: SA to check in the inner ethernet header, or NULL */ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom); + const unsigned int extra_headroom, + const u8 *check_da, const u8 *check_sa); /** * cfg80211_classify8021d - determine the 802.1p/1d tag for a data frame diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index de2d75fa5c51..cf53fe1a0aa2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2337,7 +2337,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, - rx->local->hw.extra_tx_headroom); + rx->local->hw.extra_tx_headroom, + NULL, NULL); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); diff --git a/net/wireless/util.c b/net/wireless/util.c index e36ede840b88..5ea12afc7706 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -739,7 +739,8 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen, void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, const u8 *addr, enum nl80211_iftype iftype, - const unsigned int extra_headroom) + const unsigned int extra_headroom, + const u8 *check_da, const u8 *check_sa) { unsigned int hlen = ALIGN(extra_headroom, 4); struct sk_buff *frame = NULL; @@ -767,8 +768,17 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, goto purge; offset += sizeof(struct ethhdr); - /* reuse skb for the last subframe */ last = remaining <= subframe_len + padding; + + /* FIXME: should we really accept multicast DA? */ + if ((check_da && !is_multicast_ether_addr(eth.h_dest) && + !ether_addr_equal(check_da, eth.h_dest)) || + (check_sa && !ether_addr_equal(check_sa, eth.h_source))) { + offset += len + padding; + continue; + } + + /* reuse skb for the last subframe */ if (!skb_is_nonlinear(skb) && !reuse_frag && last) { skb_pull(skb, offset); frame = skb; From e2b5227faaf33109d9d00f8a99f7b4f45a9d9c9f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 5 Oct 2016 16:42:06 +0200 Subject: [PATCH 049/884] mac80211: validate DA/SA during A-MSDU decapsulation As pointed out by Michael Braun, we don't check inner L2 addresses during A-MSDU decapsulation, leading to the possibility that, for example, a station associated to an AP sends frames as though they came from somewhere else. Fix this problem by letting cfg80211 validate the addresses, as indicated by passing in the ones that need to be validated. Reported-by: Michael Braun Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index cf53fe1a0aa2..a47bbc973f2d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2299,6 +2299,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) struct sk_buff_head frame_list; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); struct ethhdr ethhdr; + const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source; if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -2322,6 +2323,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) default: return RX_DROP_UNUSABLE; } + check_da = NULL; + check_sa = NULL; + } else switch (rx->sdata->vif.type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + check_da = NULL; + break; + case NL80211_IFTYPE_STATION: + if (!rx->sta || + !test_sta_flag(rx->sta, WLAN_STA_TDLS_PEER)) + check_sa = NULL; + break; + case NL80211_IFTYPE_MESH_POINT: + check_sa = NULL; + break; + default: + break; } if (is_multicast_ether_addr(hdr->addr1)) @@ -2338,7 +2356,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, rx->sdata->vif.type, rx->local->hw.extra_tx_headroom, - NULL, NULL); + check_da, check_sa); while (!skb_queue_empty(&frame_list)) { rx->skb = __skb_dequeue(&frame_list); From 1d4de2e222b41006007d7dbfce2abfe448217e49 Mon Sep 17 00:00:00 2001 From: Michael Braun Date: Mon, 3 Oct 2016 13:14:15 +0200 Subject: [PATCH 050/884] mac80211: fix CMD_FRAME for AP_VLAN When using IEEE 802.11r FT OVER-DS roaming with AP_VLAN, hostapd needs to send out a frame using CMD_FRAME for a station assigned to an AP_VLAN interface. Right now, the userspace needs to give the exact AP_VLAN interface index for CMD_FRAME; hostapd does not do this. Additionally, userspace cannot use GET_STATION to query the AP_VLAN ifidx, as while GET_STATION finds stations assigned to AP_VLAN even if the AP iface is queried, it does not return AP_VLAN ifidx (it returns the queried one). This breaks IEEE 802.11r over_ds with vlans, as the reply frame does not get out. This patch fixes this by using get_sta_bss for CMD_FRAME. Signed-off-by: Michael Braun Signed-off-by: Johannes Berg --- net/mac80211/offchannel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index c3f610bba3fe..eede5c6db8d5 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -820,7 +820,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, mgmt->u.action.category == WLAN_CATEGORY_SPECTRUM_MGMT) break; rcu_read_lock(); - sta = sta_info_get(sdata, mgmt->da); + sta = sta_info_get_bss(sdata, mgmt->da); rcu_read_unlock(); if (!sta) return -ENOLINK; From 40c30bbf3377babc4d6bb16b699184236a8bfa27 Mon Sep 17 00:00:00 2001 From: Brian Masney Date: Tue, 11 Oct 2016 19:28:02 -0400 Subject: [PATCH 051/884] platform/x86: ideapad-laptop: Add Lenovo Yoga 910-13IKB to no_hw_rfkill dmi list The Lenovo Yoga 910-13IKB does not have a hw rfkill switch, and trying to read the hw rfkill switch through the ideapad module causes it to always report as blocked. This commit adds the Lenovo Yoga 910-13IKB to the no_hw_rfkill dmi list, fixing the WiFI breakage. Signed-off-by: Brian Masney Signed-off-by: Darren Hart --- drivers/platform/x86/ideapad-laptop.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index d1a091b93192..a2323941e677 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -933,6 +933,13 @@ static const struct dmi_system_id no_hw_rfkill_list[] = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 900"), }, }, + { + .ident = "Lenovo YOGA 910-13IKB", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo YOGA 910-13IKB"), + }, + }, {} }; From d5e84fd8d0634d056248b67463b42f6c85896a19 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 19 Sep 2016 10:57:40 +0100 Subject: [PATCH 052/884] Btrfs: fix incremental send failure caused by balance Commit 951555856b88 ("Btrfs: send, don't bug on inconsistent snapshots") removed some BUG_ON() statements (replacing them with returning errors to user space and logging error messages) when a snapshot is in an inconsistent state due to failures to update a delayed inode item (ENOMEM or ENOSPC) after adding/updating/deleting references, xattrs or file extent items. However there is a case, when no errors happen, where a file extent item can be modified without having the corresponding inode item updated. This case happens during balance under very specific timings, when relocation is in the stage where it updates data pointers and a leaf that contains file extent items is COWed. When that happens file extent items get their disk_bytenr field updated to a new value that reflects the post relocation logical address of the extent, without updating their respective inode items (as there is nothing that needs to be updated on them). This is performed at relocation.c:replace_file_extents() through relocation.c:btrfs_reloc_cow_block(). So make an incremental send deal with this case and don't do any processing for a file extent item that got its disk_bytenr field updated by relocation, since the extent's data is the same as the one pointed by the file extent item in the parent snapshot. After the recent commit mentioned above this case resulted in EIO errors returned to user space (and an error message logged to dmesg/syslog) when doing an incremental send, while before it, it resulted in hitting a BUG_ON leading to the following trace: [ 952.206705] ------------[ cut here ]------------ [ 952.206714] kernel BUG at ../fs/btrfs/send.c:5653! [ 952.206719] Internal error: Oops - BUG: 0 [#1] SMP [ 952.209854] Modules linked in: st dm_mod nls_utf8 isofs fuse nf_log_ipv6 xt_pkttype xt_physdev br_netfilter nf_log_ipv4 nf_log_common xt_LOG xt_limit ebtable_filter ebtables af_packet bridge stp llc ip6t_REJECT xt_tcpudp nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_raw ipt_REJECT iptable_raw xt_CT iptable_filter ip6table_mangle nf_conntrack_netbios_ns nf_conntrack_broadcast nf_conntrack_ipv4 nf_defrag_ipv4 ip_tables xt_conntrack nf_conntrack ip6table_filter ip6_tables x_tables xfs libcrc32c nls_iso8859_1 nls_cp437 vfat fat joydev aes_ce_blk ablk_helper cryptd snd_intel8x0 aes_ce_cipher snd_ac97_codec ac97_bus snd_pcm ghash_ce sha2_ce sha1_ce snd_timer snd virtio_net soundcore btrfs xor sr_mod cdrom hid_generic usbhid raid6_pq virtio_blk virtio_scsi bochs_drm drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_mmio xhci_pci xhci_hcd usbcore usb_common virtio_pci virtio_ring virtio drm sg efivarfs [ 952.228333] Supported: Yes [ 952.228908] CPU: 0 PID: 12779 Comm: snapperd Not tainted 4.4.14-50-default #1 [ 952.230329] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 952.231683] task: ffff800058e94100 ti: ffff8000d866c000 task.ti: ffff8000d866c000 [ 952.233279] PC is at changed_cb+0x9f4/0xa48 [btrfs] [ 952.234375] LR is at changed_cb+0x58/0xa48 [btrfs] [ 952.236552] pc : [] lr : [] pstate: 80000145 [ 952.238049] sp : ffff8000d866fa20 [ 952.238732] x29: ffff8000d866fa20 x28: 0000000000000019 [ 952.239840] x27: 00000000000028d5 x26: 00000000000024a2 [ 952.241008] x25: 0000000000000002 x24: ffff8000e66e92f0 [ 952.242131] x23: ffff8000b8c76800 x22: ffff800092879140 [ 952.243238] x21: 0000000000000002 x20: ffff8000d866fb78 [ 952.244348] x19: ffff8000b8f8c200 x18: 0000000000002710 [ 952.245607] x17: 0000ffff90d42480 x16: ffff800000237dc0 [ 952.246719] x15: 0000ffff90de7510 x14: ab000c000a2faf08 [ 952.247835] x13: 0000000000577c2b x12: ab000c000b696665 [ 952.248981] x11: 2e65726f632f6966 x10: 652d34366d72612f [ 952.250101] x9 : 32627572672f746f x8 : ab000c00092f1671 [ 952.251352] x7 : 8000000000577c2b x6 : ffff800053eadf45 [ 952.252468] x5 : 0000000000000000 x4 : ffff80005e169494 [ 952.253582] x3 : 0000000000000004 x2 : ffff8000d866fb78 [ 952.254695] x1 : 000000000003e2a3 x0 : 000000000003e2a4 [ 952.255803] [ 952.256150] Process snapperd (pid: 12779, stack limit = 0xffff8000d866c020) [ 952.257516] Stack: (0xffff8000d866fa20 to 0xffff8000d8670000) [ 952.258654] fa20: ffff8000d866fae0 ffff7ffffc308fc0 ffff800092879140 ffff8000e66e92f0 [ 952.260219] fa40: 0000000000000035 ffff800055de6000 ffff8000b8c76800 ffff8000d866fb78 [ 952.261745] fa60: 0000000000000002 00000000000024a2 00000000000028d5 0000000000000019 [ 952.263269] fa80: ffff8000d866fae0 ffff7ffffc3090f0 ffff8000d866fae0 ffff7ffffc309128 [ 952.264797] faa0: ffff800092879140 ffff8000e66e92f0 0000000000000035 ffff800055de6000 [ 952.268261] fac0: ffff8000b8c76800 ffff8000d866fb78 0000000000000002 0000000000001000 [ 952.269822] fae0: ffff8000d866fbc0 ffff7ffffc39ecfc ffff8000b8f8c200 ffff8000b8f8c368 [ 952.271368] fb00: ffff8000b8f8c378 ffff800055de6000 0000000000000001 ffff8000ecb17500 [ 952.272893] fb20: ffff8000b8c76800 ffff800092879140 ffff800062b6d000 ffff80007a9e2470 [ 952.274420] fb40: ffff8000b8f8c208 0000000005784000 ffff8000580a8000 ffff8000b8f8c200 [ 952.276088] fb60: ffff7ffffc39d488 00000002b8f8c368 0000000000000000 000000000003e2a4 [ 952.280275] fb80: 000000000000006c ffff7ffffc39ec00 000000000003e2a4 000000000000006c [ 952.283219] fba0: ffff8000b8f8c300 0000000000000100 0000000000000001 ffff8000ecb17500 [ 952.286166] fbc0: ffff8000d866fcd0 ffff7ffffc3643c0 ffff8000f8842700 0000ffff8ffe9278 [ 952.289136] fbe0: 0000000040489426 ffff800055de6000 0000ffff8ffe9278 0000000040489426 [ 952.292083] fc00: 000000000000011d 000000000000001d ffff80007a9e4598 ffff80007a9e43e8 [ 952.294959] fc20: ffff8000b8c7693f 0000000000003b24 0000000000000019 ffff8000b8f8c218 [ 952.301161] fc40: 00000001d866fc70 ffff8000b8c76800 0000000000000128 ffffffffffffff84 [ 952.305749] fc60: ffff800058e941ff 0000000000003a58 ffff8000d866fcb0 ffff8000000f7390 [ 952.308875] fc80: 000000000000012a 0000000000010290 ffff8000d866fc00 000000000000007b [ 952.311915] fca0: 0000000000010290 ffff800046c1b100 74732d7366727462 000001006d616572 [ 952.314937] fcc0: ffff8000fffc4100 cb88537fdc8ba60e ffff8000d866fe10 ffff8000002499e8 [ 952.318008] fce0: 0000000040489426 ffff8000f8842700 0000ffff8ffe9278 ffff80007a9e4598 [ 952.321321] fd00: 0000ffff8ffe9278 0000000040489426 000000000000011d 000000000000001d [ 952.324280] fd20: ffff80000072c000 ffff8000d866c000 ffff8000d866fda0 ffff8000000e997c [ 952.327156] fd40: ffff8000fffc4180 00000000000031ed ffff8000fffc4180 ffff800046c1b7d4 [ 952.329895] fd60: 0000000000000140 0000ffff907ea170 000000000000011d 00000000000000dc [ 952.334641] fd80: ffff80000072c000 ffff8000d866c000 0000000000000000 0000000000000002 [ 952.338002] fda0: ffff8000d866fdd0 ffff8000000ebacc ffff800046c1b080 ffff800046c1b7d4 [ 952.340724] fdc0: ffff8000d866fdf0 ffff8000000db67c 0000000000000040 ffff800000e69198 [ 952.343415] fde0: 0000ffff8ffea790 00000000000031ed ffff8000d866fe20 ffff800000254000 [ 952.346101] fe00: 000000000000001d 0000000000000004 ffff8000d866fe90 ffff800000249d3c [ 952.348980] fe20: ffff8000f8842700 0000000000000000 ffff8000f8842701 0000000000000008 [ 952.351696] fe40: ffff8000d866fe70 0000000000000008 ffff8000d866fe90 ffff800000249cf8 [ 952.354387] fe60: ffff8000f8842700 0000ffff8ffe9170 ffff8000f8842701 0000000000000008 [ 952.357083] fe80: 0000ffff8ffe9278 ffff80008ff85500 0000ffff8ffe90c0 ffff800000085c84 [ 952.359800] fea0: 0000000000000000 0000ffff8ffe9170 ffffffffffffffff 0000ffff90d473bc [ 952.365351] fec0: 0000000000000000 0000000000000015 0000000000000008 0000000040489426 [ 952.369550] fee0: 0000ffff8ffe9278 0000ffff907ea790 0000ffff907ea170 0000ffff907ea790 [ 952.372416] ff00: 0000ffff907ea170 0000000000000000 000000000000001d 0000000000000004 [ 952.375223] ff20: 0000ffff90a32220 00000000003d0f00 0000ffff907ea0a0 0000ffff8ffe8f30 [ 952.378099] ff40: 0000ffff9100f554 0000ffff91147000 0000ffff91117bc0 0000ffff90d473b0 [ 952.381115] ff60: 0000ffff9100f620 0000ffff880069b0 0000ffff8ffe9170 0000ffff8ffe91a0 [ 952.384003] ff80: 0000ffff8ffe9160 0000ffff8ffe9140 0000ffff88006990 0000ffff8ffe9278 [ 952.386860] ffa0: 0000ffff88008a60 0000ffff8ffe9480 0000ffff88014ca0 0000ffff8ffe90c0 [ 952.389654] ffc0: 0000ffff910be8e8 0000ffff8ffe90c0 0000ffff90d473bc 0000000000000000 [ 952.410986] ffe0: 0000000000000008 000000000000001d 6e2079747265706f 72616d223d656d61 [ 952.415497] Call trace: [ 952.417403] [] changed_cb+0x9f4/0xa48 [btrfs] [ 952.420023] [] btrfs_compare_trees+0x500/0x6b0 [btrfs] [ 952.422759] [] btrfs_ioctl_send+0xb4c/0xe10 [btrfs] [ 952.425601] [] btrfs_ioctl+0x374/0x29a4 [btrfs] [ 952.428031] [] do_vfs_ioctl+0x33c/0x600 [ 952.430360] [] SyS_ioctl+0x90/0xa4 [ 952.432552] [] el0_svc_naked+0x38/0x3c [ 952.434803] Code: 2a1503e0 17fffdac b9404282 17ffff28 (d4210000) [ 952.437457] ---[ end trace 9afd7090c466cf15 ]--- Signed-off-by: Filipe Manana --- fs/btrfs/send.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 978796865bfc..0b4628999b77 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5805,6 +5805,64 @@ static int changed_extent(struct send_ctx *sctx, int ret = 0; if (sctx->cur_ino != sctx->cmp_key->objectid) { + + if (result == BTRFS_COMPARE_TREE_CHANGED) { + struct extent_buffer *leaf_l; + struct extent_buffer *leaf_r; + struct btrfs_file_extent_item *ei_l; + struct btrfs_file_extent_item *ei_r; + + leaf_l = sctx->left_path->nodes[0]; + leaf_r = sctx->right_path->nodes[0]; + ei_l = btrfs_item_ptr(leaf_l, + sctx->left_path->slots[0], + struct btrfs_file_extent_item); + ei_r = btrfs_item_ptr(leaf_r, + sctx->right_path->slots[0], + struct btrfs_file_extent_item); + + /* + * We may have found an extent item that has changed + * only its disk_bytenr field and the corresponding + * inode item was not updated. This case happens due to + * very specific timings during relocation when a leaf + * that contains file extent items is COWed while + * relocation is ongoing and its in the stage where it + * updates data pointers. So when this happens we can + * safely ignore it since we know it's the same extent, + * but just at different logical and physical locations + * (when an extent is fully replaced with a new one, we + * know the generation number must have changed too, + * since snapshot creation implies committing the current + * transaction, and the inode item must have been updated + * as well). + * This replacement of the disk_bytenr happens at + * relocation.c:replace_file_extents() through + * relocation.c:btrfs_reloc_cow_block(). + */ + if (btrfs_file_extent_generation(leaf_l, ei_l) == + btrfs_file_extent_generation(leaf_r, ei_r) && + btrfs_file_extent_ram_bytes(leaf_l, ei_l) == + btrfs_file_extent_ram_bytes(leaf_r, ei_r) && + btrfs_file_extent_compression(leaf_l, ei_l) == + btrfs_file_extent_compression(leaf_r, ei_r) && + btrfs_file_extent_encryption(leaf_l, ei_l) == + btrfs_file_extent_encryption(leaf_r, ei_r) && + btrfs_file_extent_other_encoding(leaf_l, ei_l) == + btrfs_file_extent_other_encoding(leaf_r, ei_r) && + btrfs_file_extent_type(leaf_l, ei_l) == + btrfs_file_extent_type(leaf_r, ei_r) && + btrfs_file_extent_disk_bytenr(leaf_l, ei_l) != + btrfs_file_extent_disk_bytenr(leaf_r, ei_r) && + btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) == + btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) && + btrfs_file_extent_offset(leaf_l, ei_l) == + btrfs_file_extent_offset(leaf_r, ei_r) && + btrfs_file_extent_num_bytes(leaf_l, ei_l) == + btrfs_file_extent_num_bytes(leaf_r, ei_r)) + return 0; + } + inconsistent_snapshot_error(sctx, result, "extent"); return -EIO; } From 1fa9ce8d0e903449842943a77e8ba100169964be Mon Sep 17 00:00:00 2001 From: Tomasz Majchrzak Date: Wed, 12 Oct 2016 12:23:08 +0200 Subject: [PATCH 053/884] badblocks: fix overlapping check for clearing Current bad block clear implementation assumes the range to clear overlaps with at least one bad block already stored. If given range to clear precedes first bad block in a list, the first entry is incorrectly updated. Check not only if stored block end is past clear block end but also if stored block start is before clear block end. Signed-off-by: Tomasz Majchrzak Acked-by: NeilBrown Signed-off-by: Jens Axboe --- block/badblocks.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/badblocks.c b/block/badblocks.c index 7be53cb1cc3c..6610e282a03e 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -354,7 +354,8 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) * current range. Earlier ranges could also overlap, * but only this one can overlap the end of the range. */ - if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) { + if ((BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) && + (BB_OFFSET(p[lo]) < target)) { /* Partial overlap, leave the tail of this range */ int ack = BB_ACK(p[lo]); sector_t a = BB_OFFSET(p[lo]); @@ -377,7 +378,8 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) lo--; } while (lo >= 0 && - BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { + (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) && + (BB_OFFSET(p[lo]) < target)) { /* This range does overlap */ if (BB_OFFSET(p[lo]) < s) { /* Keep the early parts of this range. */ From 0df1e4f5e0e831670f43bd198623b303ba09cbc0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 11 Oct 2016 13:31:58 -0400 Subject: [PATCH 054/884] nvme: Stop probing a removed device There is no reason the nvme controller can ever return all 1's from reading the CSTS register. This patch returns an error if we observe that status. Without this, we may incorrectly proceed with controller initialization and unnecessarilly rely on error handling to clean this. Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 329381a28edf..2a57f5ede386 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1086,6 +1086,8 @@ static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled) int ret; while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) { + if (csts == ~0) + return -ENODEV; if ((csts & NVME_CSTS_RDY) == bit) break; From 7065906096273b39b90a512a7170a6697ed94b23 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 12 Oct 2016 09:22:16 -0600 Subject: [PATCH 055/884] nvme: Delete created IO queues on reset The driver was decrementing the online_queues prior to attempting to delete those IO queues, so the driver ended up not requesting the controller delete any. This patch saves the online_queues prior to suspending them, and adds that parameter for deleting io queues. Fixes: c21377f8 ("nvme: Suspend all queues before deletion") Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 68ef1875e8a8..94da3a47775c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1510,9 +1510,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static void nvme_disable_io_queues(struct nvme_dev *dev) +static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) { - int pass, queues = dev->online_queues - 1; + int pass; unsigned long timeout; u8 opcode = nvme_admin_delete_sq; @@ -1648,7 +1648,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - int i; + int i, queues; u32 csts = -1; del_timer_sync(&dev->watchdog_timer); @@ -1659,6 +1659,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) csts = readl(dev->bar + NVME_REG_CSTS); } + queues = dev->online_queues - 1; for (i = dev->queue_count - 1; i > 0; i--) nvme_suspend_queue(dev->queues[i]); @@ -1670,7 +1671,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (dev->queue_count) nvme_suspend_queue(dev->queues[0]); } else { - nvme_disable_io_queues(dev); + nvme_disable_io_queues(dev, queues); nvme_disable_admin_queue(dev, shutdown); } nvme_pci_disable(dev); From c5f6ce97c12104668784ee17fb927c52a944d3d8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 5 Oct 2016 16:32:45 -0400 Subject: [PATCH 056/884] nvme: don't schedule multiple resets The queue_work only fails if the work is pending, but not yet running. If the work is running, the work item would get requeued, triggering a double reset. If the first reset fails for any reason, the second reset triggers: WARN_ON(dev->ctrl.state == NVME_CTRL_RESETTING) Hitting that schedules controller deletion for a second time, which potentially takes a reference on the device that is being deleted. If the reset occurs at the same time as a hot removal event, this causes a double-free. This patch has the reset helper function check if the work is busy prior to queueing, and changes all places that schedule resets to use this function. Since most users don't want to sync with that work, the "flush_work" is moved to the only caller that wants to sync. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 94da3a47775c..12357d616eeb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -892,7 +892,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); /* * Mark the request as handled, since the inline shutdown @@ -1290,7 +1290,7 @@ static void nvme_watchdog_timer(unsigned long data) /* Skip controllers under certain specific conditions. */ if (nvme_should_reset(dev, csts)) { - if (queue_work(nvme_workq, &dev->reset_work)) + if (!nvme_reset(dev)) dev_warn(dev->dev, "Failed status: 0x%x, reset controller.\n", csts); @@ -1818,11 +1818,10 @@ static int nvme_reset(struct nvme_dev *dev) { if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) return -ENODEV; - + if (work_busy(&dev->reset_work)) + return -ENODEV; if (!queue_work(nvme_workq, &dev->reset_work)) return -EBUSY; - - flush_work(&dev->reset_work); return 0; } @@ -1846,7 +1845,12 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) { - return nvme_reset(to_nvme_dev(ctrl)); + struct nvme_dev *dev = to_nvme_dev(ctrl); + int ret = nvme_reset(dev); + + if (!ret) + flush_work(&dev->reset_work); + return ret; } static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { @@ -1940,7 +1944,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) if (prepare) nvme_dev_disable(dev, false); else - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); } static void nvme_shutdown(struct pci_dev *pdev) @@ -2009,7 +2013,7 @@ static int nvme_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - queue_work(nvme_workq, &ndev->reset_work); + nvme_reset(ndev); return 0; } #endif @@ -2048,7 +2052,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); return PCI_ERS_RESULT_RECOVERED; } From 202021c1a63c6ed69b3260e0fe10530c51f1e53e Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Wed, 5 Oct 2016 20:01:12 -0600 Subject: [PATCH 057/884] nvme : Add sysfs entry for NVMe CMBs when appropriate Add a sysfs attribute that contains salient information about the NVMe Controller Memory Buffer when one is present. For now, just display the information about the CMB available from the control registers. We attach the CMB attribute file to the existing nvme_ctrl sysfs group so it can handle the sysfs teardown. Reviewed-by: Sagi Grimberg Reviewed-by: Jay Freyensee Signed-off-by: Stephen Bates Acked-by Jon Derrick: Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 44 ++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 12357d616eeb..a7c6e9d74943 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -99,6 +99,7 @@ struct nvme_dev { dma_addr_t cmb_dma_addr; u64 cmb_size; u32 cmbsz; + u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; }; @@ -1330,28 +1331,37 @@ static int nvme_create_io_queues(struct nvme_dev *dev) return ret >= 0 ? 0 : ret; } +static ssize_t nvme_cmb_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return snprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n", + ndev->cmbloc, ndev->cmbsz); +} +static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL); + static void __iomem *nvme_map_cmb(struct nvme_dev *dev) { u64 szu, size, offset; - u32 cmbloc; resource_size_t bar_size; struct pci_dev *pdev = to_pci_dev(dev->dev); void __iomem *cmb; dma_addr_t dma_addr; - if (!use_cmb_sqes) - return NULL; - dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!(NVME_CMB_SZ(dev->cmbsz))) return NULL; + dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC); - cmbloc = readl(dev->bar + NVME_REG_CMBLOC); + if (!use_cmb_sqes) + return NULL; szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); size = szu * NVME_CMB_SZ(dev->cmbsz); - offset = szu * NVME_CMB_OFST(cmbloc); - bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc)); + offset = szu * NVME_CMB_OFST(dev->cmbloc); + bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc)); if (offset > bar_size) return NULL; @@ -1364,7 +1374,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) if (size > bar_size - offset) size = bar_size - offset; - dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset; + dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset; cmb = ioremap_wc(dma_addr, size); if (!cmb) return NULL; @@ -1615,9 +1625,25 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev->q_depth); } - if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) + /* + * CMBs can currently only exist on >=1.2 PCIe devices. We only + * populate sysfs if a CMB is implemented. Note that we add the + * CMB attribute to the nvme_ctrl kobj which removes the need to remove + * it on exit. Since nvme_dev_attrs_group has no name we can pass + * NULL as final argument to sysfs_add_file_to_group. + */ + + if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) { dev->cmb = nvme_map_cmb(dev); + if (dev->cmbsz) { + if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL)) + dev_warn(dev->dev, + "failed to add sysfs attribute for CMB\n"); + } + } + pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); return 0; From ea893695ec1131a5fed0523ff8094bc6e8723bbe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 12 Oct 2016 21:31:40 +0200 Subject: [PATCH 058/884] platform/x86: asus-wmi: add SERIO_I8042 dependency A recent bugfix added a call to i8042_install_filter but did not add the dependency, leading to possible link errors: drivers/platform/built-in.o: In function `asus_nb_wmi_quirks': asus-nb-wmi.c:(.text+0x23af): undefined reference to `i8042_install_filter' This adds a dependency on SERIO_I8042||SERIO_I8042=n to indicate that we can build the driver when the i8042 driver is disabled, but it cannot be built-in when that is a loadable module. Fixes: b5643539b825 ("platform/x86: asus-wmi: Filter buggy scan codes on ASUS Q500A") Signed-off-by: Arnd Bergmann Signed-off-by: Darren Hart --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 81b8dcca8891..b8a21d7b25d4 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -576,6 +576,7 @@ config ASUS_WMI config ASUS_NB_WMI tristate "Asus Notebook WMI Driver" depends on ASUS_WMI + depends on SERIO_I8042 || SERIO_I8042 = n ---help--- This is a driver for newer Asus notebooks. It adds extra features like wireless radio and bluetooth control, leds, hotkeys, backlight... From de0dcc40f6e24d6bac6b60e36eac4659bbbd3f00 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 12 Oct 2016 13:38:41 -0700 Subject: [PATCH 059/884] f2fs: fix wrong sum_page pointer in f2fs_gc This patch fixes using a wrong pointer for sum_page in f2fs_gc. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 93985c64d8a8..6f14ee923acd 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -852,16 +852,16 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, for (segno = start_segno; segno < end_segno; segno++) { - if (get_valid_blocks(sbi, segno, 1) == 0 || - unlikely(f2fs_cp_error(sbi))) - goto next; - /* find segment summary of victim */ sum_page = find_get_page(META_MAPPING(sbi), GET_SUM_BLOCK(sbi, segno)); - f2fs_bug_on(sbi, !PageUptodate(sum_page)); f2fs_put_page(sum_page, 0); + if (get_valid_blocks(sbi, segno, 1) == 0 || + !PageUptodate(sum_page) || + unlikely(f2fs_cp_error(sbi))) + goto next; + sum = page_address(sum_page); f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer))); From 651e1c3b1576c5ffda6df01db1ef535eeb8b1a37 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 12 Oct 2016 23:12:53 -0400 Subject: [PATCH 060/884] ext4: super.c: Update logging style using KERN_CONT Recent commit require line continuing printks to use PR_CONT. Update super.c to use KERN_CONT and use vsprintf extension %pV to avoid a printk/vprintk/printk("\n") sequence as well. Signed-off-by: Joe Perches Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/super.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6db81fbcbaa6..20da99da0a34 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -597,14 +597,15 @@ void __ext4_std_error(struct super_block *sb, const char *function, void __ext4_abort(struct super_block *sb, const char *function, unsigned int line, const char *fmt, ...) { + struct va_format vaf; va_list args; save_error_info(sb, function, line); va_start(args, fmt); - printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: ", sb->s_id, - function, line); - vprintk(fmt, args); - printk("\n"); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: %pV\n", + sb->s_id, function, line, &vaf); va_end(args); if ((sb->s_flags & MS_RDONLY) == 0) { @@ -2715,12 +2716,12 @@ static void print_daily_error_info(unsigned long arg) es->s_first_error_func, le32_to_cpu(es->s_first_error_line)); if (es->s_first_error_ino) - printk(": inode %u", + printk(KERN_CONT ": inode %u", le32_to_cpu(es->s_first_error_ino)); if (es->s_first_error_block) - printk(": block %llu", (unsigned long long) + printk(KERN_CONT ": block %llu", (unsigned long long) le64_to_cpu(es->s_first_error_block)); - printk("\n"); + printk(KERN_CONT "\n"); } if (es->s_last_error_time) { printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d", @@ -2729,12 +2730,12 @@ static void print_daily_error_info(unsigned long arg) es->s_last_error_func, le32_to_cpu(es->s_last_error_line)); if (es->s_last_error_ino) - printk(": inode %u", + printk(KERN_CONT ": inode %u", le32_to_cpu(es->s_last_error_ino)); if (es->s_last_error_block) - printk(": block %llu", (unsigned long long) + printk(KERN_CONT ": block %llu", (unsigned long long) le64_to_cpu(es->s_last_error_block)); - printk("\n"); + printk(KERN_CONT "\n"); } mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */ } From 559cce698eaf4ccecb2213b2519ea3a0413e5155 Mon Sep 17 00:00:00 2001 From: Taesoo Kim Date: Wed, 12 Oct 2016 23:19:18 -0400 Subject: [PATCH 061/884] jbd2: fix incorrect unlock on j_list_lock When 'jh->b_transaction == transaction' (asserted by below) J_ASSERT_JH(jh, (jh->b_transaction == transaction || ... 'journal->j_list_lock' will be incorrectly unlocked, since the the lock is aquired only at the end of if / else-if statements (missing the else case). Signed-off-by: Taesoo Kim Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger Fixes: 6e4862a5bb9d12be87e4ea5d9a60836ebed71d28 Cc: stable@vger.kernel.org # 3.14+ --- fs/jbd2/transaction.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 3d8246a9faa4..e1652665bd93 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1149,6 +1149,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); + spin_unlock(&journal->j_list_lock); } else if (jh->b_transaction == journal->j_committing_transaction) { /* first access by this transaction */ jh->b_modified = 0; @@ -1156,8 +1157,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "set next transaction"); spin_lock(&journal->j_list_lock); jh->b_next_transaction = transaction; + spin_unlock(&journal->j_list_lock); } - spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); /* From c4704a4fbe834eee4109ca064131d440941f6235 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 12 Oct 2016 23:24:51 -0400 Subject: [PATCH 062/884] ext4: do not advertise encryption support when disabled The sysfs file /sys/fs/ext4/features/encryption was present on kernels compiled with CONFIG_EXT4_FS_ENCRYPTION=n. This was misleading because such kernels do not actually support ext4 encryption. Therefore, only provide this file on kernels compiled with CONFIG_EXT4_FS_ENCRYPTION=y. Note: since the ext4 feature files are all hardcoded to have a contents of "supported", it really is the presence or absence of the file that is significant, not the contents (and this change reflects that). Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/sysfs.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 73bcfd41f5f2..42145be5c6b4 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -223,14 +223,18 @@ static struct attribute *ext4_attrs[] = { EXT4_ATTR_FEATURE(lazy_itable_init); EXT4_ATTR_FEATURE(batched_discard); EXT4_ATTR_FEATURE(meta_bg_resize); +#ifdef CONFIG_EXT4_FS_ENCRYPTION EXT4_ATTR_FEATURE(encryption); +#endif EXT4_ATTR_FEATURE(metadata_csum_seed); static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(lazy_itable_init), ATTR_LIST(batched_discard), ATTR_LIST(meta_bg_resize), +#ifdef CONFIG_EXT4_FS_ENCRYPTION ATTR_LIST(encryption), +#endif ATTR_LIST(metadata_csum_seed), NULL, }; From fb4454376df9d820d95452d71dd83da6971f9338 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 12 Oct 2016 23:30:16 -0400 Subject: [PATCH 063/884] fscrypto: make XTS tweak initialization endian-independent The XTS tweak (or IV) was initialized differently on little endian and big endian systems. Because the ciphertext depends on the XTS tweak, it was not possible to use an encrypted filesystem created by a little endian system on a big endian system and vice versa, even if they shared the same PAGE_SIZE. Fix this by always using little endian. This will break hypothetical big endian users of ext4 or f2fs encryption. However, all users we are aware of are little endian, and it's believed that "real" big endian users are unlikely to exist yet. So this might as well be fixed now before it's too late. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/crypto/crypto.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 61057b7dbddb..98f87fe8f186 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -151,7 +151,10 @@ static int do_page_crypto(struct inode *inode, struct page *src_page, struct page *dest_page, gfp_t gfp_flags) { - u8 xts_tweak[FS_XTS_TWEAK_SIZE]; + struct { + __le64 index; + u8 padding[FS_XTS_TWEAK_SIZE - sizeof(__le64)]; + } xts_tweak; struct skcipher_request *req = NULL; DECLARE_FS_COMPLETION_RESULT(ecr); struct scatterlist dst, src; @@ -171,17 +174,15 @@ static int do_page_crypto(struct inode *inode, req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, page_crypt_complete, &ecr); - BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index)); - memcpy(xts_tweak, &index, sizeof(index)); - memset(&xts_tweak[sizeof(index)], 0, - FS_XTS_TWEAK_SIZE - sizeof(index)); + BUILD_BUG_ON(sizeof(xts_tweak) != FS_XTS_TWEAK_SIZE); + xts_tweak.index = cpu_to_le64(index); + memset(xts_tweak.padding, 0, sizeof(xts_tweak.padding)); sg_init_table(&dst, 1); sg_set_page(&dst, dest_page, PAGE_SIZE, 0); sg_init_table(&src, 1); sg_set_page(&src, src_page, PAGE_SIZE, 0); - skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, - xts_tweak); + skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE, &xts_tweak); if (rw == FS_DECRYPT) res = crypto_skcipher_decrypt(req); else From 77da3da0b22a67508eb1cf2b241a1fe852a6cb1a Mon Sep 17 00:00:00 2001 From: Aaron Brice Date: Mon, 10 Oct 2016 11:39:52 -0700 Subject: [PATCH 064/884] mmc: sdhci-esdhc-imx: Correct two register accesses - The DMA error interrupt bit is in a different position as compared to the sdhci standard. This is accounted for in many cases, but not handled in the case of clearing the INT_STATUS register by writing a 1 to that location. - The HOST_CONTROL register is very different as compared to the sdhci standard. This is accounted for in the write case, but not when read back out (which it is in the sdhci code). Signed-off-by: Dave Russell Signed-off-by: Aaron Brice Acked-by: Dong Aisheng Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-esdhc-imx.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 1f54fd8755c8..7123ef96ed18 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -346,7 +346,8 @@ static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg) struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); u32 data; - if (unlikely(reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE)) { + if (unlikely(reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE || + reg == SDHCI_INT_STATUS)) { if ((val & SDHCI_INT_CARD_INT) && !esdhc_is_usdhc(imx_data)) { /* * Clear and then set D3CD bit to avoid missing the @@ -555,6 +556,25 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg) esdhc_clrset_le(host, 0xffff, val, reg); } +static u8 esdhc_readb_le(struct sdhci_host *host, int reg) +{ + u8 ret; + u32 val; + + switch (reg) { + case SDHCI_HOST_CONTROL: + val = readl(host->ioaddr + reg); + + ret = val & SDHCI_CTRL_LED; + ret |= (val >> 5) & SDHCI_CTRL_DMA_MASK; + ret |= (val & ESDHC_CTRL_4BITBUS); + ret |= (val & ESDHC_CTRL_8BITBUS) << 3; + return ret; + } + + return readb(host->ioaddr + reg); +} + static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -947,6 +967,7 @@ static void esdhc_set_timeout(struct sdhci_host *host, struct mmc_command *cmd) static struct sdhci_ops sdhci_esdhc_ops = { .read_l = esdhc_readl_le, .read_w = esdhc_readw_le, + .read_b = esdhc_readb_le, .write_l = esdhc_writel_le, .write_w = esdhc_writew_le, .write_b = esdhc_writeb_le, From 5e2bd93b8fcac8c0cf83f189d996831fb21f2db3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20de=20Bretagne?= Date: Sun, 9 Oct 2016 15:51:05 +0200 Subject: [PATCH 065/884] Bluetooth: hci_bcm: Fix autosuspend PM for Lenovo ThinkPad 8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACPI table for BCM2E55 of Lenovo ThinkPad 8 is not correct. Set correctly IRQ polarity for this device, fixing the issue of bluetooth never resuming after autosuspend PM. Signed-off-by: Jérôme de Bretagne Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_bcm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 5ccb90ef0146..8f6c23c20c52 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -643,6 +643,14 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = { }, .driver_data = &acpi_active_low, }, + { /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */ + .ident = "Lenovo ThinkPad 8", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"), + }, + .driver_data = &acpi_active_low, + }, { } }; From 233c9edcca0136f2b9b304fd4f32e9bb6ce88ea9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:09:44 +0300 Subject: [PATCH 066/884] afs: unmapping the wrong buffer We switched from kmap_atomic() to kmap() so the kunmap() calls need to be updated to match. Fixes: d001648ec7cf ('rxrpc: Don't expose skbs to in-kernel users [ver #2]') Signed-off-by: Dan Carpenter Signed-off-by: David Howells --- fs/afs/fsclient.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 96f4d764d1a6..31c616ab9b40 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -364,7 +364,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) buffer = kmap(page); ret = afs_extract_data(call, buffer, call->count, true); - kunmap(buffer); + kunmap(page); if (ret < 0) return ret; } @@ -397,7 +397,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) page = call->reply3; buffer = kmap(page); memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap(buffer); + kunmap(page); } _leave(" = 0 [done]"); From 54fde4234579d3b1311b3ed1a1e95526a7cfdcd7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Oct 2016 08:39:52 +0100 Subject: [PATCH 067/884] rxrpc: Fix checker warning by not passing always-zero value to ERR_PTR() Fix the following checker warning: net/rxrpc/call_object.c:279 rxrpc_new_client_call() warn: passing zero to 'ERR_PTR' where a value that's always zero is passed to ERR_PTR() so that it can be passed to a tracepoint in an auxiliary pointer field. Just pass NULL instead to the tracepoint. Fixes: a84a46d73050 ("rxrpc: Add some additional call tracing") Reported-by: Dan Carpenter Signed-off-by: David Howells --- net/rxrpc/call_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 4353a29f3b57..1ed18d8c9c9f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -276,7 +276,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, goto error; trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), - here, ERR_PTR(ret)); + here, NULL); spin_lock_bh(&call->conn->params.peer->lock); hlist_add_head(&call->error_link, From 07096f612fdf2bb5578cd1fecb2884bdbb1cde42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Oct 2016 08:43:17 +0100 Subject: [PATCH 068/884] rxrpc: Fix checking of error from ip6_route_output() ip6_route_output() doesn't return a negative error when it fails, rather the ->error field of the returned dst_entry struct needs to be checked. Reported-by: Dan Carpenter Fixes: 75b54cb57ca3 ("rxrpc: Add IPv6 support") Signed-off-by: David Howells --- net/rxrpc/peer_object.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 941b724d523b..862eea6b266c 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -193,8 +193,8 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) fl6->fl6_dport = htons(7001); fl6->fl6_sport = htons(7000); dst = ip6_route_output(&init_net, NULL, fl6); - if (IS_ERR(dst)) { - _leave(" [route err %ld]", PTR_ERR(dst)); + if (dst->error) { + _leave(" [route err %d]", dst->error); return; } break; From cf4747d7535a936105f0abe8d8109d3fe339162b Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 12 Oct 2016 13:54:04 -0500 Subject: [PATCH 069/884] rtlwifi: Fix regression caused by commit d86e64768859 In commit d86e64768859 ("rtlwifi: rtl818x: constify local structures"), the configuration struct for most of the drivers was changed to be constant. The problem is that five of the modified drivers need to be able to update the firmware name based on the exact model of the card. As the file names were stored in one of the members of that struct, these drivers would fail with a kernel BUG splat when they tried to update the firmware name. Rather than reverting the previous commit, I used a suggestion by Johannes Berg and made the firmware file name pointers be local to the routines that update the software variables. The configuration struct of rtl8192cu, which was not touched in the previous patch, is now constantfied. Fixes: d86e64768859 ("rtlwifi: rtl818x: constify local structures") Suggested-by: Johannes Berg Signed-off-by: Larry Finger Cc: Stable # 4.8 Cc: Julia Lawall Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/core.c | 2 +- .../wireless/realtek/rtlwifi/rtl8188ee/sw.c | 8 ++++---- .../wireless/realtek/rtlwifi/rtl8192ce/sw.c | 13 +++++-------- .../wireless/realtek/rtlwifi/rtl8192cu/sw.c | 12 ++++++------ .../wireless/realtek/rtlwifi/rtl8192de/sw.c | 6 +++--- .../wireless/realtek/rtlwifi/rtl8192ee/sw.c | 8 ++++---- .../wireless/realtek/rtlwifi/rtl8192se/sw.c | 9 +++++---- .../wireless/realtek/rtlwifi/rtl8723ae/sw.c | 12 +++++------- .../wireless/realtek/rtlwifi/rtl8723be/sw.c | 6 +++--- .../wireless/realtek/rtlwifi/rtl8821ae/sw.c | 18 +++++++++--------- drivers/net/wireless/realtek/rtlwifi/wifi.h | 2 -- 11 files changed, 45 insertions(+), 51 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index f95760c13c56..8e7f23c11680 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -111,7 +111,7 @@ static void rtl_fw_do_work(const struct firmware *firmware, void *context, if (!err) goto found_alt; } - pr_err("Firmware %s not available\n", rtlpriv->cfg->fw_name); + pr_err("Selected firmware is not available\n"); rtlpriv->max_fw_size = 0; return; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index e7b11b40e68d..f361808def47 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -86,6 +86,7 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); u8 tid; + char *fw_name; rtl8188ee_bt_reg_init(hw); rtlpriv->dm.dm_initialgain_enable = 1; @@ -169,10 +170,10 @@ int rtl88e_init_sw_vars(struct ieee80211_hw *hw) return 1; } - rtlpriv->cfg->fw_name = "rtlwifi/rtl8188efw.bin"; + fw_name = "rtlwifi/rtl8188efw.bin"; rtlpriv->max_fw_size = 0x8000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -284,7 +285,6 @@ static const struct rtl_hal_cfg rtl88ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl88e_pci", - .fw_name = "rtlwifi/rtl8188efw.bin", .ops = &rtl8188ee_hal_ops, .mod_params = &rtl88ee_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c index 87aa209ae325..8b6e37ce3f66 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c @@ -96,6 +96,7 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); + char *fw_name = "rtlwifi/rtl8192cfwU.bin"; rtl8192ce_bt_reg_init(hw); @@ -167,15 +168,12 @@ int rtl92c_init_sw_vars(struct ieee80211_hw *hw) } /* request fw */ - if (IS_VENDOR_UMC_A_CUT(rtlhal->version) && - !IS_92C_SERIAL(rtlhal->version)) - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU.bin"; - else if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version)) - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cfwU_B.bin"; + if (IS_81XXC_VENDOR_UMC_B_CUT(rtlhal->version)) + fw_name = "rtlwifi/rtl8192cfwU_B.bin"; rtlpriv->max_fw_size = 0x4000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -262,7 +260,6 @@ static const struct rtl_hal_cfg rtl92ce_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92c_pci", - .fw_name = "rtlwifi/rtl8192cfw.bin", .ops = &rtl8192ce_hal_ops, .mod_params = &rtl92ce_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c index 7c6f7f0d18c6..f953320f0e23 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/sw.c @@ -59,6 +59,7 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) { struct rtl_priv *rtlpriv = rtl_priv(hw); int err; + char *fw_name; rtlpriv->dm.dm_initialgain_enable = true; rtlpriv->dm.dm_flag = 0; @@ -77,18 +78,18 @@ static int rtl92cu_init_sw_vars(struct ieee80211_hw *hw) } if (IS_VENDOR_UMC_A_CUT(rtlpriv->rtlhal.version) && !IS_92C_SERIAL(rtlpriv->rtlhal.version)) { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_A.bin"; + fw_name = "rtlwifi/rtl8192cufw_A.bin"; } else if (IS_81XXC_VENDOR_UMC_B_CUT(rtlpriv->rtlhal.version)) { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_B.bin"; + fw_name = "rtlwifi/rtl8192cufw_B.bin"; } else { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192cufw_TMSC.bin"; + fw_name = "rtlwifi/rtl8192cufw_TMSC.bin"; } /* provide name of alternative file */ rtlpriv->cfg->alt_fw_name = "rtlwifi/rtl8192cufw.bin"; - pr_info("Loading firmware %s\n", rtlpriv->cfg->fw_name); + pr_info("Loading firmware %s\n", fw_name); rtlpriv->max_fw_size = 0x4000; err = request_firmware_nowait(THIS_MODULE, 1, - rtlpriv->cfg->fw_name, rtlpriv->io.dev, + fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); return err; } @@ -187,7 +188,6 @@ static struct rtl_hal_usbint_cfg rtl92cu_interface_cfg = { static struct rtl_hal_cfg rtl92cu_hal_cfg = { .name = "rtl92c_usb", - .fw_name = "rtlwifi/rtl8192cufw.bin", .ops = &rtl8192cu_hal_ops, .mod_params = &rtl92cu_mod_params, .usb_interface_cfg = &rtl92cu_interface_cfg, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index 0538a4d09568..1ebfee18882f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -92,6 +92,7 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw) u8 tid; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + char *fw_name = "rtlwifi/rtl8192defw.bin"; rtlpriv->dm.dm_initialgain_enable = true; rtlpriv->dm.dm_flag = 0; @@ -181,10 +182,10 @@ static int rtl92d_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->max_fw_size = 0x8000; pr_info("Driver for Realtek RTL8192DE WLAN interface\n"); - pr_info("Loading firmware file %s\n", rtlpriv->cfg->fw_name); + pr_info("Loading firmware file %s\n", fw_name); /* request fw */ - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -266,7 +267,6 @@ static const struct rtl_hal_cfg rtl92de_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8192de", - .fw_name = "rtlwifi/rtl8192defw.bin", .ops = &rtl8192de_hal_ops, .mod_params = &rtl92de_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c index ac299cbe59b0..46b605de36e7 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c @@ -91,6 +91,7 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); int err = 0; + char *fw_name; rtl92ee_bt_reg_init(hw); rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support; @@ -170,11 +171,11 @@ int rtl92ee_init_sw_vars(struct ieee80211_hw *hw) } /* request fw */ - rtlpriv->cfg->fw_name = "rtlwifi/rtl8192eefw.bin"; + fw_name = "rtlwifi/rtl8192eefw.bin"; rtlpriv->max_fw_size = 0x8000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -266,7 +267,6 @@ static const struct rtl_hal_cfg rtl92ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92ee_pci", - .fw_name = "rtlwifi/rtl8192eefw.bin", .ops = &rtl8192ee_hal_ops, .mod_params = &rtl92ee_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index 5e8e02d5de8a..3e1eaeac4fdc 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -89,12 +89,13 @@ static void rtl92se_fw_cb(const struct firmware *firmware, void *context) struct ieee80211_hw *hw = context; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rt_firmware *pfirmware = NULL; + char *fw_name = "rtlwifi/rtl8192sefw.bin"; RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, "Firmware callback routine entered!\n"); complete(&rtlpriv->firmware_loading_complete); if (!firmware) { - pr_err("Firmware %s not available\n", rtlpriv->cfg->fw_name); + pr_err("Firmware %s not available\n", fw_name); rtlpriv->max_fw_size = 0; return; } @@ -117,6 +118,7 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); int err = 0; u16 earlyrxthreshold = 7; + char *fw_name = "rtlwifi/rtl8192sefw.bin"; rtlpriv->dm.dm_initialgain_enable = true; rtlpriv->dm.dm_flag = 0; @@ -214,9 +216,9 @@ static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) rtlpriv->max_fw_size = RTL8190_MAX_FIRMWARE_CODE_SIZE*2 + sizeof(struct fw_hdr); pr_info("Driver for Realtek RTL8192SE/RTL8191SE\n" - "Loading firmware %s\n", rtlpriv->cfg->fw_name); + "Loading firmware %s\n", fw_name); /* request fw */ - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl92se_fw_cb); if (err) { @@ -310,7 +312,6 @@ static const struct rtl_hal_cfg rtl92se_hal_cfg = { .bar_id = 1, .write_readback = false, .name = "rtl92s_pci", - .fw_name = "rtlwifi/rtl8192sefw.bin", .ops = &rtl8192se_hal_ops, .mod_params = &rtl92se_mod_params, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c index 89c828ad89f4..c51a9e8234e9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c @@ -94,6 +94,7 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw) struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); int err = 0; + char *fw_name = "rtlwifi/rtl8723fw.bin"; rtl8723e_bt_reg_init(hw); @@ -176,14 +177,12 @@ int rtl8723e_init_sw_vars(struct ieee80211_hw *hw) return 1; } - if (IS_VENDOR_8723_A_CUT(rtlhal->version)) - rtlpriv->cfg->fw_name = "rtlwifi/rtl8723fw.bin"; - else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) - rtlpriv->cfg->fw_name = "rtlwifi/rtl8723fw_B.bin"; + if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) + fw_name = "rtlwifi/rtl8723fw_B.bin"; rtlpriv->max_fw_size = 0x6000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -280,7 +279,6 @@ static const struct rtl_hal_cfg rtl8723e_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723e_pci", - .fw_name = "rtlwifi/rtl8723efw.bin", .ops = &rtl8723e_hal_ops, .mod_params = &rtl8723e_mod_params, .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c index 20b53f035483..847644d1f5f5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c @@ -91,6 +91,7 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); + char *fw_name = "rtlwifi/rtl8723befw.bin"; rtl8723be_bt_reg_init(hw); rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer(); @@ -184,8 +185,8 @@ int rtl8723be_init_sw_vars(struct ieee80211_hw *hw) } rtlpriv->max_fw_size = 0x8000; - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -280,7 +281,6 @@ static const struct rtl_hal_cfg rtl8723be_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723be_pci", - .fw_name = "rtlwifi/rtl8723befw.bin", .ops = &rtl8723be_hal_ops, .mod_params = &rtl8723be_mod_params, .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c index 22f687b1f133..297938e0effd 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c @@ -93,6 +93,7 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); struct rtl_hal *rtlhal = rtl_hal(rtl_priv(hw)); + char *fw_name, *wowlan_fw_name; rtl8821ae_bt_reg_init(hw); rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer(); @@ -203,17 +204,17 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) } if (rtlhal->hw_type == HARDWARE_TYPE_RTL8812AE) { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8812aefw.bin"; - rtlpriv->cfg->wowlan_fw_name = "rtlwifi/rtl8812aefw_wowlan.bin"; + fw_name = "rtlwifi/rtl8812aefw.bin"; + wowlan_fw_name = "rtlwifi/rtl8812aefw_wowlan.bin"; } else { - rtlpriv->cfg->fw_name = "rtlwifi/rtl8821aefw.bin"; - rtlpriv->cfg->wowlan_fw_name = "rtlwifi/rtl8821aefw_wowlan.bin"; + fw_name = "rtlwifi/rtl8821aefw.bin"; + wowlan_fw_name = "rtlwifi/rtl8821aefw_wowlan.bin"; } rtlpriv->max_fw_size = 0x8000; /*load normal firmware*/ - pr_info("Using firmware %s\n", rtlpriv->cfg->fw_name); - err = request_firmware_nowait(THIS_MODULE, 1, rtlpriv->cfg->fw_name, + pr_info("Using firmware %s\n", fw_name); + err = request_firmware_nowait(THIS_MODULE, 1, fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_fw_cb); if (err) { @@ -222,9 +223,9 @@ int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw) return 1; } /*load wowlan firmware*/ - pr_info("Using firmware %s\n", rtlpriv->cfg->wowlan_fw_name); + pr_info("Using firmware %s\n", wowlan_fw_name); err = request_firmware_nowait(THIS_MODULE, 1, - rtlpriv->cfg->wowlan_fw_name, + wowlan_fw_name, rtlpriv->io.dev, GFP_KERNEL, hw, rtl_wowlan_fw_cb); if (err) { @@ -320,7 +321,6 @@ static const struct rtl_hal_cfg rtl8821ae_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8821ae_pci", - .fw_name = "rtlwifi/rtl8821aefw.bin", .ops = &rtl8821ae_hal_ops, .mod_params = &rtl8821ae_mod_params, .maps[SYS_ISO_CTRL] = REG_SYS_ISO_CTRL, diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index 595f7d5d091a..dafe486f8448 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2278,9 +2278,7 @@ struct rtl_hal_cfg { u8 bar_id; bool write_readback; char *name; - char *fw_name; char *alt_fw_name; - char *wowlan_fw_name; struct rtl_hal_ops *ops; struct rtl_mod_params *mod_params; struct rtl_hal_usbint_cfg *usb_interface_cfg; From f67b107d4ceddcf7aa65b706aaaf50d68edb52a6 Mon Sep 17 00:00:00 2001 From: Marty Faltesek Date: Mon, 10 Oct 2016 19:00:04 +0300 Subject: [PATCH 070/884] ath10k: cache calibration data when the core is stopped Commit 0b8e3c4ca29f ("ath10k: move cal data len to hw_params") broke retrieving the calibration data from cal_data debugfs file. The length of file was always zero. The reason is: static ssize_t ath10k_debug_cal_data_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct ath10k *ar = file->private_data; void *buf = file->private_data; This is obviously bogus, private_data cannot contain both struct ath10k and the buffer. Fix it by caching calibration data to ar->debug.cal_data. This also allows it to be accessed when the device is not active (interface is down). The cal_data buffer is fixed size because during the first firmware probe we don't yet know what will be the lenght of the calibration data. It was simplest just to use a fixed length. There's a WARN_ON() in ath10k_debug_cal_data_fetch() if the buffer is too small. Tested with qca988x and firmware 10.2.4.70.56. Reported-by: Nikolay Martynov Fixes: 0b8e3c4ca29f ("ath10k: move cal data len to hw_params") Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Marty Faltesek [kvalo@qca.qualcomm.com: improve commit log and minor other changes] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.h | 1 + drivers/net/wireless/ath/ath10k/debug.c | 79 +++++++++++++------------ 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index dda49af1eb74..521f1c55c19e 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -450,6 +450,7 @@ struct ath10k_debug { u32 pktlog_filter; u32 reg_addr; u32 nf_cal_period; + void *cal_data; struct ath10k_fw_crash_data *fw_crash_data; }; diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 832da6ed9f13..82a4c67f3672 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -30,6 +30,8 @@ /* ms */ #define ATH10K_DEBUG_HTT_STATS_INTERVAL 1000 +#define ATH10K_DEBUG_CAL_DATA_LEN 12064 + #define ATH10K_FW_CRASH_DUMP_VERSION 1 /** @@ -1451,56 +1453,51 @@ static const struct file_operations fops_fw_dbglog = { .llseek = default_llseek, }; -static int ath10k_debug_cal_data_open(struct inode *inode, struct file *file) +static int ath10k_debug_cal_data_fetch(struct ath10k *ar) { - struct ath10k *ar = inode->i_private; - void *buf; u32 hi_addr; __le32 addr; int ret; - mutex_lock(&ar->conf_mutex); + lockdep_assert_held(&ar->conf_mutex); - if (ar->state != ATH10K_STATE_ON && - ar->state != ATH10K_STATE_UTF) { - ret = -ENETDOWN; - goto err; - } - - buf = vmalloc(ar->hw_params.cal_data_len); - if (!buf) { - ret = -ENOMEM; - goto err; - } + if (WARN_ON(ar->hw_params.cal_data_len > ATH10K_DEBUG_CAL_DATA_LEN)) + return -EINVAL; hi_addr = host_interest_item_address(HI_ITEM(hi_board_data)); ret = ath10k_hif_diag_read(ar, hi_addr, &addr, sizeof(addr)); if (ret) { - ath10k_warn(ar, "failed to read hi_board_data address: %d\n", ret); - goto err_vfree; + ath10k_warn(ar, "failed to read hi_board_data address: %d\n", + ret); + return ret; } - ret = ath10k_hif_diag_read(ar, le32_to_cpu(addr), buf, + ret = ath10k_hif_diag_read(ar, le32_to_cpu(addr), ar->debug.cal_data, ar->hw_params.cal_data_len); if (ret) { ath10k_warn(ar, "failed to read calibration data: %d\n", ret); - goto err_vfree; + return ret; } - file->private_data = buf; + return 0; +} +static int ath10k_debug_cal_data_open(struct inode *inode, struct file *file) +{ + struct ath10k *ar = inode->i_private; + + mutex_lock(&ar->conf_mutex); + + if (ar->state == ATH10K_STATE_ON || + ar->state == ATH10K_STATE_UTF) { + ath10k_debug_cal_data_fetch(ar); + } + + file->private_data = ar; mutex_unlock(&ar->conf_mutex); return 0; - -err_vfree: - vfree(buf); - -err: - mutex_unlock(&ar->conf_mutex); - - return ret; } static ssize_t ath10k_debug_cal_data_read(struct file *file, @@ -1508,18 +1505,16 @@ static ssize_t ath10k_debug_cal_data_read(struct file *file, size_t count, loff_t *ppos) { struct ath10k *ar = file->private_data; - void *buf = file->private_data; - return simple_read_from_buffer(user_buf, count, ppos, - buf, ar->hw_params.cal_data_len); -} + mutex_lock(&ar->conf_mutex); -static int ath10k_debug_cal_data_release(struct inode *inode, - struct file *file) -{ - vfree(file->private_data); + count = simple_read_from_buffer(user_buf, count, ppos, + ar->debug.cal_data, + ar->hw_params.cal_data_len); - return 0; + mutex_unlock(&ar->conf_mutex); + + return count; } static ssize_t ath10k_write_ani_enable(struct file *file, @@ -1580,7 +1575,6 @@ static const struct file_operations fops_ani_enable = { static const struct file_operations fops_cal_data = { .open = ath10k_debug_cal_data_open, .read = ath10k_debug_cal_data_read, - .release = ath10k_debug_cal_data_release, .owner = THIS_MODULE, .llseek = default_llseek, }; @@ -1932,6 +1926,8 @@ void ath10k_debug_stop(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); + ath10k_debug_cal_data_fetch(ar); + /* Must not use _sync to avoid deadlock, we do that in * ath10k_debug_destroy(). The check for htt_stats_mask is to avoid * warning from del_timer(). */ @@ -2344,6 +2340,10 @@ int ath10k_debug_create(struct ath10k *ar) if (!ar->debug.fw_crash_data) return -ENOMEM; + ar->debug.cal_data = vzalloc(ATH10K_DEBUG_CAL_DATA_LEN); + if (!ar->debug.cal_data) + return -ENOMEM; + INIT_LIST_HEAD(&ar->debug.fw_stats.pdevs); INIT_LIST_HEAD(&ar->debug.fw_stats.vdevs); INIT_LIST_HEAD(&ar->debug.fw_stats.peers); @@ -2357,6 +2357,9 @@ void ath10k_debug_destroy(struct ath10k *ar) vfree(ar->debug.fw_crash_data); ar->debug.fw_crash_data = NULL; + vfree(ar->debug.cal_data); + ar->debug.cal_data = NULL; + ath10k_debug_fw_stats_reset(ar); kfree(ar->debug.tpc_stats); From 304e5ac118cc351eb047b6c433a89e13ea7259cf Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 11 Oct 2016 19:46:49 +0200 Subject: [PATCH 071/884] Revert "ath9k_hw: implement temperature compensation support for AR9003+" This reverts commit 171f6402e4aa ("ath9k_hw: implement temperature compensation support for AR9003+"). Some users report that this commit causes a regression in performance under some conditions. Fixes: 171f6402e4aa ("ath9k_hw: implement temperature compensation support for AR9003+") Cc: # 4.8 Signed-off-by: Felix Fietkau [kvalo@qca.qualcomm.com: improve commit log] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar9003_calib.c | 25 +++---------------- drivers/net/wireless/ath/ath9k/hw.h | 1 - 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_calib.c b/drivers/net/wireless/ath/ath9k/ar9003_calib.c index b6f064a8d264..7e27a06e5df1 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_calib.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_calib.c @@ -33,7 +33,6 @@ struct coeff { enum ar9003_cal_types { IQ_MISMATCH_CAL = BIT(0), - TEMP_COMP_CAL = BIT(1), }; static void ar9003_hw_setup_calibration(struct ath_hw *ah, @@ -59,12 +58,6 @@ static void ar9003_hw_setup_calibration(struct ath_hw *ah, /* Kick-off cal */ REG_SET_BIT(ah, AR_PHY_TIMING4, AR_PHY_TIMING4_DO_CAL); break; - case TEMP_COMP_CAL: - ath_dbg(common, CALIBRATE, - "starting Temperature Compensation Calibration\n"); - REG_SET_BIT(ah, AR_CH0_THERM, AR_CH0_THERM_LOCAL); - REG_SET_BIT(ah, AR_CH0_THERM, AR_CH0_THERM_START); - break; default: ath_err(common, "Invalid calibration type\n"); break; @@ -93,8 +86,7 @@ static bool ar9003_hw_per_calibration(struct ath_hw *ah, /* * Accumulate cal measures for active chains */ - if (cur_caldata->calCollect) - cur_caldata->calCollect(ah); + cur_caldata->calCollect(ah); ah->cal_samples++; if (ah->cal_samples >= cur_caldata->calNumSamples) { @@ -107,8 +99,7 @@ static bool ar9003_hw_per_calibration(struct ath_hw *ah, /* * Process accumulated data */ - if (cur_caldata->calPostProc) - cur_caldata->calPostProc(ah, numChains); + cur_caldata->calPostProc(ah, numChains); /* Calibration has finished. */ caldata->CalValid |= cur_caldata->calType; @@ -323,16 +314,9 @@ static const struct ath9k_percal_data iq_cal_single_sample = { ar9003_hw_iqcalibrate }; -static const struct ath9k_percal_data temp_cal_single_sample = { - TEMP_COMP_CAL, - MIN_CAL_SAMPLES, - PER_MAX_LOG_COUNT, -}; - static void ar9003_hw_init_cal_settings(struct ath_hw *ah) { ah->iq_caldata.calData = &iq_cal_single_sample; - ah->temp_caldata.calData = &temp_cal_single_sample; if (AR_SREV_9300_20_OR_LATER(ah)) { ah->enabled_cals |= TX_IQ_CAL; @@ -340,7 +324,7 @@ static void ar9003_hw_init_cal_settings(struct ath_hw *ah) ah->enabled_cals |= TX_IQ_ON_AGC_CAL; } - ah->supp_cals = IQ_MISMATCH_CAL | TEMP_COMP_CAL; + ah->supp_cals = IQ_MISMATCH_CAL; } #define OFF_UPPER_LT 24 @@ -1399,9 +1383,6 @@ static void ar9003_hw_init_cal_common(struct ath_hw *ah) INIT_CAL(&ah->iq_caldata); INSERT_CAL(ah, &ah->iq_caldata); - INIT_CAL(&ah->temp_caldata); - INSERT_CAL(ah, &ah->temp_caldata); - /* Initialize current pointer to first element in list */ ah->cal_list_curr = ah->cal_list; diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 2a5d3ad1169c..9cbca1229bac 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -830,7 +830,6 @@ struct ath_hw { /* Calibration */ u32 supp_cals; struct ath9k_cal_list iq_caldata; - struct ath9k_cal_list temp_caldata; struct ath9k_cal_list adcgain_caldata; struct ath9k_cal_list adcdc_caldata; struct ath9k_cal_list *cal_list; From 1ea2643961b0d1b8d0e4a11af5aa69b0f92d0533 Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Wed, 12 Oct 2016 19:08:40 +0100 Subject: [PATCH 072/884] ath6kl: add Dell OEM SDIO I/O for the Venue 8 Pro SDIO ID 0271:0418 Signed-off-by: Alan Cox Bugzilla-ID: https://bugzilla.kernel.org/show_bug.cgi?id=67921 Reviewed-by: Steve deRosier Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath6kl/sdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath6kl/sdio.c b/drivers/net/wireless/ath/ath6kl/sdio.c index eab0ab976af2..76eb33679d4b 100644 --- a/drivers/net/wireless/ath/ath6kl/sdio.c +++ b/drivers/net/wireless/ath/ath6kl/sdio.c @@ -1401,6 +1401,7 @@ static const struct sdio_device_id ath6kl_sdio_devices[] = { {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x0))}, {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x1))}, {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x2))}, + {SDIO_DEVICE(MANUFACTURER_CODE, (MANUFACTURER_ID_AR6004_BASE | 0x18))}, {}, }; From adf08d481b52824c87af028fc74dc692d179f7f4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 13 Oct 2016 19:07:54 +0900 Subject: [PATCH 073/884] regmap: include from include/linux/regmap.h The readx_poll_timeout() macro calls usleep_range(), which is declared in . Make include/linux/regmap.h include , like include/linux/iopoll.h does. Otherwise, users of the macro will see "implicit declaration of function 'usleep_range'" error. Signed-off-by: Masahiro Yamada Signed-off-by: Mark Brown --- include/linux/regmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 9adc7b21903d..e5157e39ff22 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -15,6 +15,7 @@ #include #include +#include #include #include #include From 50a2c95381a7d0e453d7bdfde81d0c5f8351ba54 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Oct 2016 08:27:10 +0100 Subject: [PATCH 074/884] afs: call->operation_ID sometimes used as __be32 sometimes as u32 call->operation_ID is sometimes being used as __be32 sometimes is being used as u32. Be consistent and settle on using as u32. Signed-off-by: David Howells operation_ID); + _enter("{CB.OP %u}", call->operation_ID); - _enter("{CB.OP %u}", operation_id); - - switch (operation_id) { + switch (call->operation_ID) { case CBCallBack: call->type = &afs_SRXCBCallBack; return true; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 5497c8496055..535a38d2c1d0 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -112,7 +112,7 @@ struct afs_call { bool need_attention; /* T if RxRPC poked us */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ - __be32 operation_ID; /* operation ID for an incoming call */ + u32 operation_ID; /* operation ID for an incoming call */ u32 count; /* count for use in unmarshalling */ __be32 tmp; /* place to extract temporary data */ afs_dataversion_t store_version; /* updated version expected from store */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 477928b25940..25f05a8d21b1 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -676,10 +676,11 @@ static int afs_deliver_cm_op_id(struct afs_call *call) ASSERTCMP(call->offset, <, 4); /* the operation ID forms the first four bytes of the request data */ - ret = afs_extract_data(call, &call->operation_ID, 4, true); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; + call->operation_ID = ntohl(call->tmp); call->state = AFS_CALL_AWAIT_REQUEST; call->offset = 0; From f208b87b48d5af2ea483293eccb3998c467dfba1 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Sun, 9 Oct 2016 02:35:08 -0400 Subject: [PATCH 075/884] sh: support CPU_J2 when compiler lacks -mj2 Signed-off-by: Rich Felker --- arch/sh/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/Makefile b/arch/sh/Makefile index 00476662ac2c..336f33a419d9 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -31,7 +31,7 @@ isa-y := $(isa-y)-up endif cflags-$(CONFIG_CPU_SH2) := $(call cc-option,-m2,) -cflags-$(CONFIG_CPU_J2) := $(call cc-option,-mj2,) +cflags-$(CONFIG_CPU_J2) += $(call cc-option,-mj2,) cflags-$(CONFIG_CPU_SH2A) += $(call cc-option,-m2a,) \ $(call cc-option,-m2a-nofpu,) \ $(call cc-option,-m4-nofpu,) From a3f9d1b58a9ffce011ef4f074bfa36ae30eade28 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Oct 2016 15:53:21 -0400 Subject: [PATCH 076/884] pnfs/blocklayout: fix last_write_offset incorrectly set to page boundary Commit 41963c10c47a35185e68cb9049f7a3493c94d2d7 sets the block layout's last written byte to the offset of the end of the extent rather than the end of the write which incorrectly updates the inode's size for partial-page writes. Fixes: 41963c10c47a ("pnfs/blocklayout: update last_write_offset atomically with extents") Signed-off-by: Benjamin Coddington Reviewed-by: Christoph Hellwig Tested-by: Christoph Hellwig Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/blocklayout.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 217847679f0e..2905479f214a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -344,9 +344,10 @@ static void bl_write_cleanup(struct work_struct *work) u64 start = hdr->args.offset & (loff_t)PAGE_MASK; u64 end = (hdr->args.offset + hdr->args.count + PAGE_SIZE - 1) & (loff_t)PAGE_MASK; + u64 lwb = hdr->args.offset + hdr->args.count; ext_tree_mark_written(bl, start >> SECTOR_SHIFT, - (end - start) >> SECTOR_SHIFT, end); + (end - start) >> SECTOR_SHIFT, lwb); } pnfs_ld_write_done(hdr); From 68d00f332e0ba7f60f212be74ede290c9f873bc5 Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Tue, 11 Oct 2016 22:47:20 +0300 Subject: [PATCH 077/884] ip6_tunnel: fix ip6_tnl_lookup The commit ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.") introduces support for wildcards in tunnels endpoints, but in some rare circumstances ip6_tnl_lookup selects wrong tunnel interface relying only on source or destination address of the packet and not checking presence of wildcard in tunnels endpoints. Later in ip6_tnl_rcv this packets can be dicarded because of difference in ipproto even if fallback device have proper ipproto configuration. This patch adds checks of wildcard endpoint in tunnel avoiding such behavior Fixes: ea3dc9601bda ("ip6_tunnel: Add support for wildcard tunnel endpoints.") Signed-off-by: Vadim Fedorenko Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 6a66adba0c22..5692d6b8da95 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -157,6 +157,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ hash = HASH(&any, local); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_any(&t->parms.raddr) && (t->dev->flags & IFF_UP)) return t; } @@ -164,6 +165,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ hash = HASH(remote, &any); for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { if (ipv6_addr_equal(remote, &t->parms.raddr) && + ipv6_addr_any(&t->parms.laddr) && (t->dev->flags & IFF_UP)) return t; } From a220445f9f4382c36a53d8ef3e08165fa27f7e2c Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 12 Oct 2016 10:10:40 +0200 Subject: [PATCH 078/884] ipv6: correctly add local routes when lo goes up The goal of the patch is to fix this scenario: ip link add dummy1 type dummy ip link set dummy1 up ip link set lo down ; ip link set lo up After that sequence, the local route to the link layer address of dummy1 is not there anymore. When the loopback is set down, all local routes are deleted by addrconf_ifdown()/rt6_ifdown(). At this time, the rt6_info entry still exists, because the corresponding idev has a reference on it. After the rcu grace period, dst_rcu_free() is called, and thus ___dst_free(), which will set obsolete to DST_OBSOLETE_DEAD. In this case, init_loopback() is called before dst_rcu_free(), thus obsolete is still sets to something <= 0. So, the function doesn't add the route again. To avoid that race, let's check the rt6 refcnt instead. Fixes: 25fb6ca4ed9c ("net IPv6 : Fix broken IPv6 routing table after loopback down-up") Fixes: a881ae1f625c ("ipv6: don't call addrconf_dst_alloc again when enable lo") Fixes: 33d99113b110 ("ipv6: reallocate addrconf router for ipv6 address when lo device up") Reported-by: Francesco Santoro Reported-by: Samuel Gauthier CC: Balakumaran Kannan CC: Maruthi Thotad CC: Sabrina Dubroca CC: Hannes Frederic Sowa CC: Weilong Chen CC: Gao feng Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d8983e15f859..9faafe58516a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3018,7 +3018,7 @@ static void init_loopback(struct net_device *dev) * lo device down, release this obsolete dst and * reallocate a new router for ifa. */ - if (sp_ifa->rt->dst.obsolete > 0) { + if (!atomic_read(&sp_ifa->rt->rt6i_ref)) { ip6_rt_put(sp_ifa->rt); sp_ifa->rt = NULL; } else { From 558c5eb58a5c029745b29558782d528b05aba8a5 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 12 Oct 2016 15:55:40 -0300 Subject: [PATCH 079/884] net: wan: slic_ds26522: add SPI device ID table to fix module autoload If the driver is built as a module, module alias information isn't filled so the module won't be autoloaded. Add a SPI device ID table and use the MODULE_DEVICE_TABLE() macro so the information is exported in the module. Before this patch: $ modinfo drivers/net/wan/slic_ds26522.ko | grep alias $ After this patch: $ modinfo drivers/net/wan/slic_ds26522.ko | grep alias alias: spi:ds26522 Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/wan/slic_ds26522.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index d06a887a2352..53366a2232f0 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -223,6 +223,12 @@ static int slic_ds26522_probe(struct spi_device *spi) return ret; } +static const struct spi_device_id slic_ds26522_id[] = { + { .name = "ds26522" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(spi, slic_ds26522_id); + static const struct of_device_id slic_ds26522_match[] = { { .compatible = "maxim,ds26522", @@ -239,6 +245,7 @@ static struct spi_driver slic_ds26522_driver = { }, .probe = slic_ds26522_probe, .remove = slic_ds26522_remove, + .id_table = slic_ds26522_id, }; static int __init slic_ds26522_init(void) From 485c9d43382172c1b2ecf42deb4476b5e37e1264 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 12 Oct 2016 15:55:41 -0300 Subject: [PATCH 080/884] net: wan: slic_ds26522: Export OF module alias information When the device is registered via OF, the OF table is used to match the driver instead of the SPI device ID table, but the entries in the later are used as aliasses to load the module if the driver was not built-in. This is because the SPI core always reports an SPI module alias instead of an OF one, but that could change so it's better to always export it. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/wan/slic_ds26522.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wan/slic_ds26522.c b/drivers/net/wan/slic_ds26522.c index 53366a2232f0..b776a0ab106c 100644 --- a/drivers/net/wan/slic_ds26522.c +++ b/drivers/net/wan/slic_ds26522.c @@ -235,6 +235,7 @@ static const struct of_device_id slic_ds26522_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, slic_ds26522_match); static struct spi_driver slic_ds26522_driver = { .driver = { From 059f01410822b9a37fac0f4da8c2202c2d39e7a5 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Wed, 12 Oct 2016 16:05:59 -0300 Subject: [PATCH 081/884] net: wan: slic_ds26522: Allow driver to built if COMPILE_TEST is enabled The driver only has runtime but no build time dependency with FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE. So it can be built for testing purposes if the COMPILE_TEST option is enabled. This is useful to have more build coverage and make sure that the driver is not affected by changes that could cause build regressions. Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/wan/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 33ab3345d333..4e9fe75d7067 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -294,7 +294,7 @@ config FSL_UCC_HDLC config SLIC_DS26522 tristate "Slic Maxim ds26522 card support" depends on SPI - depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE + depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE || COMPILE_TEST help This module initializes and configures the slic maxim card in T1 or E1 mode. From f56f7d2e1cbe6a34dbda177d4d6245d8f8cb94bd Mon Sep 17 00:00:00 2001 From: Alexander Alemayhu Date: Thu, 13 Oct 2016 17:09:51 +0200 Subject: [PATCH 082/884] Documentation/networking: update git urls to use https over http This fixes the following errors when trying to clone the urls: Cloning into 'net'... fatal: repository 'http://git.kernel.org/cgit/linux/kernel/git/davem/net.git/' not found Cloning into 'net-next'... fatal: repository 'http://git.kernel.org/cgit/linux/kernel/git/davem/net-next.git/' not found Cloning into 'linux'... fatal: repository 'http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/' not found Cloning into 'stable-queue'... fatal: repository 'http://git.kernel.org/cgit/linux/kernel/git/stable/stable-queue.git/' not found Signed-off-by: Alexander Alemayhu Signed-off-by: David S. Miller --- Documentation/networking/netdev-FAQ.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt index 0fe1c6e0dbcd..a20b2fae942b 100644 --- a/Documentation/networking/netdev-FAQ.txt +++ b/Documentation/networking/netdev-FAQ.txt @@ -29,8 +29,8 @@ A: There are always two trees (git repositories) in play. Both are driven Linus, and net-next is where the new code goes for the future release. You can find the trees here: - http://git.kernel.org/?p=linux/kernel/git/davem/net.git - http://git.kernel.org/?p=linux/kernel/git/davem/net-next.git + https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git + https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git Q: How often do changes from these trees make it to the mainline Linus tree? @@ -76,7 +76,7 @@ Q: So where are we now in this cycle? A: Load the mainline (Linus) page here: - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git + https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git and note the top of the "tags" section. If it is rc1, it is early in the dev cycle. If it was tagged rc7 a week ago, then a release @@ -123,7 +123,7 @@ A: Normally Greg Kroah-Hartman collects stable commits himself, but It contains the patches which Dave has selected, but not yet handed off to Greg. If Greg already has the patch, then it will be here: - http://git.kernel.org/cgit/linux/kernel/git/stable/stable-queue.git + https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git A quick way to find whether the patch is in this stable-queue is to simply clone the repo, and then git grep the mainline commit ID, e.g. From fc791b6335152c5278dc4a4991bcb2d329f806f9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 13 Oct 2016 18:26:56 +0200 Subject: [PATCH 083/884] IB/ipoib: move back IB LL address into the hard header After the commit 9207f9d45b0a ("net: preserve IP control block during GSO segmentation"), the GSO CB and the IPoIB CB conflict. That destroy the IPoIB address information cached there, causing a severe performance regression, as better described here: http://marc.info/?l=linux-kernel&m=146787279825501&w=2 This change moves the data cached by the IPoIB driver from the skb control lock into the IPoIB hard header, as done before the commit 936d7de3d736 ("IPoIB: Stop lying about hard_header_len and use skb->cb to stash LL addresses"). In order to avoid GRO issue, on packet reception, the IPoIB driver stash into the skb a dummy pseudo header, so that the received packets have actually a hard header matching the declared length. To avoid changing the connected mode maximum mtu, the allocated head buffer size is increased by the pseudo header length. After this commit, IPoIB performances are back to pre-regression value. v2 -> v3: rebased v1 -> v2: avoid changing the max mtu, increasing the head buf size Fixes: 9207f9d45b0a ("net: preserve IP control block during GSO segmentation") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- drivers/infiniband/ulp/ipoib/ipoib.h | 20 ++++--- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 15 +++--- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 12 ++--- drivers/infiniband/ulp/ipoib/ipoib_main.c | 54 +++++++++++-------- .../infiniband/ulp/ipoib/ipoib_multicast.c | 6 ++- 5 files changed, 64 insertions(+), 43 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 7b8d2d9e2263..da12717a3eb7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -63,6 +63,8 @@ enum ipoib_flush_level { enum { IPOIB_ENCAP_LEN = 4, + IPOIB_PSEUDO_LEN = 20, + IPOIB_HARD_LEN = IPOIB_ENCAP_LEN + IPOIB_PSEUDO_LEN, IPOIB_UD_HEAD_SIZE = IB_GRH_BYTES + IPOIB_ENCAP_LEN, IPOIB_UD_RX_SG = 2, /* max buffer needed for 4K mtu */ @@ -134,15 +136,21 @@ struct ipoib_header { u16 reserved; }; -struct ipoib_cb { - struct qdisc_skb_cb qdisc_cb; - u8 hwaddr[INFINIBAND_ALEN]; +struct ipoib_pseudo_header { + u8 hwaddr[INFINIBAND_ALEN]; }; -static inline struct ipoib_cb *ipoib_skb_cb(const struct sk_buff *skb) +static inline void skb_add_pseudo_hdr(struct sk_buff *skb) { - BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb)); - return (struct ipoib_cb *)skb->cb; + char *data = skb_push(skb, IPOIB_PSEUDO_LEN); + + /* + * only the ipoib header is present now, make room for a dummy + * pseudo header and set skb field accordingly + */ + memset(data, 0, IPOIB_PSEUDO_LEN); + skb_reset_mac_header(skb); + skb_pull(skb, IPOIB_HARD_LEN); } /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 4ad297d3de89..339a1eecdfe3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -63,6 +63,8 @@ MODULE_PARM_DESC(cm_data_debug_level, #define IPOIB_CM_RX_DELAY (3 * 256 * HZ) #define IPOIB_CM_RX_UPDATE_MASK (0x3) +#define IPOIB_CM_RX_RESERVE (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN) + static struct ib_qp_attr ipoib_cm_err_attr = { .qp_state = IB_QPS_ERR }; @@ -146,15 +148,15 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev, struct sk_buff *skb; int i; - skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12); + skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16)); if (unlikely(!skb)) return NULL; /* - * IPoIB adds a 4 byte header. So we need 12 more bytes to align the + * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the * IP header to a multiple of 16. */ - skb_reserve(skb, 12); + skb_reserve(skb, IPOIB_CM_RX_RESERVE); mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); @@ -624,9 +626,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) if (wc->byte_len < IPOIB_CM_COPYBREAK) { int dlen = wc->byte_len; - small_skb = dev_alloc_skb(dlen + 12); + small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE); if (small_skb) { - skb_reserve(small_skb, 12); + skb_reserve(small_skb, IPOIB_CM_RX_RESERVE); ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0], dlen, DMA_FROM_DEVICE); skb_copy_from_linear_data(skb, small_skb->data, dlen); @@ -663,8 +665,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) copied: skb->protocol = ((struct ipoib_header *) skb->data)->proto; - skb_reset_mac_header(skb); - skb_pull(skb, IPOIB_ENCAP_LEN); + skb_add_pseudo_hdr(skb); ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index be11d5d5b8c1..830fecb6934c 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -128,16 +128,15 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id) buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); - skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN); + skb = dev_alloc_skb(buf_size + IPOIB_HARD_LEN); if (unlikely(!skb)) return NULL; /* - * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte - * header. So we need 4 more bytes to get to 48 and align the - * IP header to a multiple of 16. + * the IP header will be at IPOIP_HARD_LEN + IB_GRH_BYTES, that is + * 64 bytes aligned */ - skb_reserve(skb, 4); + skb_reserve(skb, sizeof(struct ipoib_pseudo_header)); mapping = priv->rx_ring[id].mapping; mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size, @@ -253,8 +252,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) skb_pull(skb, IB_GRH_BYTES); skb->protocol = ((struct ipoib_header *) skb->data)->proto; - skb_reset_mac_header(skb); - skb_pull(skb, IPOIB_ENCAP_LEN); + skb_add_pseudo_hdr(skb); ++dev->stats.rx_packets; dev->stats.rx_bytes += skb->len; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 5636fc3da6b8..b58d9dca5c93 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -925,9 +925,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr, ipoib_neigh_free(neigh); goto err_drop; } - if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) + if (skb_queue_len(&neigh->queue) < + IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, IPOIB_PSEUDO_LEN); __skb_queue_tail(&neigh->queue, skb); - else { + } else { ipoib_warn(priv, "queue length limit %d. Packet drop.\n", skb_queue_len(&neigh->queue)); goto err_drop; @@ -964,7 +967,7 @@ err_drop: } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, - struct ipoib_cb *cb) + struct ipoib_pseudo_header *phdr) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; @@ -972,16 +975,18 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, cb->hwaddr + 4); + path = __path_find(dev, phdr->hwaddr + 4); if (!path || !path->valid) { int new_path = 0; if (!path) { - path = path_rec_create(dev, cb->hwaddr + 4); + path = path_rec_create(dev, phdr->hwaddr + 4); new_path = 1; } if (path) { if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, IPOIB_PSEUDO_LEN); __skb_queue_tail(&path->queue, skb); } else { ++dev->stats.tx_dropped; @@ -1009,10 +1014,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, be16_to_cpu(path->pathrec.dlid)); spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); return; } else if ((path->query || !path_rec_start(dev, path)) && skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, IPOIB_PSEUDO_LEN); __skb_queue_tail(&path->queue, skb); } else { ++dev->stats.tx_dropped; @@ -1026,13 +1033,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; - struct ipoib_cb *cb = ipoib_skb_cb(skb); + struct ipoib_pseudo_header *phdr; struct ipoib_header *header; unsigned long flags; + phdr = (struct ipoib_pseudo_header *) skb->data; + skb_pull(skb, sizeof(*phdr)); header = (struct ipoib_header *) skb->data; - if (unlikely(cb->hwaddr[4] == 0xff)) { + if (unlikely(phdr->hwaddr[4] == 0xff)) { /* multicast, arrange "if" according to probability */ if ((header->proto != htons(ETH_P_IP)) && (header->proto != htons(ETH_P_IPV6)) && @@ -1045,13 +1054,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } /* Add in the P_Key for multicast*/ - cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; - cb->hwaddr[9] = priv->pkey & 0xff; + phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; + phdr->hwaddr[9] = priv->pkey & 0xff; - neigh = ipoib_neigh_get(dev, cb->hwaddr); + neigh = ipoib_neigh_get(dev, phdr->hwaddr); if (likely(neigh)) goto send_using_neigh; - ipoib_mcast_send(dev, cb->hwaddr, skb); + ipoib_mcast_send(dev, phdr->hwaddr, skb); return NETDEV_TX_OK; } @@ -1060,16 +1069,16 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) case htons(ETH_P_IP): case htons(ETH_P_IPV6): case htons(ETH_P_TIPC): - neigh = ipoib_neigh_get(dev, cb->hwaddr); + neigh = ipoib_neigh_get(dev, phdr->hwaddr); if (unlikely(!neigh)) { - neigh_add_path(skb, cb->hwaddr, dev); + neigh_add_path(skb, phdr->hwaddr, dev); return NETDEV_TX_OK; } break; case htons(ETH_P_ARP): case htons(ETH_P_RARP): /* for unicast ARP and RARP should always perform path find */ - unicast_arp_send(skb, dev, cb); + unicast_arp_send(skb, dev, phdr); return NETDEV_TX_OK; default: /* ethertype not supported by IPoIB */ @@ -1086,11 +1095,13 @@ send_using_neigh: goto unref; } } else if (neigh->ah) { - ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr)); + ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr)); goto unref; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, sizeof(*phdr)); spin_lock_irqsave(&priv->lock, flags); __skb_queue_tail(&neigh->queue, skb); spin_unlock_irqrestore(&priv->lock, flags); @@ -1122,8 +1133,8 @@ static int ipoib_hard_header(struct sk_buff *skb, unsigned short type, const void *daddr, const void *saddr, unsigned len) { + struct ipoib_pseudo_header *phdr; struct ipoib_header *header; - struct ipoib_cb *cb = ipoib_skb_cb(skb); header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -1132,12 +1143,13 @@ static int ipoib_hard_header(struct sk_buff *skb, /* * we don't rely on dst_entry structure, always stuff the - * destination address into skb->cb so we can figure out where + * destination address into skb hard header so we can figure out where * to send the packet later. */ - memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); + phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr)); + memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); - return sizeof *header; + return IPOIB_HARD_LEN; } static void ipoib_set_mcast_list(struct net_device *dev) @@ -1759,7 +1771,7 @@ void ipoib_setup(struct net_device *dev) dev->flags |= IFF_BROADCAST | IFF_MULTICAST; - dev->hard_header_len = IPOIB_ENCAP_LEN; + dev->hard_header_len = IPOIB_HARD_LEN; dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = ipoib_sendq_size * 2; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index d3394b6add24..1909dd252c94 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -796,9 +796,11 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) __ipoib_mcast_add(dev, mcast); list_add_tail(&mcast->list, &priv->multicast_list); } - if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) + if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) { + /* put pseudoheader back on for next time */ + skb_push(skb, sizeof(struct ipoib_pseudo_header)); skb_queue_tail(&mcast->pkt_queue, skb); - else { + } else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); } From 9d6280da39c04832d6abf5f4ecc8175c9aad91c0 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 13 Oct 2016 18:50:02 +0200 Subject: [PATCH 084/884] IPv6: Drop the temporary address regen_timer The randomized interface identifier (rndid) was periodically updated from the regen_timer timer. Simplify the code by updating the rndid only when needed by ipv6_try_regen_rndid(). This makes the follow-up DESYNC_FACTOR fix much simpler. Also it fixes a reference counting error in this error path, where an in6_dev_put was missing: err = addrconf_sysctl_register(ndev); if (err) { ipv6_mc_destroy_dev(ndev); - del_timer(&ndev->regen_timer); snmp6_unregister_dev(ndev); goto err_release; Signed-off-by: Jiri Bohac Signed-off-by: David S. Miller --- include/net/if_inet6.h | 1 - net/ipv6/addrconf.c | 61 +++++++----------------------------------- 2 files changed, 9 insertions(+), 53 deletions(-) diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 515352c6280a..ae707352f463 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -191,7 +191,6 @@ struct inet6_dev { int dead; u8 rndid[8]; - struct timer_list regen_timer; struct list_head tempaddr_list; struct in6_addr token; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 9faafe58516a..58c3d73403a5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -147,9 +147,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) } #endif -static void __ipv6_regen_rndid(struct inet6_dev *idev); -static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); -static void ipv6_regen_rndid(unsigned long data); +static void ipv6_regen_rndid(struct inet6_dev *idev); +static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); @@ -409,9 +408,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) goto err_release; } - /* One reference from device. We must do this before - * we invoke __ipv6_regen_rndid(). - */ + /* One reference from device. */ in6_dev_hold(ndev); if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -425,17 +422,14 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) #endif INIT_LIST_HEAD(&ndev->tempaddr_list); - setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev); if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || dev->type == ARPHRD_TUNNEL6 || dev->type == ARPHRD_SIT || dev->type == ARPHRD_NONE) { ndev->cnf.use_tempaddr = -1; - } else { - in6_dev_hold(ndev); - ipv6_regen_rndid((unsigned long) ndev); - } + } else + ipv6_regen_rndid(ndev); ndev->token = in6addr_any; @@ -447,7 +441,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) err = addrconf_sysctl_register(ndev); if (err) { ipv6_mc_destroy_dev(ndev); - del_timer(&ndev->regen_timer); snmp6_unregister_dev(ndev); goto err_release; } @@ -1222,7 +1215,7 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - __ipv6_try_regen_rndid(idev, tmpaddr); + ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, @@ -2150,7 +2143,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) } /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static void __ipv6_regen_rndid(struct inet6_dev *idev) +static void ipv6_regen_rndid(struct inet6_dev *idev) { regen: get_random_bytes(idev->rndid, sizeof(idev->rndid)); @@ -2179,43 +2172,10 @@ regen: } } -static void ipv6_regen_rndid(unsigned long data) -{ - struct inet6_dev *idev = (struct inet6_dev *) data; - unsigned long expires; - - rcu_read_lock_bh(); - write_lock_bh(&idev->lock); - - if (idev->dead) - goto out; - - __ipv6_regen_rndid(idev); - - expires = jiffies + - idev->cnf.temp_prefered_lft * HZ - - idev->cnf.regen_max_retry * idev->cnf.dad_transmits * - NEIGH_VAR(idev->nd_parms, RETRANS_TIME) - - idev->cnf.max_desync_factor * HZ; - if (time_before(expires, jiffies)) { - pr_warn("%s: too short regeneration interval; timer disabled for %s\n", - __func__, idev->dev->name); - goto out; - } - - if (!mod_timer(&idev->regen_timer, expires)) - in6_dev_hold(idev); - -out: - write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); - in6_dev_put(idev); -} - -static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) +static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - __ipv6_regen_rndid(idev); + ipv6_regen_rndid(idev); } /* @@ -3594,9 +3554,6 @@ restart: if (!how) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); - if (how && del_timer(&idev->regen_timer)) - in6_dev_put(idev); - /* Step 3: clear tempaddr list */ while (!list_empty(&idev->tempaddr_list)) { ifa = list_first_entry(&idev->tempaddr_list, From 76506a986dc31394fd1f2741db037d29c7e57843 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 13 Oct 2016 18:52:15 +0200 Subject: [PATCH 085/884] IPv6: fix DESYNC_FACTOR The IPv6 temporary address generation uses a variable called DESYNC_FACTOR to prevent hosts updating the addresses at the same time. Quoting RFC 4941: ... The value DESYNC_FACTOR is a random value (different for each client) that ensures that clients don't synchronize with each other and generate new addresses at exactly the same time ... DESYNC_FACTOR is defined as: DESYNC_FACTOR -- A random value within the range 0 - MAX_DESYNC_FACTOR. It is computed once at system start (rather than each time it is used) and must never be greater than (TEMP_VALID_LIFETIME - REGEN_ADVANCE). First, I believe the RFC has a typo in it and meant to say: "and must never be greater than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE)" The reason is that at various places in the RFC, DESYNC_FACTOR is used in a calculation like (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR) or (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE - DESYNC_FACTOR). It needs to be smaller than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE) for the result of these calculations to be larger than zero. It's never used in a calculation together with TEMP_VALID_LIFETIME. I already submitted an errata to the rfc-editor: https://www.rfc-editor.org/errata_search.php?rfc=4941 The Linux implementation of DESYNC_FACTOR is very wrong: max_desync_factor is used in places DESYNC_FACTOR should be used. max_desync_factor is initialized to the RFC-recommended value for MAX_DESYNC_FACTOR (600) but the whole point is to get a _random_ value. And nothing ensures that the value used is not greater than (TEMP_PREFERRED_LIFETIME - REGEN_ADVANCE), which leads to underflows. The effect can easily be observed when setting the temp_prefered_lft sysctl e.g. to 60. The preferred lifetime of the temporary addresses will be bogus. TEMP_PREFERRED_LIFETIME and REGEN_ADVANCE are not constants and can be influenced by these three sysctls: regen_max_retry, dad_transmits and temp_prefered_lft. Thus, the upper bound for desync_factor needs to be re-calculated each time a new address is generated and if desync_factor is larger than the new upper bound, a new random value needs to be re-generated. And since we already have max_desync_factor configurable per interface, we also need to calculate and store desync_factor per interface. Signed-off-by: Jiri Bohac Signed-off-by: David S. Miller --- include/net/if_inet6.h | 1 + net/ipv6/addrconf.c | 45 +++++++++++++++++++++++++++++++----------- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index ae707352f463..b0576cb2ab25 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -190,6 +190,7 @@ struct inet6_dev { __u32 if_flags; int dead; + u32 desync_factor; u8 rndid[8]; struct list_head tempaddr_list; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 58c3d73403a5..cc7c26d05f45 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -422,6 +422,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) #endif INIT_LIST_HEAD(&ndev->tempaddr_list); + ndev->desync_factor = U32_MAX; if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || dev->type == ARPHRD_TUNNEL6 || @@ -1183,6 +1184,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i int ret = 0; u32 addr_flags; unsigned long now = jiffies; + long max_desync_factor; write_lock_bh(&idev->lock); if (ift) { @@ -1218,20 +1220,41 @@ retry: ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; - tmp_valid_lft = min_t(__u32, - ifp->valid_lft, - idev->cnf.temp_valid_lft + age); - tmp_prefered_lft = min_t(__u32, - ifp->prefered_lft, - idev->cnf.temp_prefered_lft + age - - idev->cnf.max_desync_factor); - tmp_plen = ifp->prefix_len; - tmp_tstamp = ifp->tstamp; - spin_unlock_bh(&ifp->lock); regen_advance = idev->cnf.regen_max_retry * idev->cnf.dad_transmits * NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; + + /* recalculate max_desync_factor each time and update + * idev->desync_factor if it's larger + */ + max_desync_factor = min_t(__u32, + idev->cnf.max_desync_factor, + idev->cnf.temp_prefered_lft - regen_advance); + + if (unlikely(idev->desync_factor > max_desync_factor)) { + if (max_desync_factor > 0) { + get_random_bytes(&idev->desync_factor, + sizeof(idev->desync_factor)); + idev->desync_factor %= max_desync_factor; + } else { + idev->desync_factor = 0; + } + } + + tmp_valid_lft = min_t(__u32, + ifp->valid_lft, + idev->cnf.temp_valid_lft + age); + tmp_prefered_lft = idev->cnf.temp_prefered_lft + age - + idev->desync_factor; + /* guard against underflow in case of concurrent updates to cnf */ + if (unlikely(tmp_prefered_lft < 0)) + tmp_prefered_lft = 0; + tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft); + tmp_plen = ifp->prefix_len; + tmp_tstamp = ifp->tstamp; + spin_unlock_bh(&ifp->lock); + write_unlock_bh(&idev->lock); /* A temporary address is created only if this calculated Preferred @@ -2316,7 +2339,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, max_valid = 0; max_prefered = idev->cnf.temp_prefered_lft - - idev->cnf.max_desync_factor - age; + idev->desync_factor - age; if (max_prefered < 0) max_prefered = 0; From ce6b04ee8b112cc9d5ef41ba697a3ffabc630f42 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 13 Oct 2016 22:57:01 +0300 Subject: [PATCH 086/884] qed: Fix static checker warning. Smatch compains about qed_roce_ll2_tx() dereference of the 'cdev' variable while testing its validity later. As the validation checking is an over-kill [variable would always be set], simply remove it. Reported-by: Dan Carpenter Fixes: abd49676c707 ("qed: Add RoCE ll2 & GSI support") Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 76831a398bed..c73638ceb1ad 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -2809,11 +2809,6 @@ static int qed_roce_ll2_stop(struct qed_dev *cdev) struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; int rc; - if (!cdev) { - DP_ERR(cdev, "qed roce ll2 stop: invalid cdev\n"); - return -EINVAL; - } - if (roce_ll2->handle == QED_LL2_UNUSED_HANDLE) { DP_ERR(cdev, "qed roce ll2 stop: cannot stop an unused LL2\n"); return -EINVAL; @@ -2850,7 +2845,7 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev, int rc; int i; - if (!cdev || !pkt || !params) { + if (!pkt || !params) { DP_ERR(cdev, "roce ll2 tx: failed tx because one of the following is NULL - drv=%p, pkt=%p, params=%p\n", cdev, pkt, params); From 0189efb8f4f830b9ac7a7c56c0c6e260859e950d Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 13 Oct 2016 22:57:02 +0300 Subject: [PATCH 087/884] qed*: Fix Kconfig dependencies with INFINIBAND_QEDR The qedr driver would require a tristate Kconfig option [to allow it to compile as a module], and toward that end we've added the INFINIBAND_QEDR option. But as we've made the compilation of the qed/qede infrastructure required for RoCE dependent on the option we'd be facing linking difficulties in case that QED=y or QEDE=y, and INFINIBAND_QEDR=m. To resolve this, we seperate between the INFINIBAND_QEDR option and the infrastructure support in qed/qede by introducing a new QED_RDMA option which would be selected by INFINIBAND_QEDR but would be a boolean instead of a tristate; Following that, the qed/qede is fixed based on this new option so that all config combinations would be supported. Fixes: cee9fbd8e2e9 ("qede: add qedr framework") Reported-by: Arnd Bergmann Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/Kconfig | 4 + drivers/net/ethernet/qlogic/qed/Makefile | 2 +- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 7 +- drivers/net/ethernet/qlogic/qed/qed_dev.c | 14 ++-- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 1 + drivers/net/ethernet/qlogic/qed/qed_ll2.h | 20 ----- drivers/net/ethernet/qlogic/qed/qed_main.c | 28 ++++--- drivers/net/ethernet/qlogic/qed/qed_roce.c | 91 +++++++++++----------- drivers/net/ethernet/qlogic/qed/qed_roce.h | 75 +++++++++++------- drivers/net/ethernet/qlogic/qed/qed_spq.c | 4 - drivers/net/ethernet/qlogic/qede/Makefile | 2 +- include/linux/qed/qede_roce.h | 2 +- 12 files changed, 120 insertions(+), 130 deletions(-) diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 0df1391f9663..77567727528a 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -107,10 +107,14 @@ config QEDE ---help--- This enables the support for ... +config QED_RDMA + bool + config INFINIBAND_QEDR tristate "QLogic qede RoCE sources [debug]" depends on QEDE && 64BIT select QED_LL2 + select QED_RDMA default n ---help--- This provides a temporary node that allows the compilation diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index cda0af7fbc20..967acf322c09 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -5,4 +5,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o qed-$(CONFIG_QED_LL2) += qed_ll2.o -qed-$(CONFIG_INFINIBAND_QEDR) += qed_roce.o +qed-$(CONFIG_QED_RDMA) += qed_roce.o diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 82370a1a59ad..277db7831f7b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -47,13 +47,8 @@ #define TM_ALIGN BIT(TM_SHIFT) #define TM_ELEM_SIZE 4 -/* ILT constants */ -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) /* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */ -#define ILT_DEFAULT_HW_P_SIZE 4 -#else -#define ILT_DEFAULT_HW_P_SIZE 3 -#endif +#define ILT_DEFAULT_HW_P_SIZE (IS_ENABLED(CONFIG_QED_RDMA) ? 4 : 3) #define ILT_PAGE_IN_BYTES(hw_p_size) (1U << ((hw_p_size) + 12)) #define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 754f6a908858..21adf5208320 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -1422,19 +1422,19 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) u32 *feat_num = p_hwfn->hw_info.feat_num; int num_features = 1; -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) - /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide the - * status blocks equally between L2 / RoCE but with consideration as - * to how many l2 queues / cnqs we have - */ - if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + if (IS_ENABLED(CONFIG_QED_RDMA) && + p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide + * the status blocks equally between L2 / RoCE but with + * consideration as to how many l2 queues / cnqs we have. + */ num_features++; feat_num[QED_RDMA_CNQ] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM)); } -#endif + feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, RESC_NUM(p_hwfn, QED_L2_QUEUE)); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 02a8be2faed7..7856e5241b52 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -38,6 +38,7 @@ #include "qed_mcp.h" #include "qed_reg_addr.h" #include "qed_sp.h" +#include "qed_roce.h" #define QED_LL2_RX_REGISTERED(ll2) ((ll2)->rx_queue.b_cb_registred) #define QED_LL2_TX_REGISTERED(ll2) ((ll2)->tx_queue.b_cb_registred) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h index 80a5dc2d652d..4e3d62a16cab 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -293,24 +293,4 @@ void qed_ll2_setup(struct qed_hwfn *p_hwfn, */ void qed_ll2_free(struct qed_hwfn *p_hwfn, struct qed_ll2_info *p_ll2_connections); -void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, - u8 connection_handle, - void *cookie, - dma_addr_t rx_buf_addr, - u16 data_length, - u8 data_length_error, - u16 parse_flags, - u16 vlan, - u32 src_mac_addr_hi, - u16 src_mac_addr_lo, bool b_last_packet); -void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, - u8 connection_handle, - void *cookie, - dma_addr_t first_frag_addr, - bool b_last_fragment, bool b_last_packet); -void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, - u8 connection_handle, - void *cookie, - dma_addr_t first_frag_addr, - bool b_last_fragment, bool b_last_packet); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 4ee3151e80c2..6eb2401b9e22 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -33,10 +33,8 @@ #include "qed_hw.h" #include "qed_selftest.h" -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) #define QED_ROCE_QPS (8192) #define QED_ROCE_DPIS (8) -#endif static char version[] = "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n"; @@ -682,9 +680,7 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, enum qed_int_mode int_mode) { struct qed_sb_cnt_info sb_cnt_info; -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) - int num_l2_queues; -#endif + int num_l2_queues = 0; int rc; int i; @@ -715,8 +711,9 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors - cdev->num_hwfns; -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) - num_l2_queues = 0; + if (!IS_ENABLED(CONFIG_QED_RDMA)) + return 0; + for_each_hwfn(cdev, i) num_l2_queues += FEAT_NUM(&cdev->hwfns[i], QED_PF_L2_QUE); @@ -738,7 +735,6 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, DP_VERBOSE(cdev, QED_MSG_RDMA, "roce_msix_cnt=%d roce_msix_base=%d\n", cdev->int_params.rdma_msix_cnt, cdev->int_params.rdma_msix_base); -#endif return 0; } @@ -843,18 +839,20 @@ static void qed_update_pf_params(struct qed_dev *cdev, { int i; -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) - params->rdma_pf_params.num_qps = QED_ROCE_QPS; - params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; - /* divide by 3 the MRs to avoid MF ILT overflow */ - params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; - params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; -#endif for (i = 0; i < cdev->num_hwfns; i++) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; p_hwfn->pf_params = *params; } + + if (!IS_ENABLED(CONFIG_QED_RDMA)) + return; + + params->rdma_pf_params.num_qps = QED_ROCE_QPS; + params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; + /* divide by 3 the MRs to avoid MF ILT overflow */ + params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; + params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; } static int qed_slowpath_start(struct qed_dev *cdev, diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index c73638ceb1ad..187df38542f7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -129,17 +129,12 @@ static void qed_bmap_release_id(struct qed_hwfn *p_hwfn, } } -u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) +static u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) { /* First sb id for RoCE is after all the l2 sb */ return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id; } -u32 qed_rdma_query_cau_timer_res(void *rdma_cxt) -{ - return QED_CAU_DEF_RX_TIMER_RES; -} - static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_rdma_start_in_params *params) @@ -275,7 +270,7 @@ free_rdma_info: return rc; } -void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) +static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) { struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; @@ -527,6 +522,26 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } +static int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n"); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + if (rc) + goto out; + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid); +out: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); + return rc; +} + static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn) { struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; @@ -573,7 +588,7 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn, return qed_rdma_start_fw(p_hwfn, params, p_ptt); } -int qed_rdma_stop(void *rdma_cxt) +static int qed_rdma_stop(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct rdma_close_func_ramrod_data *p_ramrod; @@ -629,8 +644,8 @@ out: return rc; } -int qed_rdma_add_user(void *rdma_cxt, - struct qed_rdma_add_user_out_params *out_params) +static int qed_rdma_add_user(void *rdma_cxt, + struct qed_rdma_add_user_out_params *out_params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; u32 dpi_start_offset; @@ -664,7 +679,7 @@ int qed_rdma_add_user(void *rdma_cxt, return rc; } -struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) +static struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port; @@ -680,7 +695,7 @@ struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) return p_port; } -struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) +static struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -690,7 +705,7 @@ struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) return p_hwfn->p_rdma_info->dev; } -void qed_rdma_free_tid(void *rdma_cxt, u32 itid) +static void qed_rdma_free_tid(void *rdma_cxt, u32 itid) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -701,27 +716,7 @@ void qed_rdma_free_tid(void *rdma_cxt, u32 itid) spin_unlock_bh(&p_hwfn->p_rdma_info->lock); } -int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) -{ - struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; - int rc; - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n"); - - spin_lock_bh(&p_hwfn->p_rdma_info->lock); - rc = qed_rdma_bmap_alloc_id(p_hwfn, - &p_hwfn->p_rdma_info->tid_map, itid); - spin_unlock_bh(&p_hwfn->p_rdma_info->lock); - if (rc) - goto out; - - rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid); -out: - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); - return rc; -} - -void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) +static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) { struct qed_hwfn *p_hwfn; u16 qz_num; @@ -816,7 +811,7 @@ static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info) return 0; } -int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) +static int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; u32 returned_id; @@ -1985,9 +1980,9 @@ int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) return 0; } -int qed_rdma_query_qp(void *rdma_cxt, - struct qed_rdma_qp *qp, - struct qed_rdma_query_qp_out_params *out_params) +static int qed_rdma_query_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; int rc; @@ -2022,7 +2017,7 @@ int qed_rdma_query_qp(void *rdma_cxt, return rc; } -int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) +static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; int rc = 0; @@ -2215,9 +2210,9 @@ static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn, return rc; } -int qed_rdma_modify_qp(void *rdma_cxt, - struct qed_rdma_qp *qp, - struct qed_rdma_modify_qp_in_params *params) +static int qed_rdma_modify_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; enum qed_roce_qp_state prev_state; @@ -2312,8 +2307,9 @@ int qed_rdma_modify_qp(void *rdma_cxt, return rc; } -int qed_rdma_register_tid(void *rdma_cxt, - struct qed_rdma_register_tid_in_params *params) +static int +qed_rdma_register_tid(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct rdma_register_tid_ramrod_data *p_ramrod; @@ -2450,7 +2446,7 @@ int qed_rdma_register_tid(void *rdma_cxt, return rc; } -int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) +static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct rdma_deregister_tid_ramrod_data *p_ramrod; @@ -2561,7 +2557,8 @@ void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) qed_rdma_dpm_conf(p_hwfn, p_ptt); } -int qed_rdma_start(void *rdma_cxt, struct qed_rdma_start_in_params *params) +static int qed_rdma_start(void *rdma_cxt, + struct qed_rdma_start_in_params *params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct qed_ptt *p_ptt; @@ -2601,7 +2598,7 @@ static int qed_rdma_init(struct qed_dev *cdev, return qed_rdma_start(QED_LEADING_HWFN(cdev), params); } -void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) +static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index 2f091e8a0f40..691413176734 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -181,36 +181,55 @@ struct qed_rdma_qp { dma_addr_t shared_queue_phys_addr; }; -int -qed_rdma_add_user(void *rdma_cxt, - struct qed_rdma_add_user_out_params *out_params); -int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd); -int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid); -int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid); -void qed_rdma_free_tid(void *rdma_cxt, u32 tid); -struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt); -struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt); -int -qed_rdma_register_tid(void *rdma_cxt, - struct qed_rdma_register_tid_in_params *params); -void qed_rdma_remove_user(void *rdma_cxt, u16 dpi); -int qed_rdma_start(void *p_hwfn, struct qed_rdma_start_in_params *params); -int qed_rdma_stop(void *rdma_cxt); -u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id); -u32 qed_rdma_query_cau_timer_res(void *p_hwfn); -void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod); -void qed_rdma_resc_free(struct qed_hwfn *p_hwfn); +#if IS_ENABLED(CONFIG_QED_RDMA) +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe); -int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp); -int qed_rdma_modify_qp(void *rdma_cxt, struct qed_rdma_qp *qp, - struct qed_rdma_modify_qp_in_params *params); -int qed_rdma_query_qp(void *rdma_cxt, struct qed_rdma_qp *qp, - struct qed_rdma_query_qp_out_params *out_params); - -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) -void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); +void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); +void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, bool b_last_packet); #else -void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} +static inline void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} +static inline void qed_async_roce_event(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) {} +static inline void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) {} +static inline void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) {} +static inline void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, + bool b_last_packet) {} #endif #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index caff41544898..9fbaf9429fd0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -28,9 +28,7 @@ #include "qed_reg_addr.h" #include "qed_sp.h" #include "qed_sriov.h" -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) #include "qed_roce.h" -#endif /*************************************************************************** * Structures & Definitions @@ -240,11 +238,9 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) { switch (p_eqe->protocol_id) { -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) case PROTOCOLID_ROCE: qed_async_roce_event(p_hwfn, p_eqe); return 0; -#endif case PROTOCOLID_COMMON: return qed_sriov_eqe_event(p_hwfn, p_eqe->opcode, diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile index 28dc58919c85..048a230c3ce0 100644 --- a/drivers/net/ethernet/qlogic/qede/Makefile +++ b/drivers/net/ethernet/qlogic/qede/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_QEDE) := qede.o qede-y := qede_main.o qede_ethtool.o qede-$(CONFIG_DCB) += qede_dcbnl.o -qede-$(CONFIG_INFINIBAND_QEDR) += qede_roce.o +qede-$(CONFIG_QED_RDMA) += qede_roce.o diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h index 99fbe6d55acb..f48d64b0e2fb 100644 --- a/include/linux/qed/qede_roce.h +++ b/include/linux/qed/qede_roce.h @@ -68,7 +68,7 @@ void qede_roce_unregister_driver(struct qedr_driver *drv); bool qede_roce_supported(struct qede_dev *dev); -#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +#if IS_ENABLED(CONFIG_QED_RDMA) int qede_roce_dev_add(struct qede_dev *dev); void qede_roce_dev_event_open(struct qede_dev *dev); void qede_roce_dev_event_close(struct qede_dev *dev); From 8c93beaf5714b9ddfa4a0b4bcf89725d2021e903 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Thu, 13 Oct 2016 22:57:03 +0300 Subject: [PATCH 088/884] qed: Additional work toward cleaning C=1 This cleans many of the warnings that would arise in qed as a result of compilations with C=1; Most of those are the addition of missing 'static' to functions, although there are several other fixes as well. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_cxt.c | 8 +- drivers/net/ethernet/qlogic/qed/qed_debug.c | 53 ++++----- drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 +- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 15 +-- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- drivers/net/ethernet/qlogic/qed/qed_roce.c | 118 +++----------------- drivers/net/ethernet/qlogic/qed/qed_roce.h | 20 ---- drivers/net/ethernet/qlogic/qed/qed_sp.h | 1 - 8 files changed, 58 insertions(+), 164 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 277db7831f7b..0c42c240b5cf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -344,14 +344,14 @@ static struct qed_tid_seg *qed_cxt_tid_seg_info(struct qed_hwfn *p_hwfn, return NULL; } -void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs) +static void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs) { struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr; p_mgr->srq_count = num_srqs; } -u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn) +static u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn) { struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr; @@ -1799,8 +1799,8 @@ int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info) return 0; } -void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, - struct qed_rdma_pf_params *p_params) +static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn, + struct qed_rdma_pf_params *p_params) { u32 num_cons, num_tasks, num_qps, num_mrs, num_srqs; enum protocol_type proto; diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 88e7d5bef909..68f19ca57f96 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -405,7 +405,7 @@ struct phy_defs { /***************************** Constant Arrays *******************************/ /* Debug arrays */ -static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} }; +static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {NULL} }; /* Chip constant definitions array */ static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = { @@ -4028,10 +4028,10 @@ static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn, } /* Dump MCP Trace */ -enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *dump_buf, - bool dump, u32 *num_dumped_dwords) +static enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) { u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords; u32 trace_meta_size_dwords, running_bundle_id, offset = 0; @@ -4130,10 +4130,10 @@ enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, } /* Dump GRC FIFO */ -enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *dump_buf, - bool dump, u32 *num_dumped_dwords) +static enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) { u32 offset = 0, dwords_read, size_param_offset; bool fifo_has_data; @@ -4192,10 +4192,10 @@ enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, } /* Dump IGU FIFO */ -enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *dump_buf, - bool dump, u32 *num_dumped_dwords) +static enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) { u32 offset = 0, dwords_read, size_param_offset; bool fifo_has_data; @@ -4255,10 +4255,11 @@ enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, } /* Protection Override dump */ -enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *dump_buf, - bool dump, u32 *num_dumped_dwords) +static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + u32 *num_dumped_dwords) { u32 offset = 0, size_param_offset, override_window_dwords; @@ -6339,10 +6340,11 @@ enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn, } /* Wrapper for unifying the idle_chk and mcp_trace api */ -enum dbg_status qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn, - u32 *dump_buf, - u32 num_dumped_dwords, - char *results_buf) +static enum dbg_status +qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) { u32 num_errors, num_warnnings; @@ -6413,8 +6415,8 @@ static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size) #define QED_RESULTS_BUF_MIN_SIZE 16 /* Generic function for decoding debug feature info */ -enum dbg_status format_feature(struct qed_hwfn *p_hwfn, - enum qed_dbg_features feature_idx) +static enum dbg_status format_feature(struct qed_hwfn *p_hwfn, + enum qed_dbg_features feature_idx) { struct qed_dbg_feature *feature = &p_hwfn->cdev->dbg_params.features[feature_idx]; @@ -6480,8 +6482,9 @@ enum dbg_status format_feature(struct qed_hwfn *p_hwfn, } /* Generic function for performing the dump of a debug feature. */ -enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - enum qed_dbg_features feature_idx) +static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum qed_dbg_features feature_idx) { struct qed_dbg_feature *feature = &p_hwfn->cdev->dbg_params.features[feature_idx]; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 21adf5208320..edae5fc5fccd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -497,12 +497,13 @@ int qed_resc_alloc(struct qed_dev *cdev) if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { num_cons = qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ROCE, - 0) * 2; + NULL) * 2; n_eqes += num_cons + 2 * MAX_NUM_VFS_BB; } else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) { num_cons = qed_cxt_get_proto_cid_count(p_hwfn, - PROTOCOLID_ISCSI, 0); + PROTOCOLID_ISCSI, + NULL); n_eqes += 2 * num_cons; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 7856e5241b52..ce171a80bdaf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -141,11 +141,11 @@ static void qed_ll2_kill_buffers(struct qed_dev *cdev) qed_ll2_dealloc_buffer(cdev, buffer); } -void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, - u8 connection_handle, - struct qed_ll2_rx_packet *p_pkt, - struct core_rx_fast_path_cqe *p_cqe, - bool b_last_packet) +static void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + struct qed_ll2_rx_packet *p_pkt, + struct core_rx_fast_path_cqe *p_cqe, + bool b_last_packet) { u16 packet_length = le16_to_cpu(p_cqe->packet_length); struct qed_ll2_buffer *buffer = p_pkt->cookie; @@ -516,7 +516,7 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) return rc; } -void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) +static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) { struct qed_ll2_info *p_ll2_conn = NULL; struct qed_ll2_rx_packet *p_pkt = NULL; @@ -1124,9 +1124,6 @@ static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(start_bd->addr, first_frag); start_bd->nbytes = cpu_to_le16(first_frag_len); - SET_FIELD(start_bd->bd_flags.as_bitfield, CORE_TX_BD_FLAGS_ROCE_FLAV, - type); - DP_VERBOSE(p_hwfn, (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6eb2401b9e22..8dc3f4670f64 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1430,7 +1430,7 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode) return status; } -struct qed_selftest_ops qed_selftest_ops_pass = { +static struct qed_selftest_ops qed_selftest_ops_pass = { .selftest_memory = &qed_selftest_memory, .selftest_interrupt = &qed_selftest_interrupt, .selftest_register = &qed_selftest_register, diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index 187df38542f7..f3a825a8f8d5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -157,7 +157,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, p_hwfn->p_rdma_info = p_rdma_info; p_rdma_info->proto = PROTOCOLID_ROCE; - num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0); + num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, + NULL); p_rdma_info->num_qps = num_cons / 2; @@ -831,7 +832,7 @@ static int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) return rc; } -void qed_rdma_free_pd(void *rdma_cxt, u16 pd) +static void qed_rdma_free_pd(void *rdma_cxt, u16 pd) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; @@ -868,8 +869,9 @@ qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid) return toggle_bit; } -int qed_rdma_create_cq(void *rdma_cxt, - struct qed_rdma_create_cq_in_params *params, u16 *icid) +static int qed_rdma_create_cq(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, + u16 *icid) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct qed_rdma_info *p_info = p_hwfn->p_rdma_info; @@ -952,98 +954,10 @@ err: return rc; } -int qed_rdma_resize_cq(void *rdma_cxt, - struct qed_rdma_resize_cq_in_params *in_params, - struct qed_rdma_resize_cq_out_params *out_params) -{ - struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; - struct rdma_resize_cq_output_params *p_ramrod_res; - struct rdma_resize_cq_ramrod_data *p_ramrod; - enum qed_rdma_toggle_bit toggle_bit; - struct qed_sp_init_data init_data; - struct qed_spq_entry *p_ent; - dma_addr_t ramrod_res_phys; - u8 fw_return_code; - int rc = -ENOMEM; - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid); - - p_ramrod_res = - (struct rdma_resize_cq_output_params *) - dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, - sizeof(struct rdma_resize_cq_output_params), - &ramrod_res_phys, GFP_KERNEL); - if (!p_ramrod_res) { - DP_NOTICE(p_hwfn, - "qed resize cq failed: cannot allocate memory (ramrod)\n"); - return rc; - } - - /* Get SPQ entry */ - memset(&init_data, 0, sizeof(init_data)); - init_data.cid = in_params->icid; - init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; - init_data.comp_mode = QED_SPQ_MODE_EBLOCK; - - rc = qed_sp_init_request(p_hwfn, &p_ent, - RDMA_RAMROD_RESIZE_CQ, - p_hwfn->p_rdma_info->proto, &init_data); - if (rc) - goto err; - - p_ramrod = &p_ent->ramrod.rdma_resize_cq; - - p_ramrod->flags = 0; - - /* toggle the bit for every resize or create cq for a given icid */ - toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, - in_params->icid); - - SET_FIELD(p_ramrod->flags, - RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, toggle_bit); - - SET_FIELD(p_ramrod->flags, - RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL, - in_params->pbl_two_level); - - p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12; - p_ramrod->pbl_num_pages = cpu_to_le16(in_params->pbl_num_pages); - p_ramrod->max_cqes = cpu_to_le32(in_params->cq_size); - DMA_REGPAIR_LE(p_ramrod->pbl_addr, in_params->pbl_ptr); - DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); - - rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); - if (rc) - goto err; - - if (fw_return_code != RDMA_RETURN_OK) { - DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); - rc = -EINVAL; - goto err; - } - - out_params->prod = le32_to_cpu(p_ramrod_res->old_cq_prod); - out_params->cons = le32_to_cpu(p_ramrod_res->old_cq_cons); - - dma_free_coherent(&p_hwfn->cdev->pdev->dev, - sizeof(struct rdma_resize_cq_output_params), - p_ramrod_res, ramrod_res_phys); - - DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Resized CQ, rc = %d\n", rc); - - return rc; - -err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, - sizeof(struct rdma_resize_cq_output_params), - p_ramrod_res, ramrod_res_phys); - DP_NOTICE(p_hwfn, "Resized CQ, Failed - rc = %d\n", rc); - - return rc; -} - -int qed_rdma_destroy_cq(void *rdma_cxt, - struct qed_rdma_destroy_cq_in_params *in_params, - struct qed_rdma_destroy_cq_out_params *out_params) +static int +qed_rdma_destroy_cq(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *in_params, + struct qed_rdma_destroy_cq_out_params *out_params) { struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; struct rdma_destroy_cq_output_params *p_ramrod_res; @@ -1164,7 +1078,7 @@ static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) return flavor; } -int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid) +static int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid) { struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; u32 responder_icid; @@ -1788,9 +1702,9 @@ err: return rc; } -int qed_roce_query_qp(struct qed_hwfn *p_hwfn, - struct qed_rdma_qp *qp, - struct qed_rdma_query_qp_out_params *out_params) +static int qed_roce_query_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) { struct roce_query_qp_resp_output_params *p_resp_ramrod_res; struct roce_query_qp_req_output_params *p_req_ramrod_res; @@ -1931,7 +1845,7 @@ err_resp: return rc; } -int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +static int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) { u32 num_invalidated_mw = 0; u32 num_bound_mw = 0; @@ -2033,7 +1947,7 @@ static int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) return rc; } -struct qed_rdma_qp * +static struct qed_rdma_qp * qed_rdma_create_qp(void *rdma_cxt, struct qed_rdma_create_qp_in_params *in_params, struct qed_rdma_create_qp_out_params *out_params) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h index 691413176734..279f342af8db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.h +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -95,26 +95,6 @@ struct qed_rdma_info { enum protocol_type proto; }; -struct qed_rdma_resize_cq_in_params { - u16 icid; - u32 cq_size; - bool pbl_two_level; - u64 pbl_ptr; - u16 pbl_num_pages; - u8 pbl_page_size_log; -}; - -struct qed_rdma_resize_cq_out_params { - u32 prod; - u32 cons; -}; - -struct qed_rdma_resize_cnq_in_params { - u32 cnq_id; - u32 pbl_page_size_log; - u64 pbl_ptr; -}; - struct qed_rdma_qp { struct regpair qp_handle; struct regpair qp_handle_async; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index 652c90819758..b2c08e4d2a9b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -80,7 +80,6 @@ union ramrod_data { struct roce_destroy_qp_resp_ramrod_data roce_destroy_qp_resp; struct roce_destroy_qp_req_ramrod_data roce_destroy_qp_req; struct rdma_create_cq_ramrod_data rdma_create_cq; - struct rdma_resize_cq_ramrod_data rdma_resize_cq; struct rdma_destroy_cq_ramrod_data rdma_destroy_cq; struct rdma_srq_create_ramrod_data rdma_create_srq; struct rdma_srq_destroy_ramrod_data rdma_destroy_srq; From 958b3d396d7f80755e2c2e6a8f873a669f38de10 Mon Sep 17 00:00:00 2001 From: Brenden Blanco Date: Thu, 13 Oct 2016 13:13:11 -0700 Subject: [PATCH 089/884] net/mlx4_en: fixup xdp tx irq to match rx In cases where the number of tx rings is not a multiple of the number of rx rings, the tx completion event will be handled on a different core from the transmit and population of the ring. Races on the ring will lead to a double-free of the page, and possibly other corruption. The rings are initialized by default with a valid multiple of rings, based on the number of cpus, therefore an invalid configuration requires ethtool to change the ring layout. For instance 'ethtool -L eth0 rx 9 tx 8' will cause packets received on rx0, and XDP_TX'd to tx48, to be completed on cpu3 (48 % 9 == 3). Resolve this discrepancy by shifting the irq for the xdp tx queues to start again from 0, modulo rx_ring_num. Fixes: 9ecc2d86171a ("net/mlx4_en: add xdp forwarding and data write support") Reported-by: Jesper Dangaard Brouer Signed-off-by: Brenden Blanco Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 132cea655920..e3be7e44ff51 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -127,7 +127,15 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, /* For TX we use the same irq per ring we assigned for the RX */ struct mlx4_en_cq *rx_cq; + int xdp_index; + /* The xdp tx irq must align with the rx ring that forwards to + * it, so reindex these from 0. This should only happen when + * tx_ring_num is not a multiple of rx_ring_num. + */ + xdp_index = (priv->xdp_ring_num - priv->tx_ring_num) + cq_idx; + if (xdp_index >= 0) + cq_idx = xdp_index; cq_idx = cq_idx % priv->rx_ring_num; rx_cq = priv->rx_cq[cq_idx]; cq->vector = rx_cq->vector; From c58b84ee467bfd08b39fbda56757ba19ac50980a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 5 Oct 2016 06:46:49 -0300 Subject: [PATCH 090/884] [media] af9005: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/af9005.c | 317 ++++++++++++++++------------- 1 file changed, 180 insertions(+), 137 deletions(-) diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index efa782ed6e2d..b257780fb380 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -52,17 +52,16 @@ u8 regmask[8] = { 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff }; struct af9005_device_state { u8 sequence; int led_state; + unsigned char data[256]; + struct mutex data_mutex; }; static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg, int readwrite, int type, u8 * values, int len) { struct af9005_device_state *st = d->priv; - u8 obuf[16] = { 0 }; - u8 ibuf[17] = { 0 }; - u8 command; - int i; - int ret; + u8 command, seq; + int i, ret; if (len < 1) { err("generic read/write, less than 1 byte. Makes no sense."); @@ -73,16 +72,17 @@ static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg, return -EINVAL; } - obuf[0] = 14; /* rest of buffer length low */ - obuf[1] = 0; /* rest of buffer length high */ + mutex_lock(&st->data_mutex); + st->data[0] = 14; /* rest of buffer length low */ + st->data[1] = 0; /* rest of buffer length high */ - obuf[2] = AF9005_REGISTER_RW; /* register operation */ - obuf[3] = 12; /* rest of buffer length */ + st->data[2] = AF9005_REGISTER_RW; /* register operation */ + st->data[3] = 12; /* rest of buffer length */ - obuf[4] = st->sequence++; /* sequence number */ + st->data[4] = seq = st->sequence++; /* sequence number */ - obuf[5] = (u8) (reg >> 8); /* register address */ - obuf[6] = (u8) (reg & 0xff); + st->data[5] = (u8) (reg >> 8); /* register address */ + st->data[6] = (u8) (reg & 0xff); if (type == AF9005_OFDM_REG) { command = AF9005_CMD_OFDM_REG; @@ -96,51 +96,52 @@ static int af9005_generic_read_write(struct dvb_usb_device *d, u16 reg, command |= readwrite; if (readwrite == AF9005_CMD_WRITE) for (i = 0; i < len; i++) - obuf[8 + i] = values[i]; + st->data[8 + i] = values[i]; else if (type == AF9005_TUNER_REG) /* read command for tuner, the first byte contains the i2c address */ - obuf[8] = values[0]; - obuf[7] = command; + st->data[8] = values[0]; + st->data[7] = command; - ret = dvb_usb_generic_rw(d, obuf, 16, ibuf, 17, 0); + ret = dvb_usb_generic_rw(d, st->data, 16, st->data, 17, 0); if (ret) - return ret; + goto ret; /* sanity check */ - if (ibuf[2] != AF9005_REGISTER_RW_ACK) { + if (st->data[2] != AF9005_REGISTER_RW_ACK) { err("generic read/write, wrong reply code."); - return -EIO; + ret = -EIO; + goto ret; } - if (ibuf[3] != 0x0d) { + if (st->data[3] != 0x0d) { err("generic read/write, wrong length in reply."); - return -EIO; + ret = -EIO; + goto ret; } - if (ibuf[4] != obuf[4]) { + if (st->data[4] != seq) { err("generic read/write, wrong sequence in reply."); - return -EIO; + ret = -EIO; + goto ret; } /* - Windows driver doesn't check these fields, in fact sometimes - the register in the reply is different that what has been sent - - if (ibuf[5] != obuf[5] || ibuf[6] != obuf[6]) { - err("generic read/write, wrong register in reply."); - return -EIO; - } - if (ibuf[7] != command) { - err("generic read/write wrong command in reply."); - return -EIO; - } + * In thesis, both input and output buffers should have + * identical values for st->data[5] to st->data[8]. + * However, windows driver doesn't check these fields, in fact + * sometimes the register in the reply is different that what + * has been sent */ - if (ibuf[16] != 0x01) { + if (st->data[16] != 0x01) { err("generic read/write wrong status code in reply."); - return -EIO; + ret = -EIO; + goto ret; } + if (readwrite == AF9005_CMD_READ) for (i = 0; i < len; i++) - values[i] = ibuf[8 + i]; + values[i] = st->data[8 + i]; - return 0; +ret: + mutex_unlock(&st->data_mutex); + return ret; } @@ -464,8 +465,7 @@ int af9005_send_command(struct dvb_usb_device *d, u8 command, u8 * wbuf, struct af9005_device_state *st = d->priv; int ret, i, packet_len; - u8 buf[64]; - u8 ibuf[64]; + u8 seq; if (wlen < 0) { err("send command, wlen less than 0 bytes. Makes no sense."); @@ -480,94 +480,97 @@ int af9005_send_command(struct dvb_usb_device *d, u8 command, u8 * wbuf, return -EINVAL; } packet_len = wlen + 5; - buf[0] = (u8) (packet_len & 0xff); - buf[1] = (u8) ((packet_len & 0xff00) >> 8); - buf[2] = 0x26; /* packet type */ - buf[3] = wlen + 3; - buf[4] = st->sequence++; - buf[5] = command; - buf[6] = wlen; + mutex_lock(&st->data_mutex); + + st->data[0] = (u8) (packet_len & 0xff); + st->data[1] = (u8) ((packet_len & 0xff00) >> 8); + + st->data[2] = 0x26; /* packet type */ + st->data[3] = wlen + 3; + st->data[4] = seq = st->sequence++; + st->data[5] = command; + st->data[6] = wlen; for (i = 0; i < wlen; i++) - buf[7 + i] = wbuf[i]; - ret = dvb_usb_generic_rw(d, buf, wlen + 7, ibuf, rlen + 7, 0); - if (ret) - return ret; - if (ibuf[2] != 0x27) { + st->data[7 + i] = wbuf[i]; + ret = dvb_usb_generic_rw(d, st->data, wlen + 7, st->data, rlen + 7, 0); + if (st->data[2] != 0x27) { err("send command, wrong reply code."); - return -EIO; - } - if (ibuf[4] != buf[4]) { + ret = -EIO; + } else if (st->data[4] != seq) { err("send command, wrong sequence in reply."); - return -EIO; - } - if (ibuf[5] != 0x01) { + ret = -EIO; + } else if (st->data[5] != 0x01) { err("send command, wrong status code in reply."); - return -EIO; - } - if (ibuf[6] != rlen) { + ret = -EIO; + } else if (st->data[6] != rlen) { err("send command, invalid data length in reply."); - return -EIO; + ret = -EIO; } - for (i = 0; i < rlen; i++) - rbuf[i] = ibuf[i + 7]; - return 0; + if (!ret) { + for (i = 0; i < rlen; i++) + rbuf[i] = st->data[i + 7]; + } + + mutex_unlock(&st->data_mutex); + return ret; } int af9005_read_eeprom(struct dvb_usb_device *d, u8 address, u8 * values, int len) { struct af9005_device_state *st = d->priv; - u8 obuf[16], ibuf[14]; + u8 seq; int ret, i; - memset(obuf, 0, sizeof(obuf)); - memset(ibuf, 0, sizeof(ibuf)); + mutex_lock(&st->data_mutex); - obuf[0] = 14; /* length of rest of packet low */ - obuf[1] = 0; /* length of rest of packer high */ + memset(st->data, 0, sizeof(st->data)); - obuf[2] = 0x2a; /* read/write eeprom */ + st->data[0] = 14; /* length of rest of packet low */ + st->data[1] = 0; /* length of rest of packer high */ - obuf[3] = 12; /* size */ + st->data[2] = 0x2a; /* read/write eeprom */ - obuf[4] = st->sequence++; + st->data[3] = 12; /* size */ - obuf[5] = 0; /* read */ + st->data[4] = seq = st->sequence++; - obuf[6] = len; - obuf[7] = address; - ret = dvb_usb_generic_rw(d, obuf, 16, ibuf, 14, 0); - if (ret) - return ret; - if (ibuf[2] != 0x2b) { + st->data[5] = 0; /* read */ + + st->data[6] = len; + st->data[7] = address; + ret = dvb_usb_generic_rw(d, st->data, 16, st->data, 14, 0); + if (st->data[2] != 0x2b) { err("Read eeprom, invalid reply code"); - return -EIO; - } - if (ibuf[3] != 10) { + ret = -EIO; + } else if (st->data[3] != 10) { err("Read eeprom, invalid reply length"); - return -EIO; - } - if (ibuf[4] != obuf[4]) { + ret = -EIO; + } else if (st->data[4] != seq) { err("Read eeprom, wrong sequence in reply "); - return -EIO; - } - if (ibuf[5] != 1) { + ret = -EIO; + } else if (st->data[5] != 1) { err("Read eeprom, wrong status in reply "); - return -EIO; + ret = -EIO; } - for (i = 0; i < len; i++) { - values[i] = ibuf[6 + i]; + + if (!ret) { + for (i = 0; i < len; i++) + values[i] = st->data[6 + i]; } - return 0; + mutex_unlock(&st->data_mutex); + + return ret; } -static int af9005_boot_packet(struct usb_device *udev, int type, u8 * reply) +static int af9005_boot_packet(struct usb_device *udev, int type, u8 *reply, + u8 *buf, int size) { - u8 buf[FW_BULKOUT_SIZE + 2]; u16 checksum; int act_len, i, ret; - memset(buf, 0, sizeof(buf)); + + memset(buf, 0, size); buf[0] = (u8) (FW_BULKOUT_SIZE & 0xff); buf[1] = (u8) ((FW_BULKOUT_SIZE >> 8) & 0xff); switch (type) { @@ -720,15 +723,21 @@ static int af9005_download_firmware(struct usb_device *udev, const struct firmwa { int i, packets, ret, act_len; - u8 buf[FW_BULKOUT_SIZE + 2]; + u8 *buf; u8 reply; - ret = af9005_boot_packet(udev, FW_CONFIG, &reply); + buf = kmalloc(FW_BULKOUT_SIZE + 2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = af9005_boot_packet(udev, FW_CONFIG, &reply, buf, + FW_BULKOUT_SIZE + 2); if (ret) - return ret; + goto err; if (reply != 0x01) { err("before downloading firmware, FW_CONFIG expected 0x01, received 0x%x", reply); - return -EIO; + ret = -EIO; + goto err; } packets = fw->size / FW_BULKOUT_SIZE; buf[0] = (u8) (FW_BULKOUT_SIZE & 0xff); @@ -743,28 +752,35 @@ static int af9005_download_firmware(struct usb_device *udev, const struct firmwa buf, FW_BULKOUT_SIZE + 2, &act_len, 1000); if (ret) { err("firmware download failed at packet %d with code %d", i, ret); - return ret; + goto err; } } - ret = af9005_boot_packet(udev, FW_CONFIRM, &reply); + ret = af9005_boot_packet(udev, FW_CONFIRM, &reply, + buf, FW_BULKOUT_SIZE + 2); if (ret) - return ret; + goto err; if (reply != (u8) (packets & 0xff)) { err("after downloading firmware, FW_CONFIRM expected 0x%x, received 0x%x", packets & 0xff, reply); - return -EIO; + ret = -EIO; + goto err; } - ret = af9005_boot_packet(udev, FW_BOOT, &reply); + ret = af9005_boot_packet(udev, FW_BOOT, &reply, buf, + FW_BULKOUT_SIZE + 2); if (ret) - return ret; - ret = af9005_boot_packet(udev, FW_CONFIG, &reply); + goto err; + ret = af9005_boot_packet(udev, FW_CONFIG, &reply, buf, + FW_BULKOUT_SIZE + 2); if (ret) - return ret; + goto err; if (reply != 0x02) { err("after downloading firmware, FW_CONFIG expected 0x02, received 0x%x", reply); - return -EIO; + ret = -EIO; + goto err; } - return 0; +err: + kfree(buf); + return ret; } @@ -823,53 +839,59 @@ static int af9005_rc_query(struct dvb_usb_device *d, u32 * event, int *state) { struct af9005_device_state *st = d->priv; int ret, len; - - u8 obuf[5]; - u8 ibuf[256]; + u8 seq; *state = REMOTE_NO_KEY_PRESSED; if (rc_decode == NULL) { /* it shouldn't never come here */ return 0; } + + mutex_lock(&st->data_mutex); + /* deb_info("rc_query\n"); */ - obuf[0] = 3; /* rest of packet length low */ - obuf[1] = 0; /* rest of packet lentgh high */ - obuf[2] = 0x40; /* read remote */ - obuf[3] = 1; /* rest of packet length */ - obuf[4] = st->sequence++; /* sequence number */ - ret = dvb_usb_generic_rw(d, obuf, 5, ibuf, 256, 0); + st->data[0] = 3; /* rest of packet length low */ + st->data[1] = 0; /* rest of packet lentgh high */ + st->data[2] = 0x40; /* read remote */ + st->data[3] = 1; /* rest of packet length */ + st->data[4] = seq = st->sequence++; /* sequence number */ + ret = dvb_usb_generic_rw(d, st->data, 5, st->data, 256, 0); if (ret) { err("rc query failed"); - return ret; + goto ret; } - if (ibuf[2] != 0x41) { + if (st->data[2] != 0x41) { err("rc query bad header."); - return -EIO; - } - if (ibuf[4] != obuf[4]) { + ret = -EIO; + goto ret; + } else if (st->data[4] != seq) { err("rc query bad sequence."); - return -EIO; + ret = -EIO; + goto ret; } - len = ibuf[5]; + len = st->data[5]; if (len > 246) { err("rc query invalid length"); - return -EIO; + ret = -EIO; + goto ret; } if (len > 0) { deb_rc("rc data (%d) ", len); - debug_dump((ibuf + 6), len, deb_rc); - ret = rc_decode(d, &ibuf[6], len, event, state); + debug_dump((st->data + 6), len, deb_rc); + ret = rc_decode(d, &st->data[6], len, event, state); if (ret) { err("rc_decode failed"); - return ret; + goto ret; } else { deb_rc("rc_decode state %x event %x\n", *state, *event); if (*state == REMOTE_KEY_REPEAT) *event = d->last_event; } } - return 0; + +ret: + mutex_unlock(&st->data_mutex); + return ret; } static int af9005_power_ctrl(struct dvb_usb_device *d, int onoff) @@ -953,10 +975,16 @@ static int af9005_identify_state(struct usb_device *udev, int *cold) { int ret; - u8 reply; - ret = af9005_boot_packet(udev, FW_CONFIG, &reply); + u8 reply, *buf; + + buf = kmalloc(FW_BULKOUT_SIZE + 2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = af9005_boot_packet(udev, FW_CONFIG, &reply, + buf, FW_BULKOUT_SIZE + 2); if (ret) - return ret; + goto err; deb_info("result of FW_CONFIG in identify state %d\n", reply); if (reply == 0x01) *cold = 1; @@ -965,7 +993,10 @@ static int af9005_identify_state(struct usb_device *udev, else return -EIO; deb_info("Identify state cold = %d\n", *cold); - return 0; + +err: + kfree(buf); + return ret; } static struct dvb_usb_device_properties af9005_properties; @@ -973,8 +1004,20 @@ static struct dvb_usb_device_properties af9005_properties; static int af9005_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { - return dvb_usb_device_init(intf, &af9005_properties, - THIS_MODULE, NULL, adapter_nr); + struct dvb_usb_device *d; + struct af9005_device_state *st; + int ret; + + ret = dvb_usb_device_init(intf, &af9005_properties, + THIS_MODULE, &d, adapter_nr); + + if (ret < 0) + return ret; + + st = d->priv; + mutex_init(&st->data_mutex); + + return 0; } enum af9005_usb_table_entry { From 5ef8ed0e5608f75805e41ef8abbe94a3597e6613 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 5 Oct 2016 06:02:19 -0300 Subject: [PATCH 091/884] [media] cinergyT2-core: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/cinergyT2-core.c | 84 ++++++++++++++++------ 1 file changed, 61 insertions(+), 23 deletions(-) diff --git a/drivers/media/usb/dvb-usb/cinergyT2-core.c b/drivers/media/usb/dvb-usb/cinergyT2-core.c index 9fd1527494eb..d85c0c4d4042 100644 --- a/drivers/media/usb/dvb-usb/cinergyT2-core.c +++ b/drivers/media/usb/dvb-usb/cinergyT2-core.c @@ -41,6 +41,8 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); struct cinergyt2_state { u8 rc_counter; + unsigned char data[64]; + struct mutex data_mutex; }; /* We are missing a release hook with usb_device data */ @@ -50,33 +52,52 @@ static struct dvb_usb_device_properties cinergyt2_properties; static int cinergyt2_streaming_ctrl(struct dvb_usb_adapter *adap, int enable) { - char buf[] = { CINERGYT2_EP1_CONTROL_STREAM_TRANSFER, enable ? 1 : 0 }; - char result[64]; - return dvb_usb_generic_rw(adap->dev, buf, sizeof(buf), result, - sizeof(result), 0); + struct dvb_usb_device *d = adap->dev; + struct cinergyt2_state *st = d->priv; + int ret; + + mutex_lock(&st->data_mutex); + st->data[0] = CINERGYT2_EP1_CONTROL_STREAM_TRANSFER; + st->data[1] = enable ? 1 : 0; + + ret = dvb_usb_generic_rw(d, st->data, 2, st->data, 64, 0); + mutex_unlock(&st->data_mutex); + + return ret; } static int cinergyt2_power_ctrl(struct dvb_usb_device *d, int enable) { - char buf[] = { CINERGYT2_EP1_SLEEP_MODE, enable ? 0 : 1 }; - char state[3]; - return dvb_usb_generic_rw(d, buf, sizeof(buf), state, sizeof(state), 0); + struct cinergyt2_state *st = d->priv; + int ret; + + mutex_lock(&st->data_mutex); + st->data[0] = CINERGYT2_EP1_SLEEP_MODE; + st->data[1] = enable ? 0 : 1; + + ret = dvb_usb_generic_rw(d, st->data, 2, st->data, 3, 0); + mutex_unlock(&st->data_mutex); + + return ret; } static int cinergyt2_frontend_attach(struct dvb_usb_adapter *adap) { - char query[] = { CINERGYT2_EP1_GET_FIRMWARE_VERSION }; - char state[3]; + struct dvb_usb_device *d = adap->dev; + struct cinergyt2_state *st = d->priv; int ret; adap->fe_adap[0].fe = cinergyt2_fe_attach(adap->dev); - ret = dvb_usb_generic_rw(adap->dev, query, sizeof(query), state, - sizeof(state), 0); + mutex_lock(&st->data_mutex); + st->data[0] = CINERGYT2_EP1_GET_FIRMWARE_VERSION; + + ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 3, 0); if (ret < 0) { deb_rc("cinergyt2_power_ctrl() Failed to retrieve sleep " "state info\n"); } + mutex_unlock(&st->data_mutex); /* Copy this pointer as we are gonna need it in the release phase */ cinergyt2_usb_device = adap->dev; @@ -141,13 +162,16 @@ static int repeatable_keys[] = { static int cinergyt2_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { struct cinergyt2_state *st = d->priv; - u8 key[5] = {0, 0, 0, 0, 0}, cmd = CINERGYT2_EP1_GET_RC_EVENTS; int i; *state = REMOTE_NO_KEY_PRESSED; - dvb_usb_generic_rw(d, &cmd, 1, key, sizeof(key), 0); - if (key[4] == 0xff) { + mutex_lock(&st->data_mutex); + st->data[0] = CINERGYT2_EP1_GET_RC_EVENTS; + + dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0); + + if (st->data[4] == 0xff) { /* key repeat */ st->rc_counter++; if (st->rc_counter > RC_REPEAT_DELAY) { @@ -157,31 +181,45 @@ static int cinergyt2_rc_query(struct dvb_usb_device *d, u32 *event, int *state) *event = d->last_event; deb_rc("repeat key, event %x\n", *event); - return 0; + goto ret; } } deb_rc("repeated key (non repeatable)\n"); } - return 0; + goto ret; } /* hack to pass checksum on the custom field */ - key[2] = ~key[1]; - dvb_usb_nec_rc_key_to_event(d, key, event, state); - if (key[0] != 0) { + st->data[2] = ~st->data[1]; + dvb_usb_nec_rc_key_to_event(d, st->data, event, state); + if (st->data[0] != 0) { if (*event != d->last_event) st->rc_counter = 0; - deb_rc("key: %*ph\n", 5, key); + deb_rc("key: %*ph\n", 5, st->data); } - return 0; + +ret: + mutex_unlock(&st->data_mutex); + return ret; } static int cinergyt2_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { - return dvb_usb_device_init(intf, &cinergyt2_properties, - THIS_MODULE, NULL, adapter_nr); + struct dvb_usb_device *d; + struct cinergyt2_state *st; + int ret; + + ret = dvb_usb_device_init(intf, &cinergyt2_properties, + THIS_MODULE, &d, adapter_nr); + if (ret < 0) + return ret; + + st = d->priv; + mutex_init(&st->data_mutex); + + return 0; } From 54d577a4c91ba5dfe634ddee26223bc9a39687a7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 13:54:33 -0300 Subject: [PATCH 092/884] [media] cinergyT2-core: handle error code on RC query There's no sense on decoding and generating a RC key code if there was an error on the URB control message. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/cinergyT2-core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/dvb-usb/cinergyT2-core.c b/drivers/media/usb/dvb-usb/cinergyT2-core.c index d85c0c4d4042..8ac825413d5a 100644 --- a/drivers/media/usb/dvb-usb/cinergyT2-core.c +++ b/drivers/media/usb/dvb-usb/cinergyT2-core.c @@ -102,7 +102,7 @@ static int cinergyt2_frontend_attach(struct dvb_usb_adapter *adap) /* Copy this pointer as we are gonna need it in the release phase */ cinergyt2_usb_device = adap->dev; - return 0; + return ret; } static struct rc_map_table rc_map_cinergyt2_table[] = { @@ -162,14 +162,16 @@ static int repeatable_keys[] = { static int cinergyt2_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { struct cinergyt2_state *st = d->priv; - int i; + int i, ret; *state = REMOTE_NO_KEY_PRESSED; mutex_lock(&st->data_mutex); st->data[0] = CINERGYT2_EP1_GET_RC_EVENTS; - dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0); + ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0); + if (ret < 0) + goto ret; if (st->data[4] == 0xff) { /* key repeat */ From c2730eef5fa180eec853f3775bebc4d74e6e6697 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 06:07:36 -0300 Subject: [PATCH 093/884] [media] cinergyT2-fe: cache stats at cinergyt2_fe_read_status() Instead of sending USB commands for every stats call, collect them once, when status is updated. As the frontend kthread will call it on every few seconds, the stats will still be collected. Besides reducing the amount of USB/I2C transfers, this also warrants that all stats will be collected at the same time, and makes easier to convert it to DVBv5 stats in the future. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/cinergyT2-fe.c | 48 ++++-------------------- 1 file changed, 7 insertions(+), 41 deletions(-) diff --git a/drivers/media/usb/dvb-usb/cinergyT2-fe.c b/drivers/media/usb/dvb-usb/cinergyT2-fe.c index b3ec743a7a2e..fd8edcb56e61 100644 --- a/drivers/media/usb/dvb-usb/cinergyT2-fe.c +++ b/drivers/media/usb/dvb-usb/cinergyT2-fe.c @@ -139,6 +139,7 @@ static uint16_t compute_tps(struct dtv_frontend_properties *op) struct cinergyt2_fe_state { struct dvb_frontend fe; struct dvb_usb_device *d; + struct dvbt_get_status_msg status; }; static int cinergyt2_fe_read_status(struct dvb_frontend *fe, @@ -154,6 +155,8 @@ static int cinergyt2_fe_read_status(struct dvb_frontend *fe, if (ret < 0) return ret; + state->status = result; + *status = 0; if (0xffff - le16_to_cpu(result.gain) > 30) @@ -177,34 +180,16 @@ static int cinergyt2_fe_read_status(struct dvb_frontend *fe, static int cinergyt2_fe_read_ber(struct dvb_frontend *fe, u32 *ber) { struct cinergyt2_fe_state *state = fe->demodulator_priv; - struct dvbt_get_status_msg status; - char cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS }; - int ret; - ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (char *)&status, - sizeof(status), 0); - if (ret < 0) - return ret; - - *ber = le32_to_cpu(status.viterbi_error_rate); + *ber = le32_to_cpu(state->status.viterbi_error_rate); return 0; } static int cinergyt2_fe_read_unc_blocks(struct dvb_frontend *fe, u32 *unc) { struct cinergyt2_fe_state *state = fe->demodulator_priv; - struct dvbt_get_status_msg status; - u8 cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS }; - int ret; - ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (u8 *)&status, - sizeof(status), 0); - if (ret < 0) { - err("cinergyt2_fe_read_unc_blocks() Failed! (Error=%d)\n", - ret); - return ret; - } - *unc = le32_to_cpu(status.uncorrected_block_count); + *unc = le32_to_cpu(state->status.uncorrected_block_count); return 0; } @@ -212,35 +197,16 @@ static int cinergyt2_fe_read_signal_strength(struct dvb_frontend *fe, u16 *strength) { struct cinergyt2_fe_state *state = fe->demodulator_priv; - struct dvbt_get_status_msg status; - char cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS }; - int ret; - ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (char *)&status, - sizeof(status), 0); - if (ret < 0) { - err("cinergyt2_fe_read_signal_strength() Failed!" - " (Error=%d)\n", ret); - return ret; - } - *strength = (0xffff - le16_to_cpu(status.gain)); + *strength = (0xffff - le16_to_cpu(state->status.gain)); return 0; } static int cinergyt2_fe_read_snr(struct dvb_frontend *fe, u16 *snr) { struct cinergyt2_fe_state *state = fe->demodulator_priv; - struct dvbt_get_status_msg status; - char cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS }; - int ret; - ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (char *)&status, - sizeof(status), 0); - if (ret < 0) { - err("cinergyt2_fe_read_snr() Failed! (Error=%d)\n", ret); - return ret; - } - *snr = (status.snr << 8) | status.snr; + *snr = (state->status.snr << 8) | state->status.snr; return 0; } From 0d43c0ff8b7e0a76136c602625efa5e6919baf9c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 06:16:59 -0300 Subject: [PATCH 094/884] [media] cinergyT2-fe: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/cinergyT2-fe.c | 56 ++++++++++++++---------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/drivers/media/usb/dvb-usb/cinergyT2-fe.c b/drivers/media/usb/dvb-usb/cinergyT2-fe.c index fd8edcb56e61..2d29b4174dba 100644 --- a/drivers/media/usb/dvb-usb/cinergyT2-fe.c +++ b/drivers/media/usb/dvb-usb/cinergyT2-fe.c @@ -139,6 +139,10 @@ static uint16_t compute_tps(struct dtv_frontend_properties *op) struct cinergyt2_fe_state { struct dvb_frontend fe; struct dvb_usb_device *d; + + unsigned char data[64]; + struct mutex data_mutex; + struct dvbt_get_status_msg status; }; @@ -146,28 +150,31 @@ static int cinergyt2_fe_read_status(struct dvb_frontend *fe, enum fe_status *status) { struct cinergyt2_fe_state *state = fe->demodulator_priv; - struct dvbt_get_status_msg result; - u8 cmd[] = { CINERGYT2_EP1_GET_TUNER_STATUS }; int ret; - ret = dvb_usb_generic_rw(state->d, cmd, sizeof(cmd), (u8 *)&result, - sizeof(result), 0); + mutex_lock(&state->data_mutex); + state->data[0] = CINERGYT2_EP1_GET_TUNER_STATUS; + + ret = dvb_usb_generic_rw(state->d, state->data, 1, + state->data, sizeof(state->status), 0); + if (!ret) + memcpy(&state->status, state->data, sizeof(state->status)); + mutex_unlock(&state->data_mutex); + if (ret < 0) return ret; - state->status = result; - *status = 0; - if (0xffff - le16_to_cpu(result.gain) > 30) + if (0xffff - le16_to_cpu(state->status.gain) > 30) *status |= FE_HAS_SIGNAL; - if (result.lock_bits & (1 << 6)) + if (state->status.lock_bits & (1 << 6)) *status |= FE_HAS_LOCK; - if (result.lock_bits & (1 << 5)) + if (state->status.lock_bits & (1 << 5)) *status |= FE_HAS_SYNC; - if (result.lock_bits & (1 << 4)) + if (state->status.lock_bits & (1 << 4)) *status |= FE_HAS_CARRIER; - if (result.lock_bits & (1 << 1)) + if (state->status.lock_bits & (1 << 1)) *status |= FE_HAS_VITERBI; if ((*status & (FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC)) != @@ -232,34 +239,36 @@ static int cinergyt2_fe_set_frontend(struct dvb_frontend *fe) { struct dtv_frontend_properties *fep = &fe->dtv_property_cache; struct cinergyt2_fe_state *state = fe->demodulator_priv; - struct dvbt_set_parameters_msg param; - char result[2]; + struct dvbt_set_parameters_msg *param; int err; - param.cmd = CINERGYT2_EP1_SET_TUNER_PARAMETERS; - param.tps = cpu_to_le16(compute_tps(fep)); - param.freq = cpu_to_le32(fep->frequency / 1000); - param.flags = 0; + mutex_lock(&state->data_mutex); + + param = (void *)state->data; + param->cmd = CINERGYT2_EP1_SET_TUNER_PARAMETERS; + param->tps = cpu_to_le16(compute_tps(fep)); + param->freq = cpu_to_le32(fep->frequency / 1000); + param->flags = 0; switch (fep->bandwidth_hz) { default: case 8000000: - param.bandwidth = 8; + param->bandwidth = 8; break; case 7000000: - param.bandwidth = 7; + param->bandwidth = 7; break; case 6000000: - param.bandwidth = 6; + param->bandwidth = 6; break; } - err = dvb_usb_generic_rw(state->d, - (char *)¶m, sizeof(param), - result, sizeof(result), 0); + err = dvb_usb_generic_rw(state->d, state->data, sizeof(*param), + state->data, 2, 0); if (err < 0) err("cinergyt2_fe_set_frontend() Failed! err=%d\n", err); + mutex_unlock(&state->data_mutex); return (err < 0) ? err : 0; } @@ -281,6 +290,7 @@ struct dvb_frontend *cinergyt2_fe_attach(struct dvb_usb_device *d) s->d = d; memcpy(&s->fe.ops, &cinergyt2_fe_ops, sizeof(struct dvb_frontend_ops)); s->fe.demodulator_priv = s; + mutex_init(&s->data_mutex); return &s->fe; } From 17ce039b4e5405c49d8c0d64e6d781cc6f4dc1ac Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 06:30:27 -0300 Subject: [PATCH 095/884] [media] cxusb: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/cxusb.c | 62 +++++++++++++++++-------------- drivers/media/usb/dvb-usb/cxusb.h | 6 +++ 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c index 907ac01ae297..39772812269d 100644 --- a/drivers/media/usb/dvb-usb/cxusb.c +++ b/drivers/media/usb/dvb-usb/cxusb.c @@ -45,9 +45,6 @@ #include "si2168.h" #include "si2157.h" -/* Max transfer size done by I2C transfer functions */ -#define MAX_XFER_SIZE 80 - /* debug */ static int dvb_usb_cxusb_debug; module_param_named(debug, dvb_usb_cxusb_debug, int, 0644); @@ -61,23 +58,27 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); static int cxusb_ctrl_msg(struct dvb_usb_device *d, u8 cmd, u8 *wbuf, int wlen, u8 *rbuf, int rlen) { - int wo = (rbuf == NULL || rlen == 0); /* write-only */ - u8 sndbuf[MAX_XFER_SIZE]; + struct cxusb_state *st = d->priv; + int ret, wo; - if (1 + wlen > sizeof(sndbuf)) { - warn("i2c wr: len=%d is too big!\n", - wlen); + if (1 + wlen > MAX_XFER_SIZE) { + warn("i2c wr: len=%d is too big!\n", wlen); return -EOPNOTSUPP; } - memset(sndbuf, 0, 1+wlen); + wo = (rbuf == NULL || rlen == 0); /* write-only */ - sndbuf[0] = cmd; - memcpy(&sndbuf[1], wbuf, wlen); + mutex_lock(&st->data_mutex); + st->data[0] = cmd; + memcpy(&st->data[1], wbuf, wlen); if (wo) - return dvb_usb_generic_write(d, sndbuf, 1+wlen); + ret = dvb_usb_generic_write(d, st->data, 1 + wlen); else - return dvb_usb_generic_rw(d, sndbuf, 1+wlen, rbuf, rlen, 0); + ret = dvb_usb_generic_rw(d, st->data, 1 + wlen, + rbuf, rlen, 0); + + mutex_unlock(&st->data_mutex); + return ret; } /* GPIO */ @@ -1460,36 +1461,43 @@ static struct dvb_usb_device_properties cxusb_mygica_t230_properties; static int cxusb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct dvb_usb_device *d; + struct cxusb_state *st; + if (0 == dvb_usb_device_init(intf, &cxusb_medion_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_lgh064f_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_dee1601_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_lgz201_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_dtt7579_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_dualdig4_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_nano2_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_nano2_needsfirmware_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_aver_a868r_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_bluebird_dualdig4_rev2_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_d680_dmb_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_mygica_d689_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &cxusb_mygica_t230_properties, - THIS_MODULE, NULL, adapter_nr) || - 0) + THIS_MODULE, &d, adapter_nr) || + 0) { + st = d->priv; + mutex_init(&st->data_mutex); + return 0; + } return -EINVAL; } diff --git a/drivers/media/usb/dvb-usb/cxusb.h b/drivers/media/usb/dvb-usb/cxusb.h index 527ff7905e15..9f3ee0e47d5c 100644 --- a/drivers/media/usb/dvb-usb/cxusb.h +++ b/drivers/media/usb/dvb-usb/cxusb.h @@ -28,10 +28,16 @@ #define CMD_ANALOG 0x50 #define CMD_DIGITAL 0x51 +/* Max transfer size done by I2C transfer functions */ +#define MAX_XFER_SIZE 80 + struct cxusb_state { u8 gpio_write_state[3]; struct i2c_client *i2c_client_demod; struct i2c_client *i2c_client_tuner; + + unsigned char data[MAX_XFER_SIZE]; + struct mutex data_mutex; }; #endif From bd1f976cc95b0689f889c8d93434ee61dd09b08b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 06:40:24 -0300 Subject: [PATCH 096/884] [media] dib0700: be sure that dib0700_ctrl_rd() users can do DMA dib0700_ctrl_rd() takes a RX and a TX pointer. Be sure that both will point to a memory allocated via kmalloc(). Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dib0700_core.c | 4 +++- drivers/media/usb/dvb-usb/dib0700_devices.c | 25 +++++++++++---------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index f3196658fb70..515f89dba199 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -292,13 +292,15 @@ static int dib0700_i2c_xfer_legacy(struct i2c_adapter *adap, /* special thing in the current firmware: when length is zero the read-failed */ len = dib0700_ctrl_rd(d, st->buf, msg[i].len + 2, - msg[i+1].buf, msg[i+1].len); + st->buf, msg[i + 1].len); if (len <= 0) { deb_info("I2C read failed on address 0x%02x\n", msg[i].addr); break; } + memcpy(msg[i + 1].buf, st->buf, msg[i + 1].len); + msg[i+1].len = len; i++; diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c index 0857b56e652c..ef1b8ee75c57 100644 --- a/drivers/media/usb/dvb-usb/dib0700_devices.c +++ b/drivers/media/usb/dvb-usb/dib0700_devices.c @@ -508,8 +508,6 @@ static int stk7700ph_tuner_attach(struct dvb_usb_adapter *adap) #define DEFAULT_RC_INTERVAL 50 -static u8 rc_request[] = { REQUEST_POLL_RC, 0 }; - /* * This function is used only when firmware is < 1.20 version. Newer * firmwares use bulk mode, with functions implemented at dib0700_core, @@ -517,7 +515,6 @@ static u8 rc_request[] = { REQUEST_POLL_RC, 0 }; */ static int dib0700_rc_query_old_firmware(struct dvb_usb_device *d) { - u8 key[4]; enum rc_type protocol; u32 scancode; u8 toggle; @@ -532,39 +529,43 @@ static int dib0700_rc_query_old_firmware(struct dvb_usb_device *d) return 0; } - i = dib0700_ctrl_rd(d, rc_request, 2, key, 4); + st->buf[0] = REQUEST_POLL_RC; + st->buf[1] = 0; + + i = dib0700_ctrl_rd(d, st->buf, 2, st->buf, 4); if (i <= 0) { err("RC Query Failed"); - return -1; + return -EIO; } /* losing half of KEY_0 events from Philipps rc5 remotes.. */ - if (key[0] == 0 && key[1] == 0 && key[2] == 0 && key[3] == 0) + if (st->buf[0] == 0 && st->buf[1] == 0 + && st->buf[2] == 0 && st->buf[3] == 0) return 0; - /* info("%d: %2X %2X %2X %2X",dvb_usb_dib0700_ir_proto,(int)key[3-2],(int)key[3-3],(int)key[3-1],(int)key[3]); */ + /* info("%d: %2X %2X %2X %2X",dvb_usb_dib0700_ir_proto,(int)st->buf[3 - 2],(int)st->buf[3 - 3],(int)st->buf[3 - 1],(int)st->buf[3]); */ dib0700_rc_setup(d, NULL); /* reset ir sensor data to prevent false events */ switch (d->props.rc.core.protocol) { case RC_BIT_NEC: /* NEC protocol sends repeat code as 0 0 0 FF */ - if ((key[3-2] == 0x00) && (key[3-3] == 0x00) && - (key[3] == 0xff)) { + if ((st->buf[3 - 2] == 0x00) && (st->buf[3 - 3] == 0x00) && + (st->buf[3] == 0xff)) { rc_repeat(d->rc_dev); return 0; } protocol = RC_TYPE_NEC; - scancode = RC_SCANCODE_NEC(key[3-2], key[3-3]); + scancode = RC_SCANCODE_NEC(st->buf[3 - 2], st->buf[3 - 3]); toggle = 0; break; default: /* RC-5 protocol changes toggle bit on new keypress */ protocol = RC_TYPE_RC5; - scancode = RC_SCANCODE_RC5(key[3-2], key[3-3]); - toggle = key[3-1]; + scancode = RC_SCANCODE_RC5(st->buf[3 - 2], st->buf[3 - 3]); + toggle = st->buf[3 - 1]; break; } From fa1ecd8dc454fe2d3075a964240f45ceb60cb9e0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 06:53:51 -0300 Subject: [PATCH 097/884] [media] dib0700_core: don't use stack on I2C reads Be sure that I2C reads won't use stack by passing a pointer to the state buffer, that we know it was allocated via kmalloc, instead of relying on the buffer allocated by an I2C client. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dib0700_core.c | 27 +++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 515f89dba199..92d5408684ac 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -213,7 +213,7 @@ static int dib0700_i2c_xfer_new(struct i2c_adapter *adap, struct i2c_msg *msg, usb_rcvctrlpipe(d->udev, 0), REQUEST_NEW_I2C_READ, USB_TYPE_VENDOR | USB_DIR_IN, - value, index, msg[i].buf, + value, index, st->buf, msg[i].len, USB_CTRL_GET_TIMEOUT); if (result < 0) { @@ -221,6 +221,14 @@ static int dib0700_i2c_xfer_new(struct i2c_adapter *adap, struct i2c_msg *msg, break; } + if (msg[i].len > sizeof(st->buf)) { + deb_info("buffer too small to fit %d bytes\n", + msg[i].len); + return -EIO; + } + + memcpy(msg[i].buf, st->buf, msg[i].len); + deb_data("<<< "); debug_dump(msg[i].buf, msg[i].len, deb_data); @@ -238,6 +246,13 @@ static int dib0700_i2c_xfer_new(struct i2c_adapter *adap, struct i2c_msg *msg, /* I2C ctrl + FE bus; */ st->buf[3] = ((gen_mode << 6) & 0xC0) | ((bus_mode << 4) & 0x30); + + if (msg[i].len > sizeof(st->buf) - 4) { + deb_info("i2c message to big: %d\n", + msg[i].len); + return -EIO; + } + /* The Actual i2c payload */ memcpy(&st->buf[4], msg[i].buf, msg[i].len); @@ -283,6 +298,11 @@ static int dib0700_i2c_xfer_legacy(struct i2c_adapter *adap, /* fill in the address */ st->buf[1] = msg[i].addr << 1; /* fill the buffer */ + if (msg[i].len > sizeof(st->buf) - 2) { + deb_info("i2c xfer to big: %d\n", + msg[i].len); + return -EIO; + } memcpy(&st->buf[2], msg[i].buf, msg[i].len); /* write/read request */ @@ -299,6 +319,11 @@ static int dib0700_i2c_xfer_legacy(struct i2c_adapter *adap, break; } + if (msg[i + 1].len > sizeof(st->buf)) { + deb_info("i2c xfer buffer to small for %d\n", + msg[i].len); + return -EIO; + } memcpy(msg[i + 1].buf, st->buf, msg[i + 1].len); msg[i+1].len = len; From 426398b15d9f18ce3e7b5c4e657b4c0103dd8389 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 07:34:36 -0300 Subject: [PATCH 098/884] [media] dibusb: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dibusb-common.c | 109 ++++++++++++++++------ drivers/media/usb/dvb-usb/dibusb.h | 3 + 2 files changed, 86 insertions(+), 26 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c index 4b08c2a47ae2..951f3dac9082 100644 --- a/drivers/media/usb/dvb-usb/dibusb-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-common.c @@ -63,72 +63,117 @@ EXPORT_SYMBOL(dibusb_pid_filter_ctrl); int dibusb_power_ctrl(struct dvb_usb_device *d, int onoff) { - u8 b[3]; + u8 *b; int ret; + + b = kmalloc(3, GFP_KERNEL); + if (!b) + return -ENOMEM; + b[0] = DIBUSB_REQ_SET_IOCTL; b[1] = DIBUSB_IOCTL_CMD_POWER_MODE; b[2] = onoff ? DIBUSB_IOCTL_POWER_WAKEUP : DIBUSB_IOCTL_POWER_SLEEP; - ret = dvb_usb_generic_write(d,b,3); + + ret = dvb_usb_generic_write(d, b, 3); + + kfree(b); + msleep(10); + return ret; } EXPORT_SYMBOL(dibusb_power_ctrl); int dibusb2_0_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) { - u8 b[3] = { 0 }; int ret; + u8 *b; + + b = kmalloc(3, GFP_KERNEL); + if (!b) + return -ENOMEM; if ((ret = dibusb_streaming_ctrl(adap,onoff)) < 0) - return ret; + goto ret; if (onoff) { b[0] = DIBUSB_REQ_SET_STREAMING_MODE; b[1] = 0x00; - if ((ret = dvb_usb_generic_write(adap->dev,b,2)) < 0) - return ret; + ret = dvb_usb_generic_write(adap->dev, b, 2); + if (ret < 0) + goto ret; } b[0] = DIBUSB_REQ_SET_IOCTL; b[1] = onoff ? DIBUSB_IOCTL_CMD_ENABLE_STREAM : DIBUSB_IOCTL_CMD_DISABLE_STREAM; - return dvb_usb_generic_write(adap->dev,b,3); + ret = dvb_usb_generic_write(adap->dev, b, 3); + +ret: + kfree(b); + return ret; } EXPORT_SYMBOL(dibusb2_0_streaming_ctrl); int dibusb2_0_power_ctrl(struct dvb_usb_device *d, int onoff) { - if (onoff) { - u8 b[3] = { DIBUSB_REQ_SET_IOCTL, DIBUSB_IOCTL_CMD_POWER_MODE, DIBUSB_IOCTL_POWER_WAKEUP }; - return dvb_usb_generic_write(d,b,3); - } else + u8 *b; + int ret; + + if (!onoff) return 0; + + b = kmalloc(3, GFP_KERNEL); + if (!b) + return -ENOMEM; + + b[0] = DIBUSB_REQ_SET_IOCTL; + b[1] = DIBUSB_IOCTL_CMD_POWER_MODE; + b[2] = DIBUSB_IOCTL_POWER_WAKEUP; + + ret = dvb_usb_generic_write(d, b, 3); + + kfree(b); + + return ret; } EXPORT_SYMBOL(dibusb2_0_power_ctrl); static int dibusb_i2c_msg(struct dvb_usb_device *d, u8 addr, u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen) { - u8 sndbuf[MAX_XFER_SIZE]; /* lead(1) devaddr,direction(1) addr(2) data(wlen) (len(2) (when reading)) */ - /* write only ? */ - int wo = (rbuf == NULL || rlen == 0), - len = 2 + wlen + (wo ? 0 : 2); + u8 *sndbuf; + int ret, wo, len; - if (4 + wlen > sizeof(sndbuf)) { + /* write only ? */ + wo = (rbuf == NULL || rlen == 0); + + len = 2 + wlen + (wo ? 0 : 2); + + sndbuf = kmalloc(MAX_XFER_SIZE, GFP_KERNEL); + if (!sndbuf) + return -ENOMEM; + + if (4 + wlen > MAX_XFER_SIZE) { warn("i2c wr: len=%d is too big!\n", wlen); - return -EOPNOTSUPP; + ret = -EOPNOTSUPP; + goto ret; } sndbuf[0] = wo ? DIBUSB_REQ_I2C_WRITE : DIBUSB_REQ_I2C_READ; sndbuf[1] = (addr << 1) | (wo ? 0 : 1); - memcpy(&sndbuf[2],wbuf,wlen); + memcpy(&sndbuf[2], wbuf, wlen); if (!wo) { - sndbuf[wlen+2] = (rlen >> 8) & 0xff; - sndbuf[wlen+3] = rlen & 0xff; + sndbuf[wlen + 2] = (rlen >> 8) & 0xff; + sndbuf[wlen + 3] = rlen & 0xff; } - return dvb_usb_generic_rw(d,sndbuf,len,rbuf,rlen,0); + ret = dvb_usb_generic_rw(d, sndbuf, len, rbuf, rlen, 0); + +ret: + kfree(sndbuf); + return ret; } /* @@ -320,11 +365,23 @@ EXPORT_SYMBOL(rc_map_dibusb_table); int dibusb_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { - u8 key[5],cmd = DIBUSB_REQ_POLL_REMOTE; - dvb_usb_generic_rw(d,&cmd,1,key,5,0); - dvb_usb_nec_rc_key_to_event(d,key,event,state); - if (key[0] != 0) - deb_info("key: %*ph\n", 5, key); + u8 *buf; + + buf = kmalloc(5, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = DIBUSB_REQ_POLL_REMOTE; + + dvb_usb_generic_rw(d, buf, 1, buf, 5, 0); + + dvb_usb_nec_rc_key_to_event(d, buf, event, state); + + if (buf[0] != 0) + deb_info("key: %*ph\n", 5, buf); + + kfree(buf); + return 0; } EXPORT_SYMBOL(dibusb_rc_query); diff --git a/drivers/media/usb/dvb-usb/dibusb.h b/drivers/media/usb/dvb-usb/dibusb.h index 3f82163d8ab8..697be2a17ade 100644 --- a/drivers/media/usb/dvb-usb/dibusb.h +++ b/drivers/media/usb/dvb-usb/dibusb.h @@ -96,6 +96,9 @@ #define DIBUSB_IOCTL_CMD_ENABLE_STREAM 0x01 #define DIBUSB_IOCTL_CMD_DISABLE_STREAM 0x02 +/* Max transfer size done by I2C transfer functions */ +#define MAX_XFER_SIZE 64 + struct dibusb_state { struct dib_fe_xfer_ops ops; int mt2060_present; From ff1c123545d706c6e414132c9fc93e345f5141da Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 13:40:48 -0300 Subject: [PATCH 099/884] [media] dibusb: handle error code on RC query There's no sense on decoding and generating a RC key code if there was an error on the URB control message. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dibusb-common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c index 951f3dac9082..b0fd9a609352 100644 --- a/drivers/media/usb/dvb-usb/dibusb-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-common.c @@ -366,6 +366,7 @@ EXPORT_SYMBOL(rc_map_dibusb_table); int dibusb_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { u8 *buf; + int ret; buf = kmalloc(5, GFP_KERNEL); if (!buf) @@ -373,7 +374,9 @@ int dibusb_rc_query(struct dvb_usb_device *d, u32 *event, int *state) buf[0] = DIBUSB_REQ_POLL_REMOTE; - dvb_usb_generic_rw(d, buf, 1, buf, 5, 0); + ret = dvb_usb_generic_rw(d, buf, 1, buf, 5, 0); + if (ret < 0) + goto ret; dvb_usb_nec_rc_key_to_event(d, buf, event, state); @@ -382,6 +385,7 @@ int dibusb_rc_query(struct dvb_usb_device *d, u32 *event, int *state) kfree(buf); - return 0; +ret: + return ret; } EXPORT_SYMBOL(dibusb_rc_query); From f0b0ada7184cca44bb5ae96903eca304cc20eec5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 07:55:38 -0300 Subject: [PATCH 100/884] [media] digitv: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/digitv.c | 20 +++++++++++--------- drivers/media/usb/dvb-usb/digitv.h | 5 ++++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/media/usb/dvb-usb/digitv.c b/drivers/media/usb/dvb-usb/digitv.c index 63134335c994..09f8c28bd4db 100644 --- a/drivers/media/usb/dvb-usb/digitv.c +++ b/drivers/media/usb/dvb-usb/digitv.c @@ -28,20 +28,22 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); static int digitv_ctrl_msg(struct dvb_usb_device *d, u8 cmd, u8 vv, u8 *wbuf, int wlen, u8 *rbuf, int rlen) { + struct digitv_state *st = d->priv; int wo = (rbuf == NULL || rlen == 0); /* write-only */ - u8 sndbuf[7],rcvbuf[7]; - memset(sndbuf,0,7); memset(rcvbuf,0,7); - sndbuf[0] = cmd; - sndbuf[1] = vv; - sndbuf[2] = wo ? wlen : rlen; + memset(st->sndbuf, 0, 7); + memset(st->rcvbuf, 0, 7); + + st->sndbuf[0] = cmd; + st->sndbuf[1] = vv; + st->sndbuf[2] = wo ? wlen : rlen; if (wo) { - memcpy(&sndbuf[3],wbuf,wlen); - dvb_usb_generic_write(d,sndbuf,7); + memcpy(&st->sndbuf[3], wbuf, wlen); + dvb_usb_generic_write(d, st->sndbuf, 7); } else { - dvb_usb_generic_rw(d,sndbuf,7,rcvbuf,7,10); - memcpy(rbuf,&rcvbuf[3],rlen); + dvb_usb_generic_rw(d, st->sndbuf, 7, st->rcvbuf, 7, 10); + memcpy(rbuf, &st->rcvbuf[3], rlen); } return 0; } diff --git a/drivers/media/usb/dvb-usb/digitv.h b/drivers/media/usb/dvb-usb/digitv.h index 908c09f4966b..581e09c25491 100644 --- a/drivers/media/usb/dvb-usb/digitv.h +++ b/drivers/media/usb/dvb-usb/digitv.h @@ -5,7 +5,10 @@ #include "dvb-usb.h" struct digitv_state { - int is_nxt6000; + int is_nxt6000; + + unsigned char sndbuf[7]; + unsigned char rcvbuf[7]; }; /* protocol (from usblogging and the SDK: From 208d8af509455ebf25f25f974c9907c0eb1a2b0a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 10 Oct 2016 15:34:13 -0300 Subject: [PATCH 101/884] [media] dtt200u-fe: don't keep waiting for lock at set_frontend() It is up to the frontend kthread to wait for lock. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dtt200u-fe.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dtt200u-fe.c b/drivers/media/usb/dvb-usb/dtt200u-fe.c index c09332bd99cb..9bb15f7b48db 100644 --- a/drivers/media/usb/dvb-usb/dtt200u-fe.c +++ b/drivers/media/usb/dvb-usb/dtt200u-fe.c @@ -105,8 +105,6 @@ static int dtt200u_fe_set_frontend(struct dvb_frontend *fe) { struct dtv_frontend_properties *fep = &fe->dtv_property_cache; struct dtt200u_fe_state *state = fe->demodulator_priv; - int i; - enum fe_status st; u16 freq = fep->frequency / 250000; u8 bwbuf[2] = { SET_BANDWIDTH, 0 },freqbuf[3] = { SET_RF_FREQ, 0, 0 }; @@ -130,13 +128,6 @@ static int dtt200u_fe_set_frontend(struct dvb_frontend *fe) freqbuf[2] = (freq >> 8) & 0xff; dvb_usb_generic_write(state->d,freqbuf,3); - for (i = 0; i < 30; i++) { - msleep(20); - dtt200u_fe_read_status(fe, &st); - if (st & FE_TIMEDOUT) - continue; - } - return 0; } From 89919b518863e7dac98d4d6d4b08ed2113d91d27 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 08:06:57 -0300 Subject: [PATCH 102/884] [media] dtt200u-fe: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dtt200u-fe.c | 95 ++++++++++++++++++-------- 1 file changed, 65 insertions(+), 30 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dtt200u-fe.c b/drivers/media/usb/dvb-usb/dtt200u-fe.c index 9bb15f7b48db..7f7f64be6353 100644 --- a/drivers/media/usb/dvb-usb/dtt200u-fe.c +++ b/drivers/media/usb/dvb-usb/dtt200u-fe.c @@ -18,17 +18,22 @@ struct dtt200u_fe_state { struct dtv_frontend_properties fep; struct dvb_frontend frontend; + + unsigned char data[80]; + struct mutex data_mutex; }; static int dtt200u_fe_read_status(struct dvb_frontend *fe, enum fe_status *stat) { struct dtt200u_fe_state *state = fe->demodulator_priv; - u8 st = GET_TUNE_STATUS, b[3]; - dvb_usb_generic_rw(state->d,&st,1,b,3,0); + mutex_lock(&state->data_mutex); + state->data[0] = GET_TUNE_STATUS; - switch (b[0]) { + dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0); + + switch (state->data[0]) { case 0x01: *stat = FE_HAS_SIGNAL | FE_HAS_CARRIER | FE_HAS_VITERBI | FE_HAS_SYNC | FE_HAS_LOCK; @@ -41,51 +46,75 @@ static int dtt200u_fe_read_status(struct dvb_frontend *fe, *stat = 0; break; } + mutex_unlock(&state->data_mutex); return 0; } static int dtt200u_fe_read_ber(struct dvb_frontend* fe, u32 *ber) { struct dtt200u_fe_state *state = fe->demodulator_priv; - u8 bw = GET_VIT_ERR_CNT,b[3]; - dvb_usb_generic_rw(state->d,&bw,1,b,3,0); - *ber = (b[0] << 16) | (b[1] << 8) | b[2]; + + mutex_lock(&state->data_mutex); + state->data[0] = GET_VIT_ERR_CNT; + + dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0); + *ber = (state->data[0] << 16) | (state->data[1] << 8) | state->data[2]; + + mutex_unlock(&state->data_mutex); return 0; } static int dtt200u_fe_read_unc_blocks(struct dvb_frontend* fe, u32 *unc) { struct dtt200u_fe_state *state = fe->demodulator_priv; - u8 bw = GET_RS_UNCOR_BLK_CNT,b[2]; - dvb_usb_generic_rw(state->d,&bw,1,b,2,0); - *unc = (b[0] << 8) | b[1]; - return 0; + mutex_lock(&state->data_mutex); + state->data[0] = GET_RS_UNCOR_BLK_CNT; + + dvb_usb_generic_rw(state->d, state->data, 1, state->data, 2, 0); + + mutex_unlock(&state->data_mutex); + return ret; } static int dtt200u_fe_read_signal_strength(struct dvb_frontend* fe, u16 *strength) { struct dtt200u_fe_state *state = fe->demodulator_priv; - u8 bw = GET_AGC, b; - dvb_usb_generic_rw(state->d,&bw,1,&b,1,0); - *strength = (b << 8) | b; - return 0; + + mutex_lock(&state->data_mutex); + state->data[0] = GET_AGC; + + dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0); + + mutex_unlock(&state->data_mutex); + return ret; } static int dtt200u_fe_read_snr(struct dvb_frontend* fe, u16 *snr) { struct dtt200u_fe_state *state = fe->demodulator_priv; - u8 bw = GET_SNR,br; - dvb_usb_generic_rw(state->d,&bw,1,&br,1,0); - *snr = ~((br << 8) | br); - return 0; + + mutex_lock(&state->data_mutex); + state->data[0] = GET_SNR; + + dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0); + + mutex_unlock(&state->data_mutex); + return ret; } static int dtt200u_fe_init(struct dvb_frontend* fe) { struct dtt200u_fe_state *state = fe->demodulator_priv; - u8 b = SET_INIT; - return dvb_usb_generic_write(state->d,&b,1); + int ret; + + mutex_lock(&state->data_mutex); + state->data[0] = SET_INIT; + + ret = dvb_usb_generic_write(state->d, state->data, 1); + mutex_unlock(&state->data_mutex); + + return ret; } static int dtt200u_fe_sleep(struct dvb_frontend* fe) @@ -106,29 +135,34 @@ static int dtt200u_fe_set_frontend(struct dvb_frontend *fe) struct dtv_frontend_properties *fep = &fe->dtv_property_cache; struct dtt200u_fe_state *state = fe->demodulator_priv; u16 freq = fep->frequency / 250000; - u8 bwbuf[2] = { SET_BANDWIDTH, 0 },freqbuf[3] = { SET_RF_FREQ, 0, 0 }; + mutex_lock(&state->data_mutex); + state->data[0] = SET_BANDWIDTH; switch (fep->bandwidth_hz) { case 8000000: - bwbuf[1] = 8; + state->data[1] = 8; break; case 7000000: - bwbuf[1] = 7; + state->data[1] = 7; break; case 6000000: - bwbuf[1] = 6; + state->data[1] = 6; break; default: - return -EINVAL; + ret = -EINVAL; + goto ret; } - dvb_usb_generic_write(state->d,bwbuf,2); + dvb_usb_generic_write(state->d, state->data, 2); - freqbuf[1] = freq & 0xff; - freqbuf[2] = (freq >> 8) & 0xff; - dvb_usb_generic_write(state->d,freqbuf,3); + state->data[0] = SET_RF_FREQ; + state->data[1] = freq & 0xff; + state->data[2] = (freq >> 8) & 0xff; + dvb_usb_generic_write(state->d, state->data, 3); - return 0; +ret: + mutex_unlock(&state->data_mutex); + return ret; } static int dtt200u_fe_get_frontend(struct dvb_frontend* fe, @@ -160,6 +194,7 @@ struct dvb_frontend* dtt200u_fe_attach(struct dvb_usb_device *d) deb_info("attaching frontend dtt200u\n"); state->d = d; + mutex_init(&state->data_mutex); memcpy(&state->frontend.ops,&dtt200u_fe_ops,sizeof(struct dvb_frontend_ops)); state->frontend.demodulator_priv = state; From ba705a629ceee28f7ee4a5f977da52278b683eb0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 13:44:09 -0300 Subject: [PATCH 103/884] [media] dtt200u-fe: handle errors on USB control messages If something goes wrong, return an error code, instead of assuming that everything went fine. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dtt200u-fe.c | 40 ++++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dtt200u-fe.c b/drivers/media/usb/dvb-usb/dtt200u-fe.c index 7f7f64be6353..f5c042baa254 100644 --- a/drivers/media/usb/dvb-usb/dtt200u-fe.c +++ b/drivers/media/usb/dvb-usb/dtt200u-fe.c @@ -27,11 +27,17 @@ static int dtt200u_fe_read_status(struct dvb_frontend *fe, enum fe_status *stat) { struct dtt200u_fe_state *state = fe->demodulator_priv; + int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_TUNE_STATUS; - dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0); + ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0); + if (ret < 0) { + *stat = 0; + mutex_unlock(&state->data_mutex); + return ret; + } switch (state->data[0]) { case 0x01: @@ -53,25 +59,30 @@ static int dtt200u_fe_read_status(struct dvb_frontend *fe, static int dtt200u_fe_read_ber(struct dvb_frontend* fe, u32 *ber) { struct dtt200u_fe_state *state = fe->demodulator_priv; + int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_VIT_ERR_CNT; - dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0); - *ber = (state->data[0] << 16) | (state->data[1] << 8) | state->data[2]; + ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 3, 0); + if (ret >= 0) + *ber = (state->data[0] << 16) | (state->data[1] << 8) | state->data[2]; mutex_unlock(&state->data_mutex); - return 0; + return ret; } static int dtt200u_fe_read_unc_blocks(struct dvb_frontend* fe, u32 *unc) { struct dtt200u_fe_state *state = fe->demodulator_priv; + int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_RS_UNCOR_BLK_CNT; - dvb_usb_generic_rw(state->d, state->data, 1, state->data, 2, 0); + ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 2, 0); + if (ret >= 0) + *unc = (state->data[0] << 8) | state->data[1]; mutex_unlock(&state->data_mutex); return ret; @@ -80,11 +91,14 @@ static int dtt200u_fe_read_unc_blocks(struct dvb_frontend* fe, u32 *unc) static int dtt200u_fe_read_signal_strength(struct dvb_frontend* fe, u16 *strength) { struct dtt200u_fe_state *state = fe->demodulator_priv; + int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_AGC; - dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0); + ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0); + if (ret >= 0) + *strength = (state->data[0] << 8) | state->data[0]; mutex_unlock(&state->data_mutex); return ret; @@ -93,11 +107,14 @@ static int dtt200u_fe_read_signal_strength(struct dvb_frontend* fe, u16 *strengt static int dtt200u_fe_read_snr(struct dvb_frontend* fe, u16 *snr) { struct dtt200u_fe_state *state = fe->demodulator_priv; + int ret; mutex_lock(&state->data_mutex); state->data[0] = GET_SNR; - dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0); + ret = dvb_usb_generic_rw(state->d, state->data, 1, state->data, 1, 0); + if (ret >= 0) + *snr = ~((state->data[0] << 8) | state->data[0]); mutex_unlock(&state->data_mutex); return ret; @@ -134,6 +151,7 @@ static int dtt200u_fe_set_frontend(struct dvb_frontend *fe) { struct dtv_frontend_properties *fep = &fe->dtv_property_cache; struct dtt200u_fe_state *state = fe->demodulator_priv; + int ret; u16 freq = fep->frequency / 250000; mutex_lock(&state->data_mutex); @@ -153,12 +171,16 @@ static int dtt200u_fe_set_frontend(struct dvb_frontend *fe) goto ret; } - dvb_usb_generic_write(state->d, state->data, 2); + ret = dvb_usb_generic_write(state->d, state->data, 2); + if (ret < 0) + goto ret; state->data[0] = SET_RF_FREQ; state->data[1] = freq & 0xff; state->data[2] = (freq >> 8) & 0xff; - dvb_usb_generic_write(state->d, state->data, 3); + ret = dvb_usb_generic_write(state->d, state->data, 3); + if (ret < 0) + goto ret; ret: mutex_unlock(&state->data_mutex); From ab883e2de3fc58c0af6070ea067a607a6310772b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 08:40:43 -0300 Subject: [PATCH 104/884] [media] dtt200u: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dtt200u.c | 104 ++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 29 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dtt200u.c b/drivers/media/usb/dvb-usb/dtt200u.c index d2a01b50af0d..7706938b5167 100644 --- a/drivers/media/usb/dvb-usb/dtt200u.c +++ b/drivers/media/usb/dvb-usb/dtt200u.c @@ -20,74 +20,103 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,xfer=2 (or-able))." DVB_USB DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); +struct dtt200u_state { + unsigned char data[80]; + struct mutex data_mutex; +}; + static int dtt200u_power_ctrl(struct dvb_usb_device *d, int onoff) { - u8 b = SET_INIT; + struct dtt200u_state *st = d->priv; + + mutex_lock(&st->data_mutex); + + st->data[0] = SET_INIT; if (onoff) - dvb_usb_generic_write(d,&b,2); + dvb_usb_generic_write(d, st->data, 2); + mutex_unlock(&st->data_mutex); return 0; } static int dtt200u_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) { - u8 b_streaming[2] = { SET_STREAMING, onoff }; - u8 b_rst_pid = RESET_PID_FILTER; + struct dtt200u_state *st = adap->dev->priv; - dvb_usb_generic_write(adap->dev, b_streaming, 2); + mutex_lock(&st->data_mutex); + st->data[0] = SET_STREAMING; + st->data[1] = onoff; + + dvb_usb_generic_write(adap->dev, st->data, 2); + + if (onoff) + return 0; + + st->data[0] = RESET_PID_FILTER; + dvb_usb_generic_write(adap->dev, st->data, 1); + + mutex_unlock(&st->data_mutex); - if (onoff == 0) - dvb_usb_generic_write(adap->dev, &b_rst_pid, 1); return 0; } static int dtt200u_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, int onoff) { - u8 b_pid[4]; + struct dtt200u_state *st = adap->dev->priv; + int ret; + pid = onoff ? pid : 0; - b_pid[0] = SET_PID_FILTER; - b_pid[1] = index; - b_pid[2] = pid & 0xff; - b_pid[3] = (pid >> 8) & 0x1f; + mutex_lock(&st->data_mutex); + st->data[0] = SET_PID_FILTER; + st->data[1] = index; + st->data[2] = pid & 0xff; + st->data[3] = (pid >> 8) & 0x1f; - return dvb_usb_generic_write(adap->dev, b_pid, 4); + ret = dvb_usb_generic_write(adap->dev, st->data, 4); + mutex_unlock(&st->data_mutex); + + return ret; } static int dtt200u_rc_query(struct dvb_usb_device *d) { - u8 key[5],cmd = GET_RC_CODE; + struct dtt200u_state *st = d->priv; u32 scancode; - dvb_usb_generic_rw(d,&cmd,1,key,5,0); - if (key[0] == 1) { + mutex_lock(&st->data_mutex); + st->data[0] = GET_RC_CODE; + + dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0); + if (st->data[0] == 1) { enum rc_type proto = RC_TYPE_NEC; - scancode = key[1]; - if ((u8) ~key[1] != key[2]) { + scancode = st->data[1]; + if ((u8) ~st->data[1] != st->data[2]) { /* Extended NEC */ scancode = scancode << 8; - scancode |= key[2]; + scancode |= st->data[2]; proto = RC_TYPE_NECX; } scancode = scancode << 8; - scancode |= key[3]; + scancode |= st->data[3]; /* Check command checksum is ok */ - if ((u8) ~key[3] == key[4]) + if ((u8) ~st->data[3] == st->data[4]) rc_keydown(d->rc_dev, proto, scancode, 0); else rc_keyup(d->rc_dev); - } else if (key[0] == 2) { + } else if (st->data[0] == 2) { rc_repeat(d->rc_dev); } else { rc_keyup(d->rc_dev); } - if (key[0] != 0) - deb_info("key: %*ph\n", 5, key); + if (st->data[0] != 0) + deb_info("st->data: %*ph\n", 5, st->data); + mutex_unlock(&st->data_mutex); return 0; } @@ -106,17 +135,24 @@ static struct dvb_usb_device_properties wt220u_miglia_properties; static int dtt200u_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { + struct dvb_usb_device *d; + struct dtt200u_state *st; + if (0 == dvb_usb_device_init(intf, &dtt200u_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &wt220u_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &wt220u_fc_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &wt220u_zl0353_properties, - THIS_MODULE, NULL, adapter_nr) || + THIS_MODULE, &d, adapter_nr) || 0 == dvb_usb_device_init(intf, &wt220u_miglia_properties, - THIS_MODULE, NULL, adapter_nr)) + THIS_MODULE, &d, adapter_nr)) { + st = d->priv; + mutex_init(&st->data_mutex); + return 0; + } return -ENODEV; } @@ -140,6 +176,8 @@ static struct dvb_usb_device_properties dtt200u_properties = { .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-dtt200u-01.fw", + .size_of_priv = sizeof(struct dtt200u_state), + .num_adapters = 1, .adapter = { { @@ -190,6 +228,8 @@ static struct dvb_usb_device_properties wt220u_properties = { .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-wt220u-02.fw", + .size_of_priv = sizeof(struct dtt200u_state), + .num_adapters = 1, .adapter = { { @@ -240,6 +280,8 @@ static struct dvb_usb_device_properties wt220u_fc_properties = { .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-wt220u-fc03.fw", + .size_of_priv = sizeof(struct dtt200u_state), + .num_adapters = 1, .adapter = { { @@ -290,6 +332,8 @@ static struct dvb_usb_device_properties wt220u_zl0353_properties = { .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-wt220u-zl0353-01.fw", + .size_of_priv = sizeof(struct dtt200u_state), + .num_adapters = 1, .adapter = { { @@ -340,6 +384,8 @@ static struct dvb_usb_device_properties wt220u_miglia_properties = { .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-wt220u-miglia-01.fw", + .size_of_priv = sizeof(struct dtt200u_state), + .num_adapters = 1, .generic_bulk_ctrl_endpoint = 0x01, From a3f9f07e06ebf37261171e167e2110b758c2a147 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 13:37:50 -0300 Subject: [PATCH 105/884] [media] dtt200u: handle USB control message errors If something bad happens while an USB control message is transfered, return an error code. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dtt200u.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dtt200u.c b/drivers/media/usb/dvb-usb/dtt200u.c index 7706938b5167..f88572c7ae7c 100644 --- a/drivers/media/usb/dvb-usb/dtt200u.c +++ b/drivers/media/usb/dvb-usb/dtt200u.c @@ -28,37 +28,42 @@ struct dtt200u_state { static int dtt200u_power_ctrl(struct dvb_usb_device *d, int onoff) { struct dtt200u_state *st = d->priv; + int ret = 0; mutex_lock(&st->data_mutex); st->data[0] = SET_INIT; if (onoff) - dvb_usb_generic_write(d, st->data, 2); + ret = dvb_usb_generic_write(d, st->data, 2); mutex_unlock(&st->data_mutex); - return 0; + return ret; } static int dtt200u_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) { struct dtt200u_state *st = adap->dev->priv; + int ret; mutex_lock(&st->data_mutex); st->data[0] = SET_STREAMING; st->data[1] = onoff; - dvb_usb_generic_write(adap->dev, st->data, 2); + ret = dvb_usb_generic_write(adap->dev, st->data, 2); + if (ret < 0) + goto ret; if (onoff) - return 0; + goto ret; st->data[0] = RESET_PID_FILTER; - dvb_usb_generic_write(adap->dev, st->data, 1); + ret = dvb_usb_generic_write(adap->dev, st->data, 1); +ret: mutex_unlock(&st->data_mutex); - return 0; + return ret; } static int dtt200u_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, int onoff) @@ -84,11 +89,15 @@ static int dtt200u_rc_query(struct dvb_usb_device *d) { struct dtt200u_state *st = d->priv; u32 scancode; + int ret; mutex_lock(&st->data_mutex); st->data[0] = GET_RC_CODE; - dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0); + ret = dvb_usb_generic_rw(d, st->data, 1, st->data, 5, 0); + if (ret < 0) + goto ret; + if (st->data[0] == 1) { enum rc_type proto = RC_TYPE_NEC; @@ -116,8 +125,9 @@ static int dtt200u_rc_query(struct dvb_usb_device *d) if (st->data[0] != 0) deb_info("st->data: %*ph\n", 5, st->data); +ret: mutex_unlock(&st->data_mutex); - return 0; + return ret; } static int dtt200u_frontend_attach(struct dvb_usb_adapter *adap) From 8f306145df334e2797ef5aa63225cd5178e569c1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 08:51:23 -0300 Subject: [PATCH 106/884] [media] dtv5100: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dtv5100.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/dtv5100.c b/drivers/media/usb/dvb-usb/dtv5100.c index 3d11df41cac0..c60fb54f445f 100644 --- a/drivers/media/usb/dvb-usb/dtv5100.c +++ b/drivers/media/usb/dvb-usb/dtv5100.c @@ -31,9 +31,14 @@ module_param_named(debug, dvb_usb_dtv5100_debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level" DVB_USB_DEBUG_STATUS); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); +struct dtv5100_state { + unsigned char data[80]; +}; + static int dtv5100_i2c_msg(struct dvb_usb_device *d, u8 addr, u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen) { + struct dtv5100_state *st = d->priv; u8 request; u8 type; u16 value; @@ -60,9 +65,10 @@ static int dtv5100_i2c_msg(struct dvb_usb_device *d, u8 addr, } index = (addr << 8) + wbuf[0]; + memcpy(st->data, rbuf, rlen); msleep(1); /* avoid I2C errors */ return usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev, 0), request, - type, value, index, rbuf, rlen, + type, value, index, st->data, rlen, DTV5100_USB_TIMEOUT); } @@ -176,7 +182,7 @@ static struct dvb_usb_device_properties dtv5100_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, - .size_of_priv = 0, + .size_of_priv = sizeof(struct dtv5100_state), .num_adapters = 1, .adapter = {{ From fa86c9a1fca5a50563cc63e97aa130a3c54f1d4c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 11:24:21 -0300 Subject: [PATCH 107/884] [media] gp8psk: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/gp8psk.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/dvb-usb/gp8psk.c b/drivers/media/usb/dvb-usb/gp8psk.c index 5d0384dd45b5..807f5628a3bb 100644 --- a/drivers/media/usb/dvb-usb/gp8psk.c +++ b/drivers/media/usb/dvb-usb/gp8psk.c @@ -24,6 +24,10 @@ MODULE_PARM_DESC(debug, "set debugging level (1=info,xfer=2,rc=4 (or-able))." DV DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); +struct gp8psk_state { + unsigned char data[80]; +}; + static int gp8psk_get_fw_version(struct dvb_usb_device *d, u8 *fw_vers) { return (gp8psk_usb_in_op(d, GET_FW_VERS, 0, 0, fw_vers, 6)); @@ -53,17 +57,19 @@ static void gp8psk_info(struct dvb_usb_device *d) int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen) { + struct gp8psk_state *st = d->priv; int ret = 0,try = 0; if ((ret = mutex_lock_interruptible(&d->usb_mutex))) return ret; while (ret >= 0 && ret != blen && try < 3) { + memcpy(st->data, b, blen); ret = usb_control_msg(d->udev, usb_rcvctrlpipe(d->udev,0), req, USB_TYPE_VENDOR | USB_DIR_IN, - value,index,b,blen, + value, index, st->data, blen, 2000); deb_info("reading number %d (ret: %d)\n",try,ret); try++; @@ -86,6 +92,7 @@ int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 *b, int blen) { + struct gp8psk_state *st = d->priv; int ret; deb_xfer("out: req. %x, val: %x, ind: %x, buffer: ",req,value,index); @@ -94,11 +101,12 @@ int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value, if ((ret = mutex_lock_interruptible(&d->usb_mutex))) return ret; + memcpy(st->data, b, blen); if (usb_control_msg(d->udev, usb_sndctrlpipe(d->udev,0), req, USB_TYPE_VENDOR | USB_DIR_OUT, - value,index,b,blen, + value, index, st->data, blen, 2000) != blen) { warn("usb out operation failed."); ret = -EIO; @@ -265,6 +273,8 @@ static struct dvb_usb_device_properties gp8psk_properties = { .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-gp8psk-01.fw", + .size_of_priv = sizeof(struct gp8psk_state), + .num_adapters = 1, .adapter = { { From f1a503dddf4d4da2b7bbf6ae4e92527bdc265bb9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 14:12:48 -0300 Subject: [PATCH 108/884] [media] gp8psk: don't go past the buffer size Add checks to avoid going out of the buffer. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/gp8psk.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/media/usb/dvb-usb/gp8psk.c b/drivers/media/usb/dvb-usb/gp8psk.c index 807f5628a3bb..adfd76491451 100644 --- a/drivers/media/usb/dvb-usb/gp8psk.c +++ b/drivers/media/usb/dvb-usb/gp8psk.c @@ -60,6 +60,9 @@ int gp8psk_usb_in_op(struct dvb_usb_device *d, u8 req, u16 value, u16 index, u8 struct gp8psk_state *st = d->priv; int ret = 0,try = 0; + if (blen > sizeof(st->data)) + return -EIO; + if ((ret = mutex_lock_interruptible(&d->usb_mutex))) return ret; @@ -98,6 +101,9 @@ int gp8psk_usb_out_op(struct dvb_usb_device *d, u8 req, u16 value, deb_xfer("out: req. %x, val: %x, ind: %x, buffer: ",req,value,index); debug_dump(b,blen,deb_xfer); + if (blen > sizeof(st->data)) + return -EIO; + if ((ret = mutex_lock_interruptible(&d->usb_mutex))) return ret; @@ -151,6 +157,11 @@ static int gp8psk_load_bcm4500fw(struct dvb_usb_device *d) err("failed to load bcm4500 firmware."); goto out_free; } + if (buflen > 64) { + err("firmare chunk size bigger than 64 bytes."); + goto out_free; + } + memcpy(buf, ptr, buflen); if (dvb_usb_generic_write(d, buf, buflen)) { err("failed to load bcm4500 firmware."); From c3a66e9f465e7e792dff6ccd571bc657e6504c7b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 11:38:46 -0300 Subject: [PATCH 109/884] [media] nova-t-usb2: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/nova-t-usb2.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/dvb-usb/nova-t-usb2.c b/drivers/media/usb/dvb-usb/nova-t-usb2.c index fc7569e2728d..26d7188a1163 100644 --- a/drivers/media/usb/dvb-usb/nova-t-usb2.c +++ b/drivers/media/usb/dvb-usb/nova-t-usb2.c @@ -74,22 +74,29 @@ static struct rc_map_table rc_map_haupp_table[] = { */ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { - u8 key[5],cmd[2] = { DIBUSB_REQ_POLL_REMOTE, 0x35 }, data,toggle,custom; + u8 *buf, data, toggle, custom; u16 raw; int i; struct dibusb_device_state *st = d->priv; - dvb_usb_generic_rw(d,cmd,2,key,5,0); + buf = kmalloc(5, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + buf[0] = DIBUSB_REQ_POLL_REMOTE; + buf[1] = 0x35; + dvb_usb_generic_rw(d, buf, 2, buf, 5, 0); *state = REMOTE_NO_KEY_PRESSED; - switch (key[0]) { + switch (buf[0]) { case DIBUSB_RC_HAUPPAUGE_KEY_PRESSED: - raw = ((key[1] << 8) | key[2]) >> 3; + raw = ((buf[1] << 8) | buf[2]) >> 3; toggle = !!(raw & 0x800); data = raw & 0x3f; custom = (raw >> 6) & 0x1f; - deb_rc("raw key code 0x%02x, 0x%02x, 0x%02x to c: %02x d: %02x toggle: %d\n",key[1],key[2],key[3],custom,data,toggle); + deb_rc("raw key code 0x%02x, 0x%02x, 0x%02x to c: %02x d: %02x toggle: %d\n", + buf[1], buf[2], buf[3], custom, data, toggle); for (i = 0; i < ARRAY_SIZE(rc_map_haupp_table); i++) { if (rc5_data(&rc_map_haupp_table[i]) == data && @@ -117,6 +124,7 @@ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state) break; } + kfree(buf); return 0; } From 73d5c5c864f4094e86708760fd4612fbf1e6e76c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 11:59:44 -0300 Subject: [PATCH 110/884] [media] pctv452e: don't do DMA on stack The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/pctv452e.c | 129 +++++++++++++++------------ 1 file changed, 74 insertions(+), 55 deletions(-) diff --git a/drivers/media/usb/dvb-usb/pctv452e.c b/drivers/media/usb/dvb-usb/pctv452e.c index c05de1b088a4..58b685094904 100644 --- a/drivers/media/usb/dvb-usb/pctv452e.c +++ b/drivers/media/usb/dvb-usb/pctv452e.c @@ -97,13 +97,14 @@ struct pctv452e_state { u8 c; /* transaction counter, wraps around... */ u8 initialized; /* set to 1 if 0x15 has been sent */ u16 last_rc_key; + + unsigned char data[80]; }; static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, unsigned int write_len, unsigned int read_len) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; - u8 buf[64]; u8 id; unsigned int rlen; int ret; @@ -112,33 +113,36 @@ static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, BUG_ON(write_len > 64 - 4); BUG_ON(read_len > 64 - 4); + mutex_lock(&state->ca_mutex); id = state->c++; - buf[0] = SYNC_BYTE_OUT; - buf[1] = id; - buf[2] = cmd; - buf[3] = write_len; + state->data[0] = SYNC_BYTE_OUT; + state->data[1] = id; + state->data[2] = cmd; + state->data[3] = write_len; - memcpy(buf + 4, data, write_len); + memcpy(state->data + 4, data, write_len); rlen = (read_len > 0) ? 64 : 0; - ret = dvb_usb_generic_rw(d, buf, 4 + write_len, - buf, rlen, /* delay_ms */ 0); + ret = dvb_usb_generic_rw(d, state->data, 4 + write_len, + state->data, rlen, /* delay_ms */ 0); if (0 != ret) goto failed; ret = -EIO; - if (SYNC_BYTE_IN != buf[0] || id != buf[1]) + if (SYNC_BYTE_IN != state->data[0] || id != state->data[1]) goto failed; - memcpy(data, buf + 4, read_len); + memcpy(data, state->data + 4, read_len); + mutex_unlock(&state->ca_mutex); return 0; failed: err("CI error %d; %02X %02X %02X -> %*ph.", - ret, SYNC_BYTE_OUT, id, cmd, 3, buf); + ret, SYNC_BYTE_OUT, id, cmd, 3, state->data); + mutex_unlock(&state->ca_mutex); return ret; } @@ -405,52 +409,53 @@ static int pctv452e_i2c_msg(struct dvb_usb_device *d, u8 addr, u8 *rcv_buf, u8 rcv_len) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; - u8 buf[64]; u8 id; int ret; + mutex_lock(&state->ca_mutex); id = state->c++; ret = -EINVAL; if (snd_len > 64 - 7 || rcv_len > 64 - 7) goto failed; - buf[0] = SYNC_BYTE_OUT; - buf[1] = id; - buf[2] = PCTV_CMD_I2C; - buf[3] = snd_len + 3; - buf[4] = addr << 1; - buf[5] = snd_len; - buf[6] = rcv_len; + state->data[0] = SYNC_BYTE_OUT; + state->data[1] = id; + state->data[2] = PCTV_CMD_I2C; + state->data[3] = snd_len + 3; + state->data[4] = addr << 1; + state->data[5] = snd_len; + state->data[6] = rcv_len; - memcpy(buf + 7, snd_buf, snd_len); + memcpy(state->data + 7, snd_buf, snd_len); - ret = dvb_usb_generic_rw(d, buf, 7 + snd_len, - buf, /* rcv_len */ 64, + ret = dvb_usb_generic_rw(d, state->data, 7 + snd_len, + state->data, /* rcv_len */ 64, /* delay_ms */ 0); if (ret < 0) goto failed; /* TT USB protocol error. */ ret = -EIO; - if (SYNC_BYTE_IN != buf[0] || id != buf[1]) + if (SYNC_BYTE_IN != state->data[0] || id != state->data[1]) goto failed; /* I2C device didn't respond as expected. */ ret = -EREMOTEIO; - if (buf[5] < snd_len || buf[6] < rcv_len) + if (state->data[5] < snd_len || state->data[6] < rcv_len) goto failed; - memcpy(rcv_buf, buf + 7, rcv_len); + memcpy(rcv_buf, state->data + 7, rcv_len); + mutex_unlock(&state->ca_mutex); return rcv_len; failed: - err("I2C error %d; %02X %02X %02X %02X %02X -> " - "%02X %02X %02X %02X %02X.", + err("I2C error %d; %02X %02X %02X %02X %02X -> %*ph", ret, SYNC_BYTE_OUT, id, addr << 1, snd_len, rcv_len, - buf[0], buf[1], buf[4], buf[5], buf[6]); + 7, state->data); + mutex_unlock(&state->ca_mutex); return ret; } @@ -499,8 +504,7 @@ static u32 pctv452e_i2c_func(struct i2c_adapter *adapter) static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; - u8 b0[] = { 0xaa, 0, PCTV_CMD_RESET, 1, 0 }; - u8 rx[PCTV_ANSWER_LEN]; + u8 *rx; int ret; info("%s: %d\n", __func__, i); @@ -511,6 +515,11 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) if (state->initialized) return 0; + rx = kmalloc(PCTV_ANSWER_LEN, GFP_KERNEL); + if (!rx) + return -ENOMEM; + + mutex_lock(&state->ca_mutex); /* hmm where shoud this should go? */ ret = usb_set_interface(d->udev, 0, ISOC_INTERFACE_ALTERNATIVE); if (ret != 0) @@ -518,65 +527,75 @@ static int pctv452e_power_ctrl(struct dvb_usb_device *d, int i) __func__, ret); /* this is a one-time initialization, dont know where to put */ - b0[1] = state->c++; + state->data[0] = 0xaa; + state->data[1] = state->c++; + state->data[2] = PCTV_CMD_RESET; + state->data[3] = 1; + state->data[4] = 0; /* reset board */ - ret = dvb_usb_generic_rw(d, b0, sizeof(b0), rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0); if (ret) - return ret; + goto ret; - b0[1] = state->c++; - b0[4] = 1; + state->data[1] = state->c++; + state->data[4] = 1; /* reset board (again?) */ - ret = dvb_usb_generic_rw(d, b0, sizeof(b0), rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, state->data, 5, rx, PCTV_ANSWER_LEN, 0); if (ret) - return ret; + goto ret; state->initialized = 1; - return 0; +ret: + mutex_unlock(&state->ca_mutex); + kfree(rx); + return ret; } static int pctv452e_rc_query(struct dvb_usb_device *d) { struct pctv452e_state *state = (struct pctv452e_state *)d->priv; - u8 b[CMD_BUFFER_SIZE]; - u8 rx[PCTV_ANSWER_LEN]; int ret, i; - u8 id = state->c++; + u8 id; + + mutex_lock(&state->ca_mutex); + id = state->c++; /* prepare command header */ - b[0] = SYNC_BYTE_OUT; - b[1] = id; - b[2] = PCTV_CMD_IR; - b[3] = 0; + state->data[0] = SYNC_BYTE_OUT; + state->data[1] = id; + state->data[2] = PCTV_CMD_IR; + state->data[3] = 0; /* send ir request */ - ret = dvb_usb_generic_rw(d, b, 4, rx, PCTV_ANSWER_LEN, 0); + ret = dvb_usb_generic_rw(d, state->data, 4, + state->data, PCTV_ANSWER_LEN, 0); if (ret != 0) - return ret; + goto ret; if (debug > 3) { - info("%s: read: %2d: %*ph: ", __func__, ret, 3, rx); - for (i = 0; (i < rx[3]) && ((i+3) < PCTV_ANSWER_LEN); i++) - info(" %02x", rx[i+3]); + info("%s: read: %2d: %*ph: ", __func__, ret, 3, state->data); + for (i = 0; (i < state->data[3]) && ((i + 3) < PCTV_ANSWER_LEN); i++) + info(" %02x", state->data[i + 3]); info("\n"); } - if ((rx[3] == 9) && (rx[12] & 0x01)) { + if ((state->data[3] == 9) && (state->data[12] & 0x01)) { /* got a "press" event */ - state->last_rc_key = RC_SCANCODE_RC5(rx[7], rx[6]); + state->last_rc_key = RC_SCANCODE_RC5(state->data[7], state->data[6]); if (debug > 2) info("%s: cmd=0x%02x sys=0x%02x\n", - __func__, rx[6], rx[7]); + __func__, state->data[6], state->data[7]); rc_keydown(d->rc_dev, RC_TYPE_RC5, state->last_rc_key, 0); } else if (state->last_rc_key) { rc_keyup(d->rc_dev); state->last_rc_key = 0; } - - return 0; +ret: + mutex_unlock(&state->ca_mutex); + return ret; } static int pctv452e_read_mac_address(struct dvb_usb_device *d, u8 mac[6]) From 5dfd2c8f263dfcaf614d24734f0af8c1c18a9ca8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 14:15:53 -0300 Subject: [PATCH 111/884] [media] pctv452e: don't call BUG_ON() on non-fatal error There are some conditions on this driver that are tested with BUG_ON() with are not serious enough to hang a machine. So, just return an error if this happens. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/pctv452e.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/dvb-usb/pctv452e.c b/drivers/media/usb/dvb-usb/pctv452e.c index 58b685094904..7ad0006c5ae0 100644 --- a/drivers/media/usb/dvb-usb/pctv452e.c +++ b/drivers/media/usb/dvb-usb/pctv452e.c @@ -109,9 +109,10 @@ static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, unsigned int rlen; int ret; - BUG_ON(NULL == data && 0 != (write_len | read_len)); - BUG_ON(write_len > 64 - 4); - BUG_ON(read_len > 64 - 4); + if (!data || (write_len > 64 - 4) || (read_len > 64 - 4)) { + err("%s: transfer data invalid", __func__); + return -EIO; + }; mutex_lock(&state->ca_mutex); id = state->c++; From 88ca3619001380a3147246a22cb356f6065ad713 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 12:04:38 -0300 Subject: [PATCH 112/884] [media] technisat-usb2: use DMA buffers for I2C transfers The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. On this driver, most of the transfers are OK, but the I2C one was using stack. Reviewed-by: Patrick Boettcher Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/technisat-usb2.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/dvb-usb/technisat-usb2.c b/drivers/media/usb/dvb-usb/technisat-usb2.c index d9f3262bf071..4706628a3ed5 100644 --- a/drivers/media/usb/dvb-usb/technisat-usb2.c +++ b/drivers/media/usb/dvb-usb/technisat-usb2.c @@ -89,9 +89,13 @@ struct technisat_usb2_state { static int technisat_usb2_i2c_access(struct usb_device *udev, u8 device_addr, u8 *tx, u8 txlen, u8 *rx, u8 rxlen) { - u8 b[64]; + u8 *b; int ret, actual_length; + b = kmalloc(64, GFP_KERNEL); + if (!b) + return -ENOMEM; + deb_i2c("i2c-access: %02x, tx: ", device_addr); debug_dump(tx, txlen, deb_i2c); deb_i2c(" "); @@ -123,7 +127,7 @@ static int technisat_usb2_i2c_access(struct usb_device *udev, if (ret < 0) { err("i2c-error: out failed %02x = %d", device_addr, ret); - return -ENODEV; + goto err; } ret = usb_bulk_msg(udev, @@ -131,7 +135,7 @@ static int technisat_usb2_i2c_access(struct usb_device *udev, b, 64, &actual_length, 1000); if (ret < 0) { err("i2c-error: in failed %02x = %d", device_addr, ret); - return -ENODEV; + goto err; } if (b[0] != I2C_STATUS_OK) { @@ -140,7 +144,7 @@ static int technisat_usb2_i2c_access(struct usb_device *udev, if (!(b[0] == I2C_STATUS_NAK && device_addr == 0x60 /* && device_is_technisat_usb2 */)) - return -ENODEV; + goto err; } deb_i2c("status: %d, ", b[0]); @@ -154,7 +158,9 @@ static int technisat_usb2_i2c_access(struct usb_device *udev, deb_i2c("\n"); - return 0; +err: + kfree(b); + return ret; } static int technisat_usb2_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, From e2296021081584bd6f0e2ea21e8b909c1d4b5a07 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 13:50:21 -0300 Subject: [PATCH 113/884] [media] nova-t-usb2: handle error code on RC query There's no sense on decoding and generating a RC key code if there was an error on the URB control message. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/nova-t-usb2.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/media/usb/dvb-usb/nova-t-usb2.c b/drivers/media/usb/dvb-usb/nova-t-usb2.c index 26d7188a1163..1babd3341910 100644 --- a/drivers/media/usb/dvb-usb/nova-t-usb2.c +++ b/drivers/media/usb/dvb-usb/nova-t-usb2.c @@ -76,7 +76,7 @@ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { u8 *buf, data, toggle, custom; u16 raw; - int i; + int i, ret; struct dibusb_device_state *st = d->priv; buf = kmalloc(5, GFP_KERNEL); @@ -85,7 +85,9 @@ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state) buf[0] = DIBUSB_REQ_POLL_REMOTE; buf[1] = 0x35; - dvb_usb_generic_rw(d, buf, 2, buf, 5, 0); + ret = dvb_usb_generic_rw(d, buf, 2, buf, 5, 0); + if (ret < 0) + goto ret; *state = REMOTE_NO_KEY_PRESSED; switch (buf[0]) { @@ -124,8 +126,9 @@ static int nova_t_rc_query(struct dvb_usb_device *d, u32 *event, int *state) break; } +ret: kfree(buf); - return 0; + return ret; } static int nova_t_read_mac_address (struct dvb_usb_device *d, u8 mac[6]) From aa9efbc7b64aa1c3bf6682d06e6970b874d55d08 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 13:55:53 -0300 Subject: [PATCH 114/884] [media] dw2102: return error if su3000_power_ctrl() fails Instead of silently ignoring the error, return it. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/dw2102.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index 5fb0c650926e..2c720cb2fb00 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -852,7 +852,7 @@ static int su3000_power_ctrl(struct dvb_usb_device *d, int i) if (i && !state->initialized) { state->initialized = 1; /* reset board */ - dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0); + return dvb_usb_generic_rw(d, obuf, 2, NULL, 0, 0); } return 0; From 3dadf91cb830ac3a6992e539d82ec604fffe8671 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 7 Oct 2016 13:52:31 -0300 Subject: [PATCH 115/884] [media] digitv: handle error code on RC query There's no sense on decoding and generating a RC key code if there was an error on the URB control message. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/digitv.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/media/usb/dvb-usb/digitv.c b/drivers/media/usb/dvb-usb/digitv.c index 09f8c28bd4db..4284f6984dc1 100644 --- a/drivers/media/usb/dvb-usb/digitv.c +++ b/drivers/media/usb/dvb-usb/digitv.c @@ -29,7 +29,9 @@ static int digitv_ctrl_msg(struct dvb_usb_device *d, u8 cmd, u8 vv, u8 *wbuf, int wlen, u8 *rbuf, int rlen) { struct digitv_state *st = d->priv; - int wo = (rbuf == NULL || rlen == 0); /* write-only */ + int ret, wo; + + wo = (rbuf == NULL || rlen == 0); /* write-only */ memset(st->sndbuf, 0, 7); memset(st->rcvbuf, 0, 7); @@ -40,12 +42,12 @@ static int digitv_ctrl_msg(struct dvb_usb_device *d, if (wo) { memcpy(&st->sndbuf[3], wbuf, wlen); - dvb_usb_generic_write(d, st->sndbuf, 7); + ret = dvb_usb_generic_write(d, st->sndbuf, 7); } else { - dvb_usb_generic_rw(d, st->sndbuf, 7, st->rcvbuf, 7, 10); + ret = dvb_usb_generic_rw(d, st->sndbuf, 7, st->rcvbuf, 7, 10); memcpy(rbuf, &st->rcvbuf[3], rlen); } - return 0; + return ret; } /* I2C */ From 45ae4a5220a43ae79dacb69054a2c7928dd91c94 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 10 Oct 2016 08:28:26 -0300 Subject: [PATCH 116/884] [media] cpia2_usb: don't use stack for DMA The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/cpia2/cpia2_usb.c | 34 +++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c index 13620cdf0599..e9100a235831 100644 --- a/drivers/media/usb/cpia2/cpia2_usb.c +++ b/drivers/media/usb/cpia2/cpia2_usb.c @@ -545,18 +545,30 @@ static void free_sbufs(struct camera_data *cam) static int write_packet(struct usb_device *udev, u8 request, u8 * registers, u16 start, size_t size) { + unsigned char *buf; + int ret; + if (!registers || size <= 0) return -EINVAL; - return usb_control_msg(udev, + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memcpy(buf, registers, size); + + ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), request, USB_TYPE_VENDOR | USB_RECIP_DEVICE, start, /* value */ 0, /* index */ - registers, /* buffer */ + buf, /* buffer */ size, HZ); + + kfree(buf); + return ret; } /**************************************************************************** @@ -567,18 +579,32 @@ static int write_packet(struct usb_device *udev, static int read_packet(struct usb_device *udev, u8 request, u8 * registers, u16 start, size_t size) { + unsigned char *buf; + int ret; + if (!registers || size <= 0) return -EINVAL; - return usb_control_msg(udev, + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), request, USB_DIR_IN|USB_TYPE_VENDOR|USB_RECIP_DEVICE, start, /* value */ 0, /* index */ - registers, /* buffer */ + buf, /* buffer */ size, HZ); + + if (ret >= 0) + memcpy(registers, buf, size); + + kfree(buf); + + return ret; } /****************************************************************************** From db65c49e442cf9c9d9dc950f67daf109609f982a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 10 Oct 2016 11:11:13 -0300 Subject: [PATCH 117/884] [media] s2255drv: don't use stack for DMA The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/s2255/s2255drv.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/media/usb/s2255/s2255drv.c b/drivers/media/usb/s2255/s2255drv.c index c3a0e87066eb..f7bb78c1873c 100644 --- a/drivers/media/usb/s2255/s2255drv.c +++ b/drivers/media/usb/s2255/s2255drv.c @@ -1901,19 +1901,30 @@ static long s2255_vendor_req(struct s2255_dev *dev, unsigned char Request, s32 TransferBufferLength, int bOut) { int r; + unsigned char *buf; + + buf = kmalloc(TransferBufferLength, GFP_KERNEL); + if (!buf) + return -ENOMEM; + if (!bOut) { r = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), Request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - Value, Index, TransferBuffer, + Value, Index, buf, TransferBufferLength, HZ * 5); + + if (r >= 0) + memcpy(TransferBuffer, buf, TransferBufferLength); } else { + memcpy(buf, TransferBuffer, TransferBufferLength); r = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), Request, USB_TYPE_VENDOR | USB_RECIP_DEVICE, - Value, Index, TransferBuffer, + Value, Index, buf, TransferBufferLength, HZ * 5); } + kfree(buf); return r; } From b5f93cb5133d3ad517afef541502c6c9aecf8e5d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 10 Oct 2016 11:17:15 -0300 Subject: [PATCH 118/884] [media] stk-webcam: don't use stack for DMA The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/stkwebcam/stk-webcam.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c index db200c9d796d..22a9aae16291 100644 --- a/drivers/media/usb/stkwebcam/stk-webcam.c +++ b/drivers/media/usb/stkwebcam/stk-webcam.c @@ -147,20 +147,26 @@ int stk_camera_write_reg(struct stk_camera *dev, u16 index, u8 value) int stk_camera_read_reg(struct stk_camera *dev, u16 index, int *value) { struct usb_device *udev = dev->udev; + unsigned char *buf; int ret; + buf = kmalloc(sizeof(u8), GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x00, index, - (u8 *) value, + buf, sizeof(u8), 500); - if (ret < 0) - return ret; - else - return 0; + if (ret >= 0) + memcpy(value, buf, sizeof(u8)); + + kfree(buf); + return ret; } static int stk_start_stream(struct stk_camera *dev) From b430eaba0be5eb7140d0065df96982fa6b5ccc1d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 10 Oct 2016 07:55:54 -0300 Subject: [PATCH 119/884] [media] flexcop-usb: don't use stack for DMA The USB control messages require DMA to work. We cannot pass a stack-allocated buffer, as it is not warranted that the stack would be into a DMA enabled area. While here, remove a dead function calling usb_control_msg(). Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/b2c2/flexcop-usb.c | 105 +++++++++++++++++---------- drivers/media/usb/b2c2/flexcop-usb.h | 4 + 2 files changed, 72 insertions(+), 37 deletions(-) diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index d4bdba60b0f7..52bc42da8a4c 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -73,23 +73,34 @@ static int flexcop_usb_readwrite_dw(struct flexcop_device *fc, u16 wRegOffsPCI, u8 request_type = (read ? USB_DIR_IN : USB_DIR_OUT) | USB_TYPE_VENDOR; u8 wAddress = B2C2_FLEX_PCIOFFSET_TO_INTERNALADDR(wRegOffsPCI) | (read ? 0x80 : 0); + int ret; - int len = usb_control_msg(fc_usb->udev, + mutex_lock(&fc_usb->data_mutex); + if (!read) + memcpy(fc_usb->data, val, sizeof(*val)); + + ret = usb_control_msg(fc_usb->udev, read ? B2C2_USB_CTRL_PIPE_IN : B2C2_USB_CTRL_PIPE_OUT, request, request_type, /* 0xc0 read or 0x40 write */ wAddress, 0, - val, + fc_usb->data, sizeof(u32), B2C2_WAIT_FOR_OPERATION_RDW * HZ); - if (len != sizeof(u32)) { + if (ret != sizeof(u32)) { err("error while %s dword from %d (%d).", read ? "reading" : "writing", wAddress, wRegOffsPCI); - return -EIO; + if (ret >= 0) + ret = -EIO; } - return 0; + + if (read && ret >= 0) + memcpy(val, fc_usb->data, sizeof(*val)); + mutex_unlock(&fc_usb->data_mutex); + + return ret; } /* * DKT 010817 - add support for V8 memory read/write and flash update @@ -100,9 +111,14 @@ static int flexcop_usb_v8_memory_req(struct flexcop_usb *fc_usb, { u8 request_type = USB_TYPE_VENDOR; u16 wIndex; - int nWaitTime, pipe, len; + int nWaitTime, pipe, ret; wIndex = page << 8; + if (buflen > sizeof(fc_usb->data)) { + err("Buffer size bigger than max URB control message\n"); + return -EIO; + } + switch (req) { case B2C2_USB_READ_V8_MEM: nWaitTime = B2C2_WAIT_FOR_OPERATION_V8READ; @@ -127,17 +143,32 @@ static int flexcop_usb_v8_memory_req(struct flexcop_usb *fc_usb, deb_v8("v8mem: %02x %02x %04x %04x, len: %d\n", request_type, req, wAddress, wIndex, buflen); - len = usb_control_msg(fc_usb->udev, pipe, + mutex_lock(&fc_usb->data_mutex); + + if ((request_type & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT) + memcpy(fc_usb->data, pbBuffer, buflen); + + ret = usb_control_msg(fc_usb->udev, pipe, req, request_type, wAddress, wIndex, - pbBuffer, + fc_usb->data, buflen, nWaitTime * HZ); + if (ret != buflen) + ret = -EIO; - debug_dump(pbBuffer, len, deb_v8); - return len == buflen ? 0 : -EIO; + if (ret >= 0) { + ret = 0; + if ((request_type & USB_ENDPOINT_DIR_MASK) == USB_DIR_IN) + memcpy(pbBuffer, fc_usb->data, buflen); + } + + mutex_unlock(&fc_usb->data_mutex); + + debug_dump(pbBuffer, ret, deb_v8); + return ret; } #define bytes_left_to_read_on_page(paddr,buflen) \ @@ -196,29 +227,6 @@ static int flexcop_usb_get_mac_addr(struct flexcop_device *fc, int extended) fc->dvb_adapter.proposed_mac, 6); } -#if 0 -static int flexcop_usb_utility_req(struct flexcop_usb *fc_usb, int set, - flexcop_usb_utility_function_t func, u8 extra, u16 wIndex, - u16 buflen, u8 *pvBuffer) -{ - u16 wValue; - u8 request_type = (set ? USB_DIR_OUT : USB_DIR_IN) | USB_TYPE_VENDOR; - int nWaitTime = 2, - pipe = set ? B2C2_USB_CTRL_PIPE_OUT : B2C2_USB_CTRL_PIPE_IN, len; - wValue = (func << 8) | extra; - - len = usb_control_msg(fc_usb->udev,pipe, - B2C2_USB_UTILITY, - request_type, - wValue, - wIndex, - pvBuffer, - buflen, - nWaitTime * HZ); - return len == buflen ? 0 : -EIO; -} -#endif - /* usb i2c stuff */ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, flexcop_usb_request_t req, flexcop_usb_i2c_function_t func, @@ -226,9 +234,14 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, { struct flexcop_usb *fc_usb = i2c->fc->bus_specific; u16 wValue, wIndex; - int nWaitTime,pipe,len; + int nWaitTime, pipe, ret; u8 request_type = USB_TYPE_VENDOR; + if (buflen > sizeof(fc_usb->data)) { + err("Buffer size bigger than max URB control message\n"); + return -EIO; + } + switch (func) { case USB_FUNC_I2C_WRITE: case USB_FUNC_I2C_MULTIWRITE: @@ -257,15 +270,32 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, wValue & 0xff, wValue >> 8, wIndex & 0xff, wIndex >> 8); - len = usb_control_msg(fc_usb->udev,pipe, + mutex_lock(&fc_usb->data_mutex); + + if ((request_type & USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT) + memcpy(fc_usb->data, buf, buflen); + + ret = usb_control_msg(fc_usb->udev, pipe, req, request_type, wValue, wIndex, - buf, + fc_usb->data, buflen, nWaitTime * HZ); - return len == buflen ? 0 : -EREMOTEIO; + + if (ret != buflen) + ret = -EIO; + + if (ret >= 0) { + ret = 0; + if ((request_type & USB_ENDPOINT_DIR_MASK) == USB_DIR_IN) + memcpy(buf, fc_usb->data, buflen); + } + + mutex_unlock(&fc_usb->data_mutex); + + return 0; } /* actual bus specific access functions, @@ -516,6 +546,7 @@ static int flexcop_usb_probe(struct usb_interface *intf, /* general flexcop init */ fc_usb = fc->bus_specific; fc_usb->fc_dev = fc; + mutex_init(&fc_usb->data_mutex); fc->read_ibi_reg = flexcop_usb_read_ibi_reg; fc->write_ibi_reg = flexcop_usb_write_ibi_reg; diff --git a/drivers/media/usb/b2c2/flexcop-usb.h b/drivers/media/usb/b2c2/flexcop-usb.h index 92529a9c4475..25ad43166e78 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.h +++ b/drivers/media/usb/b2c2/flexcop-usb.h @@ -29,6 +29,10 @@ struct flexcop_usb { u8 tmp_buffer[1023+190]; int tmp_buffer_length; + + /* for URB control messages */ + u8 data[80]; + struct mutex data_mutex; }; #if 0 From 59dde8e70cfb7afbc1b647d7d3440dfb9a78b431 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Tue, 11 Oct 2016 10:22:08 -0300 Subject: [PATCH 120/884] [media] pctv452e: fix semicolon.cocci warnings drivers/media/usb/dvb-usb/pctv452e.c:115:2-3: Unneeded semicolon Remove unneeded semicolon. Generated by: scripts/coccinelle/misc/semicolon.cocci Signed-off-by: Fengguang Wu Signed-off-by: Mauro Carvalho Chehab --- drivers/media/usb/dvb-usb/pctv452e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/dvb-usb/pctv452e.c b/drivers/media/usb/dvb-usb/pctv452e.c index 7ad0006c5ae0..07fa08be9e99 100644 --- a/drivers/media/usb/dvb-usb/pctv452e.c +++ b/drivers/media/usb/dvb-usb/pctv452e.c @@ -112,7 +112,7 @@ static int tt3650_ci_msg(struct dvb_usb_device *d, u8 cmd, u8 *data, if (!data || (write_len > 64 - 4) || (read_len > 64 - 4)) { err("%s: transfer data invalid", __func__); return -EIO; - }; + } mutex_lock(&state->ca_mutex); id = state->c++; From dcb2ff56417362c31f6b430c3c531a84581e8721 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Mon, 10 Oct 2016 17:58:32 +0200 Subject: [PATCH 121/884] dm mirror: fix read error on recovery after default leg failure If a default leg has failed, any read will cause a new operational default leg to be selected and the read is resubmitted. But until now the read will return failure even though it was successful due to resubmission. The reason for this is bio->bi_error was not being cleared before resubmitting the bio. Fix by clearing bio->bi_error before resubmission. Fixes: 4246a0b63bd8 ("block: add a bi_error field to struct bio") Cc: stable@vger.kernel.org # 4.3+ Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index bdf1606f67bc..7a6254d54baf 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1292,6 +1292,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) dm_bio_restore(bd, bio); bio_record->details.bi_bdev = NULL; + bio->bi_error = 0; queue_bio(ms, bio, rw); return DM_ENDIO_INCOMPLETE; From 12a7cf5ba6c776a2621d8972c7d42e8d3d959d20 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Mon, 10 Oct 2016 18:48:06 +0200 Subject: [PATCH 122/884] dm mirror: use all available legs on multiple failures When any leg(s) have failed, any read will cause a new operational default leg to be selected and the read is resubmitted to it. If that new default leg fails the read too, no other still accessible legs are used to resubmit the read again -- thus failing the io. Fix by allowing the read to get resubmitted until all operational legs have been exhausted. Also, remove any details.bi_dev use as a flag. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid1.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7a6254d54baf..9a8b71067c6e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -145,7 +145,6 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) struct dm_raid1_bio_record { struct mirror *m; - /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; region_t write_region; }; @@ -1200,8 +1199,6 @@ static int mirror_map(struct dm_target *ti, struct bio *bio) struct dm_raid1_bio_record *bio_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); - bio_record->details.bi_bdev = NULL; - if (rw == WRITE) { /* Save region for mirror_end_io() handler */ bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); @@ -1260,22 +1257,12 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) } if (error == -EOPNOTSUPP) - goto out; + return error; if ((error == -EWOULDBLOCK) && (bio->bi_opf & REQ_RAHEAD)) - goto out; + return error; if (unlikely(error)) { - if (!bio_record->details.bi_bdev) { - /* - * There wasn't enough memory to record necessary - * information for a retry or there was no other - * mirror in-sync. - */ - DMERR_LIMIT("Mirror read failed."); - return -EIO; - } - m = bio_record->m; DMERR("Mirror read failed from %s. Trying alternative device.", @@ -1291,7 +1278,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) bd = &bio_record->details; dm_bio_restore(bd, bio); - bio_record->details.bi_bdev = NULL; bio->bi_error = 0; queue_bio(ms, bio, rw); @@ -1300,9 +1286,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) DMERR("All replicated volumes dead, failing I/O"); } -out: - bio_record->details.bi_bdev = NULL; - return error; } From 85a624403c77c3f074931aefdfced59f61b668cb Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Thu, 13 Oct 2016 16:13:55 -0700 Subject: [PATCH 123/884] ethtool: silence warning on bit loss Sparse was complaining when we went to prototype some code using ethtool_cmd_speed_set and SPEED_100000, which uses the upper 16 bits of __u32 speed for the first time. CHECK ... .../uapi/linux/ethtool.h:123:28: warning: cast truncates bits from constant value (186a0 becomes 86a0) The warning is actually bogus, as no bits are really lost, but we can get rid of the sparse warning with this one small change. Reported-by: Preethi Banala Signed-off-by: Jesse Brandeburg Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 099a4200732c..8e547231c1b7 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -119,8 +119,7 @@ struct ethtool_cmd { static inline void ethtool_cmd_speed_set(struct ethtool_cmd *ep, __u32 speed) { - - ep->speed = (__u16)speed; + ep->speed = (__u16)(speed & 0xFFFF); ep->speed_hi = (__u16)(speed >> 16); } From 610df1d2342d639ec87a8fb5290ca4799d91d59b Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Thu, 13 Oct 2016 16:43:16 -0700 Subject: [PATCH 124/884] net: asix: Avoid looping when the device does not respond Check answers from USB stack and avoid re-sending the request multiple times if the device does not respond. This fixes the following problem, observed with a probably flaky adapter. [62108.732707] usb 1-3: new high-speed USB device number 5 using xhci_hcd [62108.914421] usb 1-3: New USB device found, idVendor=0b95, idProduct=7720 [62108.914463] usb 1-3: New USB device strings: Mfr=1, Product=2, SerialNumber=3 [62108.914476] usb 1-3: Product: AX88x72A [62108.914486] usb 1-3: Manufacturer: ASIX Elec. Corp. [62108.914495] usb 1-3: SerialNumber: 000001 [62114.109109] asix 1-3:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0000: -110 [62114.109139] asix 1-3:1.0 (unnamed net_device) (uninitialized): Failed to send software reset: ffffff92 [62119.109048] asix 1-3:1.0 (unnamed net_device) (uninitialized): Failed to write reg index 0x0000: -110 ... Since the USB timeout is 5 seconds, and the operation is retried 30 times, this results in [62278.180353] INFO: task mtpd:1725 blocked for more than 120 seconds. [62278.180373] Tainted: G W 3.18.0-13298-g94ace9e #1 [62278.180383] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. ... [62278.180957] kworker/2:0 D 0000000000000000 0 5744 2 0x00000000 [62278.180978] Workqueue: usb_hub_wq hub_event [62278.181029] ffff880177f833b8 0000000000000046 ffff88017fd00000 ffff88017b126d80 [62278.181048] ffff880177f83fd8 ffff880065a71b60 0000000000013340 ffff880065a71b60 [62278.181065] 0000000000000286 0000000103b1c199 0000000000001388 0000000000000002 [62278.181081] Call Trace: [62278.181092] [] ? console_conditional_schedule+0x2c/0x2c [62278.181105] [] schedule+0x69/0x6b [62278.181117] [] schedule_timeout+0xe3/0x11d [62278.181133] [] ? trace_timer_start+0x51/0x51 [62278.181146] [] do_wait_for_common+0x12f/0x16c [62278.181162] [] ? wake_up_process+0x39/0x39 [62278.181174] [] wait_for_common+0x52/0x6d [62278.181187] [] wait_for_completion_timeout+0x13/0x15 [62278.181201] [] usb_start_wait_urb+0x93/0xf1 [62278.181214] [] usb_control_msg+0xe1/0x11d [62278.181230] [] usbnet_write_cmd+0x9c/0xc6 [usbnet] [62278.181286] [] asix_write_cmd+0x4e/0x7e [asix] [62278.181300] [] asix_set_sw_mii+0x25/0x4e [asix] [62278.181314] [] asix_mdio_read+0x51/0x109 [asix] ... Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- drivers/net/usb/asix_common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index f79eb12c326a..125cff57c759 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -433,13 +433,13 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) mutex_lock(&dev->phy_mutex); do { ret = asix_set_sw_mii(dev, 0); - if (ret == -ENODEV) + if (ret == -ENODEV || ret == -ETIMEDOUT) break; usleep_range(1000, 1100); ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 0); } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); - if (ret == -ENODEV) { + if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; } @@ -497,13 +497,13 @@ int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) mutex_lock(&dev->phy_mutex); do { ret = asix_set_sw_mii(dev, 1); - if (ret == -ENODEV) + if (ret == -ENODEV || ret == -ETIMEDOUT) break; usleep_range(1000, 1100); ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, 1); } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); - if (ret == -ENODEV) { + if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; } From e7cb08e894a0b876443ef8fdb0706575dc00a5d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2016 16:18:39 -0400 Subject: [PATCH 125/884] scsi: zfcp: spin_lock_irqsave() is not nestable We accidentally overwrite the original saved value of "flags" so that we can't re-enable IRQs at the end of the function. Presumably this function is mostly called with IRQs disabled or it would be obvious in testing. Fixes: aceeffbb59bb ("zfcp: trace full payload of all SAN records (req,resp,iels)") Cc: #2.6.38+ Signed-off-by: Dan Carpenter Signed-off-by: Steffen Maier Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_dbf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 637cf8973c9e..581001989937 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -384,7 +384,7 @@ void zfcp_dbf_san(char *tag, struct zfcp_dbf *dbf, /* if (len > rec_len): * dump data up to cap_len ignoring small duplicate in rec->payload */ - spin_lock_irqsave(&dbf->pay_lock, flags); + spin_lock(&dbf->pay_lock); memset(payload, 0, sizeof(*payload)); memcpy(payload->area, paytag, ZFCP_DBF_TAG_LEN); payload->fsf_req_id = req_id; From 8a4236a2c7868768943a24dc7b1e2ff495836880 Mon Sep 17 00:00:00 2001 From: Brian King Date: Thu, 13 Oct 2016 14:45:24 -0500 Subject: [PATCH 126/884] scsi: ipr: Fix async error WARN_ON Commit afc3f83cb4a5 ("scsi: ipr: Add asynchronous error notification") introduced the warn on shown below. To fix this, rather than attempting to send the KOBJ_CHANGE uevent from interrupt context, which is what is causing the WARN_ON, just wake the ipr worker thread which will send a KOBJ_CHANGE uevent. [ 142.278120] WARNING: CPU: 15 PID: 0 at kernel/softirq.c:161 __local_bh_enable_ip+0x7c/0xd0 [ 142.278124] Modules linked in: ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 ipt_REJECT nf_reject_ipv4 xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ses enclosure scsi_transport_sas sg pseries_rng nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sr_mod sd_mod cdrom ipr libata ibmvscsi scsi_transport_srp ibmveth dm_mirror dm_region_hash dm_log dm_mod [ 142.278208] CPU: 15 PID: 0 Comm: swapper/15 Not tainted 4.8.0.ipr+ #21 [ 142.278213] task: c00000010cf24480 task.stack: c00000010cfec000 [ 142.278217] NIP: c0000000000c0c7c LR: c000000000881778 CTR: c0000000003c5bf0 [ 142.278221] REGS: c00000010cfef080 TRAP: 0700 Not tainted (4.8.0.ipr+) [ 142.278224] MSR: 8000000000029033 CR: 28008022 XER: 2000000f [ 142.278236] CFAR: c0000000000c0c20 SOFTE: 0 GPR00: c000000000706c78 c00000010cfef300 c000000000f91d00 c000000000706c78 GPR04: 0000000000000200 c000000000f7bc80 0000000000000000 00000000024000c0 GPR08: 0000000000000000 0000000000000001 c000000000ee1d00 c000000000a9bdd0 GPR12: c0000000003c5bf0 c00000000eb22d00 c000000100ca3880 c00000020ed38400 GPR16: 0000000000000000 0000000000000000 c000000100940508 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 00000000024000c0 GPR24: c0000000004588e0 c00000010863bd00 c00000010863bd00 c0000000013773f8 GPR28: c000000000f7bc80 0000000000000000 ffffffffffffffff c000000000f7bcd8 [ 142.278290] NIP [c0000000000c0c7c] __local_bh_enable_ip+0x7c/0xd0 [ 142.278296] LR [c000000000881778] _raw_spin_unlock_bh+0x38/0x60 [ 142.278299] Call Trace: [ 142.278303] [c00000010cfef300] [c000000000f7bc80] init_net+0x0/0x1900 (unreliable) [ 142.278310] [c00000010cfef320] [c000000000706c78] peernet2id+0x58/0x80 [ 142.278316] [c00000010cfef370] [c00000000075caec] netlink_broadcast_filtered+0x30c/0x550 [ 142.278323] [c00000010cfef430] [c000000000459078] kobject_uevent_env+0x588/0x780 [ 142.278331] [c00000010cfef510] [d000000003163a6c] ipr_process_error+0x11c/0x240 [ipr] [ 142.278337] [c00000010cfef5c0] [d000000003152298] ipr_fail_all_ops+0x108/0x220 [ipr] [ 142.278343] [c00000010cfef670] [d0000000031643f8] ipr_reset_restore_cfg_space+0xa8/0x240 [ipr] [ 142.278350] [c00000010cfef6f0] [d000000003158a00] ipr_reset_ioa_job+0x80/0xe0 [ipr] [ 142.278356] [c00000010cfef720] [d000000003153f78] ipr_reset_timer_done+0xa8/0xe0 [ipr] [ 142.278363] [c00000010cfef770] [c000000000149c88] call_timer_fn+0x58/0x1c0 [ 142.278368] [c00000010cfef800] [c000000000149f60] expire_timers+0x140/0x200 [ 142.278373] [c00000010cfef870] [c00000000014a0e8] run_timer_softirq+0xc8/0x230 [ 142.278379] [c00000010cfef900] [c0000000000c0844] __do_softirq+0x164/0x3c0 [ 142.278384] [c00000010cfef9f0] [c0000000000c0f18] irq_exit+0x1a8/0x1c0 [ 142.278389] [c00000010cfefa20] [c000000000020b54] timer_interrupt+0xa4/0xe0 [ 142.278394] [c00000010cfefa50] [c000000000002414] decrementer_common+0x114/0x180 Signed-off-by: Brian King Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index a8762a3efeef..532474109624 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -2586,7 +2586,6 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd) struct ipr_hostrcb *hostrcb = ipr_cmd->u.hostrcb; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); u32 fd_ioasc; - char *envp[] = { "ASYNC_ERR_LOG=1", NULL }; if (ioa_cfg->sis64) fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error64.fd_ioasc); @@ -2607,8 +2606,8 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd) } list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_report_q); + schedule_work(&ioa_cfg->work_q); hostrcb = ipr_get_free_hostrcb(ioa_cfg); - kobject_uevent_env(&ioa_cfg->host->shost_dev.kobj, KOBJ_CHANGE, envp); ipr_send_hcam(ioa_cfg, IPR_HCAM_CDB_OP_CODE_LOG_DATA, hostrcb); } From 199625098a18a5522b424dea9b122b254c022fc5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 15 Oct 2016 09:39:31 -0400 Subject: [PATCH 127/884] ext4: correct endianness conversion in __xattr_check_inode() It should be cpu_to_le32(), not le32_to_cpu(). No change in behavior. Found with sparse, and this was the only endianness warning in fs/ext4/. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c15d63389957..e90c5cd04d89 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -241,7 +241,7 @@ __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, int error = -EFSCORRUPTED; if (((void *) header >= end) || - (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC))) + (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC))) goto errout; error = ext4_xattr_check_names(entry, end, entry); errout: From 8906a8223ad4909b391c5628f7991ebceda30e52 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 15 Oct 2016 09:48:50 -0400 Subject: [PATCH 128/884] fscrypto: lock inode while setting encryption policy i_rwsem needs to be acquired while setting an encryption policy so that concurrent calls to FS_IOC_SET_ENCRYPTION_POLICY are correctly serialized (especially the ->get_context() + ->set_context() pair), and so that new files cannot be created in the directory during or after the ->empty_dir() check. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o Reviewed-by: Richard Weinberger Cc: stable@vger.kernel.org --- fs/crypto/policy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index ed115acb5dee..6865663aac69 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -109,6 +109,8 @@ int fscrypt_process_policy(struct file *filp, if (ret) return ret; + inode_lock(inode); + if (!inode_has_encryption_context(inode)) { if (!S_ISDIR(inode->i_mode)) ret = -EINVAL; @@ -127,6 +129,8 @@ int fscrypt_process_policy(struct file *filp, ret = -EINVAL; } + inode_unlock(inode); + mnt_drop_write_file(filp); return ret; } From d74f3d25289aa9722cf777a7482eeee2eacdf46e Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 15 Oct 2016 09:57:31 -0400 Subject: [PATCH 129/884] ext4: add missing KERN_CONT to a few more debugging uses Recent commits require line continuing printks to always use pr_cont or KERN_CONT. Add these markings to a few more printks. Miscellaneaous: o Integrate the ea_idebug and ea_bdebug macros to use a single call to printk(KERN_DEBUG instead of 3 separate printks o Use the more common varargs macro style Signed-off-by: Joe Perches Signed-off-by: Theodore Ts'o Reviewed-by: Andreas Dilger --- fs/ext4/block_validity.c | 4 ++-- fs/ext4/mballoc.h | 17 ++++++++--------- fs/ext4/namei.c | 18 ++++++++++-------- fs/ext4/xattr.c | 18 ++++++------------ 4 files changed, 26 insertions(+), 31 deletions(-) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 02ddec6d8a7d..fdb19543af1e 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -128,12 +128,12 @@ static void debug_print_tree(struct ext4_sb_info *sbi) node = rb_first(&sbi->system_blks); while (node) { entry = rb_entry(node, struct ext4_system_zone, node); - printk("%s%llu-%llu", first ? "" : ", ", + printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ", entry->start_blk, entry->start_blk + entry->count - 1); first = 0; node = rb_next(node); } - printk("\n"); + printk(KERN_CONT "\n"); } int ext4_setup_system_zone(struct super_block *sb) diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 3ef1df6ae9ec..1aba469f8220 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -27,16 +27,15 @@ #ifdef CONFIG_EXT4_DEBUG extern ushort ext4_mballoc_debug; -#define mb_debug(n, fmt, a...) \ - do { \ - if ((n) <= ext4_mballoc_debug) { \ - printk(KERN_DEBUG "(%s, %d): %s: ", \ - __FILE__, __LINE__, __func__); \ - printk(fmt, ## a); \ - } \ - } while (0) +#define mb_debug(n, fmt, ...) \ +do { \ + if ((n) <= ext4_mballoc_debug) { \ + printk(KERN_DEBUG "(%s, %d): %s: " fmt, \ + __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ + } \ +} while (0) #else -#define mb_debug(n, fmt, a...) no_printk(fmt, ## a) +#define mb_debug(n, fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f92f10d4f66a..104f8bfba718 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -577,12 +577,13 @@ static inline unsigned dx_node_limit(struct inode *dir) static void dx_show_index(char * label, struct dx_entry *entries) { int i, n = dx_get_count (entries); - printk(KERN_DEBUG "%s index ", label); + printk(KERN_DEBUG "%s index", label); for (i = 0; i < n; i++) { - printk("%x->%lu ", i ? dx_get_hash(entries + i) : - 0, (unsigned long)dx_get_block(entries + i)); + printk(KERN_CONT " %x->%lu", + i ? dx_get_hash(entries + i) : 0, + (unsigned long)dx_get_block(entries + i)); } - printk("\n"); + printk(KERN_CONT "\n"); } struct stats @@ -679,7 +680,7 @@ static struct stats dx_show_leaf(struct inode *dir, } de = ext4_next_entry(de, size); } - printk("(%i)\n", names); + printk(KERN_CONT "(%i)\n", names); return (struct stats) { names, space, 1 }; } @@ -798,7 +799,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, q = entries + count - 1; while (p <= q) { m = p + (q - p) / 2; - dxtrace(printk(".")); + dxtrace(printk(KERN_CONT ".")); if (dx_get_hash(m) > hash) q = m - 1; else @@ -810,7 +811,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, at = entries; while (n--) { - dxtrace(printk(",")); + dxtrace(printk(KERN_CONT ",")); if (dx_get_hash(++at) > hash) { at--; @@ -821,7 +822,8 @@ dx_probe(struct ext4_filename *fname, struct inode *dir, } at = p - 1; - dxtrace(printk(" %x->%u\n", at == entries ? 0 : dx_get_hash(at), + dxtrace(printk(KERN_CONT " %x->%u\n", + at == entries ? 0 : dx_get_hash(at), dx_get_block(at))); frame->entries = entries; frame->at = at; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e90c5cd04d89..d77be9e9f535 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -61,18 +61,12 @@ #include "acl.h" #ifdef EXT4_XATTR_DEBUG -# define ea_idebug(inode, f...) do { \ - printk(KERN_DEBUG "inode %s:%lu: ", \ - inode->i_sb->s_id, inode->i_ino); \ - printk(f); \ - printk("\n"); \ - } while (0) -# define ea_bdebug(bh, f...) do { \ - printk(KERN_DEBUG "block %pg:%lu: ", \ - bh->b_bdev, (unsigned long) bh->b_blocknr); \ - printk(f); \ - printk("\n"); \ - } while (0) +# define ea_idebug(inode, fmt, ...) \ + printk(KERN_DEBUG "inode %s:%lu: " fmt "\n", \ + inode->i_sb->s_id, inode->i_ino, ##__VA_ARGS__) +# define ea_bdebug(bh, fmt, ...) \ + printk(KERN_DEBUG "block %pg:%lu: " fmt "\n", \ + bh->b_bdev, (unsigned long)bh->b_blocknr, ##__VA_ARGS__) #else # define ea_idebug(inode, fmt, ...) no_printk(fmt, ##__VA_ARGS__) # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) From 87c0fded852ae20bddb7833da6ead082404de86a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 29 Sep 2016 13:41:05 +0200 Subject: [PATCH 130/884] rbd: don't wait for the lock forever if blacklisted -EBLACKLISTED from __rbd_register_watch() means that our ceph_client got blacklisted - we won't be able to restore the watch and reacquire the lock. Wake up and fail all outstanding requests waiting for the lock and arrange for all new requests that require the lock to fail immediately. Signed-off-by: Ilya Dryomov Tested-by: Mike Christie --- drivers/block/rbd.c | 50 ++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index abb71628ab61..633e8c2ea120 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -415,15 +415,15 @@ struct rbd_device { }; /* - * Flag bits for rbd_dev->flags. If atomicity is required, - * rbd_dev->lock is used to protect access. - * - * Currently, only the "removing" flag (which is coupled with the - * "open_count" field) requires atomic access. + * Flag bits for rbd_dev->flags: + * - REMOVING (which is coupled with rbd_dev->open_count) is protected + * by rbd_dev->lock + * - BLACKLISTED is protected by rbd_dev->lock_rwsem */ enum rbd_dev_flags { RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ + RBD_DEV_FLAG_BLACKLISTED, /* our ceph_client is blacklisted */ }; static DEFINE_MUTEX(client_mutex); /* Serialize client creation */ @@ -3926,6 +3926,7 @@ static void rbd_reregister_watch(struct work_struct *work) struct rbd_device *rbd_dev = container_of(to_delayed_work(work), struct rbd_device, watch_dwork); bool was_lock_owner = false; + bool need_to_wake = false; int ret; dout("%s rbd_dev %p\n", __func__, rbd_dev); @@ -3935,19 +3936,27 @@ static void rbd_reregister_watch(struct work_struct *work) was_lock_owner = rbd_release_lock(rbd_dev); mutex_lock(&rbd_dev->watch_mutex); - if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) - goto fail_unlock; + if (rbd_dev->watch_state != RBD_WATCH_STATE_ERROR) { + mutex_unlock(&rbd_dev->watch_mutex); + goto out; + } ret = __rbd_register_watch(rbd_dev); if (ret) { rbd_warn(rbd_dev, "failed to reregister watch: %d", ret); - if (ret != -EBLACKLISTED) + if (ret == -EBLACKLISTED) { + set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags); + need_to_wake = true; + } else { queue_delayed_work(rbd_dev->task_wq, &rbd_dev->watch_dwork, RBD_RETRY_DELAY); - goto fail_unlock; + } + mutex_unlock(&rbd_dev->watch_mutex); + goto out; } + need_to_wake = true; rbd_dev->watch_state = RBD_WATCH_STATE_REGISTERED; rbd_dev->watch_cookie = rbd_dev->watch_handle->linger_id; mutex_unlock(&rbd_dev->watch_mutex); @@ -3963,13 +3972,10 @@ static void rbd_reregister_watch(struct work_struct *work) ret); } +out: up_write(&rbd_dev->lock_rwsem); - wake_requests(rbd_dev, true); - return; - -fail_unlock: - mutex_unlock(&rbd_dev->watch_mutex); - up_write(&rbd_dev->lock_rwsem); + if (need_to_wake) + wake_requests(rbd_dev, true); } /* @@ -4074,7 +4080,9 @@ static void rbd_wait_state_locked(struct rbd_device *rbd_dev) up_read(&rbd_dev->lock_rwsem); schedule(); down_read(&rbd_dev->lock_rwsem); - } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED); + } while (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && + !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); + finish_wait(&rbd_dev->lock_waitq, &wait); } @@ -4166,8 +4174,16 @@ static void rbd_queue_workfn(struct work_struct *work) if (must_be_locked) { down_read(&rbd_dev->lock_rwsem); - if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED) + if (rbd_dev->lock_state != RBD_LOCK_STATE_LOCKED && + !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) rbd_wait_state_locked(rbd_dev); + + WARN_ON((rbd_dev->lock_state == RBD_LOCK_STATE_LOCKED) ^ + !test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)); + if (test_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags)) { + result = -EBLACKLISTED; + goto err_unlock; + } } img_request = rbd_img_request_create(rbd_dev, offset, length, op_type, From 4d73644bc3d76dd161a84e3849c6f2c9c01c4ba7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 29 Sep 2016 14:23:12 +0200 Subject: [PATCH 131/884] rbd: don't retry watch reregistration if header object is gone If the header object gets deleted (perhaps along with the entire pool), there is no point in attempting to reregister the watch. Treat this the same as blacklisting: fail all pending and new I/Os requiring the lock. Signed-off-by: Ilya Dryomov --- drivers/block/rbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 633e8c2ea120..7b274ff4632c 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3944,7 +3944,7 @@ static void rbd_reregister_watch(struct work_struct *work) ret = __rbd_register_watch(rbd_dev); if (ret) { rbd_warn(rbd_dev, "failed to reregister watch: %d", ret); - if (ret == -EBLACKLISTED) { + if (ret == -EBLACKLISTED || ret == -ENOENT) { set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags); need_to_wake = true; } else { From 0d7718f666be181fda1ba2d08f137d87c1419347 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 10 Oct 2016 15:38:18 +0300 Subject: [PATCH 132/884] ceph: fix error handling in ceph_read_iter In case __ceph_do_getattr returns an error and the retry_op in ceph_read_iter is not READ_INLINE, then it's possible to invoke __free_page on a page which is NULL, this naturally leads to a crash. This can happen when, for example, a process waiting on a MDS reply receives sigterm. Fix this by explicitly checking whether the page is set or not. Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Nikolay Borisov Reviewed-by: Yan, Zheng Signed-off-by: Ilya Dryomov --- fs/ceph/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 7bf08825cc11..18630e800208 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -1272,7 +1272,8 @@ again: statret = __ceph_do_getattr(inode, page, CEPH_STAT_CAP_INLINE_DATA, !!page); if (statret < 0) { - __free_page(page); + if (page) + __free_page(page); if (statret == -ENODATA) { BUG_ON(retry_op != READ_INLINE); goto again; From f0076436136751359e0886f3302a2a0b3a28ba6e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 14 Oct 2016 14:40:33 +0100 Subject: [PATCH 133/884] r8169: set coherent DMA mask as well as streaming DMA mask PCI devices that are 64-bit DMA capable should set the coherent DMA mask as well as the streaming DMA mask. On some architectures, these are managed separately, and so the coherent DMA mask will be left at its default value of 32 if it is not set explicitly. This results in errors such as r8169 Gigabit Ethernet driver 2.3LK-NAPI loaded hwdev DMA mask = 0x00000000ffffffff, dev_addr = 0x00000080fbfff000 swiotlb: coherent allocation failed for device 0000:02:00.0 size=4096 CPU: 0 PID: 1062 Comm: systemd-udevd Not tainted 4.8.0+ #35 Hardware name: AMD Seattle/Seattle, BIOS 10:53:24 Oct 13 2016 on systems without memory that is 32-bit addressable by PCI devices. Signed-off-by: Ard Biesheuvel Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index e55638c7505a..bf000d819a21 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -8273,7 +8273,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if ((sizeof(dma_addr_t) > 4) && (use_dac == 1 || (use_dac == -1 && pci_is_pcie(pdev) && tp->mac_version >= RTL_GIGA_MAC_VER_18)) && - !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) { + !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) && + !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) { /* CPlusCmd Dual Access Cycle is only needed for non-PCIe */ if (!pci_is_pcie(pdev)) From 93966b715b32a783a1641f5a385901bbfab04733 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 14 Oct 2016 14:14:35 -0500 Subject: [PATCH 134/884] net: qcom/emac: disable interrupts before calling phy_disconnect There is a race condition that can occur if EMAC interrupts are enabled when phy_disconnect() is called. phy_disconnect() sets adjust_link to NULL. When an interrupt occurs, the ISR might call phy_mac_interrupt(), which wakes up the workqueue function phy_state_machine(). This function might reference adjust_link, thereby causing a null pointer exception. Signed-off-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac-mac.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c index e97968ed4b8f..6fb3bee904d3 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -1021,14 +1021,18 @@ void emac_mac_down(struct emac_adapter *adpt) napi_disable(&adpt->rx_q.napi); phy_stop(adpt->phydev); - phy_disconnect(adpt->phydev); - /* disable mac irq */ + /* Interrupts must be disabled before the PHY is disconnected, to + * avoid a race condition where adjust_link is null when we get + * an interrupt. + */ writel(DIS_INT, adpt->base + EMAC_INT_STATUS); writel(0, adpt->base + EMAC_INT_MASK); synchronize_irq(adpt->irq.irq); free_irq(adpt->irq.irq, &adpt->irq); + phy_disconnect(adpt->phydev); + emac_mac_reset(adpt); emac_tx_q_descs_free(adpt); From 50756ebecf69276b8a908409fa32c123bb12420b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2016 22:26:11 +0300 Subject: [PATCH 135/884] stmmac: fix an error code in stmmac_ptp_register() PTR_ERR(NULL) is success. We have to preserve the error code earlier. Fixes: 7086605a6ab5 ("stmmac: fix error check when init ptp") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 289d52725a6c..5d61fb27219a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -185,8 +185,10 @@ int stmmac_ptp_register(struct stmmac_priv *priv) priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops, priv->device); if (IS_ERR(priv->ptp_clock)) { + int ret = PTR_ERR(priv->ptp_clock); + priv->ptp_clock = NULL; - return PTR_ERR(priv->ptp_clock); + return ret; } spin_lock_init(&priv->ptp_lock); From fb5c6cfaec126d9a96b9dd471d4711bf4c737a6f Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Sat, 15 Oct 2016 00:01:20 +0300 Subject: [PATCH 136/884] vmxnet3: avoid assumption about invalid dma_pa in vmxnet3_set_mc() vmxnet3_set_mc() checks new_table_pa returned by dma_map_single() with dma_mapping_error(), but even there it assumes zero is invalid pa (it assumes dma_mapping_error(...,0) returns true if new_table is NULL). The patch adds an explicit variable to track status of new_table_pa. Found by Linux Driver Verification project (linuxtesting.org). v2: use "bool" and "true"/"false" for boolean variables. Signed-off-by: Alexey Khoroshilov Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index b5554f2ebee4..ef83ae3b0a44 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2279,6 +2279,7 @@ vmxnet3_set_mc(struct net_device *netdev) &adapter->shared->devRead.rxFilterConf; u8 *new_table = NULL; dma_addr_t new_table_pa = 0; + bool new_table_pa_valid = false; u32 new_mode = VMXNET3_RXM_UCAST; if (netdev->flags & IFF_PROMISC) { @@ -2307,13 +2308,15 @@ vmxnet3_set_mc(struct net_device *netdev) new_table, sz, PCI_DMA_TODEVICE); + if (!dma_mapping_error(&adapter->pdev->dev, + new_table_pa)) { + new_mode |= VMXNET3_RXM_MCAST; + new_table_pa_valid = true; + rxConf->mfTablePA = cpu_to_le64( + new_table_pa); + } } - - if (!dma_mapping_error(&adapter->pdev->dev, - new_table_pa)) { - new_mode |= VMXNET3_RXM_MCAST; - rxConf->mfTablePA = cpu_to_le64(new_table_pa); - } else { + if (!new_table_pa_valid) { netdev_info(netdev, "failed to copy mcast list, setting ALL_MULTI\n"); new_mode |= VMXNET3_RXM_ALL_MULTI; @@ -2338,7 +2341,7 @@ vmxnet3_set_mc(struct net_device *netdev) VMXNET3_CMD_UPDATE_MAC_FILTERS); spin_unlock_irqrestore(&adapter->cmd_lock, flags); - if (new_table_pa) + if (new_table_pa_valid) dma_unmap_single(&adapter->pdev->dev, new_table_pa, rxConf->mfTableLen, PCI_DMA_TODEVICE); kfree(new_table); From 0362fcc9d6834034bc0cdb1d9b02a1b9baf96a2a Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Thu, 22 Sep 2016 12:02:19 +0800 Subject: [PATCH 137/884] arm64: dts: rockchip: remove always-on and boot-on from vcc_sd Please don't add these for vcc_sd, and mmc-core/driver will control it. Otherwise, it will waste energy even without sdmmc in slot. Moreover, it will causes a bug: If we insert/remove sd card, we could see [9.337271] mmc0: new ultra high speed SDR25 SDHC card at address 0007 [9.345144] mmcblk0: mmc0:0007 SD32G 29.3 GiB This is okay for normal sd insert/remove test, but when I debug some issues for sdmmc, I did unbind/bind test. And there is a interesting phenomenon when we bind the driver again: [58.314069] mmc0: new high speed SDHC card at address 0007 [58.320282] mmcblk0: mmc0:0007 SD32G 29.3 GiB So the sd card could just support high speed without power cycle since the vcc_sd is always on, which makes the sd card fail to reinit its internal ocr mask. Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts | 2 -- arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts | 2 -- 2 files changed, 4 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts index 46cdddfcea6c..353314ca7426 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts @@ -258,8 +258,6 @@ }; vcc_sd: SWITCH_REG1 { - regulator-always-on; - regulator-boot-on; regulator-name = "vcc_sd"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 5797933ef80e..13b7f1edad10 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -152,8 +152,6 @@ gpio = <&gpio3 11 GPIO_ACTIVE_LOW>; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; - regulator-always-on; - regulator-boot-on; vin-supply = <&vcc_io>; }; From 3ce0fefc51bd56381b1b9a92835cf8f9db3f2ef8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 29 Sep 2016 10:00:14 -0700 Subject: [PATCH 138/884] ARCv2: intc: untangle SMP, MCIP and IDU The IDU intc is technically part of MCIP (Multi-core IP) hence historically was only available in a SMP hardware build (and thus only in a SMP kernel build). Now that hardware restriction has been lifted, so a UP kernel needs to support it. This requires breaking mcip.c into parts which are strictly SMP (inter-core interrupts) and IDU which in reality is just another intc and thus has no bearing on SMP. This change allows IDU in UP builds and with a suitable device tree, we can have the cascaded intc system ARCv2 core intc <---> ARCv2 IDU intc <---> periperals Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 17 +++++++++-------- arch/arc/include/asm/mcip.h | 16 ++++++++++++++++ arch/arc/kernel/mcip.c | 31 +++++++++++-------------------- 3 files changed, 36 insertions(+), 28 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ecd12379e2cd..6f67895cd9c4 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -186,14 +186,6 @@ if SMP config ARC_HAS_COH_CACHES def_bool n -config ARC_MCIP - bool "ARConnect Multicore IP (MCIP) Support " - depends on ISA_ARCV2 - help - This IP block enables SMP in ARC-HS38 cores. - It provides for cross-core interrupts, multi-core debug - hardware semaphores, shared memory,.... - config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 @@ -211,6 +203,15 @@ config ARC_SMP_HALT_ON_RESET endif #SMP +config ARC_MCIP + bool "ARConnect Multicore IP (MCIP) Support " + depends on ISA_ARCV2 + default y if SMP + help + This IP block enables SMP in ARC-HS38 cores. + It provides for cross-core interrupts, multi-core debug + hardware semaphores, shared memory,.... + menuconfig ARC_CACHE bool "Enable Cache Support" default y diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h index 847e3bbe387f..c8fbe4114bad 100644 --- a/arch/arc/include/asm/mcip.h +++ b/arch/arc/include/asm/mcip.h @@ -55,6 +55,22 @@ struct mcip_cmd { #define IDU_M_DISTRI_DEST 0x2 }; +struct mcip_bcr { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad3:8, + idu:1, llm:1, num_cores:6, + iocoh:1, gfrc:1, dbg:1, pad2:1, + msg:1, sem:1, ipi:1, pad:1, + ver:8; +#else + unsigned int ver:8, + pad:1, ipi:1, sem:1, msg:1, + pad2:1, dbg:1, gfrc:1, iocoh:1, + num_cores:6, llm:1, idu:1, + pad3:8; +#endif +}; + /* * MCIP programming model * diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 72f9179b1a24..c424d5abc318 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -15,11 +15,12 @@ #include #include -static char smp_cpuinfo_buf[128]; -static int idu_detected; - static DEFINE_RAW_SPINLOCK(mcip_lock); +#ifdef CONFIG_SMP + +static char smp_cpuinfo_buf[128]; + static void mcip_setup_per_cpu(int cpu) { smp_ipi_irq_setup(cpu, IPI_IRQ); @@ -86,21 +87,7 @@ static void mcip_ipi_clear(int irq) static void mcip_probe_n_setup(void) { - struct mcip_bcr { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad3:8, - idu:1, llm:1, num_cores:6, - iocoh:1, gfrc:1, dbg:1, pad2:1, - msg:1, sem:1, ipi:1, pad:1, - ver:8; -#else - unsigned int ver:8, - pad:1, ipi:1, sem:1, msg:1, - pad2:1, dbg:1, gfrc:1, iocoh:1, - num_cores:6, llm:1, idu:1, - pad3:8; -#endif - } mp; + struct mcip_bcr mp; READ_BCR(ARC_REG_MCIP_BCR, mp); @@ -114,7 +101,6 @@ static void mcip_probe_n_setup(void) IS_AVAIL1(mp.gfrc, "GFRC")); cpuinfo_arc700[0].extn.gfrc = mp.gfrc; - idu_detected = mp.idu; if (mp.dbg) { __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); @@ -130,6 +116,8 @@ struct plat_smp_ops plat_smp_ops = { .ipi_clear = mcip_ipi_clear, }; +#endif + /*************************************************************************** * ARCv2 Interrupt Distribution Unit (IDU) * @@ -295,8 +283,11 @@ idu_of_init(struct device_node *intc, struct device_node *parent) /* Read IDU BCR to confirm nr_irqs */ int nr_irqs = of_irq_count(intc); int i, irq; + struct mcip_bcr mp; - if (!idu_detected) + READ_BCR(ARC_REG_MCIP_BCR, mp); + + if (!mp.idu) panic("IDU not detected, but DeviceTree using it"); pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs); From 27f3d2a3b59f573a398c9acc810c16ebca07be78 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Tue, 4 Oct 2016 16:34:27 -0700 Subject: [PATCH 139/884] ARC: [build] Support gz, lzma compressed uImage Add support for lzma compressed uImage. Support for gzip was already available but could not be enabled because we were missing CONFIG_HAVE_KERNEL_GZIP in arch/arc/Kconfig. Signed-off-by: Daniel Mentz Cc: linux-snps-arc@lists.infradead.org Cc: Vineet Gupta Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 2 ++ arch/arc/boot/Makefile | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6f67895cd9c4..ac0b309aced5 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -41,6 +41,8 @@ config ARC select PERF_USE_VMALLOC select HAVE_DEBUG_STACKOVERFLOW select HAVE_GENERIC_DMA_COHERENT + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZMA config MIGHT_HAVE_PCI bool diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index e597cb34c16a..f94cf151e06a 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -14,9 +14,15 @@ UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) suffix-y := bin suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_LZMA) := lzma -targets += uImage uImage.bin uImage.gz -extra-y += vmlinux.bin vmlinux.bin.gz +targets += uImage +targets += uImage.bin +targets += uImage.gz +targets += uImage.lzma +extra-y += vmlinux.bin +extra-y += vmlinux.bin.gz +extra-y += vmlinux.bin.lzma $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) @@ -24,12 +30,18 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma) + $(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE $(call if_changed,uimage,none) $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(call if_changed,uimage,gzip) +$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE + $(call if_changed,uimage,lzma) + $(obj)/uImage: $(obj)/uImage.$(suffix-y) @ln -sf $(notdir $<) $@ @echo ' Image $@ is ready' From 3e3affe549ebd228501d160886c1b3999d99e0c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Sep 2016 11:50:52 +0200 Subject: [PATCH 140/884] drm/imx: hide an unused label The imx_drm_bind function causes a warning in linux-next when CONFIG_DRM_FBDEV_EMULATION is not set: drivers/gpu/drm/imx/imx-drm-core.c: In function 'imx_drm_bind': drivers/gpu/drm/imx/imx-drm-core.c:441:1: error: label 'err_unbind' defined but not used [-Werror=unused-label] I don't understand why the warning only showed up now, as the code has not been modified recently, but there is an obvious fix in adding another #if for the symbol. Signed-off-by: Arnd Bergmann Fixes: c1ff5a7aa3c3 ("drm/imx: Remove local fbdev emulation Kconfig option") Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-drm-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index 98df09c2b388..b084c571b23f 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -436,9 +436,11 @@ static int imx_drm_bind(struct device *dev) err_fbhelper: drm_kms_helper_poll_fini(drm); +#if IS_ENABLED(CONFIG_DRM_FBDEV_EMULATION) if (imxdrm->fbhelper) drm_fbdev_cma_fini(imxdrm->fbhelper); err_unbind: +#endif component_unbind_all(drm->dev, drm); err_vblank: drm_vblank_cleanup(drm); From 4ad3e92c28057950c30ca19e32530eff9ee111bb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 21 Sep 2016 15:12:24 +0000 Subject: [PATCH 141/884] gpu: ipu-v3: Use ERR_CAST instead of ERR_PTR(PTR_ERR()) Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...)). Generated by: scripts/coccinelle/api/err_cast.cocci Signed-off-by: Wei Yongjun Signed-off-by: Philipp Zabel --- drivers/gpu/ipu-v3/ipu-image-convert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/ipu-v3/ipu-image-convert.c b/drivers/gpu/ipu-v3/ipu-image-convert.c index 2ba7d437a2af..805b6fa7b5f4 100644 --- a/drivers/gpu/ipu-v3/ipu-image-convert.c +++ b/drivers/gpu/ipu-v3/ipu-image-convert.c @@ -1617,7 +1617,7 @@ ipu_image_convert(struct ipu_soc *ipu, enum ipu_ic_task ic_task, ctx = ipu_image_convert_prepare(ipu, ic_task, in, out, rot_mode, complete, complete_context); if (IS_ERR(ctx)) - return ERR_PTR(PTR_ERR(ctx)); + return ERR_CAST(ctx); run = kzalloc(sizeof(*run), GFP_KERNEL); if (!run) { From 039bea844016ae85eaf195bf25266b5eb028a319 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 12 Oct 2016 08:02:21 +0530 Subject: [PATCH 142/884] Staging: greybus: gpio: Use gbphy_dev->dev instead of bundle->dev Some of the print messages are using the incorrect device pointer, fix them. Signed-off-by: Viresh Kumar Acked-by: Johan Hovold Reviewed-by: Rui Miguel Silva Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/gpio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c index 5e06e4229e42..250caa00de5e 100644 --- a/drivers/staging/greybus/gpio.c +++ b/drivers/staging/greybus/gpio.c @@ -702,15 +702,13 @@ static int gb_gpio_probe(struct gbphy_device *gbphy_dev, ret = gb_gpio_irqchip_add(gpio, irqc, 0, handle_level_irq, IRQ_TYPE_NONE); if (ret) { - dev_err(&connection->bundle->dev, - "failed to add irq chip: %d\n", ret); + dev_err(&gbphy_dev->dev, "failed to add irq chip: %d\n", ret); goto exit_line_free; } ret = gpiochip_add(gpio); if (ret) { - dev_err(&connection->bundle->dev, - "failed to add gpio chip: %d\n", ret); + dev_err(&gbphy_dev->dev, "failed to add gpio chip: %d\n", ret); goto exit_gpio_irqchip_remove; } From 4fa589126f23243bd998a464cb6158d343eb6a89 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 12 Oct 2016 08:02:22 +0530 Subject: [PATCH 143/884] Staging: greybus: uart: Use gbphy_dev->dev instead of bundle->dev Some of the print messages are using the incorrect device pointer, fix them. Signed-off-by: Viresh Kumar Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/greybus/uart.c b/drivers/staging/greybus/uart.c index 5ee7954bd9f9..2633d2bfb1b4 100644 --- a/drivers/staging/greybus/uart.c +++ b/drivers/staging/greybus/uart.c @@ -888,7 +888,7 @@ static int gb_uart_probe(struct gbphy_device *gbphy_dev, minor = alloc_minor(gb_tty); if (minor < 0) { if (minor == -ENOSPC) { - dev_err(&connection->bundle->dev, + dev_err(&gbphy_dev->dev, "no more free minor numbers\n"); retval = -ENODEV; } else { From 0047b6e5f1b45b391244d78097631eb09a960202 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:20:22 +0300 Subject: [PATCH 144/884] staging: android/ion: testing the wrong variable We're testing "pdev" but we intended to test "heap_pdev". This is a static checker fix and it's unlikely that anyone is affected by this bug. Fixes: 13439479c7de ('staging: ion: Add files for parsing the devicetree') Signed-off-by: Dan Carpenter Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion_of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/android/ion/ion_of.c b/drivers/staging/android/ion/ion_of.c index 15bac92b7f04..46b2bb99bfd6 100644 --- a/drivers/staging/android/ion/ion_of.c +++ b/drivers/staging/android/ion/ion_of.c @@ -107,7 +107,7 @@ struct ion_platform_data *ion_parse_dt(struct platform_device *pdev, heap_pdev = of_platform_device_create(node, heaps[i].name, &pdev->dev); - if (!pdev) + if (!heap_pdev) return ERR_PTR(-ENOMEM); heap_pdev->dev.platform_data = &heaps[i]; From 1d4f1d53e1e2d5e38f4d3ca3bf60f8be5025540f Mon Sep 17 00:00:00 2001 From: Aditya Shankar Date: Fri, 7 Oct 2016 09:45:03 +0530 Subject: [PATCH 145/884] Staging: wilc1000: Fix kernel Oops on opening the device Commit 2518ac59eb27 ("staging: wilc1000: Replace kthread with workqueue for host interface") adds an unconditional destroy_workqueue() on the wilc's "hif_workqueue" soon after its creation thereby rendering it unusable. It then further attempts to queue work onto this non-existing hif_worqueue and results in: Unable to handle kernel NULL pointer dereference at virtual address 00000010 pgd = de478000 [00000010] *pgd=3eec0831, *pte=00000000, *ppte=00000000 Internal error: Oops: 17 [#1] ARM Modules linked in: wilc1000_sdio(C) wilc1000(C) CPU: 0 PID: 825 Comm: ifconfig Tainted: G C 4.8.0-rc8+ #37 Hardware name: Atmel SAMA5 task: df56f800 task.stack: deeb0000 PC is at __queue_work+0x90/0x284 LR is at __queue_work+0x58/0x284 pc : [] lr : [] psr: 600f0093 sp : deeb1aa0 ip : def22d78 fp : deea6000 r10: 00000000 r9 : c0a08150 r8 : c0a2f058 r7 : 00000001 r6 : dee9b600 r5 : def22d74 r4 : 00000000 r3 : 00000000 r2 : def22d74 r1 : 07ffffff r0 : 00000000 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment none ... [] (__queue_work) from [] (queue_work_on+0x34/0x40) [] (queue_work_on) from [] (wilc_enqueue_cmd+0x54/0x64 [wilc1000]) [] (wilc_enqueue_cmd [wilc1000]) from [] (wilc_set_wfi_drv_handler+0x48/0x70 [wilc1000]) [] (wilc_set_wfi_drv_handler [wilc1000]) from [] (wilc_mac_open+0x214/0x250 [wilc1000]) [] (wilc_mac_open [wilc1000]) from [] (__dev_open+0xb8/0x11c) [] (__dev_open) from [] (__dev_change_flags+0x94/0x158) [] (__dev_change_flags) from [] (dev_change_flags+0x18/0x48) [] (dev_change_flags) from [] (devinet_ioctl+0x6b4/0x788) [] (devinet_ioctl) from [] (sock_ioctl+0x154/0x2cc) [] (sock_ioctl) from [] (do_vfs_ioctl+0x9c/0x878) [] (do_vfs_ioctl) from [] (SyS_ioctl+0x34/0x5c) [] (SyS_ioctl) from [] (ret_fast_syscall+0x0/0x3c) Code: e5932004 e1520006 01a04003 0affffff (e5943010) ---[ end trace b612328adaa6bf20 ]--- This fix removes the unnecessary call to destroy_workqueue() while opening the device to avoid the above kernel panic. The deinit routine already does a good job of terminating the workqueue when no longer needed. Reported-by: Nicolas Ferre Fixes: 2518ac59eb27 ("staging: wilc1000: Replace kthread with workqueue for host interface") Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Aditya Shankar Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wilc1000/host_interface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/wilc1000/host_interface.c b/drivers/staging/wilc1000/host_interface.c index 78f5613e9467..6ab7443eabde 100644 --- a/drivers/staging/wilc1000/host_interface.c +++ b/drivers/staging/wilc1000/host_interface.c @@ -3388,7 +3388,6 @@ int wilc_init(struct net_device *dev, struct host_if_drv **hif_drv_handler) clients_count++; - destroy_workqueue(hif_workqueue); _fail_: return result; } From c89d98e224b4858f42a9fec0f16766b3d7669ba3 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Sun, 16 Oct 2016 13:16:50 -0400 Subject: [PATCH 146/884] staging/lustre/llite: Move unstable_stats from sysfs to debugfs It's multiple values per file, so it has no business being in sysfs, besides it was assuming seqfile anyway. Fixes: d806f30e639b ("staging: lustre: osc: revise unstable pages accounting") Signed-off-by: Oleg Drokin Signed-off-by: Greg Kroah-Hartman --- .../staging/lustre/lustre/llite/lproc_llite.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/staging/lustre/lustre/llite/lproc_llite.c b/drivers/staging/lustre/lustre/llite/lproc_llite.c index 6eae60595905..23fda9d98bff 100644 --- a/drivers/staging/lustre/lustre/llite/lproc_llite.c +++ b/drivers/staging/lustre/lustre/llite/lproc_llite.c @@ -871,12 +871,10 @@ static ssize_t xattr_cache_store(struct kobject *kobj, } LUSTRE_RW_ATTR(xattr_cache); -static ssize_t unstable_stats_show(struct kobject *kobj, - struct attribute *attr, - char *buf) +static int ll_unstable_stats_seq_show(struct seq_file *m, void *v) { - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); + struct super_block *sb = m->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); struct cl_client_cache *cache = sbi->ll_cache; long pages; int mb; @@ -884,19 +882,21 @@ static ssize_t unstable_stats_show(struct kobject *kobj, pages = atomic_long_read(&cache->ccc_unstable_nr); mb = (pages * PAGE_SIZE) >> 20; - return sprintf(buf, "unstable_check: %8d\n" - "unstable_pages: %12ld\n" - "unstable_mb: %8d\n", - cache->ccc_unstable_check, pages, mb); + seq_printf(m, + "unstable_check: %8d\n" + "unstable_pages: %12ld\n" + "unstable_mb: %8d\n", + cache->ccc_unstable_check, pages, mb); + + return 0; } -static ssize_t unstable_stats_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, - size_t count) +static ssize_t ll_unstable_stats_seq_write(struct file *file, + const char __user *buffer, + size_t count, loff_t *off) { - struct ll_sb_info *sbi = container_of(kobj, struct ll_sb_info, - ll_kobj); + struct super_block *sb = ((struct seq_file *)file->private_data)->private; + struct ll_sb_info *sbi = ll_s2sbi(sb); char kernbuf[128]; int val, rc; @@ -922,7 +922,7 @@ static ssize_t unstable_stats_store(struct kobject *kobj, return count; } -LUSTRE_RW_ATTR(unstable_stats); +LPROC_SEQ_FOPS(ll_unstable_stats); static ssize_t root_squash_show(struct kobject *kobj, struct attribute *attr, char *buf) @@ -995,6 +995,7 @@ static struct lprocfs_vars lprocfs_llite_obd_vars[] = { /* { "filegroups", lprocfs_rd_filegroups, 0, 0 }, */ { "max_cached_mb", &ll_max_cached_mb_fops, NULL }, { "statahead_stats", &ll_statahead_stats_fops, NULL, 0 }, + { "unstable_stats", &ll_unstable_stats_fops, NULL }, { "sbi_flags", &ll_sbi_flags_fops, NULL, 0 }, { .name = "nosquash_nids", .fops = &ll_nosquash_nids_fops }, @@ -1026,7 +1027,6 @@ static struct attribute *llite_attrs[] = { &lustre_attr_max_easize.attr, &lustre_attr_default_easize.attr, &lustre_attr_xattr_cache.attr, - &lustre_attr_unstable_stats.attr, &lustre_attr_root_squash.attr, NULL, }; From bbe097f092b0d13e9736bd2794d0ab24547d0e5d Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 15 Sep 2016 17:07:22 +0200 Subject: [PATCH 147/884] usb: gadget: udc: atmel: fix endpoint name Since commit c32b5bcfa3c4 ("ARM: dts: at91: Fix USB endpoint nodes"), atmel_usba_udc fails with: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at include/linux/usb/gadget.h:405 ecm_do_notify+0x188/0x1a0 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.7.0+ #15 Hardware name: Atmel SAMA5 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (__warn+0xe4/0xfc) [] (__warn) from [] (warn_slowpath_null+0x20/0x28) [] (warn_slowpath_null) from [] (ecm_do_notify+0x188/0x1a0) [] (ecm_do_notify) from [] (ecm_set_alt+0x74/0x1ac) [] (ecm_set_alt) from [] (composite_setup+0xfc0/0x19f8) [] (composite_setup) from [] (usba_udc_irq+0x8f4/0xd9c) [] (usba_udc_irq) from [] (handle_irq_event_percpu+0x9c/0x158) [] (handle_irq_event_percpu) from [] (handle_irq_event+0x28/0x3c) [] (handle_irq_event) from [] (handle_fasteoi_irq+0xa0/0x168) [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x24/0x34) [] (generic_handle_irq) from [] (__handle_domain_irq+0x54/0xa8) [] (__handle_domain_irq) from [] (__irq_svc+0x54/0x70) [] (__irq_svc) from [] (arch_cpu_idle+0x38/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0x9c/0xdc) [] (cpu_startup_entry) from [] (start_kernel+0x354/0x360) [] (start_kernel) from [<20008078>] (0x20008078) ---[ end trace e7cf9dcebf4815a6 ]--- Fixes: c32b5bcfa3c4 ("ARM: dts: at91: Fix USB endpoint nodes") Cc: Reported-by: Richard Genoud Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/atmel_usba_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index bb1f6c8f0f01..45bc997d0711 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -1978,7 +1978,7 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev, dev_err(&pdev->dev, "of_probe: name error(%d)\n", ret); goto err; } - ep->ep.name = name; + ep->ep.name = kasprintf(GFP_KERNEL, "ep%d", ep->index); ep->ep_regs = udc->regs + USBA_EPT_BASE(i); ep->dma_regs = udc->regs + USBA_DMA_BASE(i); From 6c83f77278f17a7679001027e9231291c20f0d8a Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Tue, 4 Oct 2016 15:14:43 +0300 Subject: [PATCH 148/884] usb: gadget: function: u_ether: don't starve tx request queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we don't guarantee that we will always get an interrupt at least when we're queueing our very last request, we could fall into situation where we queue every request with 'no_interrupt' set. This will cause the link to get stuck. The behavior above has been triggered with g_ether and dwc3. Cc: Reported-by: Ville Syrjälä Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/u_ether.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 9c8c9ed1dc9e..fe1811650dbc 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -590,8 +590,9 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, /* throttle high/super speed IRQ rate back slightly */ if (gadget_is_dualspeed(dev->gadget)) - req->no_interrupt = (dev->gadget->speed == USB_SPEED_HIGH || - dev->gadget->speed == USB_SPEED_SUPER) + req->no_interrupt = (((dev->gadget->speed == USB_SPEED_HIGH || + dev->gadget->speed == USB_SPEED_SUPER)) && + !list_empty(&dev->tx_reqs)) ? ((atomic_read(&dev->tx_qlen) % dev->qmult) != 0) : 0; From a9c3ca5fae6bf73770f0576eaf57d5f1305ef4b3 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Wed, 5 Oct 2016 14:24:37 +0300 Subject: [PATCH 149/884] usb: dwc3: gadget: properly account queued requests Some requests could be accounted for multiple times. Let's fix that so each and every requests is accounted for only once. Cc: # v4.8 Fixes: 55a0237f8f47 ("usb: dwc3: gadget: use allocated/queued reqs for LST bit") Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 07cc8929f271..3c3ced128c77 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -783,6 +783,7 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, req->trb = trb; req->trb_dma = dwc3_trb_dma_offset(dep, trb); req->first_trb_index = dep->trb_enqueue; + dep->queued_requests++; } dwc3_ep_inc_enq(dep); @@ -833,8 +834,6 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, trb->ctrl |= DWC3_TRB_CTRL_HWO; - dep->queued_requests++; - trace_dwc3_prepare_trb(dep, trb); } @@ -1861,8 +1860,11 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, unsigned int s_pkt = 0; unsigned int trb_status; - dep->queued_requests--; dwc3_ep_inc_deq(dep); + + if (req->trb == trb) + dep->queued_requests--; + trace_dwc3_complete_trb(dep, trb); /* From d889c23ce4e3159e3d737f55f9d686a030a7ba87 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 29 Sep 2016 15:44:29 +0300 Subject: [PATCH 150/884] usb: dwc3: gadget: never pre-start Isochronous endpoints We cannot pre-start isochronous endpoints because we rely on the micro-frame number passed via XferNotReady command for proper Isochronous scheduling. Fixes: 08a36b543803 ("usb: dwc3: gadget: simplify __dwc3_gadget_ep_queue()") Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 3c3ced128c77..f15147f79d14 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1073,9 +1073,17 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) list_add_tail(&req->list, &dep->pending_list); - if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && - dep->flags & DWC3_EP_PENDING_REQUEST) { - if (list_empty(&dep->started_list)) { + /* + * NOTICE: Isochronous endpoints should NEVER be prestarted. We must + * wait for a XferNotReady event so we will know what's the current + * (micro-)frame number. + * + * Without this trick, we are very, very likely gonna get Bus Expiry + * errors which will force us issue EndTransfer command. + */ + if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) { + if ((dep->flags & DWC3_EP_PENDING_REQUEST) && + list_empty(&dep->started_list)) { dwc3_stop_active_transfer(dwc, dep->number, true); dep->flags = DWC3_EP_ENABLED; } From 0f02c4e749bc79975dd23ddcc9bfe38bf76afb67 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 9 Sep 2016 16:28:43 +0200 Subject: [PATCH 151/884] s390/dasd: avoid undefined behaviour the mdc value can be quite big (like 65535), so we are in undefined territory when doing the multiplication with the (also signed) FCX_MAX_DATA_FACTOR as outlined by UBSAN: UBSAN: Undefined behaviour in drivers/s390/block/dasd_eckd.c:1678:14 signed integer overflow: 65535 * 65536 cannot be represented in type 'int' CPU: 5 PID: 183 Comm: kworker/u512:1 Not tainted 4.7.0+ #150 Workqueue: events_unbound async_run_entry_fn 000000fb8b59f900 000000fb8b59f990 0000000000000002 0000000000000000 000000fb8b59fa30 000000fb8b59f9a8 000000fb8b59f9a8 000000000011732e 00000000000000a4 0000000000a309e2 0000000000a4c072 000000000000000b 000000fb8b59f9f0 000000fb8b59f990 0000000000000000 0000000000000000 0400000000d83238 000000000011732e 000000fb8b59f990 000000fb8b59f9f0 Call Trace: ([<0000000000117260>] show_trace+0x98/0xa8) ([<00000000001172e0>] show_stack+0x70/0xf0) ([<000000000053ac96>] dump_stack+0x86/0xb8) ([<000000000057f5f8>] ubsan_epilogue+0x28/0x70) ([<000000000057fe9e>] handle_overflow+0xde/0xf0) ([<00000000006c322a>] dasd_eckd_check_characteristics+0x50a/0x550) ([<00000000006b42ca>] dasd_generic_set_online+0xba/0x380) ([<0000000000693d82>] ccw_device_set_online+0x192/0x550) ([<00000000006ac1ae>] dasd_generic_auto_online+0x2e/0x70) ([<0000000000172130>] async_run_entry_fn+0x70/0x270) ([<0000000000165a72>] process_one_work+0x26a/0x638) ([<0000000000165e8a>] worker_thread+0x4a/0x658) ([<000000000016dd9c>] kthread+0x10c/0x110) ([<00000000008963ae>] kernel_thread_starter+0x6/0xc) ([<00000000008963a8>] kernel_thread_starter+0x0/0xc) As this is a runtime value there is actually no risk of any sane compiler to detect and (ab)use this undefinedness, but let's make the multiplication defined by making mdc unsigned. Signed-off-by: Christian Borntraeger Acked-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 831935af7389..a7a88476e215 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1205,7 +1205,7 @@ static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) mdc, lpm); return mdc; } - fcx_max_data = mdc * FCX_MAX_DATA_FACTOR; + fcx_max_data = (u32)mdc * FCX_MAX_DATA_FACTOR; if (fcx_max_data < private->fcx_max_data) { dev_warn(&device->cdev->dev, "The maximum data size for zHPF requests %u " @@ -1675,7 +1675,7 @@ static u32 get_fcx_max_data(struct dasd_device *device) " data size for zHPF requests failed\n"); return 0; } else - return mdc * FCX_MAX_DATA_FACTOR; + return (u32)mdc * FCX_MAX_DATA_FACTOR; } /* From 12e721964e7feb555c3ee499a3f85c194d3d36ea Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 12 Oct 2016 13:43:38 +0200 Subject: [PATCH 152/884] s390: ignore pkey system calls Ignore the pkey systems calls since they don't make any sense on s390. In addition any user could trigger a warning if issueing the pkey_free system call, if it would be wired up on a system without pkey support. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/unistd.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 02613bad8bbb..3066031a73fe 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -9,6 +9,9 @@ #include #define __IGNORE_time +#define __IGNORE_pkey_mprotect +#define __IGNORE_pkey_alloc +#define __IGNORE_pkey_free #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM From 179a98cba11b057d9f1cc70cd2a8831f9e9a06e6 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 12 Oct 2016 11:14:31 +0200 Subject: [PATCH 153/884] s390/cio: don't register chpids in reserved state During IPL we register all chpids that are not in the unrecognized state. This includes chpids that are not usable and chpids for which the state could not be obtained. Change that to only register chpids in the configured (usable) or standby (usable after a configure operation) state. All other chpids could only be made available by external control for which we would receive machine checks. Signed-off-by: Sebastian Ott Reviewed-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- drivers/s390/cio/chp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 46be25c7461e..876c7e6e3a99 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -780,7 +780,7 @@ static int cfg_wait_idle(void) static int __init chp_init(void) { struct chp_id chpid; - int ret; + int state, ret; ret = crw_register_handler(CRW_RSC_CPATH, chp_process_crw); if (ret) @@ -791,7 +791,9 @@ static int __init chp_init(void) return 0; /* Register available channel-paths. */ chp_id_for_each(&chpid) { - if (chp_info_get_status(chpid) != CHP_STATUS_NOT_RECOGNIZED) + state = chp_info_get_status(chpid); + if (state == CHP_STATUS_CONFIGURED || + state == CHP_STATUS_STANDBY) chp_new(chpid); } From b5003b5f0a19b6b37ab32b1f0c6b5da2cb3f0903 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 13 Oct 2016 21:50:07 +0530 Subject: [PATCH 154/884] s390/mm: use hugetlb_bad_size() Update setup_hugepagesz() to call hugetlb_bad_size() when unsupported hugepage size is found. Signed-off-by: Shyam Saini Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/hugetlbpage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index cd404aa3931c..4a0c5bce3552 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -217,6 +217,7 @@ static __init int setup_hugepagesz(char *opt) } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); } else { + hugetlb_bad_size(); pr_err("hugepagesz= specifies an unsupported page size %s\n", string); return 0; From a07ce8d34eb3d9c6cec3aa25f7713e6aafad2260 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 14 Oct 2016 10:47:24 -0700 Subject: [PATCH 155/884] usb: dwc2: Add msleep for host-only Although a host-only controller should not have any associated delay, some rockchip SOC platforms will not show the correct host-values of registers until after a delay. So add a 50 ms sleep when in host-only mode. Signed-off-by: John Youn Signed-off-by: Heiko Stuebner Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c index fa9b26b91507..4c0fa0b17353 100644 --- a/drivers/usb/dwc2/core.c +++ b/drivers/usb/dwc2/core.c @@ -463,9 +463,18 @@ static void dwc2_clear_force_mode(struct dwc2_hsotg *hsotg) */ void dwc2_force_dr_mode(struct dwc2_hsotg *hsotg) { + bool ret; + switch (hsotg->dr_mode) { case USB_DR_MODE_HOST: - dwc2_force_mode(hsotg, true); + ret = dwc2_force_mode(hsotg, true); + /* + * NOTE: This is required for some rockchip soc based + * platforms on their host-only dwc2. + */ + if (!ret) + msleep(50); + break; case USB_DR_MODE_PERIPHERAL: dwc2_force_mode(hsotg, false); From 454915dde06a51133750c6745f0ba57361ba209d Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 4 Oct 2016 02:07:33 +0200 Subject: [PATCH 156/884] usb: gadget: f_fs: edit epfile->ep under lock epfile->ep is protected by ffs->eps_lock (not epfile->mutex) so clear it while holding the spin lock. Tested-by: John Stultz Tested-by: Chen Yu Signed-off-by: Michal Nazarewicz Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 54ad100af35b..b31aa9572723 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1725,17 +1725,17 @@ static void ffs_func_eps_disable(struct ffs_function *func) unsigned long flags; do { - if (epfile) - mutex_lock(&epfile->mutex); spin_lock_irqsave(&func->ffs->eps_lock, flags); /* pending requests get nuked */ if (likely(ep->ep)) usb_ep_disable(ep->ep); ++ep; + if (epfile) + epfile->ep = NULL; spin_unlock_irqrestore(&func->ffs->eps_lock, flags); if (epfile) { - epfile->ep = NULL; + mutex_lock(&epfile->mutex); kfree(epfile->read_buffer); epfile->read_buffer = NULL; mutex_unlock(&epfile->mutex); From a9e6f83c2df199187a5248f824f31b6787ae23ae Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Tue, 4 Oct 2016 02:07:34 +0200 Subject: [PATCH 157/884] usb: gadget: f_fs: stop sleeping in ffs_func_eps_disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ffs_func_eps_disable is called from atomic context so it cannot sleep thus cannot grab a mutex. Change the handling of epfile->read_buffer to use non-sleeping synchronisation method. Reported-by: Chen Yu Signed-off-by: Michał Nazarewicz Fixes: 9353afbbfa7b ("buffer data from ‘oversized’ OUT requests") Tested-by: John Stultz Tested-by: Chen Yu Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_fs.c | 109 ++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 16 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index b31aa9572723..e40d47d47d82 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -136,8 +136,60 @@ struct ffs_epfile { /* * Buffer for holding data from partial reads which may happen since * we’re rounding user read requests to a multiple of a max packet size. + * + * The pointer is initialised with NULL value and may be set by + * __ffs_epfile_read_data function to point to a temporary buffer. + * + * In normal operation, calls to __ffs_epfile_read_buffered will consume + * data from said buffer and eventually free it. Importantly, while the + * function is using the buffer, it sets the pointer to NULL. This is + * all right since __ffs_epfile_read_data and __ffs_epfile_read_buffered + * can never run concurrently (they are synchronised by epfile->mutex) + * so the latter will not assign a new value to the pointer. + * + * Meanwhile ffs_func_eps_disable frees the buffer (if the pointer is + * valid) and sets the pointer to READ_BUFFER_DROP value. This special + * value is crux of the synchronisation between ffs_func_eps_disable and + * __ffs_epfile_read_data. + * + * Once __ffs_epfile_read_data is about to finish it will try to set the + * pointer back to its old value (as described above), but seeing as the + * pointer is not-NULL (namely READ_BUFFER_DROP) it will instead free + * the buffer. + * + * == State transitions == + * + * • ptr == NULL: (initial state) + * ◦ __ffs_epfile_read_buffer_free: go to ptr == DROP + * ◦ __ffs_epfile_read_buffered: nop + * ◦ __ffs_epfile_read_data allocates temp buffer: go to ptr == buf + * ◦ reading finishes: n/a, not in ‘and reading’ state + * • ptr == DROP: + * ◦ __ffs_epfile_read_buffer_free: nop + * ◦ __ffs_epfile_read_buffered: go to ptr == NULL + * ◦ __ffs_epfile_read_data allocates temp buffer: free buf, nop + * ◦ reading finishes: n/a, not in ‘and reading’ state + * • ptr == buf: + * ◦ __ffs_epfile_read_buffer_free: free buf, go to ptr == DROP + * ◦ __ffs_epfile_read_buffered: go to ptr == NULL and reading + * ◦ __ffs_epfile_read_data: n/a, __ffs_epfile_read_buffered + * is always called first + * ◦ reading finishes: n/a, not in ‘and reading’ state + * • ptr == NULL and reading: + * ◦ __ffs_epfile_read_buffer_free: go to ptr == DROP and reading + * ◦ __ffs_epfile_read_buffered: n/a, mutex is held + * ◦ __ffs_epfile_read_data: n/a, mutex is held + * ◦ reading finishes and … + * … all data read: free buf, go to ptr == NULL + * … otherwise: go to ptr == buf and reading + * • ptr == DROP and reading: + * ◦ __ffs_epfile_read_buffer_free: nop + * ◦ __ffs_epfile_read_buffered: n/a, mutex is held + * ◦ __ffs_epfile_read_data: n/a, mutex is held + * ◦ reading finishes: free buf, go to ptr == DROP */ - struct ffs_buffer *read_buffer; /* P: epfile->mutex */ + struct ffs_buffer *read_buffer; +#define READ_BUFFER_DROP ((struct ffs_buffer *)ERR_PTR(-ESHUTDOWN)) char name[5]; @@ -736,25 +788,47 @@ static void ffs_epfile_async_io_complete(struct usb_ep *_ep, schedule_work(&io_data->work); } +static void __ffs_epfile_read_buffer_free(struct ffs_epfile *epfile) +{ + /* + * See comment in struct ffs_epfile for full read_buffer pointer + * synchronisation story. + */ + struct ffs_buffer *buf = xchg(&epfile->read_buffer, READ_BUFFER_DROP); + if (buf && buf != READ_BUFFER_DROP) + kfree(buf); +} + /* Assumes epfile->mutex is held. */ static ssize_t __ffs_epfile_read_buffered(struct ffs_epfile *epfile, struct iov_iter *iter) { - struct ffs_buffer *buf = epfile->read_buffer; + /* + * Null out epfile->read_buffer so ffs_func_eps_disable does not free + * the buffer while we are using it. See comment in struct ffs_epfile + * for full read_buffer pointer synchronisation story. + */ + struct ffs_buffer *buf = xchg(&epfile->read_buffer, NULL); ssize_t ret; - if (!buf) + if (!buf || buf == READ_BUFFER_DROP) return 0; ret = copy_to_iter(buf->data, buf->length, iter); if (buf->length == ret) { kfree(buf); - epfile->read_buffer = NULL; - } else if (unlikely(iov_iter_count(iter))) { + return ret; + } + + if (unlikely(iov_iter_count(iter))) { ret = -EFAULT; } else { buf->length -= ret; buf->data += ret; } + + if (cmpxchg(&epfile->read_buffer, NULL, buf)) + kfree(buf); + return ret; } @@ -783,7 +857,15 @@ static ssize_t __ffs_epfile_read_data(struct ffs_epfile *epfile, buf->length = data_len; buf->data = buf->storage; memcpy(buf->storage, data + ret, data_len); - epfile->read_buffer = buf; + + /* + * At this point read_buffer is NULL or READ_BUFFER_DROP (if + * ffs_func_eps_disable has been called in the meanwhile). See comment + * in struct ffs_epfile for full read_buffer pointer synchronisation + * story. + */ + if (unlikely(cmpxchg(&epfile->read_buffer, NULL, buf))) + kfree(buf); return ret; } @@ -1097,8 +1179,7 @@ ffs_epfile_release(struct inode *inode, struct file *file) ENTER(); - kfree(epfile->read_buffer); - epfile->read_buffer = NULL; + __ffs_epfile_read_buffer_free(epfile); ffs_data_closed(epfile->ffs); return 0; @@ -1724,24 +1805,20 @@ static void ffs_func_eps_disable(struct ffs_function *func) unsigned count = func->ffs->eps_count; unsigned long flags; + spin_lock_irqsave(&func->ffs->eps_lock, flags); do { - spin_lock_irqsave(&func->ffs->eps_lock, flags); /* pending requests get nuked */ if (likely(ep->ep)) usb_ep_disable(ep->ep); ++ep; - if (epfile) - epfile->ep = NULL; - spin_unlock_irqrestore(&func->ffs->eps_lock, flags); if (epfile) { - mutex_lock(&epfile->mutex); - kfree(epfile->read_buffer); - epfile->read_buffer = NULL; - mutex_unlock(&epfile->mutex); + epfile->ep = NULL; + __ffs_epfile_read_buffer_free(epfile); ++epfile; } } while (--count); + spin_unlock_irqrestore(&func->ffs->eps_lock, flags); } static int ffs_func_eps_enable(struct ffs_function *func) From 51fbc7c06c8900370c6da5fc4a4685add8fa4fb0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 7 Oct 2016 22:12:39 +0200 Subject: [PATCH 158/884] usb: dwc3: Fix size used in dma_free_coherent() In commit 2abd9d5fa60f9 ("usb: dwc3: ep0: Add chained TRB support"), the size of the memory allocated with 'dma_alloc_coherent()' has been modified but the corresponding calls to 'dma_free_coherent()' have not been updated accordingly. This has been spotted with coccinelle, using the following script: //////////////////// @r@ expression x0, x1, y0, y1, z0, z1, t0, t1, ret; @@ * ret = dma_alloc_coherent(x0, y0, z0, t0); ... * dma_free_coherent(x1, y1, ret, t1); @script:python@ y0 << r.y0; y1 << r.y1; @@ if y1.find(y0) == -1: print "WARNING: sizes look different: '%s' vs '%s'" % (y0, y1) //////////////////// Fixes: 2abd9d5fa60f9 ("usb: dwc3: ep0: Add chained TRB support") Signed-off-by: Christophe JAILLET Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/gadget.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index f15147f79d14..1dfa56a5f1c5 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2990,7 +2990,7 @@ err3: kfree(dwc->setup_buf); err2: - dma_free_coherent(dwc->dev, sizeof(*dwc->ep0_trb), + dma_free_coherent(dwc->dev, sizeof(*dwc->ep0_trb) * 2, dwc->ep0_trb, dwc->ep0_trb_addr); err1: @@ -3015,7 +3015,7 @@ void dwc3_gadget_exit(struct dwc3 *dwc) kfree(dwc->setup_buf); kfree(dwc->zlp_buf); - dma_free_coherent(dwc->dev, sizeof(*dwc->ep0_trb), + dma_free_coherent(dwc->dev, sizeof(*dwc->ep0_trb) * 2, dwc->ep0_trb, dwc->ep0_trb_addr); dma_free_coherent(dwc->dev, sizeof(*dwc->ctrl_req), From a19b882c07a63174f09d1f7c036cc2aab7f04ad3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 6 Oct 2016 10:25:38 -0700 Subject: [PATCH 159/884] wusb: Stop using the stack for sg crypto scratch space Pointing an sg list at the stack is verboten and, with CONFIG_VMAP_STACK=y, will malfunction. Use kmalloc for the wusb crypto stack space instead. Untested -- I'm not entirely convinced that this hardware exists in the wild. Signed-off-by: Andy Lutomirski Signed-off-by: Greg Kroah-Hartman --- drivers/usb/wusbcore/crypto.c | 59 ++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index 79b2b628066d..de089f3a82f3 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -133,6 +133,13 @@ static void bytewise_xor(void *_bo, const void *_bi1, const void *_bi2, bo[itr] = bi1[itr] ^ bi2[itr]; } +/* Scratch space for MAC calculations. */ +struct wusb_mac_scratch { + struct aes_ccm_b0 b0; + struct aes_ccm_b1 b1; + struct aes_ccm_a ax; +}; + /* * CC-MAC function WUSB1.0[6.5] * @@ -197,16 +204,15 @@ static void bytewise_xor(void *_bo, const void *_bi1, const void *_bi2, * what sg[4] is for. Maybe there is a smarter way to do this. */ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, - struct crypto_cipher *tfm_aes, void *mic, + struct crypto_cipher *tfm_aes, + struct wusb_mac_scratch *scratch, + void *mic, const struct aes_ccm_nonce *n, const struct aes_ccm_label *a, const void *b, size_t blen) { int result = 0; SKCIPHER_REQUEST_ON_STACK(req, tfm_cbc); - struct aes_ccm_b0 b0; - struct aes_ccm_b1 b1; - struct aes_ccm_a ax; struct scatterlist sg[4], sg_dst; void *dst_buf; size_t dst_size; @@ -218,16 +224,17 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, * These checks should be compile time optimized out * ensure @a fills b1's mac_header and following fields */ - WARN_ON(sizeof(*a) != sizeof(b1) - sizeof(b1.la)); - WARN_ON(sizeof(b0) != sizeof(struct aes_ccm_block)); - WARN_ON(sizeof(b1) != sizeof(struct aes_ccm_block)); - WARN_ON(sizeof(ax) != sizeof(struct aes_ccm_block)); + WARN_ON(sizeof(*a) != sizeof(scratch->b1) - sizeof(scratch->b1.la)); + WARN_ON(sizeof(scratch->b0) != sizeof(struct aes_ccm_block)); + WARN_ON(sizeof(scratch->b1) != sizeof(struct aes_ccm_block)); + WARN_ON(sizeof(scratch->ax) != sizeof(struct aes_ccm_block)); result = -ENOMEM; zero_padding = blen % sizeof(struct aes_ccm_block); if (zero_padding) zero_padding = sizeof(struct aes_ccm_block) - zero_padding; - dst_size = blen + sizeof(b0) + sizeof(b1) + zero_padding; + dst_size = blen + sizeof(scratch->b0) + sizeof(scratch->b1) + + zero_padding; dst_buf = kzalloc(dst_size, GFP_KERNEL); if (!dst_buf) goto error_dst_buf; @@ -235,9 +242,9 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, memset(iv, 0, sizeof(iv)); /* Setup B0 */ - b0.flags = 0x59; /* Format B0 */ - b0.ccm_nonce = *n; - b0.lm = cpu_to_be16(0); /* WUSB1.0[6.5] sez l(m) is 0 */ + scratch->b0.flags = 0x59; /* Format B0 */ + scratch->b0.ccm_nonce = *n; + scratch->b0.lm = cpu_to_be16(0); /* WUSB1.0[6.5] sez l(m) is 0 */ /* Setup B1 * @@ -246,12 +253,12 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, * 14'--after clarification, it means to use A's contents * for MAC Header, EO, sec reserved and padding. */ - b1.la = cpu_to_be16(blen + 14); - memcpy(&b1.mac_header, a, sizeof(*a)); + scratch->b1.la = cpu_to_be16(blen + 14); + memcpy(&scratch->b1.mac_header, a, sizeof(*a)); sg_init_table(sg, ARRAY_SIZE(sg)); - sg_set_buf(&sg[0], &b0, sizeof(b0)); - sg_set_buf(&sg[1], &b1, sizeof(b1)); + sg_set_buf(&sg[0], &scratch->b0, sizeof(scratch->b0)); + sg_set_buf(&sg[1], &scratch->b1, sizeof(scratch->b1)); sg_set_buf(&sg[2], b, blen); /* 0 if well behaved :) */ sg_set_buf(&sg[3], bzero, zero_padding); @@ -276,11 +283,12 @@ static int wusb_ccm_mac(struct crypto_skcipher *tfm_cbc, * POS Crypto API: size is assumed to be AES's block size. * Thanks for documenting it -- tip taken from airo.c */ - ax.flags = 0x01; /* as per WUSB 1.0 spec */ - ax.ccm_nonce = *n; - ax.counter = 0; - crypto_cipher_encrypt_one(tfm_aes, (void *)&ax, (void *)&ax); - bytewise_xor(mic, &ax, iv, 8); + scratch->ax.flags = 0x01; /* as per WUSB 1.0 spec */ + scratch->ax.ccm_nonce = *n; + scratch->ax.counter = 0; + crypto_cipher_encrypt_one(tfm_aes, (void *)&scratch->ax, + (void *)&scratch->ax); + bytewise_xor(mic, &scratch->ax, iv, 8); result = 8; error_cbc_crypt: kfree(dst_buf); @@ -303,6 +311,7 @@ ssize_t wusb_prf(void *out, size_t out_size, struct aes_ccm_nonce n = *_n; struct crypto_skcipher *tfm_cbc; struct crypto_cipher *tfm_aes; + struct wusb_mac_scratch *scratch; u64 sfn = 0; __le64 sfn_le; @@ -329,17 +338,23 @@ ssize_t wusb_prf(void *out, size_t out_size, printk(KERN_ERR "E: can't set AES key: %d\n", (int)result); goto error_setkey_aes; } + scratch = kmalloc(sizeof(*scratch), GFP_KERNEL); + if (!scratch) + goto error_alloc_scratch; for (bitr = 0; bitr < (len + 63) / 64; bitr++) { sfn_le = cpu_to_le64(sfn++); memcpy(&n.sfn, &sfn_le, sizeof(n.sfn)); /* n.sfn++... */ - result = wusb_ccm_mac(tfm_cbc, tfm_aes, out + bytes, + result = wusb_ccm_mac(tfm_cbc, tfm_aes, scratch, out + bytes, &n, a, b, blen); if (result < 0) goto error_ccm_mac; bytes += result; } result = bytes; + + kfree(scratch); +error_alloc_scratch: error_ccm_mac: error_setkey_aes: crypto_free_cipher(tfm_aes); From 589ce5f447b8610df0dbd6935b56d5cda17b9cec Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 14 Oct 2016 15:13:07 +0100 Subject: [PATCH 160/884] irqchip/gic-v3-its: Fix 64bit GIC{R,ITS}_TYPER accesses The GICv3 architecture specification mentions that a 64bit register can be accessed using two 32bit accesses. What it doesn't mention is that this is only guaranteed on a system that implements AArch32, and a pure AArch64 system is allowed not to support this. This causes issues with the GICR_TYPER and GITS_TYPER registers, which are both RO 64bit registers. In order to solve this, this patch switches the TYPER accesses to the gic_read_typer macro already used in other parts of the driver. This makes sure that we always use a 64bit access on 64bit systems, and two 32bit accesses on 32bit system. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 003495d91f9c..c5dee300e8a3 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1023,7 +1023,7 @@ static void its_free_tables(struct its_node *its) static int its_alloc_tables(struct its_node *its) { - u64 typer = readq_relaxed(its->base + GITS_TYPER); + u64 typer = gic_read_typer(its->base + GITS_TYPER); u32 ids = GITS_TYPER_DEVBITS(typer); u64 shr = GITS_BASER_InnerShareable; u64 cache = GITS_BASER_WaWb; @@ -1198,7 +1198,7 @@ static void its_cpu_init_collection(void) * We now have to bind each collection to its target * redistributor. */ - if (readq_relaxed(its->base + GITS_TYPER) & GITS_TYPER_PTA) { + if (gic_read_typer(its->base + GITS_TYPER) & GITS_TYPER_PTA) { /* * This ITS wants the physical address of the * redistributor. @@ -1208,7 +1208,7 @@ static void its_cpu_init_collection(void) /* * This ITS wants a linear CPU number. */ - target = readq_relaxed(gic_data_rdist_rd_base() + GICR_TYPER); + target = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER); target = GICR_TYPER_CPU_NUMBER(target) << 16; } @@ -1691,7 +1691,7 @@ static int __init its_probe_one(struct resource *res, INIT_LIST_HEAD(&its->its_device_list); its->base = its_base; its->phys_base = res->start; - its->ite_size = ((readl_relaxed(its_base + GITS_TYPER) >> 4) & 0xf) + 1; + its->ite_size = ((gic_read_typer(its_base + GITS_TYPER) >> 4) & 0xf) + 1; its->numa_node = numa_node; its->cmd_base = kzalloc(ITS_CMD_QUEUE_SZ, GFP_KERNEL); @@ -1763,7 +1763,7 @@ out_unmap: static bool gic_rdists_supports_plpis(void) { - return !!(readl_relaxed(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS); + return !!(gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS); } int its_cpu_init(void) From 9c0e83c371cf4696926c95f9c8c77cd6ea803426 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 13 Oct 2016 17:42:09 +0100 Subject: [PATCH 161/884] arm64: kaslr: fix breakage with CONFIG_MODVERSIONS=y As it turns out, the KASLR code breaks CONFIG_MODVERSIONS, since the kcrctab has an absolute address field that is relocated at runtime when the kernel offset is randomized. This has been fixed already for PowerPC in the past, so simply wire up the existing code dealing with this issue. Cc: Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Tested-by: Timur Tabi Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/module.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index e12af6754634..06ff7fd9e81f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -17,6 +17,7 @@ #define __ASM_MODULE_H #include +#include #define MODULE_ARCH_VERMAGIC "aarch64" @@ -32,6 +33,10 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, Elf64_Sym *sym); #ifdef CONFIG_RANDOMIZE_BASE +#ifdef CONFIG_MODVERSIONS +#define ARCH_RELOCATES_KCRCTAB +#define reloc_start (kimage_vaddr - KIMAGE_VADDR) +#endif extern u64 module_alloc_base; #else #define module_alloc_base ((u64)_etext - MODULES_VSIZE) From d0679cfaf54c9e42e3ba4f8d5e67969932040d16 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Fri, 14 Oct 2016 16:20:21 -0700 Subject: [PATCH 162/884] mailbox: PCC: Fix return value of pcc_mbox_request_channel() When CONFIG_PCC is disabled, pcc_mbox_request_channel() needs to return ERR_PTR(-ENODEV), not a NULL pointer, as the callers of this function use IS_ERR() to check for error code. Signed-off-by: Duc Dang Signed-off-by: Hoan Tran Signed-off-by: Rafael J. Wysocki --- include/acpi/pcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/pcc.h b/include/acpi/pcc.h index 17a940a14477..8caa79c61703 100644 --- a/include/acpi/pcc.h +++ b/include/acpi/pcc.h @@ -21,7 +21,7 @@ extern void pcc_mbox_free_channel(struct mbox_chan *chan); static inline struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, int subspace_id) { - return NULL; + return ERR_PTR(-ENODEV); } static inline void pcc_mbox_free_channel(struct mbox_chan *chan) { } #endif From d0208639dbc6fe97a25054df44faa2d19aca9380 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Oct 2016 11:08:31 +0200 Subject: [PATCH 163/884] s390/dumpstack: restore reliable indicator for call traces Before merging all different stack tracers the call traces printed had an indicator if an entry can be considered reliable or not. Unreliable entries were put in braces, reliable not. Currently all lines contain these extra braces. This patch restores the old behaviour by adding an extra "reliable" parameter to the callback functions. Only show_trace makes currently use of it. Before: [ 0.804751] Call Trace: [ 0.804753] ([<000000000017d0e0>] try_to_wake_up+0x318/0x5e0) [ 0.804756] ([<0000000000161d64>] create_worker+0x174/0x1c0) After: [ 0.804751] Call Trace: [ 0.804753] ([<000000000017d0e0>] try_to_wake_up+0x318/0x5e0) [ 0.804756] [<0000000000161d64>] create_worker+0x174/0x1c0 Fixes: 758d39ebd3d5 ("s390/dumpstack: merge all four stack tracers") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 2 +- arch/s390/kernel/dumpstack.c | 17 +++++++++++------ arch/s390/kernel/perf_event.c | 2 +- arch/s390/kernel/stacktrace.c | 4 ++-- arch/s390/oprofile/init.c | 2 +- 5 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 03323175de30..602af692efdc 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -192,7 +192,7 @@ struct task_struct; struct mm_struct; struct seq_file; -typedef int (*dump_trace_func_t)(void *data, unsigned long address); +typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable); void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, unsigned long sp); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 6693383bc01b..518f615ad0a2 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -38,10 +38,10 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; + if (func(data, sf->gprs[8], 0)) + return sp; /* Follow the backchain. */ while (1) { - if (func(data, sf->gprs[8])) - return sp; low = sp; sp = sf->back_chain; if (!sp) @@ -49,6 +49,8 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; + if (func(data, sf->gprs[8], 1)) + return sp; } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); @@ -56,7 +58,7 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, return sp; regs = (struct pt_regs *) sp; if (!user_mode(regs)) { - if (func(data, regs->psw.addr)) + if (func(data, regs->psw.addr, 1)) return sp; } low = sp; @@ -90,7 +92,7 @@ struct return_address_data { int depth; }; -static int __return_address(void *data, unsigned long address) +static int __return_address(void *data, unsigned long address, int reliable) { struct return_address_data *rd = data; @@ -109,9 +111,12 @@ unsigned long return_address(int depth) } EXPORT_SYMBOL_GPL(return_address); -static int show_address(void *data, unsigned long address) +static int show_address(void *data, unsigned long address, int reliable) { - printk("([<%016lx>] %pSR)\n", address, (void *)address); + if (reliable) + printk(" [<%016lx>] %pSR \n", address, (void *)address); + else + printk("([<%016lx>] %pSR)\n", address, (void *)address); return 0; } diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 17431f63de00..955a7b6fa0a4 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -222,7 +222,7 @@ static int __init service_level_perf_register(void) } arch_initcall(service_level_perf_register); -static int __perf_callchain_kernel(void *data, unsigned long address) +static int __perf_callchain_kernel(void *data, unsigned long address, int reliable) { struct perf_callchain_entry_ctx *entry = data; diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 44f84b23d4e5..355db9db8210 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -27,12 +27,12 @@ static int __save_address(void *data, unsigned long address, int nosched) return 1; } -static int save_address(void *data, unsigned long address) +static int save_address(void *data, unsigned long address, int reliable) { return __save_address(data, address, 0); } -static int save_address_nosched(void *data, unsigned long address) +static int save_address_nosched(void *data, unsigned long address, int reliable) { return __save_address(data, address, 1); } diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 16f4c3960b87..9a4de4599c7b 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -13,7 +13,7 @@ #include #include -static int __s390_backtrace(void *data, unsigned long address) +static int __s390_backtrace(void *data, unsigned long address, int reliable) { unsigned int *depth = data; From a790634544f5f98364b0aafe9d7e669810d96360 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Oct 2016 11:59:58 +0200 Subject: [PATCH 164/884] s390/dumpstack: use pr_cont where appropriate Use pr_cont instead of simple printk calls when lines will be continued. This fixes the kernel output of various lines printed on e.g. a warning: Before: [ 0.840604] Krnl PSW : 0404c00180000000 000000000017d1d2 [ 0.840606] (try_to_wake_up+0x382/0x5e0) [ 0.840610] R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 [ 0.840611] RI:0 EA:3 After: [ 0.831772] Krnl PSW : 0404c00180000000 000000000017d14a (try_to_wake_up+0x382/0x5e0) [ 0.831776] R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dumpstack.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 518f615ad0a2..4bebe72b7780 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -168,13 +168,13 @@ void show_registers(struct pt_regs *regs) mode = user_mode(regs) ? "User" : "Krnl"; printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); if (!user_mode(regs)) - printk(" (%pSR)", (void *)regs->psw.addr); - printk("\n"); + pr_cont(" (%pSR)", (void *)regs->psw.addr); + pr_cont("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); - printk(" RI:%x EA:%x", psw->ri, psw->eaba); - printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode, + pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba); + printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); printk(" %016lx %016lx %016lx %016lx\n", regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); From 4d062487f3431f124e3a2420c0da0b7a2388dc80 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Oct 2016 12:07:35 +0200 Subject: [PATCH 165/884] s390/disassambler: use pr_cont where appropriate Just like for dumpstack use pr_cont instead of simple printk calls to fix the output when disassembling a piece of code. Before: [ 0.840627] Krnl Code: 000000000017d1c6: a77400f7 brc 7,17d3b4 [ 0.840630] 000000000017d1ca: 92015000 mvi 0(%r5),1 [ 0.840634] #000000000017d1ce: a7f40001 brc 15,17d1d0 After: [ 0.831792] Krnl Code: 000000000017d13e: a77400f7 brc 7,17d32c 000000000017d142: 92015000 mvi 0(%r5),1 #000000000017d146: a7f40001 brc 15,17d148 Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 43446fa2a4e5..c74c59236f44 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -2014,12 +2014,12 @@ void show_code(struct pt_regs *regs) *ptr++ = '\t'; ptr += print_insn(ptr, code + start, addr); start += opsize; - printk("%s", buffer); + pr_cont("%s", buffer); ptr = buffer; ptr += sprintf(ptr, "\n "); hops++; } - printk("\n"); + pr_cont("\n"); } void print_fn_code(unsigned char *code, unsigned long len) From dcddba96cdbc5d0e4d4a17bf22cfd9b2f038a4ca Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Oct 2016 13:07:46 +0200 Subject: [PATCH 166/884] s390/dumpstack: get rid of return_address again With commit ef6000b4c670 ("Disable the __builtin_return_address() warning globally after all)" the kernel does not warn at all again if __builtin_return_address(n) is called with n > 0. Besides the fact that this was a false warning on s390 anyway, due to the always present backchain, we can now revert commit 5606330627ab ("s390/dumpstack: implement and use return_address()") again, to simplify the code again. After all I shouldn't have had return_address() implememted at all to workaround this issue. So get rid of this again. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ftrace.h | 4 +--- arch/s390/kernel/dumpstack.c | 24 ------------------------ 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 64053d9ac3f2..836c56290499 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -12,9 +12,7 @@ #ifndef __ASSEMBLY__ -unsigned long return_address(int depth); - -#define ftrace_return_address(n) return_address(n) +#define ftrace_return_address(n) __builtin_return_address(n) void _mcount(void); void ftrace_caller(void); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 4bebe72b7780..34345c0a3c46 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -87,30 +87,6 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, } EXPORT_SYMBOL_GPL(dump_trace); -struct return_address_data { - unsigned long address; - int depth; -}; - -static int __return_address(void *data, unsigned long address, int reliable) -{ - struct return_address_data *rd = data; - - if (rd->depth--) - return 0; - rd->address = address; - return 1; -} - -unsigned long return_address(int depth) -{ - struct return_address_data rd = { .depth = depth + 2 }; - - dump_trace(__return_address, &rd, NULL, current_stack_pointer()); - return rd.address; -} -EXPORT_SYMBOL_GPL(return_address); - static int show_address(void *data, unsigned long address, int reliable) { if (reliable) From dabe7ecc572447c6f8a052cc77e220998c7f9c38 Mon Sep 17 00:00:00 2001 From: Joao Pinto Date: Thu, 13 Oct 2016 18:26:13 +0100 Subject: [PATCH 167/884] PCI: designware-plat: Change maintainer to Jose Abreu Change designware-plat maintainer to Jose Abreu. Signed-off-by: Joao Pinto Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1cd38a7e0064..906e96948520 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9299,7 +9299,7 @@ S: Maintained F: drivers/pci/host/*designware* PCI DRIVER FOR SYNOPSYS PROTOTYPING DEVICE -M: Joao Pinto +M: Jose Abreu L: linux-pci@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/pci/designware-pcie.txt From 15480f3ab7a8b5333e170d7168ce8a1012cc735e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 17 Oct 2016 11:39:32 +0100 Subject: [PATCH 168/884] PCI: layerscape: Fix drvdata usage before assignment Commit fefe6733e516 ("PCI: layerscape: Move struct pcie_port setup to probe function") changed the init ordering of the pcie structure, but started to use the pcie->drvdata field before initializing it. Mayhem follows. Fix this by moving the drvdata assignment right before the first use. Tested on LS2085a. Fixes: efe6733e516 ("PCI: layerscape: Move struct pcie_port setup to probe function") Signed-off-by: Marc Zyngier Signed-off-by: Bjorn Helgaas --- drivers/pci/host/pci-layerscape.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pci-layerscape.c b/drivers/pci/host/pci-layerscape.c index 2cb7315e26d0..653707996342 100644 --- a/drivers/pci/host/pci-layerscape.c +++ b/drivers/pci/host/pci-layerscape.c @@ -247,6 +247,7 @@ static int __init ls_pcie_probe(struct platform_device *pdev) pp = &pcie->pp; pp->dev = dev; + pcie->drvdata = match->data; pp->ops = pcie->drvdata->ops; dbi_base = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); @@ -256,7 +257,6 @@ static int __init ls_pcie_probe(struct platform_device *pdev) return PTR_ERR(pcie->pp.dbi_base); } - pcie->drvdata = match->data; pcie->lut = pcie->pp.dbi_base + pcie->drvdata->lut_offset; if (!ls_pcie_is_bridge(pcie)) From 02265cd60335a2c1417abae4192611e1fc05a6e5 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Mon, 17 Oct 2016 10:18:37 +0200 Subject: [PATCH 169/884] mmc: sdhci: cast unsigned int to unsigned long long to avoid unexpeted error Potentially overflowing expression 1000000 * data->timeout_clks with type unsigned int is evaluated using 32-bit arithmetic, and then used in a context that expects an expression of type unsigned long long. To avoid overflow, cast 1000000U to type unsigned long long. Special thanks to Coverity. Fixes: 7f05538af71c ("mmc: sdhci: fix data timeout (part 2)") Signed-off-by: Haibo Chen Cc: stable@vger.kernel.org # v3.15+ Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 223a91e039dc..71654b90227f 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -687,7 +687,7 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) * host->clock is in Hz. target_timeout is in us. * Hence, us = 1000000 * cycles / Hz. Round up. */ - val = 1000000 * data->timeout_clks; + val = 1000000ULL * data->timeout_clks; if (do_div(val, host->clock)) target_timeout++; target_timeout += val; From 31cf742f515c275d22843c4c756e048d2b6d716c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 27 Sep 2016 08:44:33 -0700 Subject: [PATCH 170/884] mmc: rtsx_usb_sdmmc: Avoid keeping the device runtime resumed when unused The rtsx_usb_sdmmc driver may bail out in its ->set_ios() callback when no SD card is inserted. This is wrong, as it could cause the device to remain runtime resumed when it's unused. Fix this behaviour. Tested-by: Ritesh Raj Sarraf Cc: Cc: Alan Stern Signed-off-by: Ulf Hansson --- drivers/mmc/host/rtsx_usb_sdmmc.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c index 4106295527b9..e0b85904c9bb 100644 --- a/drivers/mmc/host/rtsx_usb_sdmmc.c +++ b/drivers/mmc/host/rtsx_usb_sdmmc.c @@ -1138,11 +1138,6 @@ static void sdmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) dev_dbg(sdmmc_dev(host), "%s\n", __func__); mutex_lock(&ucr->dev_mutex); - if (rtsx_usb_card_exclusive_check(ucr, RTSX_USB_SD_CARD)) { - mutex_unlock(&ucr->dev_mutex); - return; - } - sd_set_power_mode(host, ios->power_mode); sd_set_bus_width(host, ios->bus_width); sd_set_timing(host, ios->timing, &host->ddr_mode); From 4f48aa7a11bfed9502a7c85a5b68cd40ea827f73 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 15 Sep 2016 14:46:21 +0200 Subject: [PATCH 171/884] mmc: rtsx_usb_sdmmc: Handle runtime PM while changing the led Accesses of the rtsx sdmmc's parent device, which is the rtsx usb device, must be done when it's runtime resumed. Currently this isn't case when changing the led, so let's fix this by adding a pm_runtime_get_sync() and a pm_runtime_put() around those operations. Reported-by: Ritesh Raj Sarraf Tested-by: Ritesh Raj Sarraf Cc: Cc: Alan Stern Signed-off-by: Ulf Hansson --- drivers/mmc/host/rtsx_usb_sdmmc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c index e0b85904c9bb..6e9c0f8fddb1 100644 --- a/drivers/mmc/host/rtsx_usb_sdmmc.c +++ b/drivers/mmc/host/rtsx_usb_sdmmc.c @@ -1309,6 +1309,7 @@ static void rtsx_usb_update_led(struct work_struct *work) container_of(work, struct rtsx_usb_sdmmc, led_work); struct rtsx_ucr *ucr = host->ucr; + pm_runtime_get_sync(sdmmc_dev(host)); mutex_lock(&ucr->dev_mutex); if (host->led.brightness == LED_OFF) @@ -1317,6 +1318,7 @@ static void rtsx_usb_update_led(struct work_struct *work) rtsx_usb_turn_on_led(ucr); mutex_unlock(&ucr->dev_mutex); + pm_runtime_put(sdmmc_dev(host)); } #endif From 796aa46adf1d90eab36ae06a42e6d3f10b28a75c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 26 Sep 2016 15:45:41 -0700 Subject: [PATCH 172/884] memstick: rtsx_usb_ms: Runtime resume the device when polling for cards Accesses to the rtsx usb device, which is the parent of the rtsx memstick device, must not be done unless it's runtime resumed. Therefore when the rtsx_usb_ms driver polls for inserted memstick cards, let's add pm_runtime_get|put*() to make sure accesses is done when the rtsx usb device is runtime resumed. Reported-by: Ritesh Raj Sarraf Tested-by: Ritesh Raj Sarraf Signed-off-by: Alan Stern Cc: Signed-off-by: Ulf Hansson --- drivers/memstick/host/rtsx_usb_ms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index d34bc3530385..1b994897f0ff 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -681,6 +681,7 @@ static int rtsx_usb_detect_ms_card(void *__host) int err; for (;;) { + pm_runtime_get_sync(ms_dev(host)); mutex_lock(&ucr->dev_mutex); /* Check pending MS card changes */ @@ -703,6 +704,7 @@ static int rtsx_usb_detect_ms_card(void *__host) } poll_again: + pm_runtime_put(ms_dev(host)); if (host->eject) break; From 9158cb29e7c2f10dd325eb1589f0fe745a271257 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 28 Sep 2016 11:33:28 -0700 Subject: [PATCH 173/884] memstick: rtsx_usb_ms: Manage runtime PM when accessing the device Accesses to the rtsx usb device, which is the parent of the rtsx memstick device, must not be done unless it's runtime resumed. This is currently not the case and it could trigger various errors. Fix this by properly deal with runtime PM in this regards. This means making sure the device is runtime resumed, when serving requests via the ->request() callback or changing settings via the ->set_param() callbacks. Cc: Cc: Ritesh Raj Sarraf Cc: Alan Stern Signed-off-by: Ulf Hansson --- drivers/memstick/host/rtsx_usb_ms.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/memstick/host/rtsx_usb_ms.c b/drivers/memstick/host/rtsx_usb_ms.c index 1b994897f0ff..2e3cf012ef48 100644 --- a/drivers/memstick/host/rtsx_usb_ms.c +++ b/drivers/memstick/host/rtsx_usb_ms.c @@ -524,6 +524,7 @@ static void rtsx_usb_ms_handle_req(struct work_struct *work) int rc; if (!host->req) { + pm_runtime_get_sync(ms_dev(host)); do { rc = memstick_next_req(msh, &host->req); dev_dbg(ms_dev(host), "next req %d\n", rc); @@ -544,6 +545,7 @@ static void rtsx_usb_ms_handle_req(struct work_struct *work) host->req->error); } } while (!rc); + pm_runtime_put(ms_dev(host)); } } @@ -570,6 +572,7 @@ static int rtsx_usb_ms_set_param(struct memstick_host *msh, dev_dbg(ms_dev(host), "%s: param = %d, value = %d\n", __func__, param, value); + pm_runtime_get_sync(ms_dev(host)); mutex_lock(&ucr->dev_mutex); err = rtsx_usb_card_exclusive_check(ucr, RTSX_USB_MS_CARD); @@ -635,6 +638,7 @@ static int rtsx_usb_ms_set_param(struct memstick_host *msh, } out: mutex_unlock(&ucr->dev_mutex); + pm_runtime_put(ms_dev(host)); /* power-on delay */ if (param == MEMSTICK_POWER && value == MEMSTICK_POWER_ON) From 82bb095ee9dcd9380b6ab608b5fa9f64e65f3a3b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 17 Oct 2016 12:19:38 +0200 Subject: [PATCH 174/884] MAINTAINERS: mmc: Move the mmc tree to kernel.org Signed-off-by: Ulf Hansson --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a009e004f8f7..e6857248d3a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8164,7 +8164,7 @@ F: include/linux/mfd/ MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM M: Ulf Hansson L: linux-mmc@vger.kernel.org -T: git git://git.linaro.org/people/ulf.hansson/mmc.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/ulfh/mmc.git S: Maintained F: Documentation/devicetree/bindings/mmc/ F: drivers/mmc/ From 4547f4d8ffd63ba4ac129f9136027bd14b729101 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Fri, 23 Sep 2016 14:05:04 -0700 Subject: [PATCH 175/884] Btrfs: kill BUG_ON in do_relocation While updating btree, we try to push items between sibling nodes/leaves in order to keep height as low as possible. But we don't memset the original places with zero when pushing items so that we could end up leaving stale content in nodes/leaves. One may read the above stale content by increasing btree blocks' @nritems. One case I've come across is that in fs tree, a leaf has two parent nodes, hence running balance ends up with processing this leaf with two parent nodes, but it can only reach the valid parent node through btrfs_search_slot, so it'd be like, do_relocation for P in all parent nodes of block A: if !P->eb: btrfs_search_slot(key); --> get path from P to A. if lowest: BUG_ON(A->bytenr != bytenr of A recorded in P); btrfs_cow_block(P, A); --> change A's bytenr in P. After btrfs_cow_block, P has the new bytenr of A, but with the same @key, we get the same path again, and get panic by BUG_ON. Note that this is only happening in a corrupted fs, for a regular fs in which we have correct @nritems so that we won't read stale content in any case. Reviewed-by: Josef Bacik Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/relocation.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0ec8ffa37ab0..c4af0cdb783d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2728,7 +2728,14 @@ static int do_relocation(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(upper->eb, slot); if (lowest) { - BUG_ON(bytenr != node->bytenr); + if (bytenr != node->bytenr) { + btrfs_err(root->fs_info, + "lowest leaf/node mismatch: bytenr %llu node->bytenr %llu slot %d upper %llu", + bytenr, node->bytenr, slot, + upper->eb->start); + err = -EIO; + goto next; + } } else { if (node->eb->start == bytenr) goto next; From 9ca488dd53088d4fcc97258aeeccf21f63b7da1e Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Thu, 29 Sep 2016 17:22:58 +0200 Subject: [PATCH 176/884] batman-adv: Modify neigh_list only with rcu-list functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The batadv_hard_iface::neigh_list is accessed via rcu based primitives. Thus all operations done on it have to fulfill the requirements by RCU. So using non-RCU mechanisms like hlist_add_head is not allowed because it misses the barriers required to protect concurrent readers when accessing the data behind the pointer. Fixes: cef63419f7db ("batman-adv: add list of unique single hop neighbors per hard-interface") Signed-off-by: Sven Eckelmann Acked-by: Linus Lüssing Signed-off-by: Simon Wunderlich --- net/batman-adv/originator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 5f3bfc41aeb1..7c8d16086f0f 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -544,7 +544,7 @@ batadv_hardif_neigh_create(struct batadv_hard_iface *hard_iface, if (bat_priv->algo_ops->neigh.hardif_init) bat_priv->algo_ops->neigh.hardif_init(hardif_neigh); - hlist_add_head(&hardif_neigh->list, &hard_iface->neigh_list); + hlist_add_head_rcu(&hardif_neigh->list, &hard_iface->neigh_list); out: spin_unlock_bh(&hard_iface->neigh_list_lock); From 611b975b5c868639b7cb0e89e22d1c8c561bf3a1 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 16 Jul 2016 21:30:20 +0200 Subject: [PATCH 177/884] batman-adv: Add BATADV_DBG_TP_METER to BATADV_DBG_ALL The BATADV_DBG_ALL has to contain the bit of BATADV_DBG_TP_METER to really support all available debug messages. Fixes: 33a3bb4a3345 ("batman-adv: throughput meter implementation") Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/log.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/log.h b/net/batman-adv/log.h index e0e1a88c3e58..d2905a855d1b 100644 --- a/net/batman-adv/log.h +++ b/net/batman-adv/log.h @@ -63,7 +63,7 @@ enum batadv_dbg_level { BATADV_DBG_NC = BIT(5), BATADV_DBG_MCAST = BIT(6), BATADV_DBG_TP_METER = BIT(7), - BATADV_DBG_ALL = 127, + BATADV_DBG_ALL = 255, }; #ifdef CONFIG_BATMAN_ADV_DEBUG From 1aeb5b615cd10db7324d4e4d167c4916dfeca311 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 12 Oct 2016 08:21:43 -0300 Subject: [PATCH 178/884] [media] radio-bcm2048: don't ignore errors Remove this warning: drivers/staging/media/bcm2048/radio-bcm2048.c: In function 'bcm2048_set_rds_no_lock': drivers/staging/media/bcm2048/radio-bcm2048.c:467:6: warning: variable 'err' set but not used [-Wunused-but-set-variable] int err; ^~~ By returning the error code. Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/bcm2048/radio-bcm2048.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/media/bcm2048/radio-bcm2048.c b/drivers/staging/media/bcm2048/radio-bcm2048.c index 8dade197f053..0f088405e186 100644 --- a/drivers/staging/media/bcm2048/radio-bcm2048.c +++ b/drivers/staging/media/bcm2048/radio-bcm2048.c @@ -482,6 +482,8 @@ static int bcm2048_set_rds_no_lock(struct bcm2048_device *bdev, u8 rds_on) flags); memset(&bdev->rds_info, 0, sizeof(bdev->rds_info)); } + if (err) + return err; err = bcm2048_send_command(bdev, BCM2048_I2C_FM_RDS_SYSTEM, bdev->cache_fm_rds_system); From f4a067f9ffca603b45f7e82ddd2ba50e5904cea3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Oct 2016 15:05:33 +0100 Subject: [PATCH 179/884] mac80211: move struct aead_req off the stack Some crypto implementations (such as the generic CCM wrapper in crypto/) use scatterlists to map fields of private data in their struct aead_req. This means these data structures cannot live in the vmalloc area, which means that they cannot live on the stack (with CONFIG_VMAP_STACK.) This currently occurs only with the generic software implementation, but the private data and usage is implementation specific, so move the whole data structures off the stack into heap by allocating every time we need to use them. In addition, take care not to put any of our own stack allocations into scatterlists. This involves reserving some extra room when allocating the aead_request structures, and referring to those allocations in the scatter- lists (while copying the data from the stack before the crypto operation) Signed-off-by: Ard Biesheuvel Signed-off-by: Johannes Berg --- net/mac80211/aes_ccm.c | 46 +++++++++++++++++++++++++++-------------- net/mac80211/aes_ccm.h | 8 ++++--- net/mac80211/aes_gcm.c | 43 ++++++++++++++++++++++++-------------- net/mac80211/aes_gcm.h | 6 ++++-- net/mac80211/aes_gmac.c | 26 +++++++++++------------ net/mac80211/aes_gmac.h | 4 ++++ net/mac80211/wpa.c | 22 ++++++++------------ 7 files changed, 93 insertions(+), 62 deletions(-) diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c index 7663c28ba353..a4e0d59a40dd 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aes_ccm.c @@ -18,21 +18,24 @@ #include "key.h" #include "aes_ccm.h" -void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len) +int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic, + size_t mic_len) { struct scatterlist sg[3]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *) aead_req_data; + aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; - memset(aead_req, 0, sizeof(aead_req_data)); + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, CCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -41,6 +44,9 @@ void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, aead_request_set_ad(aead_req, sg[0].length); crypto_aead_encrypt(aead_req); + kzfree(aead_req); + + return 0; } int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, @@ -48,18 +54,23 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t mic_len) { struct scatterlist sg[3]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *) aead_req_data; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; + int err; if (data_len == 0) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, CCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -67,7 +78,10 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, aead_request_set_crypt(aead_req, sg, sg, data_len + mic_len, b_0); aead_request_set_ad(aead_req, sg[0].length); - return crypto_aead_decrypt(aead_req); + err = crypto_aead_decrypt(aead_req); + kzfree(aead_req); + + return err; } struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index 6a73d1e4d186..fcd3254c5cf0 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -12,12 +12,14 @@ #include +#define CCM_AAD_LEN 32 + struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len); -void ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len); +int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic, + size_t mic_len); int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, u8 *data, size_t data_len, u8 *mic, size_t mic_len); diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c index 3afe361fd27c..8a4397cc1b08 100644 --- a/net/mac80211/aes_gcm.c +++ b/net/mac80211/aes_gcm.c @@ -15,20 +15,23 @@ #include "key.h" #include "aes_gcm.h" -void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) +int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[3]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; + aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; - memset(aead_req, 0, sizeof(aead_req_data)); + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, GCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); @@ -37,24 +40,31 @@ void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, aead_request_set_ad(aead_req, sg[0].length); crypto_aead_encrypt(aead_req); + kzfree(aead_req); + return 0; } int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[3]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); + u8 *__aad; + int err; if (data_len == 0) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + __aad = (u8 *)aead_req + reqsize; + memcpy(__aad, aad, GCM_AAD_LEN); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &aad[2], be16_to_cpup((__be16 *)aad)); + sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); @@ -63,7 +73,10 @@ int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, data_len + IEEE80211_GCMP_MIC_LEN, j_0); aead_request_set_ad(aead_req, sg[0].length); - return crypto_aead_decrypt(aead_req); + err = crypto_aead_decrypt(aead_req); + kzfree(aead_req); + + return err; } struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h index 1347fda6b76a..55aed5352494 100644 --- a/net/mac80211/aes_gcm.h +++ b/net/mac80211/aes_gcm.h @@ -11,8 +11,10 @@ #include -void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); +#define GCM_AAD_LEN 32 + +int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, + u8 *data, size_t data_len, u8 *mic); int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, u8 *data, size_t data_len, u8 *mic); struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c index 3ddd927aaf30..bd72a862ddb7 100644 --- a/net/mac80211/aes_gmac.c +++ b/net/mac80211/aes_gmac.c @@ -17,28 +17,27 @@ #include "key.h" #include "aes_gmac.h" -#define GMAC_MIC_LEN 16 -#define GMAC_NONCE_LEN 12 -#define AAD_LEN 20 - int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, const u8 *data, size_t data_len, u8 *mic) { struct scatterlist sg[4]; - char aead_req_data[sizeof(struct aead_request) + - crypto_aead_reqsize(tfm)] - __aligned(__alignof__(struct aead_request)); - struct aead_request *aead_req = (void *)aead_req_data; - u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE]; + u8 *zero, *__aad, iv[AES_BLOCK_SIZE]; + struct aead_request *aead_req; + int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); if (data_len < GMAC_MIC_LEN) return -EINVAL; - memset(aead_req, 0, sizeof(aead_req_data)); + aead_req = kzalloc(reqsize + GMAC_MIC_LEN + GMAC_AAD_LEN, GFP_ATOMIC); + if (!aead_req) + return -ENOMEM; + + zero = (u8 *)aead_req + reqsize; + __aad = zero + GMAC_MIC_LEN; + memcpy(__aad, aad, GMAC_AAD_LEN); - memset(zero, 0, GMAC_MIC_LEN); sg_init_table(sg, 4); - sg_set_buf(&sg[0], aad, AAD_LEN); + sg_set_buf(&sg[0], __aad, GMAC_AAD_LEN); sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN); sg_set_buf(&sg[2], zero, GMAC_MIC_LEN); sg_set_buf(&sg[3], mic, GMAC_MIC_LEN); @@ -49,9 +48,10 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, aead_request_set_tfm(aead_req, tfm); aead_request_set_crypt(aead_req, sg, sg, 0, iv); - aead_request_set_ad(aead_req, AAD_LEN + data_len); + aead_request_set_ad(aead_req, GMAC_AAD_LEN + data_len); crypto_aead_encrypt(aead_req); + kzfree(aead_req); return 0; } diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h index d328204d73a8..32e6442c95be 100644 --- a/net/mac80211/aes_gmac.h +++ b/net/mac80211/aes_gmac.h @@ -11,6 +11,10 @@ #include +#define GMAC_AAD_LEN 20 +#define GMAC_MIC_LEN 16 +#define GMAC_NONCE_LEN 12 + struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[], size_t key_len); int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce, diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index b48c1e13e281..42ce9bd4426f 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -405,7 +405,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, u8 *pos; u8 pn[6]; u64 pn64; - u8 aad[2 * AES_BLOCK_SIZE]; + u8 aad[CCM_AAD_LEN]; u8 b_0[AES_BLOCK_SIZE]; if (info->control.hw_key && @@ -461,10 +461,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad); - ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, - skb_put(skb, mic_len), mic_len); - - return 0; + return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, + skb_put(skb, mic_len), mic_len); } @@ -639,7 +637,7 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) u8 *pos; u8 pn[6]; u64 pn64; - u8 aad[2 * AES_BLOCK_SIZE]; + u8 aad[GCM_AAD_LEN]; u8 j_0[AES_BLOCK_SIZE]; if (info->control.hw_key && @@ -696,10 +694,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos += IEEE80211_GCMP_HDR_LEN; gcmp_special_blocks(skb, pn, j_0, aad); - ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, - skb_put(skb, IEEE80211_GCMP_MIC_LEN)); - - return 0; + return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, + skb_put(skb, IEEE80211_GCMP_MIC_LEN)); } ieee80211_tx_result @@ -1123,9 +1119,9 @@ ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx) struct ieee80211_key *key = tx->key; struct ieee80211_mmie_16 *mmie; struct ieee80211_hdr *hdr; - u8 aad[20]; + u8 aad[GMAC_AAD_LEN]; u64 pn64; - u8 nonce[12]; + u8 nonce[GMAC_NONCE_LEN]; if (WARN_ON(skb_queue_len(&tx->skbs) != 1)) return TX_DROP; @@ -1171,7 +1167,7 @@ ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx) struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_key *key = rx->key; struct ieee80211_mmie_16 *mmie; - u8 aad[20], mic[16], ipn[6], nonce[12]; + u8 aad[GMAC_AAD_LEN], mic[GMAC_MIC_LEN], ipn[6], nonce[GMAC_NONCE_LEN]; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; if (!ieee80211_is_mgmt(hdr->frame_control)) From a04a480d4392ea6efd117be2de564117b2a009c0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 16 Oct 2016 20:02:52 -0700 Subject: [PATCH 180/884] net: Require exact match for TCP socket lookups if dif is l3mdev Currently, socket lookups for l3mdev (vrf) use cases can match a socket that is bound to a port but not a device (ie., a global socket). If the sysctl tcp_l3mdev_accept is not set this leads to ack packets going out based on the main table even though the packet came in from an L3 domain. The end result is that the connection does not establish creating confusion for users since the service is running and a socket shows in ss output. Fix by requiring an exact dif to sk_bound_dev_if match if the skb came through an interface enslaved to an l3mdev device and the tcp_l3mdev_accept is not set. skb's through an l3mdev interface are marked by setting a flag in inet{6}_skb_parm. The IPv6 variant is already set; this patch adds the flag for IPv4. Using an skb flag avoids a device lookup on the dif. The flag is set in the VRF driver using the IP{6}CB macros. For IPv4, the inet_skb_parm struct is moved in the cb per commit 971f10eca186, so the match function in the TCP stack needs to use TCP_SKB_CB. For IPv6, the move is done after the socket lookup, so IP6CB is used. The flags field in inet_skb_parm struct needs to be increased to add another flag. There is currently a 1-byte hole following the flags, so it can be expanded to u16 without increasing the size of the struct. Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 2 ++ include/linux/ipv6.h | 17 ++++++++++++++--- include/net/ip.h | 8 +++++++- include/net/tcp.h | 13 ++++++++++++- net/ipv4/inet_hashtables.c | 8 +++++--- net/ipv6/inet6_hashtables.c | 7 ++++--- 6 files changed, 44 insertions(+), 11 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 85c271c70d42..820de6a9ddde 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -956,6 +956,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (skb->pkt_type == PACKET_LOOPBACK) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; + IP6CB(skb)->flags |= IP6SKB_L3SLAVE; skb->pkt_type = PACKET_HOST; goto out; } @@ -996,6 +997,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; + IPCB(skb)->flags |= IPSKB_L3SLAVE; /* loopback traffic; do not push through packet taps again. * Reset pkt_type for upper layers to process skb diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7e9a789be5e0..ca1ad9ebbc92 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -123,12 +123,12 @@ struct inet6_skb_parm { }; #if defined(CONFIG_NET_L3_MASTER_DEV) -static inline bool skb_l3mdev_slave(__u16 flags) +static inline bool ipv6_l3mdev_skb(__u16 flags) { return flags & IP6SKB_L3SLAVE; } #else -static inline bool skb_l3mdev_slave(__u16 flags) +static inline bool ipv6_l3mdev_skb(__u16 flags) { return false; } @@ -139,11 +139,22 @@ static inline bool skb_l3mdev_slave(__u16 flags) static inline int inet6_iif(const struct sk_buff *skb) { - bool l3_slave = skb_l3mdev_slave(IP6CB(skb)->flags); + bool l3_slave = ipv6_l3mdev_skb(IP6CB(skb)->flags); return l3_slave ? skb->skb_iif : IP6CB(skb)->iif; } +/* can not be used in TCP layer after tcp_v6_fill_cb */ +static inline bool inet6_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if defined(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_tcp_l3mdev_accept && + ipv6_l3mdev_skb(IP6CB(skb)->flags)) + return true; +#endif + return false; +} + struct tcp6_request_sock { struct tcp_request_sock tcp6rsk_tcp; }; diff --git a/include/net/ip.h b/include/net/ip.h index bc43c0fcae12..c9d07988911e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -38,7 +38,7 @@ struct sock; struct inet_skb_parm { int iif; struct ip_options opt; /* Compiled IP options */ - unsigned char flags; + u16 flags; #define IPSKB_FORWARDED BIT(0) #define IPSKB_XFRM_TUNNEL_SIZE BIT(1) @@ -48,10 +48,16 @@ struct inet_skb_parm { #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) #define IPSKB_FRAG_SEGS BIT(7) +#define IPSKB_L3SLAVE BIT(8) u16 frag_max_size; }; +static inline bool ipv4_l3mdev_skb(u16 flags) +{ + return !!(flags & IPSKB_L3SLAVE); +} + static inline unsigned int ip_hdrlen(const struct sk_buff *skb) { return ip_hdr(skb)->ihl * 4; diff --git a/include/net/tcp.h b/include/net/tcp.h index f83b7f220a65..5b82d4d94834 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -794,12 +794,23 @@ struct tcp_skb_cb { */ static inline int tcp_v6_iif(const struct sk_buff *skb) { - bool l3_slave = skb_l3mdev_slave(TCP_SKB_CB(skb)->header.h6.flags); + bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; } #endif +/* TCP_SKB_CB reference means this can not be used from early demux */ +static inline bool inet_exact_dif_match(struct net *net, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + if (!net->ipv4.sysctl_tcp_l3mdev_accept && + ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) + return true; +#endif + return false; +} + /* Due to TSO, an SKB can be composed of multiple actual * packets. To keep these tracked properly, we use this. */ diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 77c20a489218..ca97835bfec4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -25,6 +25,7 @@ #include #include #include +#include #include static u32 inet_ehashfn(const struct net *net, const __be32 laddr, @@ -172,7 +173,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; struct inet_sock *inet = inet_sk(sk); @@ -186,7 +187,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score += 4; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score += 4; @@ -215,11 +216,12 @@ struct sock *__inet_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet_exact_dif_match(net, skb); struct sock *sk, *result = NULL; u32 phash = 0; sk_for_each_rcu(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 00cf28ad4565..2fd0374a35b1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, - const int dif) + const int dif, bool exact_dif) { int score = -1; @@ -109,7 +109,7 @@ static inline int compute_score(struct sock *sk, struct net *net, return -1; score++; } - if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if || exact_dif) { if (sk->sk_bound_dev_if != dif) return -1; score++; @@ -131,11 +131,12 @@ struct sock *inet6_lookup_listener(struct net *net, unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; int score, hiscore = 0, matches = 0, reuseport = 0; + bool exact_dif = inet6_exact_dif_match(net, skb); struct sock *sk, *result = NULL; u32 phash = 0; sk_for_each(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif); + score = compute_score(sk, net, hnum, daddr, dif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { From ca006f785fbfd7a5c901900bd3fe2b26e946a1ee Mon Sep 17 00:00:00 2001 From: Stefan Tauner Date: Thu, 6 Oct 2016 18:40:11 +0200 Subject: [PATCH 181/884] USB: serial: ftdi_sio: add support for Infineon TriBoard TC2X7 This adds support to ftdi_sio for the Infineon TriBoard TC2X7 engineering board for first-generation Aurix SoCs with Tricore CPUs. Mere addition of the device IDs does the job. Signed-off-by: Stefan Tauner Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 3 ++- drivers/usb/serial/ftdi_sio_ids.h | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index b2d767e743fc..0ff7f38d7800 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -986,7 +986,8 @@ static const struct usb_device_id id_table_combined[] = { /* ekey Devices */ { USB_DEVICE(FTDI_VID, FTDI_EKEY_CONV_USB_PID) }, /* Infineon Devices */ - { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC1798_PID, 1) }, + { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_TC2X7_PID, 1) }, /* GE Healthcare devices */ { USB_DEVICE(GE_HEALTHCARE_VID, GE_HEALTHCARE_NEMO_TRACKER_PID) }, /* Active Research (Actisense) devices */ diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index f87a938cf005..21011c0a4c64 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -626,8 +626,9 @@ /* * Infineon Technologies */ -#define INFINEON_VID 0x058b -#define INFINEON_TRIBOARD_PID 0x0028 /* DAS JTAG TriBoard TC1798 V1.0 */ +#define INFINEON_VID 0x058b +#define INFINEON_TRIBOARD_TC1798_PID 0x0028 /* DAS JTAG TriBoard TC1798 V1.0 */ +#define INFINEON_TRIBOARD_TC2X7_PID 0x0043 /* DAS JTAG TriBoard TC2X7 V1.0 */ /* * Acton Research Corp. From baa5567c18d17843815d1d9150424d31f238e363 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 17 Oct 2016 15:18:48 +0100 Subject: [PATCH 182/884] arm64: kernel: numa: fix ACPI boot cpu numa node mapping Commit 7ba5f605f3a0 ("arm64/numa: remove the limitation that cpu0 must bind to node0") removed the numa cpu<->node mapping restriction whereby logical cpu 0 always corresponds to numa node 0; removing the restriction was correct, in that it does not really exist in practice but the commit only updated the early mapping of logical cpu 0 to its real numa node for the DT boot path, missing the ACPI one, leading to boot failures on ACPI systems owing to missing node<->cpu map for logical cpu 0. Fix the issue by updating the ACPI boot path with code that carries out the early cpu<->node mapping also for the boot cpu (ie cpu 0), mirroring what is currently done in the DT boot path. Fixes: 7ba5f605f3a0 ("arm64/numa: remove the limitation that cpu0 must bind to node0") Signed-off-by: Lorenzo Pieralisi Tested-by: Laszlo Ersek Reported-by: Laszlo Ersek Cc: Will Deacon Cc: Laszlo Ersek Cc: Hanjun Guo Cc: Andrew Jones Cc: Zhen Lei Cc: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d3f151cfd4a1..8507703dabe4 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -544,6 +544,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) return; } bootcpu_valid = true; + early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid)); return; } From 9a1a1f404be55b07aea64864f98d7306cc493360 Mon Sep 17 00:00:00 2001 From: Tai Nguyen Date: Thu, 13 Oct 2016 11:09:16 -0700 Subject: [PATCH 183/884] perf: xgene: Remove bogus IS_ERR() check In acpi_get_pmu_hw_inf we pass the address of a local variable to IS_ERR(), which doesn't make sense, as the pointer must be a real, valid pointer. This doesn't cause a functional problem, as IS_ERR() will evaluate as false, but the check is bogus and causes static checkers to complain. Remove the bogus check. The bug is reported by Dan Carpenter in [1] [1] https://www.spinics.net/lists/arm-kernel/msg535957.html Signed-off-by: Tai Nguyen Acked-by: Mark Rutland Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index c2ac7646b99f..a8ac4bcef2c0 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1011,7 +1011,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu, rc = acpi_dev_get_resources(adev, &resource_list, acpi_pmu_dev_add_resource, &res); acpi_dev_free_resource_list(&resource_list); - if (rc < 0 || IS_ERR(&res)) { + if (rc < 0) { dev_err(dev, "PMU type %d: No resource address found\n", type); goto err; } From 9a0b1e8ba4061778897b544afc898de2163382f7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 15 Oct 2016 17:50:49 +0200 Subject: [PATCH 184/884] net: pktgen: remove rcu locking in pktgen_change_name() After Jesper commit back in linux-3.18, we trigger a lockdep splat in proc_create_data() while allocating memory from pktgen_change_name(). This patch converts t->if_lock to a mutex, since it is now only used from control path, and adds proper locking to pktgen_change_name() 1) pktgen_thread_lock to protect the outer loop (iterating threads) 2) t->if_lock to protect the inner loop (iterating devices) Note that before Jesper patch, pktgen_change_name() was lacking proper protection, but lockdep was not able to detect the problem. Fixes: 8788370a1d4b ("pktgen: RCU-ify "if_list" to remove lock in next_to_run()") Reported-by: John Sperbeck Signed-off-by: Eric Dumazet Cc: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/core/pktgen.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 5219a9e2127a..306b8f0e03c1 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -216,8 +216,8 @@ #define M_QUEUE_XMIT 2 /* Inject packet into qdisc */ /* If lock -- protects updating of if_list */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); +#define if_lock(t) mutex_lock(&(t->if_lock)); +#define if_unlock(t) mutex_unlock(&(t->if_lock)); /* Used to help with determining the pkts on receive */ #define PKTGEN_MAGIC 0xbe9be955 @@ -423,7 +423,7 @@ struct pktgen_net { }; struct pktgen_thread { - spinlock_t if_lock; /* for list of devices */ + struct mutex if_lock; /* for list of devices */ struct list_head if_list; /* All device here */ struct list_head th_list; struct task_struct *tsk; @@ -2010,11 +2010,13 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d { struct pktgen_thread *t; + mutex_lock(&pktgen_thread_lock); + list_for_each_entry(t, &pn->pktgen_threads, th_list) { struct pktgen_dev *pkt_dev; - rcu_read_lock(); - list_for_each_entry_rcu(pkt_dev, &t->if_list, list) { + if_lock(t); + list_for_each_entry(pkt_dev, &t->if_list, list) { if (pkt_dev->odev != dev) continue; @@ -2029,8 +2031,9 @@ static void pktgen_change_name(const struct pktgen_net *pn, struct net_device *d dev->name); break; } - rcu_read_unlock(); + if_unlock(t); } + mutex_unlock(&pktgen_thread_lock); } static int pktgen_device_event(struct notifier_block *unused, @@ -3762,7 +3765,7 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) return -ENOMEM; } - spin_lock_init(&t->if_lock); + mutex_init(&t->if_lock); t->cpu = cpu; INIT_LIST_HEAD(&t->if_list); From 850540351bb1a4fa5f192e5ce55b89928cc57f42 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 17 Oct 2016 13:47:34 +0100 Subject: [PATCH 185/884] arm64: kernel: Init MDCR_EL2 even in the absence of a PMU Commit f436b2ac90a0 ("arm64: kernel: fix architected PMU registers unconditional access") made sure we wouldn't access unimplemented PMU registers, but also left MDCR_EL2 uninitialized in that case, leading to trap bits being potentially left set. Make sure we always write something in that register. Fixes: f436b2ac90a0 ("arm64: kernel: fix architected PMU registers unconditional access") Cc: Lorenzo Pieralisi Cc: Will Deacon Cc: Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 427f6d3f084c..332e33193ccf 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -586,8 +586,9 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to - msr mdcr_el2, x0 // all PMU counters from EL1 4: + csel x0, xzr, x0, lt // all PMU counters from EL1 + msr mdcr_el2, x0 // (if they exist) /* Stage-2 translation */ msr vttbr_el2, xzr From 8fe88a4145cdeee486af60e61f5d5a14f804fa45 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Oct 2016 16:18:39 +0100 Subject: [PATCH 186/884] arm64: kaslr: keep modules close to the kernel when DYNAMIC_FTRACE=y The RANDOMIZE_MODULE_REGION_FULL Kconfig option allows KASLR to be configured in such a way that kernel modules and the core kernel are allocated completely independently, which implies that modules are likely to require branches via PLT entries to reach the core kernel. The dynamic ftrace code does not expect that, and assumes that it can patch module code to perform a relative branch to anywhere in the core kernel. This may result in errors such as branch_imm_common: offset out of range ------------[ cut here ]------------ WARNING: CPU: 3 PID: 196 at kernel/trace/ftrace.c:1995 ftrace_bug+0x220/0x2e8 Modules linked in: CPU: 3 PID: 196 Comm: systemd-udevd Not tainted 4.8.0-22-generic #24 Hardware name: AMD Seattle/Seattle, BIOS 10:34:40 Oct 6 2016 task: ffff8d1bef7dde80 task.stack: ffff8d1bef6b0000 PC is at ftrace_bug+0x220/0x2e8 LR is at ftrace_process_locs+0x330/0x430 So make RANDOMIZE_MODULE_REGION_FULL mutually exclusive with DYNAMIC_FTRACE at the Kconfig level. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 30398dbc940a..969ef880d234 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -915,7 +915,7 @@ config RANDOMIZE_BASE config RANDOMIZE_MODULE_REGION_FULL bool "Randomize the module region independently from the core kernel" - depends on RANDOMIZE_BASE + depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE default y help Randomizes the location of the module region without considering the From 67b11e2ea704f99953f18585609df17b6e189e53 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 16 Oct 2016 23:54:03 +0100 Subject: [PATCH 187/884] cxgb4: fix memory leak of qe on error exit path A memory leak of qe occurs when t4_sched_queue_unbind fails, so fix this by free'ing qe on the error exit path. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sched.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c index 539de764bbd3..cbd68a8fe2e4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sched.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c @@ -210,8 +210,10 @@ static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p) /* Unbind queue from any existing class */ err = t4_sched_queue_unbind(pi, p); - if (err) + if (err) { + t4_free_mem(qe); goto out; + } /* Bind queue to specified class */ memset(qe, 0, sizeof(*qe)); From 1b203c138c5a0d78aa34d1c4346ff6b32ac74869 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Oct 2016 15:40:14 +0200 Subject: [PATCH 188/884] netfilter: xt_hashlimit: Add missing ULL suffixes for 64-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): net/netfilter/xt_hashlimit.c: In function ‘user2credits’: net/netfilter/xt_hashlimit.c:476: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c:478: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c:480: warning: integer constant is too large for ‘long’ type ... net/netfilter/xt_hashlimit.c: In function ‘rateinfo_recalc’: net/netfilter/xt_hashlimit.c:513: warning: integer constant is too large for ‘long’ type Fixes: 11d5f15723c9f39d ("netfilter: xt_hashlimit: Create revision 2 to support higher pps rates") Signed-off-by: Geert Uytterhoeven Acked-by: Vishwanath Pai Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2fab0c65aa94..b89b688e9d01 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -431,7 +431,7 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ #define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) -#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ (0xFFFFFFFFFFFFFFFFULL / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -473,7 +473,7 @@ static u64 user2credits(u64 user, int revision) return div64_u64(user * HZ * CREDITS_PER_JIFFY_v1, XT_HASHLIMIT_SCALE); } else { - if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + if (user > 0xFFFFFFFFFFFFFFFFULL / (HZ*CREDITS_PER_JIFFY)) return div64_u64(user, XT_HASHLIMIT_SCALE_v2) * HZ * CREDITS_PER_JIFFY; From 9224eb77e63f70f16c0b6b7a20ca7d395f3bc077 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 17 Oct 2016 16:00:46 +0100 Subject: [PATCH 189/884] irqchip/gic-v3-its: Fix entry size mask for GITS_BASER Entry Size in GITS_BASER occupies 5 bits [52:48], but we mask out 8 bits. Fixes: cc2d3216f53c ("irqchip: GICv3: ITS command queue") Cc: stable@vger.kernel.org Signed-off-by: Vladimir Murzin Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 8361c8d3edd1..b7e34313cdfe 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -290,7 +290,7 @@ #define GITS_BASER_TYPE_SHIFT (56) #define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) #define GITS_BASER_ENTRY_SIZE_SHIFT (48) -#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0xff) + 1) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) From a8b1e36d0d1d6f51490e7adce35367ed6adb10e7 Mon Sep 17 00:00:00 2001 From: "Anders K. Pedersen" Date: Sun, 9 Oct 2016 13:49:02 +0000 Subject: [PATCH 190/884] netfilter: nft_dynset: fix element timeout for HZ != 1000 With HZ=100 element timeout in dynamic sets (i.e. flow tables) is 10 times higher than configured. Add proper conversion to/from jiffies, when interacting with userspace. I tested this on Linux 4.8.1, and it applies cleanly to current nf and nf-next trees. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Anders K. Pedersen Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_dynset.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index e3b83c31da2e..517f08767a3c 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -158,7 +158,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); + timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( + tb[NFTA_DYNSET_TIMEOUT]))); } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -246,7 +247,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; - if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout), + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, + cpu_to_be64(jiffies_to_msecs(priv->timeout)), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) From 4f76de5f237615643d2243582cee331815312ad0 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Oct 2016 12:18:23 +0200 Subject: [PATCH 191/884] netfilter: conntrack: remove obsolete sysctl (nf_conntrack_events_retry_timeout) This entry has been removed in commit 9500507c6138. Fixes: 9500507c6138 ("netfilter: conntrack: remove timer from ecache extension") Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- .../networking/nf_conntrack-sysctl.txt | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 4fb51d32fccc..399e4e866a9c 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -33,24 +33,6 @@ nf_conntrack_events - BOOLEAN If this option is enabled, the connection tracking code will provide userspace with connection tracking events via ctnetlink. -nf_conntrack_events_retry_timeout - INTEGER (seconds) - default 15 - - This option is only relevant when "reliable connection tracking - events" are used. Normally, ctnetlink is "lossy", that is, - events are normally dropped when userspace listeners can't keep up. - - Userspace can request "reliable event mode". When this mode is - active, the conntrack will only be destroyed after the event was - delivered. If event delivery fails, the kernel periodically - re-tries to send the event to userspace. - - This is the maximum interval the kernel should use when re-trying - to deliver the destroy event. - - A higher number means there will be fewer delivery retries and it - will take longer for a backlog to be processed. - nf_conntrack_expect_max - INTEGER Maximum size of expectation table. Default value is nf_conntrack_buckets / 256. Minimum is 1. From 6d19375b58763fefc2f215fb45117d3353ced888 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 11 Oct 2016 21:03:45 +0800 Subject: [PATCH 192/884] netfilter: xt_NFLOG: fix unexpected truncated packet Justin and Chris spotted that iptables NFLOG target was broken when they upgraded the kernel to 4.8: "ulogd-2.0.5- IPs are no longer logged" or "results in segfaults in ulogd-2.0.5". Because "struct nf_loginfo li;" is a local variable, and flags will be filled with garbage value, not inited to zero. So if it contains 0x1, packets will not be logged to the userspace anymore. Fixes: 7643507fe8b5 ("netfilter: xt_NFLOG: nflog-range does not truncate packets") Reported-by: Justin Piszcz Reported-by: Chris Caputo Tested-by: Chris Caputo Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_NFLOG.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 018eed7e1ff1..8668a5c18dc3 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -32,6 +32,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) li.u.ulog.copy_len = info->len; li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; + li.u.ulog.flags = 0; if (info->flags & XT_NFLOG_F_COPY_LEN) li.u.ulog.flags |= NF_LOG_F_COPY_LEN; From f434ed0a00b232efc9843c869d05048c7cd79bb7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 12 Oct 2016 21:09:22 +0800 Subject: [PATCH 193/884] netfilter: xt_ipcomp: add "ip[6]t_ipcomp" module alias name Otherwise, user cannot add related rules if xt_ipcomp.ko is not loaded: # iptables -A OUTPUT -p 108 -m ipcomp --ipcompspi 1 iptables: No chain/target/match by that name. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_ipcomp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 89d53104c6b3..000e70377f85 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -26,6 +26,8 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fan Du "); MODULE_DESCRIPTION("Xtables: IPv4/6 IPsec-IPComp SPI match"); +MODULE_ALIAS("ipt_ipcomp"); +MODULE_ALIAS("ip6t_ipcomp"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool From 5751e175c60be1b4113d1e9bfa011b58ec01f104 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Wed, 12 Oct 2016 21:10:45 +0800 Subject: [PATCH 194/884] netfilter: nft_hash: add missing NFTA_HASH_OFFSET's nla_policy Missing the nla_policy description will also miss the validation check in kernel. Fixes: 70ca767ea1b2 ("netfilter: nft_hash: Add hash offset value") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_hash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 09473b415b95..baf694de3935 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -44,6 +44,7 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { [NFTA_HASH_LEN] = { .type = NLA_U32 }, [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, + [NFTA_HASH_OFFSET] = { .type = NLA_U32 }, }; static int nft_hash_init(const struct nft_ctx *ctx, From 09525a09ad3099efd9ba49b0b90bddc350d6b53a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 12:14:29 +0300 Subject: [PATCH 195/884] netfilter: nf_tables: underflow in nft_parse_u32_check() We don't want to allow negatives here. Fixes: 36b701fae12a ('netfilter: nf_tables: validate maximum value of u32 netlink attributes') Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index b70d3ea1430e..24db22257586 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4423,7 +4423,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, */ unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) { - int val; + u32 val; val = ntohl(nla_get_be32(attr)); if (val > max) From 21a9e0f1568eaa0aad970c06e4cc8d77de8d9fa1 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:09:12 +0300 Subject: [PATCH 196/884] netfilter: nft_exthdr: fix error handling in nft_exthdr_init() "err" needs to be signed for the error handling to work. Fixes: 36b701fae12a ('netfilter: nf_tables: validate maximum value of u32 netlink attributes') Signed-off-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_exthdr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index a84cf3d66056..47beb3abcc9d 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,7 +59,8 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len, err; + u32 offset, len; + int err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || From ccca6607c545534e5b74ef04e0f2f6ba11344be4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 13 Oct 2016 08:42:17 +0200 Subject: [PATCH 197/884] netfilter: nft_range: validate operation netlink attribute Use nft_parse_u32_check() to make sure we don't get a value over the unsigned 8-bit integer. Moreover, make sure this value doesn't go over the two supported range comparison modes. Fixes: 9286c2eb1fda ("netfilter: nft_range: validate operation netlink attribute") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_range.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index c6d5358482d1..9bc4586c3006 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -59,6 +59,7 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr struct nft_range_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc_from, desc_to; int err; + u32 op; err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), &desc_from, tb[NFTA_RANGE_FROM_DATA]); @@ -80,7 +81,20 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr if (err < 0) goto err2; - priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + err = nft_parse_u32_check(tb[NFTA_RANGE_OP], U8_MAX, &op); + if (err < 0) + goto err2; + + switch (op) { + case NFT_RANGE_EQ: + case NFT_RANGE_NEQ: + break; + default: + err = -EINVAL; + goto err2; + } + + priv->op = op; priv->len = desc_from.len; return 0; err2: From fc971a2f2383950ddedb1e4896c5ead5d2357029 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:40 -0300 Subject: [PATCH 198/884] net: nps_enet: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/ethernet/ezchip/nps_enet.ko | grep alias $ After this patch: $ modinfo drivers/net/ethernet/ezchip/nps_enet.ko | grep alias alias: of:N*T*Cezchip,nps-mgt-enetC* alias: of:N*T*Cezchip,nps-mgt-enet Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/ezchip/nps_enet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ezchip/nps_enet.c b/drivers/net/ethernet/ezchip/nps_enet.c index f928e6f79c89..223f35cc034c 100644 --- a/drivers/net/ethernet/ezchip/nps_enet.c +++ b/drivers/net/ethernet/ezchip/nps_enet.c @@ -669,6 +669,7 @@ static const struct of_device_id nps_enet_dt_ids[] = { { .compatible = "ezchip,nps-mgt-enet" }, { /* Sentinel */ } }; +MODULE_DEVICE_TABLE(of, nps_enet_dt_ids); static struct platform_driver nps_enet_driver = { .probe = nps_enet_probe, From 2fa3e317e6c89fc89e96a48308f6791f74fcea3c Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:41 -0300 Subject: [PATCH 199/884] net: ethernet: nb8800: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ $ modinfo drivers/net/ethernet/aurora/nb8800.ko | grep alias $ After this patch: $ modinfo drivers/net/ethernet/aurora/nb8800.ko | grep alias alias: of:N*T*Csigma,smp8734-ethernetC* alias: of:N*T*Csigma,smp8734-ethernet alias: of:N*T*Csigma,smp8642-ethernetC* alias: of:N*T*Csigma,smp8642-ethernet alias: of:N*T*Caurora,nb8800C* alias: of:N*T*Caurora,nb8800 Signed-off-by: Javier Martinez Canillas Acked-by: Mans Rullgard Signed-off-by: David S. Miller --- drivers/net/ethernet/aurora/nb8800.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c index b047fd607b83..00c38bf151e6 100644 --- a/drivers/net/ethernet/aurora/nb8800.c +++ b/drivers/net/ethernet/aurora/nb8800.c @@ -1358,6 +1358,7 @@ static const struct of_device_id nb8800_dt_ids[] = { }, { } }; +MODULE_DEVICE_TABLE(of, nb8800_dt_ids); static int nb8800_probe(struct platform_device *pdev) { From a7deb924d33b5688b11e34e84c88f2591f933e3a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:42 -0300 Subject: [PATCH 200/884] net: hns: Fix hns_dsaf module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/ethernet/hisilicon/hns/hns_dsaf.ko | grep alias alias: acpi*:HISI00B2:* alias: acpi*:HISI00B1:* After this patch: $ modinfo drivers/net/ethernet/hisilicon/hns/hns_dsaf.ko | grep alias alias: acpi*:HISI00B2:* alias: acpi*:HISI00B1:* alias: of:N*T*Chisilicon,hns-dsaf-v2C* alias: of:N*T*Chisilicon,hns-dsaf-v2 alias: of:N*T*Chisilicon,hns-dsaf-v1C* alias: of:N*T*Chisilicon,hns-dsaf-v1 Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 8e5b3f51b47b..66b99d2be3fe 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -2761,6 +2761,7 @@ static const struct of_device_id g_dsaf_match[] = { {.compatible = "hisilicon,hns-dsaf-v2"}, {} }; +MODULE_DEVICE_TABLE(of, g_dsaf_match); static struct platform_driver g_dsaf_driver = { .probe = hns_dsaf_probe, From 7097268509a7d6756d14ab422af7446f9bf53221 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:43 -0300 Subject: [PATCH 201/884] net: qcom/emac: Fix module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/ethernet/qualcomm/emac/qcom-emac.ko | grep alias alias: platform:qcom-emac After this patch: $ modinfo drivers/net/ethernet/qualcomm/emac/qcom-emac.ko | grep alias alias: platform:qcom-emac alias: of:N*T*Cqcom,fsm9900-emacC* alias: of:N*T*Cqcom,fsm9900-emac Signed-off-by: Javier Martinez Canillas Acked-by: Timur Tabi Signed-off-by: David S. Miller --- drivers/net/ethernet/qualcomm/emac/emac.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c index 9bf3b2b82e95..4fede4b86538 100644 --- a/drivers/net/ethernet/qualcomm/emac/emac.c +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -575,6 +575,7 @@ static const struct of_device_id emac_dt_match[] = { }, {} }; +MODULE_DEVICE_TABLE(of, emac_dt_match); #if IS_ENABLED(CONFIG_ACPI) static const struct acpi_device_id emac_acpi_match[] = { From af40097e3eafc97f6f856c085360a0e696e1f319 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:44 -0300 Subject: [PATCH 202/884] net: hisilicon: Fix hns_mdio module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/ethernet/hisilicon//hns_mdio.ko | grep alias alias: platform:Hi-HNS_MDIO alias: acpi*:HISI0141:* After this patch: $ modinfo drivers/net/ethernet/hisilicon//hns_mdio.ko | grep alias alias: platform:Hi-HNS_MDIO alias: of:N*T*Chisilicon,hns-mdioC* alias: of:N*T*Chisilicon,hns-mdio alias: of:N*T*Chisilicon,mdioC* alias: of:N*T*Chisilicon,mdio alias: acpi*:HISI0141:* Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns_mdio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index 33f4c483af0f..501eb2090ca6 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -563,6 +563,7 @@ static const struct of_device_id hns_mdio_match[] = { {.compatible = "hisilicon,hns-mdio"}, {} }; +MODULE_DEVICE_TABLE(of, hns_mdio_match); static const struct acpi_device_id hns_mdio_acpi_match[] = { { "HISI0141", 0 }, From 03eaae52537f4bbe2e565535f2aa38d80f92e994 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:45 -0300 Subject: [PATCH 203/884] net: dsa: b53: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/dsa/b53/b53_mmap.ko | grep alias $ After this patch: $ modinfo drivers/net/dsa/b53/b53_mmap.ko | grep alias alias: of:N*T*Cbrcm,bcm63xx-switchC* alias: of:N*T*Cbrcm,bcm63xx-switch alias: of:N*T*Cbrcm,bcm6368-switchC* alias: of:N*T*Cbrcm,bcm6368-switch alias: of:N*T*Cbrcm,bcm6328-switchC* alias: of:N*T*Cbrcm,bcm6328-switch alias: of:N*T*Cbrcm,bcm3384-switchC* alias: of:N*T*Cbrcm,bcm3384-switch Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/dsa/b53/b53_mmap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 76fb8552c9d9..ef63d24fef81 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -256,6 +256,7 @@ static const struct of_device_id b53_mmap_of_table[] = { { .compatible = "brcm,bcm63xx-switch" }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, b53_mmap_of_table); static struct platform_driver b53_mmap_driver = { .probe = b53_mmap_probe, From 0822b43e4f08af3619a5dbf59f5036872c3ea323 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Mon, 17 Oct 2016 11:05:46 -0300 Subject: [PATCH 204/884] net: dsa: bcm_sf2: Fix module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/net/dsa/bcm_sf2.ko | grep alias alias: platform:brcm-sf2 After this patch: $ modinfo drivers/net/dsa/bcm_sf2.ko | grep alias alias: platform:brcm-sf2 alias: of:N*T*Cbrcm,bcm7445-switch-v4.0C* alias: of:N*T*Cbrcm,bcm7445-switch-v4.0 Signed-off-by: Javier Martinez Canillas Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index e218887f18b7..0427009bc924 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1158,6 +1158,7 @@ static const struct of_device_id bcm_sf2_of_match[] = { { .compatible = "brcm,bcm7445-switch-v4.0" }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, bcm_sf2_of_match); static struct platform_driver bcm_sf2_driver = { .probe = bcm_sf2_sw_probe, From 667f4bab81ea8357d260aa14bb6fb1a4834248d5 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Wed, 5 Oct 2016 10:40:54 +1300 Subject: [PATCH 205/884] hwmon: (adm9240) handle temperature readings below 0 Unlike the temperature thresholds the temperature data is a 9-bit signed value. This allows and additional 0.5 degrees of precision on the reading but makes handling negative values slightly harder. In order to have sign-extension applied correctly the 9-bit value is stored in the upper bits of a signed 16-bit value. When presenting this in sysfs the value is shifted and scaled appropriately. Signed-off-by: Chris Packham Signed-off-by: Guenter Roeck --- drivers/hwmon/adm9240.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c index 98114cef1e43..2fe1828bd10b 100644 --- a/drivers/hwmon/adm9240.c +++ b/drivers/hwmon/adm9240.c @@ -194,10 +194,10 @@ static struct adm9240_data *adm9240_update_device(struct device *dev) * 0.5'C per two measurement cycles thus ignore possible * but unlikely aliasing error on lsb reading. --Grant */ - data->temp = ((i2c_smbus_read_byte_data(client, + data->temp = (i2c_smbus_read_byte_data(client, ADM9240_REG_TEMP) << 8) | i2c_smbus_read_byte_data(client, - ADM9240_REG_TEMP_CONF)) / 128; + ADM9240_REG_TEMP_CONF); for (i = 0; i < 2; i++) { /* read fans */ data->fan[i] = i2c_smbus_read_byte_data(client, @@ -263,7 +263,7 @@ static ssize_t show_temp(struct device *dev, struct device_attribute *dummy, char *buf) { struct adm9240_data *data = adm9240_update_device(dev); - return sprintf(buf, "%d\n", data->temp * 500); /* 9-bit value */ + return sprintf(buf, "%d\n", data->temp / 128 * 500); /* 9-bit value */ } static ssize_t show_max(struct device *dev, struct device_attribute *devattr, From 94cdc5608b5561aeda80edda9c9223608a1da6fc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:24:52 +0300 Subject: [PATCH 206/884] hwmon: (max31790) potential ERR_PTR dereference We should only dereference "data" after we check if it is an error pointer. Fixes: 54187ff9d766 ('hwmon: (max31790) Convert to use new hwmon registration API') Signed-off-by: Dan Carpenter Signed-off-by: Guenter Roeck --- drivers/hwmon/max31790.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/max31790.c b/drivers/hwmon/max31790.c index bef84e085973..c1b9275978f9 100644 --- a/drivers/hwmon/max31790.c +++ b/drivers/hwmon/max31790.c @@ -268,11 +268,13 @@ static int max31790_read_pwm(struct device *dev, u32 attr, int channel, long *val) { struct max31790_data *data = max31790_update_device(dev); - u8 fan_config = data->fan_config[channel]; + u8 fan_config; if (IS_ERR(data)) return PTR_ERR(data); + fan_config = data->fan_config[channel]; + switch (attr) { case hwmon_pwm_input: *val = data->pwm[channel] >> 8; From 4fa507992f0a1063d7326abaf705f9408548349e Mon Sep 17 00:00:00 2001 From: Jitendra Bhivare Date: Thu, 13 Oct 2016 12:08:48 +0530 Subject: [PATCH 207/884] scsi: libiscsi: Fix locking in __iscsi_conn_send_pdu The code at free_task label in __iscsi_conn_send_pdu can get executed from blk_timeout_work which takes queue_lock using spin_lock_irq. back_lock taken with spin_unlock_bh will cause WARN_ON_ONCE. The code gets executed either with bottom half or IRQ disabled hence using spin_lock/spin_unlock for back_lock is safe. Signed-off-by: Jitendra Bhivare Reviewed-by: Hannes Reinecke Reviewed-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/libiscsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index c051694bfcb0..f9b6fba689ff 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -791,9 +791,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, free_task: /* regular RX path uses back_lock */ - spin_lock_bh(&session->back_lock); + spin_lock(&session->back_lock); __iscsi_put_task(task); - spin_unlock_bh(&session->back_lock); + spin_unlock(&session->back_lock); return NULL; } From 7d2c0d643244311d0ce04fde373cd371ad1f1cad Mon Sep 17 00:00:00 2001 From: Jitendra Bhivare Date: Thu, 13 Oct 2016 12:08:49 +0530 Subject: [PATCH 208/884] scsi: be2iscsi: Replace _bh with _irqsave/irqrestore [ 3843.132217] WARNING: CPU: 20 PID: 1227 at kernel/softirq.c:150 __local_bh_enable_ip+0x6b/0x90 [ 3843.142815] Modules linked in: ... [ 3843.294328] CPU: 20 PID: 1227 Comm: kworker/20:1H Tainted: G E 4.8.0-rc1+ #3 [ 3843.304944] Hardware name: Dell Inc. PowerEdge R720/0X6H47, BIOS 1.4.8 10/25/2012 [ 3843.314798] Workqueue: kblockd blk_timeout_work [ 3843.321350] 0000000000000086 00000000a32f4533 ffff8802216d7bd8 ffffffff8135c3cf [ 3843.331146] 0000000000000000 0000000000000000 ffff8802216d7c18 ffffffff8108d661 [ 3843.340918] 00000096216d7c50 0000000000000200 ffff8802d07cc828 ffff8801b3632550 [ 3843.350687] Call Trace: [ 3843.354866] [] dump_stack+0x63/0x84 [ 3843.362061] [] __warn+0xd1/0xf0 [ 3843.368851] [] warn_slowpath_null+0x1d/0x20 [ 3843.376791] [] __local_bh_enable_ip+0x6b/0x90 [ 3843.384903] [] _raw_spin_unlock_bh+0x1e/0x20 [ 3843.392940] [] beiscsi_alloc_pdu+0x2f0/0x6e0 [be2iscsi] [ 3843.402076] [] __iscsi_conn_send_pdu+0xf8/0x370 [libiscsi] [ 3843.411549] [] iscsi_send_nopout+0xbe/0x110 [libiscsi] [ 3843.420639] [] iscsi_eh_cmd_timed_out+0x29b/0x2b0 [libiscsi] [ 3843.430339] [] scsi_times_out+0x5e/0x250 [ 3843.438119] [] blk_rq_timed_out+0x1f/0x60 [ 3843.446009] [] blk_timeout_work+0xad/0x150 [ 3843.454010] [] process_one_work+0x152/0x400 [ 3843.462114] [] worker_thread+0x125/0x4b0 [ 3843.469961] [] ? rescuer_thread+0x380/0x380 [ 3843.478116] [] kthread+0xd8/0xf0 [ 3843.485212] [] ret_from_fork+0x1f/0x40 [ 3843.492908] [] ? kthread_park+0x60/0x60 [ 3843.500715] ---[ end trace 57ec0a1d8f0dd3a0 ]--- [ 3852.328667] NMI watchdog: Watchdog detected hard LOCKUP on cpu 1Kernel panic - not syncing: Hard LOCKUP blk_timeout_work takes queue_lock spin_lock with interrupts disabled before invoking iscsi_eh_cmd_timed_out. This causes a WARN_ON_ONCE in spin_unlock_bh for wrb_lock/io_sgl_lock/mgmt_sgl_lock. CPU was kept busy in lot of bottom half work with interrupts disabled thus causing hard lock up. Signed-off-by: Jitendra Bhivare Reviewed-by: Hannes Reinecke Reviewed-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/be2iscsi/be_main.c | 37 ++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 6a6906f847db..75bfa14ed5ba 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -900,8 +900,9 @@ void hwi_ring_cq_db(struct beiscsi_hba *phba, static struct sgl_handle *alloc_io_sgl_handle(struct beiscsi_hba *phba) { struct sgl_handle *psgl_handle; + unsigned long flags; - spin_lock_bh(&phba->io_sgl_lock); + spin_lock_irqsave(&phba->io_sgl_lock, flags); if (phba->io_sgl_hndl_avbl) { beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_IO, "BM_%d : In alloc_io_sgl_handle," @@ -919,14 +920,16 @@ static struct sgl_handle *alloc_io_sgl_handle(struct beiscsi_hba *phba) phba->io_sgl_alloc_index++; } else psgl_handle = NULL; - spin_unlock_bh(&phba->io_sgl_lock); + spin_unlock_irqrestore(&phba->io_sgl_lock, flags); return psgl_handle; } static void free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle) { - spin_lock_bh(&phba->io_sgl_lock); + unsigned long flags; + + spin_lock_irqsave(&phba->io_sgl_lock, flags); beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_IO, "BM_%d : In free_,io_sgl_free_index=%d\n", phba->io_sgl_free_index); @@ -941,7 +944,7 @@ free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle) "value there=%p\n", phba->io_sgl_free_index, phba->io_sgl_hndl_base [phba->io_sgl_free_index]); - spin_unlock_bh(&phba->io_sgl_lock); + spin_unlock_irqrestore(&phba->io_sgl_lock, flags); return; } phba->io_sgl_hndl_base[phba->io_sgl_free_index] = psgl_handle; @@ -950,7 +953,7 @@ free_io_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle) phba->io_sgl_free_index = 0; else phba->io_sgl_free_index++; - spin_unlock_bh(&phba->io_sgl_lock); + spin_unlock_irqrestore(&phba->io_sgl_lock, flags); } static inline struct wrb_handle * @@ -958,15 +961,16 @@ beiscsi_get_wrb_handle(struct hwi_wrb_context *pwrb_context, unsigned int wrbs_per_cxn) { struct wrb_handle *pwrb_handle; + unsigned long flags; - spin_lock_bh(&pwrb_context->wrb_lock); + spin_lock_irqsave(&pwrb_context->wrb_lock, flags); pwrb_handle = pwrb_context->pwrb_handle_base[pwrb_context->alloc_index]; pwrb_context->wrb_handles_available--; if (pwrb_context->alloc_index == (wrbs_per_cxn - 1)) pwrb_context->alloc_index = 0; else pwrb_context->alloc_index++; - spin_unlock_bh(&pwrb_context->wrb_lock); + spin_unlock_irqrestore(&pwrb_context->wrb_lock, flags); if (pwrb_handle) memset(pwrb_handle->pwrb, 0, sizeof(*pwrb_handle->pwrb)); @@ -1001,14 +1005,16 @@ beiscsi_put_wrb_handle(struct hwi_wrb_context *pwrb_context, struct wrb_handle *pwrb_handle, unsigned int wrbs_per_cxn) { - spin_lock_bh(&pwrb_context->wrb_lock); + unsigned long flags; + + spin_lock_irqsave(&pwrb_context->wrb_lock, flags); pwrb_context->pwrb_handle_base[pwrb_context->free_index] = pwrb_handle; pwrb_context->wrb_handles_available++; if (pwrb_context->free_index == (wrbs_per_cxn - 1)) pwrb_context->free_index = 0; else pwrb_context->free_index++; - spin_unlock_bh(&pwrb_context->wrb_lock); + spin_unlock_irqrestore(&pwrb_context->wrb_lock, flags); } /** @@ -1037,8 +1043,9 @@ free_wrb_handle(struct beiscsi_hba *phba, struct hwi_wrb_context *pwrb_context, static struct sgl_handle *alloc_mgmt_sgl_handle(struct beiscsi_hba *phba) { struct sgl_handle *psgl_handle; + unsigned long flags; - spin_lock_bh(&phba->mgmt_sgl_lock); + spin_lock_irqsave(&phba->mgmt_sgl_lock, flags); if (phba->eh_sgl_hndl_avbl) { psgl_handle = phba->eh_sgl_hndl_base[phba->eh_sgl_alloc_index]; phba->eh_sgl_hndl_base[phba->eh_sgl_alloc_index] = NULL; @@ -1056,14 +1063,16 @@ static struct sgl_handle *alloc_mgmt_sgl_handle(struct beiscsi_hba *phba) phba->eh_sgl_alloc_index++; } else psgl_handle = NULL; - spin_unlock_bh(&phba->mgmt_sgl_lock); + spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags); return psgl_handle; } void free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle) { - spin_lock_bh(&phba->mgmt_sgl_lock); + unsigned long flags; + + spin_lock_irqsave(&phba->mgmt_sgl_lock, flags); beiscsi_log(phba, KERN_INFO, BEISCSI_LOG_CONFIG, "BM_%d : In free_mgmt_sgl_handle," "eh_sgl_free_index=%d\n", @@ -1078,7 +1087,7 @@ free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle) "BM_%d : Double Free in eh SGL ," "eh_sgl_free_index=%d\n", phba->eh_sgl_free_index); - spin_unlock_bh(&phba->mgmt_sgl_lock); + spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags); return; } phba->eh_sgl_hndl_base[phba->eh_sgl_free_index] = psgl_handle; @@ -1088,7 +1097,7 @@ free_mgmt_sgl_handle(struct beiscsi_hba *phba, struct sgl_handle *psgl_handle) phba->eh_sgl_free_index = 0; else phba->eh_sgl_free_index++; - spin_unlock_bh(&phba->mgmt_sgl_lock); + spin_unlock_irqrestore(&phba->mgmt_sgl_lock, flags); } static void From 77f18a87186a87cab2a027335758a7244896084c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 11 Oct 2016 11:23:23 +0200 Subject: [PATCH 209/884] scsi: NCR5380: no longer mark irq probing as __init The g_NCR5380 has been converted to more regular probing, which means its probe function can now be invoked after the __init section is discarded, as pointed out by this kbuild warning: WARNING: drivers/scsi/built-in.o(.text+0x3a105): Section mismatch in reference from the function generic_NCR5380_isa_match() to the function .init.text:probe_intr() WARNING: drivers/scsi/built-in.o(.text+0x3a145): Section mismatch in reference from the function generic_NCR5380_isa_match() to the variable .init.data:probe_irq To make sure this works correctly in all cases, let's remove the __init and __initdata annotations. Fixes: a8cfbcaec0c1 ("scsi: g_NCR5380: Stop using scsi_module.c") Signed-off-by: Arnd Bergmann Acked-by: Finn Thain Signed-off-by: Martin K. Petersen --- drivers/scsi/NCR5380.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index db2739079cbb..790babc5ef66 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -353,7 +353,7 @@ static void NCR5380_print_phase(struct Scsi_Host *instance) #endif -static int probe_irq __initdata; +static int probe_irq; /** * probe_intr - helper for IRQ autoprobe @@ -365,7 +365,7 @@ static int probe_irq __initdata; * used by the IRQ probe code. */ -static irqreturn_t __init probe_intr(int irq, void *dev_id) +static irqreturn_t probe_intr(int irq, void *dev_id) { probe_irq = irq; return IRQ_HANDLED; @@ -380,7 +380,7 @@ static irqreturn_t __init probe_intr(int irq, void *dev_id) * and then looking to see what interrupt actually turned up. */ -static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance, +static int __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance, int possible) { struct NCR5380_hostdata *hostdata = shost_priv(instance); From 91cb163e4d141c74e99639fbee7c2a6332c92901 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 17 Oct 2016 13:38:14 +0100 Subject: [PATCH 210/884] arm64: sysreg: Fix use of XZR in write_sysreg_s Commit 8a71f0c656e0 ("arm64: sysreg: replace open-coded mrs_s/msr_s with {read,write}_sysreg_s") introduced a write_sysreg_s macro for writing to system registers that are not supported by binutils. Unfortunately, this was implemented with the wrong template (%0 vs %x0), so in the case that we are writing a constant 0, we will generate invalid instruction syntax and bail with a cryptic assembler error: | Error: constant expression required This patch fixes the template. Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e8d46e8e6079..6c80b3699cb8 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -286,7 +286,7 @@ asm( #define write_sysreg_s(v, r) do { \ u64 __val = (u64)v; \ - asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val)); \ + asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ } while (0) static inline void config_sctlr_el1(u32 clear, u32 set) From b052b07c39d593c9954a84d5bbe1563999483f38 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Mon, 17 Oct 2016 21:20:07 +0200 Subject: [PATCH 211/884] dm raid: fix activation of existing raid4/10 devices dm-raid 1.9.0 fails to activate existing RAID4/10 devices that have the old superblock format (which does not have takeover/reshaping support that was added via commit 33e53f06850f). Fix validation path for old superblocks by reverting to the old raid4 layout and basing checks on mddev->new_{level,layout,...} members in super_init_validation(). Cc: stable@vger.kernel.org # 4.8 Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- Documentation/device-mapper/dm-raid.txt | 1 + drivers/md/dm-raid.c | 12 +++++++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt index e5b6497116f4..c75b64a85859 100644 --- a/Documentation/device-mapper/dm-raid.txt +++ b/Documentation/device-mapper/dm-raid.txt @@ -309,3 +309,4 @@ Version History with a reshape in progress. 1.9.0 Add support for RAID level takeover/reshape/region size and set size reduction. +1.9.1 Fix activation of existing RAID 4/10 mapped devices diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 2a3970097991..6d53810963f7 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -266,7 +266,7 @@ static struct raid_type { {"raid10_offset", "raid10 offset (striped mirrors)", 0, 2, 10, ALGORITHM_RAID10_OFFSET}, {"raid10_near", "raid10 near (striped mirrors)", 0, 2, 10, ALGORITHM_RAID10_NEAR}, {"raid10", "raid10 (striped mirrors)", 0, 2, 10, ALGORITHM_RAID10_DEFAULT}, - {"raid4", "raid4 (dedicated last parity disk)", 1, 2, 4, ALGORITHM_PARITY_N}, /* raid4 layout = raid5_n */ + {"raid4", "raid4 (dedicated first parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, /* raid4 layout = raid5_0 */ {"raid5_n", "raid5 (dedicated last parity disk)", 1, 2, 5, ALGORITHM_PARITY_N}, {"raid5_ls", "raid5 (left symmetric)", 1, 2, 5, ALGORITHM_LEFT_SYMMETRIC}, {"raid5_rs", "raid5 (right symmetric)", 1, 2, 5, ALGORITHM_RIGHT_SYMMETRIC}, @@ -2087,11 +2087,11 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) /* * No takeover/reshaping, because we don't have the extended v1.9.0 metadata */ - if (le32_to_cpu(sb->level) != mddev->level) { + if (le32_to_cpu(sb->level) != mddev->new_level) { DMERR("Reshaping/takeover raid sets not yet supported. (raid level/stripes/size change)"); return -EINVAL; } - if (le32_to_cpu(sb->layout) != mddev->layout) { + if (le32_to_cpu(sb->layout) != mddev->new_layout) { DMERR("Reshaping raid sets not yet supported. (raid layout change)"); DMERR(" 0x%X vs 0x%X", le32_to_cpu(sb->layout), mddev->layout); DMERR(" Old layout: %s w/ %d copies", @@ -2102,7 +2102,7 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) raid10_md_layout_to_copies(mddev->layout)); return -EINVAL; } - if (le32_to_cpu(sb->stripe_sectors) != mddev->chunk_sectors) { + if (le32_to_cpu(sb->stripe_sectors) != mddev->new_chunk_sectors) { DMERR("Reshaping raid sets not yet supported. (stripe sectors change)"); return -EINVAL; } @@ -2115,6 +2115,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev) return -EINVAL; } + DMINFO("Discovered old metadata format; upgrading to extended metadata format"); + /* Table line is checked vs. authoritative superblock */ rs_set_new(rs); } @@ -3647,7 +3649,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 9, 0}, + .version = {1, 9, 1}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, From 1b283eea6228880b765bc40fe4e555416437ce58 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 7 Oct 2016 10:52:17 +0200 Subject: [PATCH 212/884] ARM: dts: fix the SD card on the Snowball This fixes a very annoying regression on the Snowball SD card that has been around for a while. It turns out that the device tree does not configure the direction pins properly, nor sets up the pins for the voltage converter properly at boot. Unless all things are correctly set up, the feedback clock will not work, and makes the driver spew messages in the console (but it works, very slowly): root@Ux500:/ mount /dev/mmcblk0p2 /mnt/ [ 9.953460] mmci-pl18x 80126000.sdi0_per1: error during DMA transfer! [ 9.960296] mmcblk0: error -110 sending status command, retrying [ 9.966461] mmcblk0: error -110 sending status command, retrying [ 9.972534] mmcblk0: error -110 sending status command, aborting Fix this by rectifying the device tree to correspond to that of the Ux500 HREF boards plus the DAT31DIR setting that is unique for the Snowball, and things start working smoothly. Add in the SDR12 and SDR25 modes which this host can do without any problems. I don't know if this has ever been correct, sadly. It works after this patch. Cc: stable@vger.kernel.org Reported-by: Daniel Lezcano Cc: Ulf Hansson Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/boot/dts/ste-snowball.dts | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index b3df1c60d465..386eee6de232 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -239,14 +239,25 @@ arm,primecell-periphid = <0x10480180>; max-frequency = <100000000>; bus-width = <4>; + cap-sd-highspeed; cap-mmc-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + /* All direction control is used */ + st,sig-dir-cmd; + st,sig-dir-dat0; + st,sig-dir-dat2; + st,sig-dir-dat31; + st,sig-pin-fbclk; + full-pwr-cycle; vmmc-supply = <&ab8500_ldo_aux3_reg>; vqmmc-supply = <&vmmci>; pinctrl-names = "default", "sleep"; pinctrl-0 = <&sdi0_default_mode>; pinctrl-1 = <&sdi0_sleep_mode>; - cd-gpios = <&gpio6 26 GPIO_ACTIVE_LOW>; // 218 + /* GPIO218 MMC_CD */ + cd-gpios = <&gpio6 26 GPIO_ACTIVE_LOW>; status = "okay"; }; @@ -549,7 +560,7 @@ /* VMMCI level-shifter enable */ snowball_cfg3 { pins = "GPIO217_AH12"; - ste,config = <&gpio_out_lo>; + ste,config = <&gpio_out_hi>; }; /* VMMCI level-shifter voltage select */ snowball_cfg4 { From 5fac7e8405a951f536dfcea09c6f6adb904a08a8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 4 Oct 2016 13:56:19 +0200 Subject: [PATCH 213/884] bus: qcom-ebi2: depend on ARCH_QCOM or COMPILE_TEST This hides the option for people who do not want their Kconfig vision cluttered (i.e. x86) and enables compile testing apart from the supported main arch. Cc: Stephen Boyd Cc: Arnd Bergmann Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- drivers/bus/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bus/Kconfig b/drivers/bus/Kconfig index 7010dcac9328..78751057164a 100644 --- a/drivers/bus/Kconfig +++ b/drivers/bus/Kconfig @@ -111,6 +111,7 @@ config OMAP_OCP2SCP config QCOM_EBI2 bool "Qualcomm External Bus Interface 2 (EBI2)" depends on HAS_IOMEM + depends on ARCH_QCOM || COMPILE_TEST help Say y here to enable support for the Qualcomm External Bus Interface 2, which can be used to connect things like NAND Flash, From 70e1a28fe1e3fe1bddcb09e8eee25d8be1610b49 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 29 Sep 2016 18:51:20 +0800 Subject: [PATCH 214/884] MAINTAINERS: add myself as Marvell berlin SoC maintainer I would like to take maintainership for Marvell berlin SoCs. Signed-off-by: Jisheng Zhang Acked-by: Sebastian Hesselbarth Signed-off-by: Olof Johansson --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1cd38a7e0064..25f543c1814d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1442,6 +1442,7 @@ F: drivers/cpufreq/mvebu-cpufreq.c F: arch/arm/configs/mvebu_*_defconfig ARM/Marvell Berlin SoC support +M: Jisheng Zhang M: Sebastian Hesselbarth L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained From 2723605169d8f58ebc2881d2b8a59e6ee6fe074a Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Sat, 8 Oct 2016 13:41:04 -0700 Subject: [PATCH 215/884] ARM: multi_v7_defconfig: Enable Intel e1000e driver Enable support for the Intel e1000e driver Signed-off-by: Ray Jui Signed-off-by: Scott Branden Signed-off-by: Olof Johansson --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 437d0740dec6..11f37ed1dbff 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -850,6 +850,7 @@ CONFIG_PWM_SUN4I=y CONFIG_PWM_TEGRA=y CONFIG_PWM_VT8500=y CONFIG_PHY_HIX5HD2_SATA=y +CONFIG_E1000E=y CONFIG_PWM_STI=y CONFIG_PWM_BCM2835=y CONFIG_PWM_BRCMSTB=m From 8236d9ac4cf2e51e4355c411612cfbaec8d31942 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Oct 2016 00:11:14 +0900 Subject: [PATCH 216/884] clk: uniphier: add system clock support for sLD3 SoC I do not know why, but I missed to add this compatible string in the initial commit of this driver. Signed-off-by: Masahiro Yamada Signed-off-by: Stephen Boyd --- drivers/clk/uniphier/clk-uniphier-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/clk/uniphier/clk-uniphier-core.c b/drivers/clk/uniphier/clk-uniphier-core.c index 5ffb898d0839..f4e0f6be5f33 100644 --- a/drivers/clk/uniphier/clk-uniphier-core.c +++ b/drivers/clk/uniphier/clk-uniphier-core.c @@ -110,6 +110,10 @@ static int uniphier_clk_remove(struct platform_device *pdev) static const struct of_device_id uniphier_clk_match[] = { /* System clock */ + { + .compatible = "socionext,uniphier-sld3-clock", + .data = uniphier_sld3_sys_clk_data, + }, { .compatible = "socionext,uniphier-ld4-clock", .data = uniphier_ld4_sys_clk_data, From c0ce317f0c8863d309e7cfb6f6c0fe86e38c6d86 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Oct 2016 00:25:55 +0900 Subject: [PATCH 217/884] clk: uniphier: fix type of variable passed to regmap_read() The 3rd argument of regmap_read() takes a pointer to unsigned int. This driver is saved just because u32 happens to be typedef'ed as unsigned int, but we should not rely on that fact. Change the variable type just in case. Signed-off-by: Masahiro Yamada Signed-off-by: Stephen Boyd --- drivers/clk/uniphier/clk-uniphier-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/uniphier/clk-uniphier-mux.c b/drivers/clk/uniphier/clk-uniphier-mux.c index 15a2f2cbe0d9..2c243a894f3b 100644 --- a/drivers/clk/uniphier/clk-uniphier-mux.c +++ b/drivers/clk/uniphier/clk-uniphier-mux.c @@ -42,7 +42,7 @@ static u8 uniphier_clk_mux_get_parent(struct clk_hw *hw) struct uniphier_clk_mux *mux = to_uniphier_clk_mux(hw); int num_parents = clk_hw_get_num_parents(hw); int ret; - u32 val; + unsigned int val; u8 i; ret = regmap_read(mux->regmap, mux->reg, &val); From 34b89b2967f284937be6759936ef3dc4d3aff2d0 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Sun, 16 Oct 2016 10:45:07 -0300 Subject: [PATCH 218/884] clk: samsung: clk-exynos-audss: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/clk/samsung/clk-exynos-audss.ko | grep alias alias: platform:exynos-audss-clk After this patch: $ modinfo drivers/clk/samsung/clk-exynos-audss.ko | grep alias alias: platform:exynos-audss-clk alias: of:N*T*Csamsung,exynos5420-audss-clockC* alias: of:N*T*Csamsung,exynos5420-audss-clock alias: of:N*T*Csamsung,exynos5410-audss-clockC* alias: of:N*T*Csamsung,exynos5410-audss-clock alias: of:N*T*Csamsung,exynos5250-audss-clockC* alias: of:N*T*Csamsung,exynos5250-audss-clock alias: of:N*T*Csamsung,exynos4210-audss-clockC* alias: of:N*T*Csamsung,exynos4210-audss-clock Fixes: 4d252fd5719b ("clk: samsung: Allow modular build of the Audio Subsystem CLKCON driver") Signed-off-by: Javier Martinez Canillas Reviewed-by: Krzysztof Kozlowski Tested-by: Krzysztof Kozlowski Signed-off-by: Stephen Boyd --- drivers/clk/samsung/clk-exynos-audss.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/samsung/clk-exynos-audss.c b/drivers/clk/samsung/clk-exynos-audss.c index 51d152f735cc..17e68a724945 100644 --- a/drivers/clk/samsung/clk-exynos-audss.c +++ b/drivers/clk/samsung/clk-exynos-audss.c @@ -106,6 +106,7 @@ static const struct of_device_id exynos_audss_clk_of_match[] = { }, { }, }; +MODULE_DEVICE_TABLE(of, exynos_audss_clk_of_match); static void exynos_audss_clk_teardown(void) { From 234d511d8c158d62f73f1a818eb4dd494a13a6e3 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Fri, 14 Oct 2016 14:44:13 +0200 Subject: [PATCH 219/884] clk: mediatek: Add hardware dependency Only propose the mediatek clock drivers on this platform, unless build-testing. Signed-off-by: Jean Delvare Cc: Shunli Wang Cc: James Liao Cc: Erin Lo Cc: Matthias Brugger Cc: Michael Turquette Reviewed-by: Matthias Brugger Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/mediatek/Kconfig b/drivers/clk/mediatek/Kconfig index 380c372d528e..f042bd2a6a99 100644 --- a/drivers/clk/mediatek/Kconfig +++ b/drivers/clk/mediatek/Kconfig @@ -8,6 +8,7 @@ config COMMON_CLK_MEDIATEK config COMMON_CLK_MT8135 bool "Clock driver for Mediatek MT8135" + depends on ARCH_MEDIATEK || COMPILE_TEST select COMMON_CLK_MEDIATEK default ARCH_MEDIATEK ---help--- @@ -15,6 +16,7 @@ config COMMON_CLK_MT8135 config COMMON_CLK_MT8173 bool "Clock driver for Mediatek MT8173" + depends on ARCH_MEDIATEK || COMPILE_TEST select COMMON_CLK_MEDIATEK default ARCH_MEDIATEK ---help--- From 339e1e54891c339b30023c9cc8a005cbf65a3c0c Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 8 Oct 2016 16:59:38 +0800 Subject: [PATCH 220/884] clk: core: add __init decoration for CLK_OF_DECLARE_DRIVER function The new introduced macro CLK_OF_DECLARE_DRIVER is usually used to declare clock driver init functions, which are mostly decorated with __init. Add __init decoration for CLK_OF_DECLARE_DRIVER function to avoid causing section mismatch warnings on client clock drivers. Signed-off-by: Shawn Guo Fixes: c7296c51ce5d ("clk: core: New macro CLK_OF_DECLARE_DRIVER") Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index af596381fa0f..a428aec36ace 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -785,7 +785,7 @@ extern struct of_device_id __clk_of_table; * routines, one at of_clk_init(), and one at platform device probe */ #define CLK_OF_DECLARE_DRIVER(name, compat, fn) \ - static void name##_of_clk_init_driver(struct device_node *np) \ + static void __init name##_of_clk_init_driver(struct device_node *np) \ { \ of_node_clear_flag(np, OF_POPULATED); \ fn(np); \ From 981e1bea55e56abdb16505502e4a69ff868e87d3 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Thu, 29 Sep 2016 16:28:55 +0200 Subject: [PATCH 221/884] clk: mvebu: armada-37xx-periph: Fix the clock provider registration While trying using a peripheral clock on a driver, I saw that the clock pointer returned by the provider was NULL. The problem was a missing indirection. It was the pointer stored in the hws array which needed to be updated not the value it contains. Signed-off-by: Gregory CLEMENT Fixes: 8ca4746a78ab ("clk: mvebu: Add the peripheral clock driver for Armada 3700") Signed-off-by: Stephen Boyd --- drivers/clk/mvebu/armada-37xx-periph.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/clk/mvebu/armada-37xx-periph.c b/drivers/clk/mvebu/armada-37xx-periph.c index 45905fc0d75b..d5dfbad4ceab 100644 --- a/drivers/clk/mvebu/armada-37xx-periph.c +++ b/drivers/clk/mvebu/armada-37xx-periph.c @@ -305,7 +305,7 @@ static const struct of_device_id armada_3700_periph_clock_of_match[] = { }; static int armada_3700_add_composite_clk(const struct clk_periph_data *data, void __iomem *reg, spinlock_t *lock, - struct device *dev, struct clk_hw *hw) + struct device *dev, struct clk_hw **hw) { const struct clk_ops *mux_ops = NULL, *gate_ops = NULL, *rate_ops = NULL; @@ -353,13 +353,13 @@ static int armada_3700_add_composite_clk(const struct clk_periph_data *data, } } - hw = clk_hw_register_composite(dev, data->name, data->parent_names, + *hw = clk_hw_register_composite(dev, data->name, data->parent_names, data->num_parents, mux_hw, mux_ops, rate_hw, rate_ops, gate_hw, gate_ops, CLK_IGNORE_UNUSED); - if (IS_ERR(hw)) - return PTR_ERR(hw); + if (IS_ERR(*hw)) + return PTR_ERR(*hw); return 0; } @@ -400,7 +400,7 @@ static int armada_3700_periph_clock_probe(struct platform_device *pdev) spin_lock_init(&driver_data->lock); for (i = 0; i < num_periph; i++) { - struct clk_hw *hw = driver_data->hw_data->hws[i]; + struct clk_hw **hw = &driver_data->hw_data->hws[i]; if (armada_3700_add_composite_clk(&data[i], reg, &driver_data->lock, dev, hw)) From 1c7032258d568f9a7aeb4c541786699d9a219a2a Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 6 Oct 2016 11:59:59 -0300 Subject: [PATCH 222/884] clk: max77686: fix number of clocks setup for clk_hw based registration The commit 9b4cac33adc7 ("clk: max77686: Migrate to clk_hw based OF and registration APIs") converted the driver to use the new provider API to register clocks using clk_hw. But unfortunately, in the conversion it missed to set the num_clks value which lead to the following error when trying to register a clk provider: [ 1.963782] of_clk_max77686_get: invalid index 0 [ 1.967460] ERROR: could not get clock /rtc@10070000:rtc_src(1) [ 1.973638] s3c-rtc 10070000.rtc: failed to find rtc source clock Fix it by correctly set the max77686_clk_driver_data num_clks member. Fixes: 9b4cac33adc7 ("clk: max77686: Migrate to clk_hw based OF and registration APIs") Reported-by: Markus Reichl Suggested-by: Tobias Jakobi Signed-off-by: Javier Martinez Canillas Tested-by: Markus Reichl Reviewed-by: Chanwoo Choi Reviewed-by: Krzysztof Kozlowski Signed-off-by: Stephen Boyd --- drivers/clk/clk-max77686.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/clk-max77686.c b/drivers/clk/clk-max77686.c index b637f5979023..eb953d3b0b69 100644 --- a/drivers/clk/clk-max77686.c +++ b/drivers/clk/clk-max77686.c @@ -216,6 +216,7 @@ static int max77686_clk_probe(struct platform_device *pdev) return -EINVAL; } + drv_data->num_clks = num_clks; drv_data->max_clk_data = devm_kcalloc(dev, num_clks, sizeof(*drv_data->max_clk_data), GFP_KERNEL); From c4e634ce412d97f0e61223b2a5b3f8f9600cd4dc Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 30 Sep 2016 10:07:27 -0700 Subject: [PATCH 223/884] clk: bcm2835: Clamp the PLL's requested rate to the hardware limits. Fixes setting low-resolution video modes on HDMI. Now the PLLH_PIX divider adjusts itself until the PLLH is within bounds. Signed-off-by: Eric Anholt Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-bcm2835.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index b68bf573dcfb..8c7763fd9efc 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -502,8 +502,12 @@ static long bcm2835_pll_rate_from_divisors(unsigned long parent_rate, static long bcm2835_pll_round_rate(struct clk_hw *hw, unsigned long rate, unsigned long *parent_rate) { + struct bcm2835_pll *pll = container_of(hw, struct bcm2835_pll, hw); + const struct bcm2835_pll_data *data = pll->data; u32 ndiv, fdiv; + rate = clamp(rate, data->min_rate, data->max_rate); + bcm2835_pll_choose_ndiv_and_fdiv(rate, *parent_rate, &ndiv, &fdiv); return bcm2835_pll_rate_from_divisors(*parent_rate, ndiv, fdiv, 1); @@ -608,13 +612,6 @@ static int bcm2835_pll_set_rate(struct clk_hw *hw, u32 ana[4]; int i; - if (rate < data->min_rate || rate > data->max_rate) { - dev_err(cprman->dev, "%s: rate out of spec: %lu vs (%lu, %lu)\n", - clk_hw_get_name(hw), rate, - data->min_rate, data->max_rate); - return -EINVAL; - } - if (rate > data->max_fb_rate) { use_fb_prediv = true; rate /= 2; From 4aa6c99d31c0cc471b7f243f5d314391a1abcaf3 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Fri, 30 Sep 2016 10:33:59 +0200 Subject: [PATCH 224/884] clk: mvebu: armada-37xx-periph: Fix the clock gate flag For the gate part of the peripheral clock setting the bit disables the clock and clearing it enables the clock. This is not the default behavior of clk_gate component, so we need to use the CLK_GATE_SET_TO_DISABLE flag. Signed-off-by: Gregory CLEMENT Fixes: 8ca4746a78ab ("clk: mvebu: Add the peripheral clock driver for Armada 3700") Signed-off-by: Stephen Boyd --- drivers/clk/mvebu/armada-37xx-periph.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clk/mvebu/armada-37xx-periph.c b/drivers/clk/mvebu/armada-37xx-periph.c index d5dfbad4ceab..cecb0fdfaef6 100644 --- a/drivers/clk/mvebu/armada-37xx-periph.c +++ b/drivers/clk/mvebu/armada-37xx-periph.c @@ -329,6 +329,7 @@ static int armada_3700_add_composite_clk(const struct clk_periph_data *data, gate->lock = lock; gate_ops = gate_hw->init->ops; gate->reg = reg + (u64)gate->reg; + gate->flags = CLK_GATE_SET_TO_DISABLE; } if (data->rate_hw) { From d3397484bb5b8534289a630c1a78500ff4f2fbf4 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 8 Oct 2016 21:38:12 +0800 Subject: [PATCH 225/884] clk: hi6220: use CLK_OF_DECLARE_DRIVER for sysctrl and mediactrl clock init The hi6220-sysctrl and hi6220-mediactrl are not only clock provider but also reset controller. It worked fine that single sysctrl/mediactrl device node in DT can be used to initialize clock driver and populate platform device for reset controller. But it stops working after commit 989eafd0b609 ("clk: core: Avoid double initialization of clocks") gets merged. The commit sets flag OF_POPULATED during clock initialization to skip the platform device populating for the same device node. On hi6220, it effectively makes hi6220-sysctrl reset driver not probe any more. The patch changes hi6220 sysctrl and mediactrl clock init macro from CLK_OF_DECLARE to CLK_OF_DECLARE_DRIVER, so that the reset driver using the same hardware block can continue working. Signed-off-by: Shawn Guo Tested-by: John Stultz Signed-off-by: Stephen Boyd --- drivers/clk/hisilicon/clk-hi6220.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/hisilicon/clk-hi6220.c b/drivers/clk/hisilicon/clk-hi6220.c index fe364e63f8de..c0e8e1f196aa 100644 --- a/drivers/clk/hisilicon/clk-hi6220.c +++ b/drivers/clk/hisilicon/clk-hi6220.c @@ -195,7 +195,7 @@ static void __init hi6220_clk_sys_init(struct device_node *np) hi6220_clk_register_divider(hi6220_div_clks_sys, ARRAY_SIZE(hi6220_div_clks_sys), clk_data); } -CLK_OF_DECLARE(hi6220_clk_sys, "hisilicon,hi6220-sysctrl", hi6220_clk_sys_init); +CLK_OF_DECLARE_DRIVER(hi6220_clk_sys, "hisilicon,hi6220-sysctrl", hi6220_clk_sys_init); /* clocks in media controller */ @@ -252,7 +252,7 @@ static void __init hi6220_clk_media_init(struct device_node *np) hi6220_clk_register_divider(hi6220_div_clks_media, ARRAY_SIZE(hi6220_div_clks_media), clk_data); } -CLK_OF_DECLARE(hi6220_clk_media, "hisilicon,hi6220-mediactrl", hi6220_clk_media_init); +CLK_OF_DECLARE_DRIVER(hi6220_clk_media, "hisilicon,hi6220-mediactrl", hi6220_clk_media_init); /* clocks in pmctrl */ From 43daa01323da37a3692cabe1579ef5c2c4372e06 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 10 Oct 2016 14:50:06 +0800 Subject: [PATCH 226/884] drm/imx: ipuv3-plane: Switch EBA buffer only when we don't need modeset We added active plane reconfiguration support by forcing a full modeset operation. So, looking at old_plane_state->fb to determine whether we need to switch EBA buffer(for hardware double buffering) in ipu_plane_atomic_set_base() or not is no more correct. Instead, we should do that only when we don't need modeset, otherwise, we initialize the two EBA buffers with the buffer address. Fixes: c6c1f9bc798b ("drm/imx: Add active plane reconfiguration support") Cc: stable@vger.kernel.org # 4.8 Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index ce22d0a0ddc8..f5861d9b0df7 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -108,6 +108,7 @@ static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane, { struct drm_plane *plane = &ipu_plane->base; struct drm_plane_state *state = plane->state; + struct drm_crtc_state *crtc_state = state->crtc->state; struct drm_framebuffer *fb = state->fb; unsigned long eba, ubo, vbo; int active; @@ -149,7 +150,7 @@ static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane, break; } - if (old_state->fb) { + if (!drm_atomic_crtc_needs_modeset(crtc_state)) { active = ipu_idmac_get_current_buffer(ipu_plane->ipu_ch); ipu_cpmem_set_buffer(ipu_plane->ipu_ch, !active, eba); ipu_idmac_select_buffer(ipu_plane->ipu_ch, !active); From 81d553545a1510ff7c7c00cbc9b57d6172d411a4 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 10 Oct 2016 14:50:07 +0800 Subject: [PATCH 227/884] drm/imx: ipuv3-plane: Skip setting u/vbo only when we don't need modeset We added active plane reconfiguration support by forcing a full modeset operation. So, looking at old_plane_state->fb to determine whether we need to set u/v offset(aka, u/vbo for IPUv3) in ipu_plane_atomic_set_base() or not is no more correct. Instead, we should do that only when we don't need modeset. Fixes: c6c1f9bc798b ("drm/imx: Add active plane reconfiguration support") Cc: stable@vger.kernel.org # 4.8 Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index f5861d9b0df7..cfb34b595d15 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -103,8 +103,7 @@ drm_plane_state_to_vbo(struct drm_plane_state *state) (state->src_x >> 16) / 2 - eba; } -static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane, - struct drm_plane_state *old_state) +static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane) { struct drm_plane *plane = &ipu_plane->base; struct drm_plane_state *state = plane->state; @@ -118,7 +117,7 @@ static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane, switch (fb->pixel_format) { case DRM_FORMAT_YUV420: case DRM_FORMAT_YVU420: - if (old_state->fb) + if (!drm_atomic_crtc_needs_modeset(crtc_state)) break; /* @@ -393,7 +392,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, struct drm_crtc_state *crtc_state = state->crtc->state; if (!drm_atomic_crtc_needs_modeset(crtc_state)) { - ipu_plane_atomic_set_base(ipu_plane, old_state); + ipu_plane_atomic_set_base(ipu_plane); return; } } @@ -438,7 +437,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ipu_cpmem_set_high_priority(ipu_plane->ipu_ch); ipu_idmac_set_double_buffer(ipu_plane->ipu_ch, 1); ipu_cpmem_set_stride(ipu_plane->ipu_ch, state->fb->pitches[0]); - ipu_plane_atomic_set_base(ipu_plane, old_state); + ipu_plane_atomic_set_base(ipu_plane); ipu_plane_enable(ipu_plane); } From 8cca354816adacb91280e19c50f81ffddc0460a4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Oct 2016 11:53:21 +0300 Subject: [PATCH 228/884] drm/imx: drm_dev_alloc() returns error pointers We are checking for NULL here, when we should be checking for error pointers. Fixes: 54db5decce17 ("drm/imx: drop deprecated load/unload drm_driver ops") Signed-off-by: Dan Carpenter Reviewed-by: Lucas Stach Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/imx-drm-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c index b084c571b23f..9672b579f950 100644 --- a/drivers/gpu/drm/imx/imx-drm-core.c +++ b/drivers/gpu/drm/imx/imx-drm-core.c @@ -357,8 +357,8 @@ static int imx_drm_bind(struct device *dev) int ret; drm = drm_dev_alloc(&imx_drm_driver, dev); - if (!drm) - return -ENOMEM; + if (IS_ERR(drm)) + return PTR_ERR(drm); imxdrm = devm_kzalloc(dev, sizeof(*imxdrm), GFP_KERNEL); if (!imxdrm) { From e73aca5184ad9fc948ba22b4d35dce11db35bb25 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Tue, 18 Oct 2016 16:44:03 +0800 Subject: [PATCH 229/884] drm/imx: ipuv3-plane: Access old u/vbo properly in ->atomic_check for YU12/YV12 Before accessing the u/v offset(aka, u/vbo for IPUv3) of the old plane state's relevant fb, we should make sure the fb is in YU12 or YV12 pixel format(which are the two YUV pixel formats we support only), otherwise, we are likely to trigger BUG_ON() in drm_plane_state_to_u/vbo() since the fb's pixel format is probably not YU12 or YV12. Link: https://bugs.freedesktop.org/show_bug.cgi?id=98150 Fixes: c6c1f9bc798b ("drm/imx: Add active plane reconfiguration support") Cc: stable@vger.kernel.org # 4.8 Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index cfb34b595d15..5c342997863e 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -355,7 +355,9 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, if ((ubo > 0xfffff8) || (vbo > 0xfffff8)) return -EINVAL; - if (old_fb) { + if (old_fb && + (old_fb->pixel_format == DRM_FORMAT_YUV420 || + old_fb->pixel_format == DRM_FORMAT_YVU420)) { old_ubo = drm_plane_state_to_ubo(old_state); old_vbo = drm_plane_state_to_vbo(old_state); if (ubo != old_ubo || vbo != old_vbo) From 3ab7511eafdd5c4f40d2832f09554478dfbea170 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 17 Oct 2016 17:23:59 +0100 Subject: [PATCH 230/884] ALSA: hda - allow 40 bit DMA mask for NVidia devices Commit 49d9e77e72cf ("ALSA: hda - Fix system panic when DMA > 40 bits for Nvidia audio controllers") simply disabled any DMA exceeding 32 bits for NVidia devices, even though they are capable of performing DMA up to 40 bits. On some architectures (such as arm64), system memory is not guaranteed to be 32-bit addressable by PCI devices, and so this change prevents NVidia devices from working on platforms such as AMD Seattle. Since the original commit already mentioned that up to 40 bits of DMA is supported, and given that the code has been updated in the meantime to support a 40 bit DMA mask on other devices, revert commit 49d9e77e72cf and explicitly set the DMA mask to 40 bits for NVidia devices. Fixes: 49d9e77e72cf ('ALSA: hda - Fix system panic when DMA > 40 bits...') Signed-off-by: Ard Biesheuvel Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index c3469f756ec2..c64d986009a9 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -341,8 +341,7 @@ enum { /* quirks for Nvidia */ #define AZX_DCAPS_PRESET_NVIDIA \ - (AZX_DCAPS_NO_MSI | /*AZX_DCAPS_ALIGN_BUFSIZE |*/ \ - AZX_DCAPS_NO_64BIT | AZX_DCAPS_CORBRP_SELF_CLEAR |\ + (AZX_DCAPS_NO_MSI | AZX_DCAPS_CORBRP_SELF_CLEAR |\ AZX_DCAPS_SNOOP_TYPE(NVIDIA)) #define AZX_DCAPS_PRESET_CTHDA \ @@ -1716,6 +1715,10 @@ static int azx_first_init(struct azx *chip) } } + /* NVidia hardware normally only supports up to 40 bits of DMA */ + if (chip->pci->vendor == PCI_VENDOR_ID_NVIDIA) + dma_bits = 40; + /* disable 64bit DMA address on some devices */ if (chip->driver_caps & AZX_DCAPS_NO_64BIT) { dev_dbg(card->dev, "Disabling 64bit DMA\n"); From a2ed0b391dd9c3ef1d64c7c3e370f4a5ffcd324a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Oct 2016 13:44:06 +0200 Subject: [PATCH 231/884] isofs: Do not return EACCES for unknown filesystems When isofs_mount() is called to mount a device read-write, it returns EACCES even before it checks that the device actually contains an isofs filesystem. This may confuse mount(8) which then tries to mount all subsequent filesystem types in read-only mode. Fix the problem by returning EACCES only once we verify that the device indeed contains an iso9660 filesystem. CC: stable@vger.kernel.org Fixes: 17b7f7cf58926844e1dd40f5eb5348d481deca6a Reported-by: Kent Overstreet Reported-by: Karel Zak Signed-off-by: Jan Kara --- fs/isofs/inode.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index ad0c745ebad7..871c8b392099 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -687,6 +687,11 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent) pri_bh = NULL; root_found: + /* We don't support read-write mounts */ + if (!(s->s_flags & MS_RDONLY)) { + error = -EACCES; + goto out_freebh; + } if (joliet_level && (pri == NULL || !opt.rock)) { /* This is the case of Joliet with the norock mount flag. @@ -1501,9 +1506,6 @@ struct inode *__isofs_iget(struct super_block *sb, static struct dentry *isofs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - /* We don't support read-write mounts */ - if (!(flags & MS_RDONLY)) - return ERR_PTR(-EACCES); return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super); } From e952813e210b3addaea063da64ef68c3f30c0aa2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:05:34 +0200 Subject: [PATCH 232/884] ext2: avoid bogus -Wmaybe-uninitialized warning On ARM, we get this false-positive warning since the rework of the ext2_get_blocks interface: fs/ext2/inode.c: In function 'ext2_get_block': include/linux/buffer_head.h:340:16: error: 'bno' may be used uninitialized in this function [-Werror=maybe-uninitialized] The calling conventions for this function are rather complex, and it's not surprising that the compiler gets this wrong, I spent a long time trying to understand how it all fits together myself. This change to avoid the warning makes sure the compiler sees that we always set 'bno' pointer whenever we have a positive return code. The transformation is correct because we always arrive at the 'got_it' label with a positive count that gets used as the return value, while any branch to the 'cleanup' label has a negative or zero 'err'. Fixes: 6750ad71986d ("ext2: stop passing buffer_head to ext2_get_blocks") Signed-off-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Cc: Dave Chinner Signed-off-by: Jan Kara --- fs/ext2/inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index d831e24dc885..41b8b44a391c 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -622,7 +622,7 @@ static int ext2_get_blocks(struct inode *inode, u32 *bno, bool *new, bool *boundary, int create) { - int err = -EIO; + int err; int offsets[4]; Indirect chain[4]; Indirect *partial; @@ -639,7 +639,7 @@ static int ext2_get_blocks(struct inode *inode, depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary); if (depth == 0) - return (err); + return -EIO; partial = ext2_get_branch(inode, depth, offsets, chain, &err); /* Simplest case - block found, no allocation needed */ @@ -761,7 +761,6 @@ static int ext2_get_blocks(struct inode *inode, ext2_splice_branch(inode, iblock, partial, indirect_blks, count); mutex_unlock(&ei->truncate_mutex); got_it: - *bno = le32_to_cpu(chain[depth-1].key); if (count > blocks_to_boundary) *boundary = true; err = count; @@ -772,6 +771,8 @@ cleanup: brelse(partial->bh); partial--; } + if (err > 0) + *bno = le32_to_cpu(chain[depth-1].key); return err; } From f771d5bb71d4df9573d12386400540516672208b Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 18 Oct 2016 10:59:09 +0800 Subject: [PATCH 233/884] ALSA: hda - Adding a new group of pin cfg into ALC295 pin quirk table We have a new Dell laptop model which uses ALC295, the pin definition is different from the existing ones in the pin quirk table, to fix the headset mic detection and mic mute led's problem, we need to add the new pin defintion into the pin quirk table. Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b58e8c76346a..8483fc20f635 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5811,8 +5811,6 @@ static const struct hda_model_fixup alc269_fixup_models[] = { #define ALC295_STANDARD_PINS \ {0x12, 0xb7a60130}, \ {0x14, 0x90170110}, \ - {0x17, 0x21014020}, \ - {0x18, 0x21a19030}, \ {0x21, 0x04211020} #define ALC298_STANDARD_PINS \ @@ -6039,7 +6037,13 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { ALC292_STANDARD_PINS, {0x13, 0x90a60140}), SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, - ALC295_STANDARD_PINS), + ALC295_STANDARD_PINS, + {0x17, 0x21014020}, + {0x18, 0x21a19030}), + SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL1_MIC_NO_PRESENCE, + ALC295_STANDARD_PINS, + {0x17, 0x21014040}, + {0x18, 0x21a19050}), SND_HDA_PIN_QUIRK(0x10ec0298, 0x1028, "Dell", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE, ALC298_STANDARD_PINS, {0x17, 0x90170110}), From f72f94555aa32b2c4374dde5ad9b960cc4ff32e2 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 12 Oct 2016 14:48:28 +0800 Subject: [PATCH 234/884] ceph: fix readdir vs fragmentation race following sequence of events tigger the race - client readdir frag 0* -> got item 'A' - MDS merges frag 0* and frag 1* - client send readdir request (frag 1*, offset 2, readdir_start 'A') - MDS reply items (that are after item 'A') in frag * Link: http://tracker.ceph.com/issues/17286 Signed-off-by: Yan, Zheng --- fs/ceph/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index bca1b49c1c4b..ef4d04647325 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1511,7 +1511,8 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir); } - if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2) { + if (ceph_frag_is_leftmost(frag) && req->r_readdir_offset == 2 && + !(rinfo->hash_order && req->r_path2)) { /* note dir version at start of readdir so we can tell * if any dentries get dropped */ req->r_dir_release_cnt = atomic64_read(&ci->i_release_count); From 31ca58781019de191c7f520f0626ca76a88c1f6e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 13 Oct 2016 17:15:37 +0200 Subject: [PATCH 235/884] ceph: fix uninitialized dentry pointer in ceph_real_mount() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs/ceph/super.c: In function ‘ceph_real_mount’: fs/ceph/super.c:818: warning: ‘root’ may be used uninitialized in this function If s_root is already valid, dentry pointer root is never initialized, and returned by ceph_real_mount(). This will cause a crash later when the caller dereferences the pointer. Fixes: ce2728aaa82bbeba ("ceph: avoid accessing / when mounting a subpath") Signed-off-by: Geert Uytterhoeven Signed-off-by: Yan, Zheng --- fs/ceph/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index a29ffce98187..b382e5910eea 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -845,6 +845,8 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc) err = ceph_fs_debugfs_init(fsc); if (err < 0) goto fail; + } else { + root = dget(fsc->sb->s_root); } fsc->mount_state = CEPH_MOUNT_MOUNTED; From 5130ccea7cf4646a24c005be1309b7f86f1e91c9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 15:11:29 +0000 Subject: [PATCH 236/884] ceph: fix non static symbol warning Fixes the following sparse warning: fs/ceph/xattr.c:19:28: warning: symbol 'ceph_other_xattr_handler' was not declared. Should it be static? Signed-off-by: Wei Yongjun Signed-off-by: Ilya Dryomov --- fs/ceph/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 40b703217977..febc28f9e2c2 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -16,7 +16,7 @@ static int __remove_xattr(struct ceph_inode_info *ci, struct ceph_inode_xattr *xattr); -const struct xattr_handler ceph_other_xattr_handler; +static const struct xattr_handler ceph_other_xattr_handler; /* * List of handlers for synthetic system.* attributes. Other @@ -1086,7 +1086,7 @@ static int ceph_set_xattr_handler(const struct xattr_handler *handler, return __ceph_setxattr(inode, name, value, size, flags); } -const struct xattr_handler ceph_other_xattr_handler = { +static const struct xattr_handler ceph_other_xattr_handler = { .prefix = "", /* match any name => handlers called with full name */ .get = ceph_get_xattr_handler, .set = ceph_set_xattr_handler, From 2317eacd9cf9dc1beee74ddb453bdd7552a64a27 Mon Sep 17 00:00:00 2001 From: John Youn Date: Mon, 17 Oct 2016 17:36:23 -0700 Subject: [PATCH 237/884] Revert "usb: dwc2: gadget: change variable name to more meaningful" This reverts commit ba48eab8866c ("usb: dwc2: gadget: change variable name to more meaningful"). This is needed to cleanly revert commit aa381a7259c3 ("usb: dwc2: gadget: fix TX FIFO size and address initialization") which may cause regressions on some platforms. Signed-off-by: John Youn Cc: Robert Baldyga Cc: Stefan Wahren Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/gadget.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 4cd6403a7566..aac4af3ea68f 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -186,7 +186,7 @@ static void dwc2_hsotg_ctrl_epint(struct dwc2_hsotg *hsotg, */ static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg) { - unsigned int fifo; + unsigned int ep; unsigned int addr; int timeout; u32 dptxfsizn; @@ -217,8 +217,8 @@ static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg) * them to endpoints dynamically according to maxpacket size value of * given endpoint. */ - for (fifo = 1; fifo < MAX_EPS_CHANNELS; fifo++) { - dptxfsizn = dwc2_readl(hsotg->regs + DPTXFSIZN(fifo)); + for (ep = 1; ep < MAX_EPS_CHANNELS; ep++) { + dptxfsizn = dwc2_readl(hsotg->regs + DPTXFSIZN(ep)); val = (dptxfsizn & FIFOSIZE_DEPTH_MASK) | addr; addr += dptxfsizn >> FIFOSIZE_DEPTH_SHIFT; @@ -226,7 +226,7 @@ static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg) if (addr > hsotg->fifo_mem) break; - dwc2_writel(val, hsotg->regs + DPTXFSIZN(fifo)); + dwc2_writel(val, hsotg->regs + DPTXFSIZN(ep)); } /* From 3fa9538539ac737096c81f3315a14670b1609092 Mon Sep 17 00:00:00 2001 From: John Youn Date: Mon, 17 Oct 2016 17:36:25 -0700 Subject: [PATCH 238/884] Revert "usb: dwc2: gadget: fix TX FIFO size and address initialization" This reverts commit aa381a7259c3 ("usb: dwc2: gadget: fix TX FIFO size and address initialization"). The original commit removed the FIFO size programming per endpoint. The DPTXFSIZn register is also used for DIEPTXFn and the SIZE field is r/w in dedicated fifo mode. So it isn't appropriate to simply remove this initialization as it might break existing behavior. Also, some cores might not have enough fifo space to handle the programming method used in the reverted patch, resulting in fifo initialization failure. Signed-off-by: John Youn Cc: Robert Baldyga Cc: Stefan Wahren Signed-off-by: Felipe Balbi --- drivers/usb/dwc2/core.h | 7 ++++++ drivers/usb/dwc2/gadget.c | 47 ++++++++++++++++++++++++++++++++------- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index aad4107ef927..2a21a0414b1d 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -259,6 +259,13 @@ enum dwc2_lx_state { DWC2_L3, /* Off state */ }; +/* + * Gadget periodic tx fifo sizes as used by legacy driver + * EP0 is not included + */ +#define DWC2_G_P_LEGACY_TX_FIFO_SIZE {256, 256, 256, 256, 768, 768, 768, \ + 768, 0, 0, 0, 0, 0, 0, 0} + /* Gadget ep0 states */ enum dwc2_ep0_state { DWC2_EP0_SETUP, diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index aac4af3ea68f..24fbebc9b409 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -189,7 +189,6 @@ static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg) unsigned int ep; unsigned int addr; int timeout; - u32 dptxfsizn; u32 val; /* Reset fifo map if not correctly cleared during previous session */ @@ -218,13 +217,13 @@ static void dwc2_hsotg_init_fifo(struct dwc2_hsotg *hsotg) * given endpoint. */ for (ep = 1; ep < MAX_EPS_CHANNELS; ep++) { - dptxfsizn = dwc2_readl(hsotg->regs + DPTXFSIZN(ep)); - - val = (dptxfsizn & FIFOSIZE_DEPTH_MASK) | addr; - addr += dptxfsizn >> FIFOSIZE_DEPTH_SHIFT; - - if (addr > hsotg->fifo_mem) - break; + if (!hsotg->g_tx_fifo_sz[ep]) + continue; + val = addr; + val |= hsotg->g_tx_fifo_sz[ep] << FIFOSIZE_DEPTH_SHIFT; + WARN_ONCE(addr + hsotg->g_tx_fifo_sz[ep] > hsotg->fifo_mem, + "insufficient fifo memory"); + addr += hsotg->g_tx_fifo_sz[ep]; dwc2_writel(val, hsotg->regs + DPTXFSIZN(ep)); } @@ -3807,10 +3806,36 @@ static void dwc2_hsotg_dump(struct dwc2_hsotg *hsotg) static void dwc2_hsotg_of_probe(struct dwc2_hsotg *hsotg) { struct device_node *np = hsotg->dev->of_node; + u32 len = 0; + u32 i = 0; /* Enable dma if requested in device tree */ hsotg->g_using_dma = of_property_read_bool(np, "g-use-dma"); + /* + * Register TX periodic fifo size per endpoint. + * EP0 is excluded since it has no fifo configuration. + */ + if (!of_find_property(np, "g-tx-fifo-size", &len)) + goto rx_fifo; + + len /= sizeof(u32); + + /* Read tx fifo sizes other than ep0 */ + if (of_property_read_u32_array(np, "g-tx-fifo-size", + &hsotg->g_tx_fifo_sz[1], len)) + goto rx_fifo; + + /* Add ep0 */ + len++; + + /* Make remaining TX fifos unavailable */ + if (len < MAX_EPS_CHANNELS) { + for (i = len; i < MAX_EPS_CHANNELS; i++) + hsotg->g_tx_fifo_sz[i] = 0; + } + +rx_fifo: /* Register RX fifo size */ of_property_read_u32(np, "g-rx-fifo-size", &hsotg->g_rx_fifo_sz); @@ -3832,10 +3857,13 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq) struct device *dev = hsotg->dev; int epnum; int ret; + int i; + u32 p_tx_fifo[] = DWC2_G_P_LEGACY_TX_FIFO_SIZE; /* Initialize to legacy fifo configuration values */ hsotg->g_rx_fifo_sz = 2048; hsotg->g_np_g_tx_fifo_sz = 1024; + memcpy(&hsotg->g_tx_fifo_sz[1], p_tx_fifo, sizeof(p_tx_fifo)); /* Device tree specific probe */ dwc2_hsotg_of_probe(hsotg); @@ -3853,6 +3881,9 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg, int irq) dev_dbg(dev, "NonPeriodic TXFIFO size: %d\n", hsotg->g_np_g_tx_fifo_sz); dev_dbg(dev, "RXFIFO size: %d\n", hsotg->g_rx_fifo_sz); + for (i = 0; i < MAX_EPS_CHANNELS; i++) + dev_dbg(dev, "Periodic TXFIFO%2d size: %d\n", i, + hsotg->g_tx_fifo_sz[i]); hsotg->gadget.max_speed = USB_SPEED_HIGH; hsotg->gadget.ops = &dwc2_hsotg_gadget_ops; From a1aa8cf6471b17c0fa7132ea5eeef0ae07ca07cd Mon Sep 17 00:00:00 2001 From: John Youn Date: Mon, 17 Oct 2016 17:36:28 -0700 Subject: [PATCH 239/884] Revert "Documentation: devicetree: dwc2: Deprecate g-tx-fifo-size" This binding was deprecated due to commit aa381a7259c3 ("usb: dwc2: gadget: fix TX FIFO size and address initialization"). However that commit is now reverted, so also revert this commit. The binding is valid and shouldn't be deprecated. This reverts commit 65e1ff7f4b5b ("Documentation: devicetree: dwc2: Deprecate g-tx-fifo-size"). Signed-off-by: John Youn Acked-by: Rob Herring Signed-off-by: Felipe Balbi --- Documentation/devicetree/bindings/usb/dwc2.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt index 455f2c310a1b..2c30a5479069 100644 --- a/Documentation/devicetree/bindings/usb/dwc2.txt +++ b/Documentation/devicetree/bindings/usb/dwc2.txt @@ -28,10 +28,7 @@ Refer to phy/phy-bindings.txt for generic phy consumer properties - g-use-dma: enable dma usage in gadget driver. - g-rx-fifo-size: size of rx fifo size in gadget mode. - g-np-tx-fifo-size: size of non-periodic tx fifo size in gadget mode. - -Deprecated properties: -- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0) - in gadget mode. +- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0) in gadget mode. Example: From d69bb92e402ff948bdcd39f19c9067874fb86873 Mon Sep 17 00:00:00 2001 From: Vlad Tsyrklevich Date: Thu, 13 Oct 2016 14:36:41 +0200 Subject: [PATCH 240/884] ALSA: asihpi: fix kernel memory disclosure Some elements in hr are not cleared before being copied to user space, leaking kernel heap memory to user space. For example, this happens in the error handling code for the HPI_ADAPTER_DELETE case. Zero the memory before it's copied. Signed-off-by: Vlad Tsyrklevich Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index d17937b92331..7e3aa50b21f9 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -111,7 +111,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return -EINVAL; hm = kmalloc(sizeof(*hm), GFP_KERNEL); - hr = kmalloc(sizeof(*hr), GFP_KERNEL); + hr = kzalloc(sizeof(*hr), GFP_KERNEL); if (!hm || !hr) { err = -ENOMEM; goto out; From 5366f1460c447f8ecb7d13e99e5ccb4bcfc21927 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Wed, 28 Sep 2016 00:20:13 +0930 Subject: [PATCH 241/884] pinctrl: aspeed: "Not enabled" is a significant mux state Consider a scenario with one pin P that has two signals A and B, where A is defined to be higher priority than B: That is, if the mux IP is in a state that would consider both A and B to be active on P, then A will be the active signal. To instead configure B as the active signal we must configure the mux so that A is inactive. The mux state for signals can be described by logical operations on one or more bits from one or more registers (a "signal expression"), which in some cases leads to aliased mux states for a particular signal. Further, signals described by multi-bit bitfields often do not only need to record the states that would make them active (the "enable" expressions), but also the states that makes them inactive (the "disable" expressions). All of this combined leads to four possible states for a signal: 1. A signal is active with respect to an "enable" expression 2. A signal is not active with respect to an "enable" expression 3. A signal is inactive with respect to a "disable" expression 4. A signal is not inactive with respect to a "disable" expression In the case of P, if we are looking to activate B without explicitly having configured A it's enough to consider A inactive if all of A's "enable" signal expressions evaluate to "not active". If any evaluate to "active" then the corresponding "disable" states must be applied so it becomes inactive. For example, on the AST2400 the pins composing GPIO bank H provide signals ROMD8 through ROMD15 (high priority) and those for UART6 (low priority). The mux states for ROMD8 through ROMD15 are aliased, i.e. there are two mux states that result in the respective signals being configured: A. SCU90[6]=1 B. Strap[4,1:0]=100 Further, the second mux state is a 3-bit bitfield that explicitly defines the enabled state but the disabled state is implicit, i.e. if Strap[4,1:0] is not exactly "100" then ROMD8 through ROMD15 are not considered active. This requires the mux function evaluation logic to use approach 2. above, however the existing code was using approach 3. The problem was brought to light on the Palmetto machines where the strap register value is 0x120ce416, and prevented GPIO requests in bank H from succeeding despite the hardware being in a position to allow them. Fixes: 318398c09a8d ("pinctrl: Add core pinctrl support for Aspeed SoCs") Signed-off-by: Andrew Jeffery Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed.c b/drivers/pinctrl/aspeed/pinctrl-aspeed.c index 0391f9f13f3e..49aeba912531 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed.c @@ -166,13 +166,9 @@ static bool aspeed_sig_expr_set(const struct aspeed_sig_expr *expr, bool enable, struct regmap *map) { int i; - bool ret; - - ret = aspeed_sig_expr_eval(expr, enable, map); - if (ret) - return ret; for (i = 0; i < expr->ndescs; i++) { + bool ret; const struct aspeed_sig_desc *desc = &expr->descs[i]; u32 pattern = enable ? desc->enable : desc->disable; @@ -199,12 +195,18 @@ static bool aspeed_sig_expr_set(const struct aspeed_sig_expr *expr, static bool aspeed_sig_expr_enable(const struct aspeed_sig_expr *expr, struct regmap *map) { + if (aspeed_sig_expr_eval(expr, true, map)) + return true; + return aspeed_sig_expr_set(expr, true, map); } static bool aspeed_sig_expr_disable(const struct aspeed_sig_expr *expr, struct regmap *map) { + if (!aspeed_sig_expr_eval(expr, true, map)) + return true; + return aspeed_sig_expr_set(expr, false, map); } From 97e8c3f5e76582d63026585c79d39c1d1fb960e5 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Wed, 28 Sep 2016 00:20:14 +0930 Subject: [PATCH 242/884] pinctrl: aspeed-g5: Fix names of GPID2 pins Fixes simple typos in the initial commit. There is no behavioural change. Fixes: 56e57cb6c07f (pinctrl: Add pinctrl-aspeed-g5 driver) Reported-by: Xo Wang Signed-off-by: Andrew Jeffery Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c index e1ab864e1a7f..14639834a5eb 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c @@ -151,21 +151,21 @@ FUNC_GROUP_DECL(GPID0, F19, E21); #define GPID2_DESC SIG_DESC_SET(SCU8C, 9) -#define D20 26 +#define F20 26 SIG_EXPR_LIST_DECL_SINGLE(SD2DAT0, SD2, SD2_DESC); SIG_EXPR_DECL(GPID2IN, GPID2, GPID2_DESC); SIG_EXPR_DECL(GPID2IN, GPID, GPID_DESC); SIG_EXPR_LIST_DECL_DUAL(GPID2IN, GPID2, GPID); -MS_PIN_DECL(D20, GPIOD2, SD2DAT0, GPID2IN); +MS_PIN_DECL(F20, GPIOD2, SD2DAT0, GPID2IN); -#define D21 27 +#define D20 27 SIG_EXPR_LIST_DECL_SINGLE(SD2DAT1, SD2, SD2_DESC); SIG_EXPR_DECL(GPID2OUT, GPID2, GPID2_DESC); SIG_EXPR_DECL(GPID2OUT, GPID, GPID_DESC); SIG_EXPR_LIST_DECL_DUAL(GPID2OUT, GPID2, GPID); -MS_PIN_DECL(D21, GPIOD3, SD2DAT1, GPID2OUT); +MS_PIN_DECL(D20, GPIOD3, SD2DAT1, GPID2OUT); -FUNC_GROUP_DECL(GPID2, D20, D21); +FUNC_GROUP_DECL(GPID2, F20, D20); #define GPIE_DESC SIG_DESC_SET(HW_STRAP1, 21) #define GPIE0_DESC SIG_DESC_SET(SCU8C, 12) @@ -614,7 +614,6 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = { ASPEED_PINCTRL_PIN(D10), ASPEED_PINCTRL_PIN(D2), ASPEED_PINCTRL_PIN(D20), - ASPEED_PINCTRL_PIN(D21), ASPEED_PINCTRL_PIN(D4), ASPEED_PINCTRL_PIN(D5), ASPEED_PINCTRL_PIN(D6), @@ -630,6 +629,7 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = { ASPEED_PINCTRL_PIN(E7), ASPEED_PINCTRL_PIN(E9), ASPEED_PINCTRL_PIN(F19), + ASPEED_PINCTRL_PIN(F20), ASPEED_PINCTRL_PIN(F9), ASPEED_PINCTRL_PIN(H20), ASPEED_PINCTRL_PIN(L1), From d3dbabe9848092d26d14076cdf4d52734f998580 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Wed, 28 Sep 2016 00:20:15 +0930 Subject: [PATCH 243/884] pinctrl: aspeed-g5: Fix GPIOE1 typo This prevented C20 from successfully being muxed as GPIO. Fixes: 56e57cb6c07f (pinctrl: Add pinctrl-aspeed-g5 driver) Signed-off-by: Andrew Jeffery Reviewed-by: Joel Stanley Signed-off-by: Linus Walleij --- drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c index 14639834a5eb..235d929e74fd 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c @@ -182,7 +182,7 @@ SIG_EXPR_LIST_DECL_SINGLE(NDCD3, NDCD3, SIG_DESC_SET(SCU80, 17)); SIG_EXPR_DECL(GPIE0OUT, GPIE0, GPIE0_DESC); SIG_EXPR_DECL(GPIE0OUT, GPIE, GPIE_DESC); SIG_EXPR_LIST_DECL_DUAL(GPIE0OUT, GPIE0, GPIE); -MS_PIN_DECL(C20, GPIE0, NDCD3, GPIE0OUT); +MS_PIN_DECL(C20, GPIOE1, NDCD3, GPIE0OUT); FUNC_GROUP_DECL(GPIE0, B20, C20); From 8eb37aff76f4d97db39e62a838cd37c4d341d673 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Wed, 28 Sep 2016 00:20:16 +0930 Subject: [PATCH 244/884] pinctrl: aspeed-g5: Fix pin association of SPI1 function The SPI1 function was associated with the wrong pins: The functions that those pins provide is either an SPI debug or passthrough function coupled to SPI1. Make the SPI1 mux function configure the relevant pins and associate new SPI1DEBUG and SPI1PASSTHRU functions with the pins that were already defined. The notation used in the datasheet's multi-function pin table for the SoC is often creative: in this case the SYS* signals are enabled by a single bit, which is nothing unusual on its own, but in this case the bit was also participating in a multi-bit bitfield and therefore represented multiple functions. This fact was overlooked in the original patch. Fixes: 56e57cb6c07f (pinctrl: Add pinctrl-aspeed-g5 driver) Signed-off-by: Andrew Jeffery Reviewed-by: Joel Stanley Acked-by: Rob Herring Signed-off-by: Linus Walleij --- .../bindings/pinctrl/pinctrl-aspeed.txt | 4 +- drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c | 86 +++++++++++++++++-- 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt index 5e60ad18f147..2ad18c4ea55c 100644 --- a/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt +++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-aspeed.txt @@ -43,7 +43,9 @@ aspeed,ast2500-pinctrl, aspeed,g5-pinctrl: GPID0 GPID2 GPIE0 I2C10 I2C11 I2C12 I2C13 I2C14 I2C3 I2C4 I2C5 I2C6 I2C7 I2C8 I2C9 MAC1LINK MDIO1 MDIO2 OSCCLK PEWAKE PWM0 PWM1 PWM2 PWM3 PWM4 PWM5 PWM6 PWM7 -RGMII1 RGMII2 RMII1 RMII2 SD1 SPI1 TIMER4 TIMER5 TIMER6 TIMER7 TIMER8 +RGMII1 RGMII2 RMII1 RMII2 SD1 SPI1 SPI1DEBUG SPI1PASSTHRU TIMER4 TIMER5 TIMER6 +TIMER7 TIMER8 VGABIOSROM + Examples: diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c index 235d929e74fd..c8c72e8259d3 100644 --- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c +++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g5.c @@ -186,24 +186,84 @@ MS_PIN_DECL(C20, GPIOE1, NDCD3, GPIE0OUT); FUNC_GROUP_DECL(GPIE0, B20, C20); -#define SPI1_DESC SIG_DESC_SET(HW_STRAP1, 13) +#define SPI1_DESC { HW_STRAP1, GENMASK(13, 12), 1, 0 } +#define SPI1DEBUG_DESC { HW_STRAP1, GENMASK(13, 12), 2, 0 } +#define SPI1PASSTHRU_DESC { HW_STRAP1, GENMASK(13, 12), 3, 0 } + #define C18 64 -SIG_EXPR_LIST_DECL_SINGLE(SYSCS, SPI1, COND1, SPI1_DESC); +SIG_EXPR_DECL(SYSCS, SPI1DEBUG, COND1, SPI1DEBUG_DESC); +SIG_EXPR_DECL(SYSCS, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC); +SIG_EXPR_LIST_DECL_DUAL(SYSCS, SPI1DEBUG, SPI1PASSTHRU); SS_PIN_DECL(C18, GPIOI0, SYSCS); #define E15 65 -SIG_EXPR_LIST_DECL_SINGLE(SYSCK, SPI1, COND1, SPI1_DESC); +SIG_EXPR_DECL(SYSCK, SPI1DEBUG, COND1, SPI1DEBUG_DESC); +SIG_EXPR_DECL(SYSCK, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC); +SIG_EXPR_LIST_DECL_DUAL(SYSCK, SPI1DEBUG, SPI1PASSTHRU); SS_PIN_DECL(E15, GPIOI1, SYSCK); -#define A14 66 -SIG_EXPR_LIST_DECL_SINGLE(SYSMOSI, SPI1, COND1, SPI1_DESC); -SS_PIN_DECL(A14, GPIOI2, SYSMOSI); +#define B16 66 +SIG_EXPR_DECL(SYSMOSI, SPI1DEBUG, COND1, SPI1DEBUG_DESC); +SIG_EXPR_DECL(SYSMOSI, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC); +SIG_EXPR_LIST_DECL_DUAL(SYSMOSI, SPI1DEBUG, SPI1PASSTHRU); +SS_PIN_DECL(B16, GPIOI2, SYSMOSI); #define C16 67 -SIG_EXPR_LIST_DECL_SINGLE(SYSMISO, SPI1, COND1, SPI1_DESC); +SIG_EXPR_DECL(SYSMISO, SPI1DEBUG, COND1, SPI1DEBUG_DESC); +SIG_EXPR_DECL(SYSMISO, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC); +SIG_EXPR_LIST_DECL_DUAL(SYSMISO, SPI1DEBUG, SPI1PASSTHRU); SS_PIN_DECL(C16, GPIOI3, SYSMISO); -FUNC_GROUP_DECL(SPI1, C18, E15, A14, C16); +#define VB_DESC SIG_DESC_SET(HW_STRAP1, 5) + +#define B15 68 +SIG_EXPR_DECL(SPI1CS0, SPI1, COND1, SPI1_DESC); +SIG_EXPR_DECL(SPI1CS0, SPI1DEBUG, COND1, SPI1DEBUG_DESC); +SIG_EXPR_DECL(SPI1CS0, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC); +SIG_EXPR_LIST_DECL(SPI1CS0, SIG_EXPR_PTR(SPI1CS0, SPI1), + SIG_EXPR_PTR(SPI1CS0, SPI1DEBUG), + SIG_EXPR_PTR(SPI1CS0, SPI1PASSTHRU)); +SIG_EXPR_LIST_DECL_SINGLE(VBCS, VGABIOSROM, COND1, VB_DESC); +MS_PIN_DECL(B15, GPIOI4, SPI1CS0, VBCS); + +#define C15 69 +SIG_EXPR_DECL(SPI1CK, SPI1, COND1, SPI1_DESC); +SIG_EXPR_DECL(SPI1CK, SPI1DEBUG, COND1, SPI1DEBUG_DESC); +SIG_EXPR_DECL(SPI1CK, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC); +SIG_EXPR_LIST_DECL(SPI1CK, SIG_EXPR_PTR(SPI1CK, SPI1), + SIG_EXPR_PTR(SPI1CK, SPI1DEBUG), + SIG_EXPR_PTR(SPI1CK, SPI1PASSTHRU)); +SIG_EXPR_LIST_DECL_SINGLE(VBCK, VGABIOSROM, COND1, VB_DESC); +MS_PIN_DECL(C15, GPIOI5, SPI1CK, VBCK); + +#define A14 70 +SIG_EXPR_DECL(SPI1MOSI, SPI1, COND1, SPI1_DESC); +SIG_EXPR_DECL(SPI1MOSI, SPI1DEBUG, COND1, SPI1DEBUG_DESC); +SIG_EXPR_DECL(SPI1MOSI, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC); +SIG_EXPR_LIST_DECL(SPI1MOSI, SIG_EXPR_PTR(SPI1MOSI, SPI1), + SIG_EXPR_PTR(SPI1MOSI, SPI1DEBUG), + SIG_EXPR_PTR(SPI1MOSI, SPI1PASSTHRU)); +SIG_EXPR_LIST_DECL_SINGLE(VBMOSI, VGABIOSROM, COND1, VB_DESC); +MS_PIN_DECL(A14, GPIOI6, SPI1MOSI, VBMOSI); + +#define A15 71 +SIG_EXPR_DECL(SPI1MISO, SPI1, COND1, SPI1_DESC); +SIG_EXPR_DECL(SPI1MISO, SPI1DEBUG, COND1, SPI1DEBUG_DESC); +SIG_EXPR_DECL(SPI1MISO, SPI1PASSTHRU, COND1, SPI1PASSTHRU_DESC); +SIG_EXPR_LIST_DECL(SPI1MISO, SIG_EXPR_PTR(SPI1MISO, SPI1), + SIG_EXPR_PTR(SPI1MISO, SPI1DEBUG), + SIG_EXPR_PTR(SPI1MISO, SPI1PASSTHRU)); +SIG_EXPR_LIST_DECL_SINGLE(VBMISO, VGABIOSROM, COND1, VB_DESC); +MS_PIN_DECL(A15, GPIOI7, SPI1MISO, VBMISO); + +FUNC_GROUP_DECL(SPI1, B15, C15, A14, A15); +FUNC_GROUP_DECL(SPI1DEBUG, C18, E15, B16, C16, B15, C15, A14, A15); +FUNC_GROUP_DECL(SPI1PASSTHRU, C18, E15, B16, C16, B15, C15, A14, A15); +FUNC_GROUP_DECL(VGABIOSROM, B15, C15, A14, A15); + +#define R2 72 +SIG_EXPR_LIST_DECL_SINGLE(SGPMCK, SGPM, SIG_DESC_SET(SCU84, 8)); +SS_PIN_DECL(R2, GPIOJ0, SGPMCK); #define L2 73 SIG_EXPR_LIST_DECL_SINGLE(SGPMLD, SGPM, SIG_DESC_SET(SCU84, 9)); @@ -580,6 +640,7 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = { ASPEED_PINCTRL_PIN(A12), ASPEED_PINCTRL_PIN(A13), ASPEED_PINCTRL_PIN(A14), + ASPEED_PINCTRL_PIN(A15), ASPEED_PINCTRL_PIN(A2), ASPEED_PINCTRL_PIN(A3), ASPEED_PINCTRL_PIN(A4), @@ -592,6 +653,8 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = { ASPEED_PINCTRL_PIN(B12), ASPEED_PINCTRL_PIN(B13), ASPEED_PINCTRL_PIN(B14), + ASPEED_PINCTRL_PIN(B15), + ASPEED_PINCTRL_PIN(B16), ASPEED_PINCTRL_PIN(B2), ASPEED_PINCTRL_PIN(B20), ASPEED_PINCTRL_PIN(B3), @@ -603,6 +666,7 @@ static struct pinctrl_pin_desc aspeed_g5_pins[ASPEED_G5_NR_PINS] = { ASPEED_PINCTRL_PIN(C12), ASPEED_PINCTRL_PIN(C13), ASPEED_PINCTRL_PIN(C14), + ASPEED_PINCTRL_PIN(C15), ASPEED_PINCTRL_PIN(C16), ASPEED_PINCTRL_PIN(C18), ASPEED_PINCTRL_PIN(C2), @@ -691,11 +755,14 @@ static const struct aspeed_pin_group aspeed_g5_groups[] = { ASPEED_PINCTRL_GROUP(RMII2), ASPEED_PINCTRL_GROUP(SD1), ASPEED_PINCTRL_GROUP(SPI1), + ASPEED_PINCTRL_GROUP(SPI1DEBUG), + ASPEED_PINCTRL_GROUP(SPI1PASSTHRU), ASPEED_PINCTRL_GROUP(TIMER4), ASPEED_PINCTRL_GROUP(TIMER5), ASPEED_PINCTRL_GROUP(TIMER6), ASPEED_PINCTRL_GROUP(TIMER7), ASPEED_PINCTRL_GROUP(TIMER8), + ASPEED_PINCTRL_GROUP(VGABIOSROM), }; static const struct aspeed_pin_function aspeed_g5_functions[] = { @@ -733,11 +800,14 @@ static const struct aspeed_pin_function aspeed_g5_functions[] = { ASPEED_PINCTRL_FUNC(RMII2), ASPEED_PINCTRL_FUNC(SD1), ASPEED_PINCTRL_FUNC(SPI1), + ASPEED_PINCTRL_FUNC(SPI1DEBUG), + ASPEED_PINCTRL_FUNC(SPI1PASSTHRU), ASPEED_PINCTRL_FUNC(TIMER4), ASPEED_PINCTRL_FUNC(TIMER5), ASPEED_PINCTRL_FUNC(TIMER6), ASPEED_PINCTRL_FUNC(TIMER7), ASPEED_PINCTRL_FUNC(TIMER8), + ASPEED_PINCTRL_FUNC(VGABIOSROM), }; static struct aspeed_pinctrl_data aspeed_g5_pinctrl_data = { From a171bc51fa697021e1b2082d7e95c12a363bc0a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 3 Oct 2016 17:56:55 +0300 Subject: [PATCH 245/884] pinctrl: baytrail: Fix lockdep MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize the spinlock before using it. INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 2 PID: 1 Comm: swapper/0 Not tainted 4.8.0-dwc-bisect #4 Hardware name: Intel Corp. VALLEYVIEW C0 PLATFORM/BYT-T FFD8, BIOS BLAKFF81.X64.0088.R10.1403240443 FFD8_X64_R_2014_13_1_00 03/24/2014 0000000000000000 ffff8800788ff770 ffffffff8133d597 0000000000000000 0000000000000000 ffff8800788ff7e0 ffffffff810cfb9e 0000000000000002 ffff8800788ff7d0 ffffffff8205b600 0000000000000002 ffff8800788ff7f0 Call Trace: [] dump_stack+0x67/0x90 [] register_lock_class+0x52e/0x540 [] __lock_acquire+0x81/0x16b0 [] ? save_trace+0x41/0xd0 [] ? __lock_acquire+0x13b2/0x16b0 [] ? __lock_is_held+0x4a/0x70 [] lock_acquire+0xba/0x220 [] ? byt_gpio_get_direction+0x3e/0x80 [] _raw_spin_lock_irqsave+0x47/0x60 [] ? byt_gpio_get_direction+0x3e/0x80 [] byt_gpio_get_direction+0x3e/0x80 [] gpiochip_add_data+0x319/0x7d0 [] ? _raw_spin_unlock_irqrestore+0x43/0x70 [] byt_pinctrl_probe+0x2fb/0x620 [] platform_drv_probe+0x3c/0xa0 ... Based on the diff it looks like the problem was introduced in commit 71e6ca61e826 ("pinctrl: baytrail: Register pin control handling") but I wasn't able to verify that empirically as the parent commit just oopsed when I tried to boot it. Cc: Cristina Ciocan Cc: stable@vger.kernel.org Fixes: 71e6ca61e826 ("pinctrl: baytrail: Register pin control handling") Signed-off-by: Ville Syrjälä Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-baytrail.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index d22a9fe2e6df..71bbeb9321ba 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -1808,6 +1808,8 @@ static int byt_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(vg->pctl_dev); } + raw_spin_lock_init(&vg->lock); + ret = byt_gpio_probe(vg); if (ret) { pinctrl_unregister(vg->pctl_dev); @@ -1815,7 +1817,6 @@ static int byt_pinctrl_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, vg); - raw_spin_lock_init(&vg->lock); pm_runtime_enable(&pdev->dev); return 0; From c538b9436751a0be2e1246b48353bc23156bdbcc Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 10 Oct 2016 16:39:31 +0300 Subject: [PATCH 246/884] pinctrl: intel: Only restore pins that are used by the driver Dell XPS 13 (and maybe some others) uses a GPIO (CPU_GP_1) during suspend to explicitly disable USB touchscreen interrupt. This is done to prevent situation where the lid is closed the touchscreen is left functional. The pinctrl driver (wrongly) assumes it owns all pins which are owned by host and not locked down. It is perfectly fine for BIOS to use those pins as it is also considered as host in this context. What happens is that when the lid of Dell XPS 13 is closed, the BIOS configures CPU_GP_1 low disabling the touchscreen interrupt. During resume we restore all host owned pins to the known state which includes CPU_GP_1 and this overwrites what the BIOS has programmed there causing the touchscreen to fail as no interrupts are reaching the CPU anymore. Fix this by restoring only those pins we know are explicitly requested by the kernel one way or other. Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=176361 Reported-by: AceLan Kao Tested-by: AceLan Kao Signed-off-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-intel.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 63387a40b973..01443762e570 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -19,6 +19,7 @@ #include #include +#include "../core.h" #include "pinctrl-intel.h" /* Offset from regs */ @@ -1056,6 +1057,26 @@ int intel_pinctrl_remove(struct platform_device *pdev) EXPORT_SYMBOL_GPL(intel_pinctrl_remove); #ifdef CONFIG_PM_SLEEP +static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned pin) +{ + const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); + + if (!pd || !intel_pad_usable(pctrl, pin)) + return false; + + /* + * Only restore the pin if it is actually in use by the kernel (or + * by userspace). It is possible that some pins are used by the + * BIOS during resume and those are not always locked down so leave + * them alone. + */ + if (pd->mux_owner || pd->gpio_owner || + gpiochip_line_is_irq(&pctrl->chip, pin)) + return true; + + return false; +} + int intel_pinctrl_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); @@ -1069,7 +1090,7 @@ int intel_pinctrl_suspend(struct device *dev) const struct pinctrl_pin_desc *desc = &pctrl->soc->pins[i]; u32 val; - if (!intel_pad_usable(pctrl, desc->number)) + if (!intel_pinctrl_should_save(pctrl, desc->number)) continue; val = readl(intel_get_padcfg(pctrl, desc->number, PADCFG0)); @@ -1130,7 +1151,7 @@ int intel_pinctrl_resume(struct device *dev) void __iomem *padcfg; u32 val; - if (!intel_pad_usable(pctrl, desc->number)) + if (!intel_pinctrl_should_save(pctrl, desc->number)) continue; padcfg = intel_get_padcfg(pctrl, desc->number, PADCFG0); From 6bc80629eefc3731f5881092bed610d994e05763 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 18 Oct 2016 08:16:03 +0200 Subject: [PATCH 247/884] bnx2: fix locking when netconsole is used Functions bnx2_reg_rd_ind(), bnx2_reg_wr_ind() and bnx2_ctx_wr() can be called with IRQs disabled when netconsole is enabled. So they should use spin_{,un}lock_irq{save,restore} instead of _bh variants. Example call flow: bnx2_poll() ->bnx2_poll_link() ->bnx2_phy_int() ->bnx2_set_remote_link() ->bnx2_shmem_rd() ->bnx2_reg_rd_ind() -> spin_lock_bh(&bp->indirect_lock); spin_unlock_bh(&bp->indirect_lock); ... -> __local_bh_enable_ip static inline void __local_bh_enable_ip(unsigned long ip) WARN_ON_ONCE(in_irq() || irqs_disabled()); <<<<<< WARN Cc: Sony Chacko Cc: Dept-HSGLinuxNICDev@qlogic.com Signed-off-by: Ivan Vecera Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 27f11a5d5fe2..b3791b394715 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -271,22 +271,25 @@ static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr) static u32 bnx2_reg_rd_ind(struct bnx2 *bp, u32 offset) { + unsigned long flags; u32 val; - spin_lock_bh(&bp->indirect_lock); + spin_lock_irqsave(&bp->indirect_lock, flags); BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW_ADDRESS, offset); val = BNX2_RD(bp, BNX2_PCICFG_REG_WINDOW); - spin_unlock_bh(&bp->indirect_lock); + spin_unlock_irqrestore(&bp->indirect_lock, flags); return val; } static void bnx2_reg_wr_ind(struct bnx2 *bp, u32 offset, u32 val) { - spin_lock_bh(&bp->indirect_lock); + unsigned long flags; + + spin_lock_irqsave(&bp->indirect_lock, flags); BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW_ADDRESS, offset); BNX2_WR(bp, BNX2_PCICFG_REG_WINDOW, val); - spin_unlock_bh(&bp->indirect_lock); + spin_unlock_irqrestore(&bp->indirect_lock, flags); } static void @@ -304,8 +307,10 @@ bnx2_shmem_rd(struct bnx2 *bp, u32 offset) static void bnx2_ctx_wr(struct bnx2 *bp, u32 cid_addr, u32 offset, u32 val) { + unsigned long flags; + offset += cid_addr; - spin_lock_bh(&bp->indirect_lock); + spin_lock_irqsave(&bp->indirect_lock, flags); if (BNX2_CHIP(bp) == BNX2_CHIP_5709) { int i; @@ -322,7 +327,7 @@ bnx2_ctx_wr(struct bnx2 *bp, u32 cid_addr, u32 offset, u32 val) BNX2_WR(bp, BNX2_CTX_DATA_ADR, offset); BNX2_WR(bp, BNX2_CTX_DATA, val); } - spin_unlock_bh(&bp->indirect_lock); + spin_unlock_irqrestore(&bp->indirect_lock, flags); } #ifdef BCM_CNIC From 8d324fd9e1bee59274d67a0a08eac5f7b1573db7 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 18 Oct 2016 09:07:29 +0200 Subject: [PATCH 248/884] net/hsr: Remove unused but set variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused but set variable master_dev in check_local_dest to fix the following GCC warning when building with 'W=1': net/hsr/hsr_forward.c: In function ‘check_local_dest’: net/hsr/hsr_forward.c:303:21: warning: variable ‘master_dev’ set but not used [-Wunused-but-set-variable] Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/hsr/hsr_forward.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index 5ee1d43f1310..4ebe2aa3e7d3 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -300,10 +300,6 @@ static void hsr_forward_do(struct hsr_frame_info *frame) static void check_local_dest(struct hsr_priv *hsr, struct sk_buff *skb, struct hsr_frame_info *frame) { - struct net_device *master_dev; - - master_dev = hsr_port_get_hsr(hsr, HSR_PT_MASTER)->dev; - if (hsr_addr_is_self(hsr, eth_hdr(skb)->h_dest)) { frame->is_local_exclusive = true; skb->pkt_type = PACKET_HOST; From 902943c0d759a090490b2bf6c91a49395b353653 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 18 Oct 2016 09:20:33 +0200 Subject: [PATCH 249/884] dwc_eth_qos: do not clear pause flags from phy_device->supported phy_device->supported is originally set by the PHY driver. The ethernet driver should filter phy_device->supported to only contain flags supported by the IP. The IP supports setting rx and tx flow control independently, therefore SUPPORTED_Pause and SUPPORTED_Asym_Pause should not be cleared. If the flags are cleared, pause frames cannot be enabled (even if they are supported by the PHY). Signed-off-by: Niklas Cassel Signed-off-by: Jesper Nilsson Acked-by: Lars Persson Signed-off-by: David S. Miller --- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 0d0053128542..d775729648ef 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -982,7 +982,8 @@ static int dwceqos_mii_probe(struct net_device *ndev) if (netif_msg_probe(lp)) phy_attached_info(phydev); - phydev->supported &= PHY_GBIT_FEATURES; + phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + SUPPORTED_Asym_Pause; lp->link = 0; lp->speed = 0; From 1826277802d83b47d77e6a3e876aec07777aa271 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 18 Oct 2016 09:20:55 +0200 Subject: [PATCH 250/884] dwc_eth_qos: enable flow control by default Allow autoneg to enable flow control by default. The behavior when autoneg is off has not changed. Signed-off-by: Niklas Cassel Signed-off-by: Jesper Nilsson Acked-by: Lars Persson Signed-off-by: David S. Miller --- drivers/net/ethernet/synopsys/dwc_eth_qos.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index d775729648ef..5eedac495077 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -988,6 +988,7 @@ static int dwceqos_mii_probe(struct net_device *ndev) lp->link = 0; lp->speed = 0; lp->duplex = DUPLEX_UNKNOWN; + lp->flowcontrol.autoneg = AUTONEG_ENABLE; return 0; } From 7e1670c15c9b18d614b43a674e086eb3e239c8f7 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 18 Oct 2016 09:40:20 +0200 Subject: [PATCH 251/884] ipv4: Remove unused but set variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused but set variable dev in ip_do_fragment to fix the following GCC warning when building with 'W=1': net/ipv4/ip_output.c: In function ‘ip_do_fragment’: net/ipv4/ip_output.c:541:21: warning: variable ‘dev’ set but not used [-Wunused-but-set-variable] Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 05d105832bdb..03e7f7310423 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -538,7 +538,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, { struct iphdr *iph; int ptr; - struct net_device *dev; struct sk_buff *skb2; unsigned int mtu, hlen, left, len, ll_rs; int offset; @@ -546,8 +545,6 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rtable *rt = skb_rtable(skb); int err = 0; - dev = rt->dst.dev; - /* for offloaded checksums cleanup checksum before fragmentation */ if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) From a56177e18f2e44499a8bf5bc03dbe896dbec657d Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Tue, 18 Oct 2016 14:21:25 +0530 Subject: [PATCH 252/884] cxgb4: Fix number of queue sets corssing the limit Do not let number of offload queue sets to go more than MAX_OFLD_QSETS, which would otherwise crash the driver on machines with cores more than MAX_OFLD_QSETS. Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index f320497368f4..57eb4e1345cb 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -4057,7 +4057,7 @@ static void cfg_queues(struct adapter *adap) * capped by the number of available cores. */ if (n10g) { - i = num_online_cpus(); + i = min_t(int, MAX_OFLD_QSETS, num_online_cpus()); s->ofldqsets = roundup(i, adap->params.nports); } else { s->ofldqsets = adap->params.nports; From 7ab488951aa536d52feb3690d204a693edf4f433 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Tue, 18 Oct 2016 11:22:54 +0200 Subject: [PATCH 253/884] tcp: Remove unused but set variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unused but set variable icsk in listening_get_next to fix the following GCC warning when building with 'W=1': net/ipv4/tcp_ipv4.c: In function ‘listening_get_next’: net/ipv4/tcp_ipv4.c:1890:31: warning: variable ‘icsk’ set but not used [-Wunused-but-set-variable] Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index bd5e8d10893f..79d55eb3ec3f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1887,7 +1887,6 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; - struct inet_connection_sock *icsk; struct sock *sk = cur; if (!sk) { @@ -1909,7 +1908,6 @@ get_sk: continue; if (sk->sk_family == st->family) return sk; - icsk = inet_csk(sk); } spin_unlock_bh(&ilb->lock); st->offset = 0; From 6026ed2fe258b61ea5aadd91a95c4f36a6dbe167 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 18 Oct 2016 15:33:11 +0100 Subject: [PATCH 254/884] MAINTAINERS: Add myself as EFI maintainer At the request of Matt, I am taking up co-maintainership of the EFI subsystem. So add my name to the EFI section in MAINTAINERS, and change the SCM tree reference to point to the new shared Git repo. Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Acked-by: Matt Fleming Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20161018143318.15673-2-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1cd38a7e0064..6847ba844ef9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4620,8 +4620,9 @@ F: sound/usb/misc/ua101.c EXTENSIBLE FIRMWARE INTERFACE (EFI) M: Matt Fleming +M: Ard Biesheuvel L: linux-efi@vger.kernel.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git S: Maintained F: Documentation/efi-stub.txt F: arch/ia64/kernel/efi.c From d2e4d593516e877f1f6fb40031eb495f36606e16 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:05:30 +0200 Subject: [PATCH 255/884] netfilter: nf_tables: avoid uninitialized variable warning The newly added nft_range_eval() function handles the two possible nft range operations, but as the compiler warning points out, any unexpected value would lead to the 'mismatch' variable being used without being initialized: net/netfilter/nft_range.c: In function 'nft_range_eval': net/netfilter/nft_range.c:45:5: error: 'mismatch' may be used uninitialized in this function [-Werror=maybe-uninitialized] This removes the variable in question and instead moves the condition into the switch itself, which is potentially more efficient than adding a bogus 'default' clause as in my first approach, and is nicer than using the 'uninitialized_var' macro. Fixes: 0f3cd9b36977 ("netfilter: nf_tables: add range expression") Link: http://patchwork.ozlabs.org/patch/677114/ Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_range.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 9bc4586c3006..fbc88009ca2e 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -28,22 +28,20 @@ static void nft_range_eval(const struct nft_expr *expr, const struct nft_pktinfo *pkt) { const struct nft_range_expr *priv = nft_expr_priv(expr); - bool mismatch; int d1, d2; d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); switch (priv->op) { case NFT_RANGE_EQ: - mismatch = (d1 < 0 || d2 > 0); + if (d1 < 0 || d2 > 0) + regs->verdict.code = NFT_BREAK; break; case NFT_RANGE_NEQ: - mismatch = (d1 >= 0 && d2 <= 0); + if (d1 >= 0 && d2 <= 0) + regs->verdict.code = NFT_BREAK; break; } - - if (mismatch) - regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { From d09960b0032174eb493c4c13be5b9c9ef36dc9a7 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Mon, 10 Oct 2016 05:35:19 -0700 Subject: [PATCH 256/884] dm: free io_barrier after blk_cleanup_queue call dm_old_request_fn() has paths that access md->io_barrier. The party destroying io_barrier should ensure that no future execution of dm_old_request_fn() is possible. Move io_barrier destruction to below blk_cleanup_queue() to ensure this and avoid a NULL pointer crash during request-based DM device shutdown. Cc: stable@vger.kernel.org # 4.3+ Signed-off-by: Tahsin Erdogan Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index be35258324c1..ec513ee864f2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1423,8 +1423,6 @@ static void cleanup_mapped_device(struct mapped_device *md) if (md->bs) bioset_free(md->bs); - cleanup_srcu_struct(&md->io_barrier); - if (md->disk) { spin_lock(&_minor_lock); md->disk->private_data = NULL; @@ -1436,6 +1434,8 @@ static void cleanup_mapped_device(struct mapped_device *md) if (md->queue) blk_cleanup_queue(md->queue); + cleanup_srcu_struct(&md->io_barrier); + if (md->bdev) { bdput(md->bdev); md->bdev = NULL; From 7cb3f9214dfa443c1ccc2be637dcc6344cc203f0 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Tue, 18 Oct 2016 18:09:48 +0200 Subject: [PATCH 257/884] bridge: multicast: restore perm router ports on multicast enable Satish reported a problem with the perm multicast router ports not getting reenabled after some series of events, in particular if it happens that the multicast snooping has been disabled and the port goes to disabled state then it will be deleted from the router port list, but if it moves into non-disabled state it will not be re-added because the mcast snooping is still disabled, and enabling snooping later does nothing. Here are the steps to reproduce, setup br0 with snooping enabled and eth1 added as a perm router (multicast_router = 2): 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show At this point we have mcast enabled and eth1 as a perm router (value = 2) but it is not in the router list which is incorrect. After this change: 1. $ echo 0 > /sys/class/net/br0/bridge/multicast_snooping 2. $ ip l set eth1 down ^ This step deletes the interface from the router list 3. $ ip l set eth1 up ^ This step does not add it again because mcast snooping is disabled 4. $ echo 1 > /sys/class/net/br0/bridge/multicast_snooping 5. $ bridge -d -s mdb show router ports on br0: eth1 Note: we can directly do br_multicast_enable_port for all because the querier timer already has checks for the port state and will simply expire if it's in blocking/disabled. See the comment added by commit 9aa66382163e7 ("bridge: multicast: add a comment to br_port_state_selection about blocking state") Fixes: 561f1103a2b7 ("bridge: Add multicast_snooping sysfs toggle") Reported-by: Satish Ashok Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index c5fea9393946..2136e45f5277 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -972,13 +972,12 @@ static void br_multicast_enable(struct bridge_mcast_own_query *query) mod_timer(&query->timer, jiffies); } -void br_multicast_enable_port(struct net_bridge_port *port) +static void __br_multicast_enable_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; - spin_lock(&br->multicast_lock); if (br->multicast_disabled || !netif_running(br->dev)) - goto out; + return; br_multicast_enable(&port->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) @@ -987,8 +986,14 @@ void br_multicast_enable_port(struct net_bridge_port *port) if (port->multicast_router == MDB_RTR_TYPE_PERM && hlist_unhashed(&port->rlist)) br_multicast_add_router(br, port); +} -out: +void br_multicast_enable_port(struct net_bridge_port *port) +{ + struct net_bridge *br = port->br; + + spin_lock(&br->multicast_lock); + __br_multicast_enable_port(port); spin_unlock(&br->multicast_lock); } @@ -1994,8 +1999,9 @@ static void br_multicast_start_querier(struct net_bridge *br, int br_multicast_toggle(struct net_bridge *br, unsigned long val) { - int err = 0; struct net_bridge_mdb_htable *mdb; + struct net_bridge_port *port; + int err = 0; spin_lock_bh(&br->multicast_lock); if (br->multicast_disabled == !val) @@ -2023,10 +2029,9 @@ rollback: goto rollback; } - br_multicast_start_querier(br, &br->ip4_own_query); -#if IS_ENABLED(CONFIG_IPV6) - br_multicast_start_querier(br, &br->ip6_own_query); -#endif + br_multicast_open(br); + list_for_each_entry(port, &br->port_list, list) + __br_multicast_enable_port(port); unlock: spin_unlock_bh(&br->multicast_lock); From 937fa62e8a00d0b4bc2c0a40567d7c88ab2b2e8d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 18 Oct 2016 14:02:04 -0400 Subject: [PATCH 258/884] dm rq: clear kworker_task if kthread_run() returned an error cleanup_mapped_device() calls kthread_stop() if kworker_task is non-NULL. Currently the assigned value could be a valid task struct or an error code (e.g -ENOMEM). Reset md->kworker_task to NULL if kthread_run() returned an erorr. Fixes: 7193a9defc ("dm rq: check kthread_run return for .request_fn request-based DM") Cc: stable@vger.kernel.org # 4.8 Reported-by: Tahsin Erdogan Signed-off-by: Mike Snitzer --- drivers/md/dm-rq.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 5eacce1ef88b..63e43f261cce 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -856,8 +856,11 @@ int dm_old_init_request_queue(struct mapped_device *md) init_kthread_worker(&md->kworker); md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, "kdmwork-%s", dm_device_name(md)); - if (IS_ERR(md->kworker_task)) - return PTR_ERR(md->kworker_task); + if (IS_ERR(md->kworker_task)) { + int error = PTR_ERR(md->kworker_task); + md->kworker_task = NULL; + return error; + } elv_register_queue(md->queue); From 7c6273194445fe1316084d3096f9311c3dfa4da6 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Tue, 18 Oct 2016 20:52:28 +0800 Subject: [PATCH 259/884] arm64: dts: rockchip: remove the abuse of keep-power-in-suspend It was invented for sdio only, and should not be used for sdmmc or emmc. Remove it. Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts | 1 - arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts | 2 -- 2 files changed, 3 deletions(-) diff --git a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts index 353314ca7426..e5eeca2c2456 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts @@ -116,7 +116,6 @@ cap-mmc-highspeed; clock-frequency = <150000000>; disable-wp; - keep-power-in-suspend; non-removable; num-slots = <1>; vmmc-supply = <&vcc_io>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 13b7f1edad10..ea0a8eceefd4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -199,7 +199,6 @@ bus-width = <8>; cap-mmc-highspeed; disable-wp; - keep-power-in-suspend; mmc-pwrseq = <&emmc_pwrseq>; mmc-hs200-1_2v; mmc-hs200-1_8v; @@ -348,7 +347,6 @@ clock-freq-min-max = <400000 50000000>; cap-sd-highspeed; card-detect-delay = <200>; - keep-power-in-suspend; num-slots = <1>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; From 9fa2f2cca21b14d17b1c1a37babb639a48a5a29f Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 17 Oct 2016 15:56:29 -0500 Subject: [PATCH 260/884] ibmvnic: Driver Version 1.0.1 Increment driver version to reflect features that have been added since release. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index bfc84c7d0e11..878e2c059f5c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -27,7 +27,7 @@ /**************************************************************************/ #define IBMVNIC_NAME "ibmvnic" -#define IBMVNIC_DRIVER_VERSION "1.0" +#define IBMVNIC_DRIVER_VERSION "1.0.1" #define IBMVNIC_INVALID_MAP -1 #define IBMVNIC_STATS_TIMEOUT 1 /* basic structures plus 100 2k buffers */ From 12608c260d2fe36746508cb4fa20b6e9a5f9c241 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 17 Oct 2016 15:28:09 -0500 Subject: [PATCH 261/884] ibmvnic: Fix GFP_KERNEL allocation in interrupt context Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index bfe17d9c022d..928bf8a5a567 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1190,7 +1190,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter if (!scrq) return NULL; - scrq->msgs = (union sub_crq *)__get_free_pages(GFP_KERNEL, 2); + scrq->msgs = (union sub_crq *)__get_free_pages(GFP_ATOMIC, 2); memset(scrq->msgs, 0, 4 * PAGE_SIZE); if (!scrq->msgs) { dev_warn(dev, "Couldn't allocate crq queue messages page\n"); From 87737f8810db445db171ca81ca4cc43bd5b067ce Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 17 Oct 2016 15:28:10 -0500 Subject: [PATCH 262/884] ibmvnic: Update MTU after device initialization It is possible for the MTU to be changed during the initialization process with the VNIC Server. Ensure that the net device is updated to reflect the new MTU. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 928bf8a5a567..213162df1a9b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3654,6 +3654,7 @@ static void handle_crq_init_rsp(struct work_struct *work) goto task_failed; netdev->real_num_tx_queues = adapter->req_tx_queues; + netdev->mtu = adapter->req_mtu; if (adapter->failover) { adapter->failover = false; @@ -3792,6 +3793,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } netdev->real_num_tx_queues = adapter->req_tx_queues; + netdev->mtu = adapter->req_mtu; rc = register_netdev(netdev); if (rc) { From 41ee9c557ef5de992843b6dac35a199e651525cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 17 Oct 2016 14:22:48 -0700 Subject: [PATCH 263/884] soreuseport: do not export reuseport_add_sock() reuseport_add_sock() is not used from a module, no need to export it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/sock_reuseport.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index e92b759d906c..9a1a352fd1eb 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -129,7 +129,6 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2) return 0; } -EXPORT_SYMBOL(reuseport_add_sock); static void reuseport_free_rcu(struct rcu_head *head) { From 4b75ca5a7a9e01dda3ea70bc17b1826fd679af61 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:16:08 +0200 Subject: [PATCH 264/884] net: bcm63xx: avoid referencing uninitialized variable gcc found a reference to an uninitialized variable in the error handling of bcm_enet_open, introduced by a recent cleanup: drivers/net/ethernet/broadcom/bcm63xx_enet.c: In function 'bcm_enet_open' drivers/net/ethernet/broadcom/bcm63xx_enet.c:1129:2: warning: 'phydev' may be used uninitialized in this function [-Wmaybe-uninitialized] This makes the use of that variable conditional, so we only reference it here after it has been used before. Unlike my normal patches, I have not build-tested this one, as I don't currently have mips test in my randconfig setup. Fixes: 625eb8667d6f ("net: ethernet: broadcom: bcm63xx: use phydev from struct net_device") Cc: Philippe Reynes Reported-by: kbuild test robot Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcm63xx_enet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index ae364c74baf3..537090952c45 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -1126,7 +1126,8 @@ out_freeirq: free_irq(dev->irq, dev); out_phy_disconnect: - phy_disconnect(phydev); + if (priv->has_phy) + phy_disconnect(phydev); return ret; } From 52ccd6318481a467d8ad54ea40f60c61e957d58f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:16:09 +0200 Subject: [PATCH 265/884] net/hyperv: avoid uninitialized variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hdr_offset variable is only if we deal with a TCP or UDP packet, but as the check surrounding its usage tests for skb_is_gso() instead, the compiler has no idea if the variable is initialized or not at that point: drivers/net/hyperv/netvsc_drv.c: In function ‘netvsc_start_xmit’: drivers/net/hyperv/netvsc_drv.c:494:42: error: ‘hdr_offset’ may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds an additional check for the transport type, which tells the compiler that this path cannot happen. Since the get_net_transport_info() function should always be inlined here, I don't expect this to result in additional runtime checks. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f0919bd3a563..5d6e75a40d38 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -447,7 +447,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) * Setup the sendside checksum offload only if this is not a * GSO packet. */ - if (skb_is_gso(skb)) { + if ((net_trans_info & (INFO_TCP | INFO_UDP)) && skb_is_gso(skb)) { struct ndis_tcp_lso_info *lso_info; rndis_msg_size += NDIS_LSO_PPI_SIZE; From ecf244f753e09486707843f2b7b1874f33027038 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:16:15 +0200 Subject: [PATCH 266/884] rocker: fix maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In some rare configurations, we get a warning about the 'index' variable being used without an initialization: drivers/net/ethernet/rocker/rocker_ofdpa.c: In function ‘ofdpa_port_fib_ipv4.isra.16.constprop’: drivers/net/ethernet/rocker/rocker_ofdpa.c:2425:92: warning: ‘index’ may be used uninitialized in this function [-Wmaybe-uninitialized] This is a false positive, the logic is just a bit too complex for gcc to follow here. Moving the intialization of 'index' a little further down makes it clear to gcc that the function always returns an error if it is not initialized. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_ofdpa.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 431a60804272..4ca461322d60 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -1493,8 +1493,6 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port, spin_lock_irqsave(&ofdpa->neigh_tbl_lock, lock_flags); found = ofdpa_neigh_tbl_find(ofdpa, ip_addr); - if (found) - *index = found->index; updating = found && adding; removing = found && !adding; @@ -1508,9 +1506,11 @@ static int ofdpa_port_ipv4_nh(struct ofdpa_port *ofdpa_port, resolved = false; } else if (removing) { ofdpa_neigh_del(trans, found); + *index = found->index; } else if (updating) { ofdpa_neigh_update(found, trans, NULL, false); resolved = !is_zero_ether_addr(found->eth_dst); + *index = found->index; } else { err = -ENOENT; } From b4f0fd4baa90ecce798e0d26d1cce8f4457f2028 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 15:17:51 +0000 Subject: [PATCH 267/884] qed: Use list_move_tail instead of list_del/list_add_tail Using list_move_tail() instead of list_del() + list_add_tail(). Signed-off-by: Wei Yongjun Acked-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index ce171a80bdaf..63e1a1b0ef8e 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -538,8 +538,7 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) if (!p_pkt) break; - list_del(&p_pkt->list_entry); - list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + list_move_tail(&p_pkt->list_entry, &p_rx->free_descq); rx_buf_addr = p_pkt->rx_buf_addr; cookie = p_pkt->cookie; @@ -993,9 +992,8 @@ static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn, p_posting_packet = list_first_entry(&p_rx->posting_descq, struct qed_ll2_rx_packet, list_entry); - list_del(&p_posting_packet->list_entry); - list_add_tail(&p_posting_packet->list_entry, - &p_rx->active_descq); + list_move_tail(&p_posting_packet->list_entry, + &p_rx->active_descq); b_notify_fw = true; } @@ -1186,8 +1184,7 @@ static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn, if (!p_pkt) break; - list_del(&p_pkt->list_entry); - list_add_tail(&p_pkt->list_entry, &p_tx->active_descq); + list_move_tail(&p_pkt->list_entry, &p_tx->active_descq); } SET_FIELD(db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM); From 19be0eaffa3ac7d8eb6784ad9bdbc7d67ed8e619 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 13 Oct 2016 13:07:36 -0700 Subject: [PATCH 268/884] mm: remove gup_flags FOLL_WRITE games from __get_user_pages() This is an ancient bug that was actually attempted to be fixed once (badly) by me eleven years ago in commit 4ceb5db9757a ("Fix get_user_pages() race for write access") but that was then undone due to problems on s390 by commit f33ea7f404e5 ("fix get_user_pages bug"). In the meantime, the s390 situation has long been fixed, and we can now fix it by checking the pte_dirty() bit properly (and do it better). The s390 dirty bit was implemented in abf09bed3cce ("s390/mm: implement software dirty bits") which made it into v3.9. Earlier kernels will have to look at the page state itself. Also, the VM has become more scalable, and what used a purely theoretical race back then has become easier to trigger. To fix it, we introduce a new internal FOLL_COW flag to mark the "yes, we already did a COW" rather than play racy games with FOLL_WRITE that is very fundamental, and then use the pte dirty flag to validate that the FOLL_COW flag is still valid. Reported-and-tested-by: Phil "not Paul" Oester Acked-by: Hugh Dickins Reviewed-by: Michal Hocko Cc: Andy Lutomirski Cc: Kees Cook Cc: Oleg Nesterov Cc: Willy Tarreau Cc: Nick Piggin Cc: Greg Thelen Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + mm/gup.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e9caec6a51e9..ed85879f47f5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2232,6 +2232,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ #define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ +#define FOLL_COW 0x4000 /* internal GUP flag */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); diff --git a/mm/gup.c b/mm/gup.c index 96b2b2fd0fbd..22cc22e7432f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -60,6 +60,16 @@ static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address, return -EEXIST; } +/* + * FOLL_FORCE can write to even unwritable pte's, but only + * after we've gone through a COW cycle and they are dirty. + */ +static inline bool can_follow_write_pte(pte_t pte, unsigned int flags) +{ + return pte_write(pte) || + ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte)); +} + static struct page *follow_page_pte(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, unsigned int flags) { @@ -95,7 +105,7 @@ retry: } if ((flags & FOLL_NUMA) && pte_protnone(pte)) goto no_page; - if ((flags & FOLL_WRITE) && !pte_write(pte)) { + if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) { pte_unmap_unlock(ptep, ptl); return NULL; } @@ -412,7 +422,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, * reCOWed by userspace write). */ if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE)) - *flags &= ~FOLL_WRITE; + *flags |= FOLL_COW; return 0; } From 859110d7497cdd0e6b21010d6f777049d676382c Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:11 +0100 Subject: [PATCH 269/884] mm: remove write/force parameters from __get_user_pages_locked() This removes the redundant 'write' and 'force' parameters from __get_user_pages_locked() to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Reviewed-by: Jan Kara Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- mm/gup.c | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 22cc22e7432f..720eb9385204 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -739,7 +739,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, struct vm_area_struct **vmas, int *locked, bool notify_drop, @@ -757,10 +756,6 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, if (pages) flags |= FOLL_GET; - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; pages_done = 0; lock_dropped = false; @@ -856,9 +851,15 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked) { + unsigned int flags = FOLL_TOUCH; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + return __get_user_pages_locked(current, current->mm, start, nr_pages, - write, force, pages, NULL, locked, true, - FOLL_TOUCH); + pages, NULL, locked, true, flags); } EXPORT_SYMBOL(get_user_pages_locked); @@ -879,9 +880,15 @@ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct m { long ret; int locked = 1; + + if (write) + gup_flags |= FOLL_WRITE; + if (force) + gup_flags |= FOLL_FORCE; + down_read(&mm->mmap_sem); - ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, NULL, &locked, false, gup_flags); + ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL, + &locked, false, gup_flags); if (locked) up_read(&mm->mmap_sem); return ret; @@ -973,9 +980,15 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, int write, int force, struct page **pages, struct vm_area_struct **vmas) { - return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force, - pages, vmas, NULL, false, - FOLL_TOUCH | FOLL_REMOTE); + unsigned int flags = FOLL_TOUCH | FOLL_REMOTE; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + + return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, + NULL, false, flags); } EXPORT_SYMBOL(get_user_pages_remote); @@ -989,9 +1002,15 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas) { + unsigned int flags = FOLL_TOUCH; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + return __get_user_pages_locked(current, current->mm, start, nr_pages, - write, force, pages, vmas, NULL, false, - FOLL_TOUCH); + pages, vmas, NULL, false, flags); } EXPORT_SYMBOL(get_user_pages); From d4944b0ecec0af882483fe44b66729316e575208 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:12 +0100 Subject: [PATCH 270/884] mm: remove write/force parameters from __get_user_pages_unlocked() This removes the redundant 'write' and 'force' parameters from __get_user_pages_unlocked() to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Paolo Bonzini Reviewed-by: Jan Kara Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +-- mm/gup.c | 17 +++++++++-------- mm/nommu.c | 12 +++++++++--- mm/process_vm_access.c | 7 +++++-- virt/kvm/async_pf.c | 3 ++- virt/kvm/kvm_main.c | 11 ++++++++--- 6 files changed, 34 insertions(+), 19 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index ed85879f47f5..bcdea1f4e98c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1285,8 +1285,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags); + struct page **pages, unsigned int gup_flags); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, diff --git a/mm/gup.c b/mm/gup.c index 720eb9385204..e997f545b059 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -875,17 +875,11 @@ EXPORT_SYMBOL(get_user_pages_locked); */ __always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; int locked = 1; - if (write) - gup_flags |= FOLL_WRITE; - if (force) - gup_flags |= FOLL_FORCE; - down_read(&mm->mmap_sem); ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL, &locked, false, gup_flags); @@ -915,8 +909,15 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { + unsigned int flags = FOLL_TOUCH; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + return __get_user_pages_unlocked(current, current->mm, start, nr_pages, - write, force, pages, FOLL_TOUCH); + pages, flags); } EXPORT_SYMBOL(get_user_pages_unlocked); diff --git a/mm/nommu.c b/mm/nommu.c index 95daf81a4855..925dcc1fa2f3 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -185,8 +185,7 @@ EXPORT_SYMBOL(get_user_pages_locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - unsigned int gup_flags) + struct page **pages, unsigned int gup_flags) { long ret; down_read(&mm->mmap_sem); @@ -200,8 +199,15 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages) { + unsigned int flags = 0; + + if (write) + flags |= FOLL_WRITE; + if (force) + flags |= FOLL_FORCE; + return __get_user_pages_unlocked(current, current->mm, start, nr_pages, - write, force, pages, 0); + pages, flags); } EXPORT_SYMBOL(get_user_pages_unlocked); diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c index 07514d41ebcc..be8dc8d1edb9 100644 --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -88,12 +88,16 @@ static int process_vm_rw_single_vec(unsigned long addr, ssize_t rc = 0; unsigned long max_pages_per_loop = PVM_MAX_KMALLOC_PAGES / sizeof(struct pages *); + unsigned int flags = FOLL_REMOTE; /* Work out address and page range required */ if (len == 0) return 0; nr_pages = (addr + len - 1) / PAGE_SIZE - addr / PAGE_SIZE + 1; + if (vm_write) + flags |= FOLL_WRITE; + while (!rc && nr_pages && iov_iter_count(iter)) { int pages = min(nr_pages, max_pages_per_loop); size_t bytes; @@ -104,8 +108,7 @@ static int process_vm_rw_single_vec(unsigned long addr, * current/current->mm */ pages = __get_user_pages_unlocked(task, mm, pa, pages, - vm_write, 0, process_pages, - FOLL_REMOTE); + process_pages, flags); if (pages <= 0) return -EFAULT; diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index db9668869f6f..8035cc1eb955 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -84,7 +84,8 @@ static void async_pf_execute(struct work_struct *work) * mm and might be done in another context, so we must * use FOLL_REMOTE. */ - __get_user_pages_unlocked(NULL, mm, addr, 1, 1, 0, NULL, FOLL_REMOTE); + __get_user_pages_unlocked(NULL, mm, addr, 1, NULL, + FOLL_WRITE | FOLL_REMOTE); kvm_async_page_present_sync(vcpu, apf); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 81dfc73d3df3..28510e72618a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1416,10 +1416,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault, down_read(¤t->mm->mmap_sem); npages = get_user_page_nowait(addr, write_fault, page); up_read(¤t->mm->mmap_sem); - } else + } else { + unsigned int flags = FOLL_TOUCH | FOLL_HWPOISON; + + if (write_fault) + flags |= FOLL_WRITE; + npages = __get_user_pages_unlocked(current, current->mm, addr, 1, - write_fault, 0, page, - FOLL_TOUCH|FOLL_HWPOISON); + page, flags); + } if (npages != 1) return npages; From c164154f66f0c9b02673f07aa4f044f1d9c70274 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:13 +0100 Subject: [PATCH 271/884] mm: replace get_user_pages_unlocked() write/force parameters with gup_flags This removes the 'write' and 'force' use from get_user_pages_unlocked() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Reviewed-by: Jan Kara Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/mips/mm/gup.c | 2 +- arch/s390/mm/gup.c | 3 ++- arch/sh/mm/gup.c | 3 ++- arch/sparc/mm/gup.c | 3 ++- arch/x86/mm/gup.c | 2 +- drivers/media/pci/ivtv/ivtv-udma.c | 4 ++-- drivers/media/pci/ivtv/ivtv-yuv.c | 5 +++-- drivers/scsi/st.c | 5 ++--- drivers/video/fbdev/pvr2fb.c | 4 ++-- include/linux/mm.h | 2 +- mm/gup.c | 14 ++++---------- mm/nommu.c | 11 ++--------- mm/util.c | 3 ++- net/ceph/pagevec.c | 2 +- 14 files changed, 27 insertions(+), 36 deletions(-) diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 42d124fb6474..d8c3c159289a 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -287,7 +287,7 @@ slow_irqon: pages += nr; ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index adb0c34bf431..18d4107e10ee 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -266,7 +266,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); + ret = get_user_pages_unlocked(start, nr_pages - nr, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) ret = (ret < 0) ? nr : ret + nr; diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index 40fa6c8adc43..063c298ba56c 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -258,7 +258,8 @@ slow_irqon: pages += nr; ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 4e06750a5d29..cd0e32bbcb1d 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -238,7 +238,8 @@ slow: pages += nr; ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index b8b6a60b32cf..0d4fb3ebbbac 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -435,7 +435,7 @@ slow_irqon: ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c index 4769469fe842..2c9232ef7baa 100644 --- a/drivers/media/pci/ivtv/ivtv-udma.c +++ b/drivers/media/pci/ivtv/ivtv-udma.c @@ -124,8 +124,8 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr, } /* Get user pages for DMA Xfer */ - err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count, 0, - 1, dma->map); + err = get_user_pages_unlocked(user_dma.uaddr, user_dma.page_count, + dma->map, FOLL_FORCE); if (user_dma.page_count != err) { IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n", diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c index b094054cda6e..f7299d3d8244 100644 --- a/drivers/media/pci/ivtv/ivtv-yuv.c +++ b/drivers/media/pci/ivtv/ivtv-yuv.c @@ -76,11 +76,12 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma, /* Get user pages for DMA Xfer */ y_pages = get_user_pages_unlocked(y_dma.uaddr, - y_dma.page_count, 0, 1, &dma->map[0]); + y_dma.page_count, &dma->map[0], FOLL_FORCE); uv_pages = 0; /* silence gcc. value is set and consumed only if: */ if (y_pages == y_dma.page_count) { uv_pages = get_user_pages_unlocked(uv_dma.uaddr, - uv_dma.page_count, 0, 1, &dma->map[y_pages]); + uv_dma.page_count, &dma->map[y_pages], + FOLL_FORCE); } if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) { diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 7af5226aa55b..618422ea3a41 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4922,9 +4922,8 @@ static int sgl_map_user_pages(struct st_buffer *STbp, res = get_user_pages_unlocked( uaddr, nr_pages, - rw == READ, - 0, /* don't force */ - pages); + pages, + rw == READ ? FOLL_WRITE : 0); /* don't force */ /* Errors and no page mapped should return here */ if (res < nr_pages) diff --git a/drivers/video/fbdev/pvr2fb.c b/drivers/video/fbdev/pvr2fb.c index 3b1ca4411073..a2564ab91e62 100644 --- a/drivers/video/fbdev/pvr2fb.c +++ b/drivers/video/fbdev/pvr2fb.c @@ -686,8 +686,8 @@ static ssize_t pvr2fb_write(struct fb_info *info, const char *buf, if (!pages) return -ENOMEM; - ret = get_user_pages_unlocked((unsigned long)buf, nr_pages, WRITE, - 0, pages); + ret = get_user_pages_unlocked((unsigned long)buf, nr_pages, pages, + FOLL_WRITE); if (ret < nr_pages) { nr_pages = ret; diff --git a/include/linux/mm.h b/include/linux/mm.h index bcdea1f4e98c..abd53f2eb74e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1287,7 +1287,7 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); + struct page **pages, unsigned int gup_flags); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); diff --git a/mm/gup.c b/mm/gup.c index e997f545b059..373d1ec006e4 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -907,17 +907,10 @@ EXPORT_SYMBOL(__get_user_pages_unlocked); * "force" parameter). */ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) + struct page **pages, unsigned int gup_flags) { - unsigned int flags = FOLL_TOUCH; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - return __get_user_pages_unlocked(current, current->mm, start, nr_pages, - pages, flags); + pages, gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_unlocked); @@ -1535,7 +1528,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); + ret = get_user_pages_unlocked(start, nr_pages - nr, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/mm/nommu.c b/mm/nommu.c index 925dcc1fa2f3..7e27add39f7e 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -197,17 +197,10 @@ long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, EXPORT_SYMBOL(__get_user_pages_unlocked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages) + struct page **pages, unsigned int gup_flags) { - unsigned int flags = 0; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - return __get_user_pages_unlocked(current, current->mm, start, nr_pages, - pages, flags); + pages, gup_flags); } EXPORT_SYMBOL(get_user_pages_unlocked); diff --git a/mm/util.c b/mm/util.c index 662cddf914af..4c685bde5ebc 100644 --- a/mm/util.c +++ b/mm/util.c @@ -283,7 +283,8 @@ EXPORT_SYMBOL_GPL(__get_user_pages_fast); int __weak get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages) { - return get_user_pages_unlocked(start, nr_pages, write, 0, pages); + return get_user_pages_unlocked(start, nr_pages, pages, + write ? FOLL_WRITE : 0); } EXPORT_SYMBOL_GPL(get_user_pages_fast); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 00d2601407c5..1a7c9a79a53c 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -26,7 +26,7 @@ struct page **ceph_get_direct_page_vector(const void __user *data, while (got < num_pages) { rc = get_user_pages_unlocked( (unsigned long)data + ((unsigned long)got * PAGE_SIZE), - num_pages - got, write_page, 0, pages + got); + num_pages - got, pages + got, write_page ? FOLL_WRITE : 0); if (rc < 0) break; BUG_ON(rc == 0); From db766b0a25c9520b7c585bcdb2725dcc0e490f4a Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 13 Oct 2016 15:51:47 -0400 Subject: [PATCH 272/884] sh: add Kconfig option for J-Core SoC core drivers Signed-off-by: Rich Felker --- arch/sh/boards/Kconfig | 10 ++++++++++ arch/sh/configs/j2_defconfig | 1 + 2 files changed, 11 insertions(+) diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index e9c2c42031fe..4e21949593cf 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -22,6 +22,16 @@ config SH_DEVICE_TREE have sufficient driver coverage to use this option; do not select it if you are using original SuperH hardware. +config SH_JCORE_SOC + bool "J-Core SoC" + depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2) + select CLKSRC_JCORE_PIT + select JCORE_AIC + default y if CPU_J2 + help + Select this option to include drivers core components of the + J-Core SoC, including interrupt controllers and timers. + config SH_SOLUTION_ENGINE bool "SolutionEngine" select SOLUTION_ENGINE diff --git a/arch/sh/configs/j2_defconfig b/arch/sh/configs/j2_defconfig index 94d1eca52f72..530e66e163aa 100644 --- a/arch/sh/configs/j2_defconfig +++ b/arch/sh/configs/j2_defconfig @@ -8,6 +8,7 @@ CONFIG_MEMORY_START=0x10000000 CONFIG_MEMORY_SIZE=0x04000000 CONFIG_CPU_BIG_ENDIAN=y CONFIG_SH_DEVICE_TREE=y +CONFIG_SH_JCORE_SOC=y CONFIG_HZ_100=y CONFIG_CMDLINE_OVERWRITE=y CONFIG_CMDLINE="console=ttyUL0 earlycon" From 99f453f875a5e207f38e92ced6ce3427433d5be2 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Tue, 18 Oct 2016 18:54:56 -0400 Subject: [PATCH 273/884] sh: add earlycon support to j2_defconfig Signed-off-by: Rich Felker --- arch/sh/configs/j2_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sh/configs/j2_defconfig b/arch/sh/configs/j2_defconfig index 530e66e163aa..2eb81ebe3888 100644 --- a/arch/sh/configs/j2_defconfig +++ b/arch/sh/configs/j2_defconfig @@ -21,6 +21,7 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_NETDEVICES=y +CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_UARTLITE=y CONFIG_SERIAL_UARTLITE_CONSOLE=y CONFIG_I2C=y From d852b5f35e84e60c930589eeb14a6df21ea9b1cb Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 19 Oct 2016 00:24:27 +0100 Subject: [PATCH 274/884] KVM: MIPS: Add missing uaccess.h include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIPS KVM uses user memory accessors but mips.c doesn't directly include uaccess.h, so include it now. This wasn't too much of a problem before v4.9-rc1 as asm/module.h included asm/uaccess.h, however since commit 29abfbd9cbba ("mips: separate extable.h, switch module.h to it") this is no longer the case. This resulted in build failures when trace points were disabled, as trace/define_trace.h includes trace/trace_events.h only ifdef TRACEPOINTS_ENABLED, which goes on to include asm/uaccess.h via a couple of other headers. Fixes: 29abfbd9cbba ("mips: separate extable.h, switch module.h to it") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org --- arch/mips/kvm/mips.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index ce961495b5e1..622037d851a3 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include From 6d4952d9d9d4dc2bb9c0255d95a09405a1e958f7 Mon Sep 17 00:00:00 2001 From: Andrew Lutomirski Date: Mon, 17 Oct 2016 10:06:27 -0700 Subject: [PATCH 275/884] hwrng: core - Don't use a stack buffer in add_early_randomness() hw_random carefully avoids using a stack buffer except in add_early_randomness(). This causes a crash in virtio_rng if CONFIG_VMAP_STACK=y. Reported-by: Matt Mullins Tested-by: Matt Mullins Fixes: d3cc7996473a ("hwrng: fetch randomness only after device init") Signed-off-by: Andy Lutomirski Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 482794526e8c..d2d2c89de5b4 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -84,14 +84,14 @@ static size_t rng_buffer_size(void) static void add_early_randomness(struct hwrng *rng) { - unsigned char bytes[16]; int bytes_read; + size_t size = min_t(size_t, 16, rng_buffer_size()); mutex_lock(&reading_mutex); - bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1); + bytes_read = rng_get_data(rng, rng_buffer, size, 1); mutex_unlock(&reading_mutex); if (bytes_read > 0) - add_device_randomness(bytes, bytes_read); + add_device_randomness(rng_buffer, bytes_read); } static inline void cleanup_rng(struct kref *kref) From 04946fb60fb157faafa01658dff3131d49f49ccb Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 18 Oct 2016 10:24:49 +0100 Subject: [PATCH 276/884] ARM: fix oops when using older ARMv4T CPUs Alexander Shiyan reports that CLPS711x fails at boot time in the data exception handler due to a NULL pointer dereference. This is caused by the late-v4t abort handler overwriting R9 (which becomes zero). Fix this by making the abort handler save and restore R9. Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = c3b58000 [00000008] *pgd=800000000, *pte=00000000, *ppte=feff4140 Internal error: Oops: 63c11817 [#1] PREEMPT ARM CPU: 0 PID: 448 Comm: ash Not tainted 4.8.1+ #1 Hardware name: Cirrus Logic CLPS711X (Device Tree Support) task: c39e03a0 ti: c3b4e000 task.ti: c3b4e000 PC is at __dabt_svc+0x4c/0x60 LR is at do_page_fault+0x144/0x2ac pc : [] lr : [] psr: 60000093 sp : c3b4fe6c ip : 00000001 fp : b6f1bf88 r10: c387a5a0 r9 : 00000000 r8 : e4e0e001 r7 : bee3ef83 r6 : 00100000 r5 : 80000013 r4 : c022fcf8 r3 : 00000000 r2 : 00000008 r1 : bf000000 r0 : 00000000 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user Control: 0000217f Table: c3b58055 DAC: 00000055 Process ash (pid: 448, stack limit = 0xc3b4e190) Stack: (0xc3b4fe6c to 0xc3b50000) fe60: bee3ef83 c05168d1 ffffffff 00000000 c3adfe80 fe80: c3a03300 00000000 c3b4fed0 c3a03400 bee3ef83 c387a5a0 b6f1bf88 00000001 fea0: c3b4febc 00000076 c022fcf8 80000013 ffffffff 0000003f bf000000 bee3ef83 fec0: 00000004 00000000 c3adfe80 c00e432c 00000812 00000005 00000001 00000006 fee0: b6f1b000 00000000 00010000 0003c944 0004d000 0004d439 00010000 b6f1b000 ff00: 00000005 00000000 00015ecc c3b4fed0 0000000a 00000000 00000000 c00a1dc0 ff20: befff000 c3a03300 c3b4e000 c0507cd8 c0508024 fffffff8 c3a03300 00000000 ff40: c0516a58 c00a35bc c39e03a0 000001c0 bea84ce8 0004e008 c3b3a000 c00a3ac0 ff60: c3b40374 c3b3a000 bea84d11 00000000 c0500188 bea84d11 bea84ce8 00000001 ff80: 0000000b c000a304 c3b4e000 00000000 bea84ce4 c00a3cd0 00000000 bea84d11 ffa0: bea84ce8 c000a160 bea84d11 bea84ce8 bea84d11 bea84ce8 0004e008 0004d450 ffc0: bea84d11 bea84ce8 00000001 0000000b b6f45ee4 00000000 b6f5ff70 bea84ce4 ffe0: b6f2f130 bea84cb0 b6f2f194 b6ef29f4 a0000010 bea84d11 02c7cffa 02c7cffd [] (__dabt_svc) from [] (__copy_to_user_std+0xf8/0x330) [] (__copy_to_user_std) from [] +(load_elf_binary+0x920/0x107c) [] (load_elf_binary) from [] +(search_binary_handler+0x80/0x16c) [] (search_binary_handler) from [] +(do_execveat_common+0x418/0x600) [] (do_execveat_common) from [] (do_execve+0x28/0x30) [] (do_execve) from [] (ret_fast_syscall+0x0/0x30) Code: e1a0200d eb00136b e321f093 e59d104c (e5891008) ---[ end trace 4b4f8086ebef98c5 ]--- Fixes: e6978e4bf181 ("ARM: save and reset the address limit when entering an exception") Reported-by: Alexander Shiyan Tested-by: Alexander Shiyan Signed-off-by: Russell King --- arch/arm/mm/abort-lv4t.S | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S index 6d8e8e3365d1..4cdfab31a0b6 100644 --- a/arch/arm/mm/abort-lv4t.S +++ b/arch/arm/mm/abort-lv4t.S @@ -7,7 +7,7 @@ * : r4 = aborted context pc * : r5 = aborted context psr * - * Returns : r4-r5, r10-r11, r13 preserved + * Returns : r4-r5, r9-r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -48,7 +48,10 @@ ENTRY(v4t_late_abort) /* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m /* d */ b do_DataAbort @ ldc rd, [rn, #m] /* e */ b .data_unknown -/* f */ +/* f */ b .data_unknown + +.data_unknown_r9: + ldr r9, [sp], #4 .data_unknown: @ Part of jumptable mov r0, r4 mov r1, r8 @@ -57,6 +60,7 @@ ENTRY(v4t_late_abort) .data_arm_ldmstm: tst r8, #1 << 21 @ check writeback bit beq do_DataAbort @ no writeback -> no fixup + str r9, [sp, #-4]! mov r7, #0x11 orr r7, r7, #0x1100 and r6, r8, r7 @@ -75,12 +79,14 @@ ENTRY(v4t_late_abort) subne r7, r7, r6, lsl #2 @ Undo increment addeq r7, r7, r6, lsl #2 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrhpre: tst r8, #1 << 21 @ Check writeback bit beq do_DataAbort @ No writeback -> no fixup .data_arm_lateldrhpost: + str r9, [sp, #-4]! and r9, r8, #0x00f @ get Rm / low nibble of immediate value tst r8, #1 << 22 @ if (immediate offset) andne r6, r8, #0xf00 @ { immediate high nibble @@ -93,6 +99,7 @@ ENTRY(v4t_late_abort) subne r7, r7, r6 @ Undo incrmenet addeq r7, r7, r6 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrpreconst: @@ -101,12 +108,14 @@ ENTRY(v4t_late_abort) .data_arm_lateldrpostconst: movs r6, r8, lsl #20 @ Get offset beq do_DataAbort @ zero -> no fixup + str r9, [sp, #-4]! and r9, r8, #15 << 16 @ Extract 'n' from instruction ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit subne r7, r7, r6, lsr #20 @ Undo increment addeq r7, r7, r6, lsr #20 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrprereg: @@ -115,6 +124,7 @@ ENTRY(v4t_late_abort) .data_arm_lateldrpostreg: and r7, r8, #15 @ Extract 'm' from instruction ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' + str r9, [sp, #-4]! mov r9, r8, lsr #7 @ get shift count ands r9, r9, #31 and r7, r8, #0x70 @ get shift type @@ -126,33 +136,33 @@ ENTRY(v4t_late_abort) b .data_arm_apply_r6_and_rn b .data_arm_apply_r6_and_rn @ 1: LSL #0 nop - b .data_unknown @ 2: MUL? + b .data_unknown_r9 @ 2: MUL? nop - b .data_unknown @ 3: MUL? + b .data_unknown_r9 @ 3: MUL? nop mov r6, r6, lsr r9 @ 4: LSR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, lsr #32 @ 5: LSR #32 b .data_arm_apply_r6_and_rn - b .data_unknown @ 6: MUL? + b .data_unknown_r9 @ 6: MUL? nop - b .data_unknown @ 7: MUL? + b .data_unknown_r9 @ 7: MUL? nop mov r6, r6, asr r9 @ 8: ASR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, asr #32 @ 9: ASR #32 b .data_arm_apply_r6_and_rn - b .data_unknown @ A: MUL? + b .data_unknown_r9 @ A: MUL? nop - b .data_unknown @ B: MUL? + b .data_unknown_r9 @ B: MUL? nop mov r6, r6, ror r9 @ C: ROR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, rrx @ D: RRX b .data_arm_apply_r6_and_rn - b .data_unknown @ E: MUL? + b .data_unknown_r9 @ E: MUL? nop - b .data_unknown @ F: MUL? + b .data_unknown_r9 @ F: MUL? .data_thumb_abort: ldrh r8, [r4] @ read instruction @@ -190,6 +200,7 @@ ENTRY(v4t_late_abort) .data_thumb_pushpop: tst r8, #1 << 10 beq .data_unknown + str r9, [sp, #-4]! and r6, r8, #0x55 @ hweight8(r8) + R bit and r9, r8, #0xaa add r6, r6, r9, lsr #1 @@ -204,9 +215,11 @@ ENTRY(v4t_late_abort) addeq r7, r7, r6, lsl #2 @ increment SP if PUSH subne r7, r7, r6, lsl #2 @ decrement SP if POP str r7, [r2, #13 << 2] + ldr r9, [sp], #4 b do_DataAbort .data_thumb_ldmstm: + str r9, [sp, #-4]! and r6, r8, #0x55 @ hweight8(r8) and r9, r8, #0xaa add r6, r6, r9, lsr #1 @@ -219,4 +232,5 @@ ENTRY(v4t_late_abort) and r6, r6, #15 @ number of regs to transfer sub r7, r7, r6, lsl #2 @ always decrement str r7, [r2, r9, lsr #6] + ldr r9, [sp], #4 b do_DataAbort From 6127d124ee4eb9c39983813cc9803f3654ab7e16 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 18 Oct 2016 21:46:18 +0100 Subject: [PATCH 277/884] ARM: wire up new pkey syscalls Wire up the new pkey syscalls for ARM. Signed-off-by: Russell King --- arch/arm/include/asm/unistd.h | 2 +- arch/arm/include/uapi/asm/unistd.h | 3 +++ arch/arm/kernel/calls.S | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 194b69923389..ada0d29a660f 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -19,7 +19,7 @@ * This may need to be greater than __NR_last_syscall+1 in order to * account for the padding in the syscall table */ -#define __NR_syscalls (396) +#define __NR_syscalls (400) #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 2cb9dc770e1d..314100a06ccb 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -420,6 +420,9 @@ #define __NR_copy_file_range (__NR_SYSCALL_BASE+391) #define __NR_preadv2 (__NR_SYSCALL_BASE+392) #define __NR_pwritev2 (__NR_SYSCALL_BASE+393) +#define __NR_pkey_mprotect (__NR_SYSCALL_BASE+394) +#define __NR_pkey_alloc (__NR_SYSCALL_BASE+395) +#define __NR_pkey_free (__NR_SYSCALL_BASE+396) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 703fa0f3cd8f..08030b18f10a 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -403,6 +403,9 @@ CALL(sys_copy_file_range) CALL(sys_preadv2) CALL(sys_pwritev2) + CALL(sys_pkey_mprotect) +/* 395 */ CALL(sys_pkey_alloc) + CALL(sys_pkey_free) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted From d2cf909cda5f8c5609cb7ed6cda816c3e15528c7 Mon Sep 17 00:00:00 2001 From: Frederic Barrat Date: Fri, 17 Jun 2016 18:53:28 +0200 Subject: [PATCH 278/884] powerpc/mm: Prevent unlikely crash in copro_calculate_slb() If a cxl adapter faults on an invalid address for a kernel context, we may enter copro_calculate_slb() with a NULL mm pointer (kernel context) and an effective address which looks like a user address. Which will cause a crash when dereferencing mm. It is clearly an AFU bug, but there's no reason to crash either. So return an error, so that cxl can ack the interrupt with an address error. Fixes: 73d16a6e0e51 ("powerpc/cell: Move data segment faulting code out of cell platform") Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Frederic Barrat Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- arch/powerpc/mm/copro_fault.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index bb0354222b11..362954f98029 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -106,6 +106,8 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) switch (REGION_ID(ea)) { case USER_REGION_ID: pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); + if (mm == NULL) + return 1; psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); From 65bc3ece84ef6340cbd80eec10ab9be3426e1149 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 12 Oct 2016 21:00:43 +0200 Subject: [PATCH 279/884] powerpc/boot: Fix boot on systems with uncompressed kernel image This commit broke boot on systems with an uncompressed kernel image, namely systems using a cuImage. On such systems the compressed boot image (boot wrapper, uncompressed kernel image, ..) is decompressed by u-boot already, therefore the boot wrapper code sees an uncompressed kernel image. The old decompression code silently assumed an uncompressed kernel image if it found no valid gzip signature, whilst the new code bailed out in this case. Fix this by re-introducing such a fallback if no valid compressed image is found. Fixes: 1b7898ee276b ("Use the pre-boot decompression API") Signed-off-by: Heiner Kallweit Signed-off-by: Michael Ellerman --- arch/powerpc/boot/main.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index f7a184b6c35b..57d42d129033 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -32,9 +32,16 @@ static struct addr_range prep_kernel(void) void *addr = 0; struct elf_info ei; long len; + int uncompressed_image = 0; - partial_decompress(vmlinuz_addr, vmlinuz_size, + len = partial_decompress(vmlinuz_addr, vmlinuz_size, elfheader, sizeof(elfheader), 0); + /* assume uncompressed data if -1 is returned */ + if (len == -1) { + uncompressed_image = 1; + memcpy(elfheader, vmlinuz_addr, sizeof(elfheader)); + printf("No valid compressed data found, assume uncompressed data\n\r"); + } if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei)) fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r"); @@ -67,6 +74,13 @@ static struct addr_range prep_kernel(void) "device tree\n\r"); } + if (uncompressed_image) { + memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize); + printf("0x%lx bytes of uncompressed data copied\n\r", + ei.loadsize); + goto out; + } + /* Finally, decompress the kernel */ printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr, vmlinuz_addr, vmlinuz_addr+vmlinuz_size); @@ -82,7 +96,7 @@ static struct addr_range prep_kernel(void) len, ei.loadsize); printf("Done! Decompressed 0x%lx bytes\n\r", len); - +out: flush_cache(addr, ei.loadsize); return (struct addr_range){addr, ei.memsize}; From 70b565bbdb911023373e035225ab10077e4ab937 Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 14 Oct 2016 15:08:36 +0530 Subject: [PATCH 280/884] cxl: Prevent adapter reset if an active context exists This patch prevents resetting the cxl adapter via sysfs in presence of one or more active cxl_context on it. This protects against an unrecoverable error caused by PSL owning a dirty cache line even after reset and host tries to touch the same cache line. In case a force reset of the card is required irrespective of any active contexts, the int value -1 can be stored in the 'reset' sysfs attribute of the card. The patch introduces a new atomic_t member named contexts_num inside struct cxl that holds the number of active context attached to the card , which is checked against '0' before proceeding with the reset. To prevent against a race condition where a context is activated just after reset check is performed, the contexts_num is atomically set to '-1' after reset-check to indicate that no more contexts can be activated on the card anymore. Before activating a context we atomically test if contexts_num is non-negative and if so, increment its value by one. In case the value of contexts_num is negative then it indicates that the card is about to be reset and context activation is error-ed out at that point. Fixes: 62fa19d4b4fd ("cxl: Add ability to reset the card") Cc: stable@vger.kernel.org # v4.0+ Acked-by: Frederic Barrat Reviewed-by: Andrew Donnellan Signed-off-by: Vaibhav Jain Signed-off-by: Michael Ellerman --- Documentation/ABI/testing/sysfs-class-cxl | 7 ++-- drivers/misc/cxl/api.c | 9 +++++ drivers/misc/cxl/context.c | 3 ++ drivers/misc/cxl/cxl.h | 24 +++++++++++++ drivers/misc/cxl/file.c | 11 ++++++ drivers/misc/cxl/guest.c | 3 ++ drivers/misc/cxl/main.c | 42 ++++++++++++++++++++++- drivers/misc/cxl/pci.c | 2 ++ drivers/misc/cxl/sysfs.c | 27 ++++++++++++--- 9 files changed, 121 insertions(+), 7 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index 4ba0a2a61926..640f65e79ef1 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -220,8 +220,11 @@ What: /sys/class/cxl//reset Date: October 2014 Contact: linuxppc-dev@lists.ozlabs.org Description: write only - Writing 1 will issue a PERST to card which may cause the card - to reload the FPGA depending on load_image_on_perst. + Writing 1 will issue a PERST to card provided there are no + contexts active on any one of the card AFUs. This may cause + the card to reload the FPGA depending on load_image_on_perst. + Writing -1 will do a force PERST irrespective of any active + contexts on the card AFUs. Users: https://github.com/ibm-capi/libcxl What: /sys/class/cxl//perst_reloads_same_image (not in a guest) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index f3d34b941f85..af23d7dfe752 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -229,6 +229,14 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, if (ctx->status == STARTED) goto out; /* already started */ + /* + * Increment the mapped context count for adapter. This also checks + * if adapter_context_lock is taken. + */ + rc = cxl_adapter_context_get(ctx->afu->adapter); + if (rc) + goto out; + if (task) { ctx->pid = get_task_pid(task, PIDTYPE_PID); ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID); @@ -240,6 +248,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { put_pid(ctx->pid); + cxl_adapter_context_put(ctx->afu->adapter); cxl_ctx_put(); goto out; } diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index c466ee2b0c97..5e506c19108a 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -238,6 +238,9 @@ int __detach_context(struct cxl_context *ctx) put_pid(ctx->glpid); cxl_ctx_put(); + + /* Decrease the attached context count on the adapter */ + cxl_adapter_context_put(ctx->afu->adapter); return 0; } diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 01d372aba131..a144073593fa 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -618,6 +618,14 @@ struct cxl { bool perst_select_user; bool perst_same_image; bool psl_timebase_synced; + + /* + * number of contexts mapped on to this card. Possible values are: + * >0: Number of contexts mapped and new one can be mapped. + * 0: No active contexts and new ones can be mapped. + * -1: No contexts mapped and new ones cannot be mapped. + */ + atomic_t contexts_num; }; int cxl_pci_alloc_one_irq(struct cxl *adapter); @@ -944,4 +952,20 @@ bool cxl_pci_is_vphb_device(struct pci_dev *dev); /* decode AFU error bits in the PSL register PSL_SERR_An */ void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr); + +/* + * Increments the number of attached contexts on an adapter. + * In case an adapter_context_lock is taken the return -EBUSY. + */ +int cxl_adapter_context_get(struct cxl *adapter); + +/* Decrements the number of attached contexts on an adapter */ +void cxl_adapter_context_put(struct cxl *adapter); + +/* If no active contexts then prevents contexts from being attached */ +int cxl_adapter_context_lock(struct cxl *adapter); + +/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */ +void cxl_adapter_context_unlock(struct cxl *adapter); + #endif diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 5fb9894b157f..d0b421f49b39 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -205,11 +205,22 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, ctx->pid = get_task_pid(current, PIDTYPE_PID); ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID); + /* + * Increment the mapped context count for adapter. This also checks + * if adapter_context_lock is taken. + */ + rc = cxl_adapter_context_get(ctx->afu->adapter); + if (rc) { + afu_release_irqs(ctx, ctx); + goto out; + } + trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor, amr))) { afu_release_irqs(ctx, ctx); + cxl_adapter_context_put(ctx->afu->adapter); goto out; } diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 9aa58a77a24d..3e102cd6ed91 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -1152,6 +1152,9 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic if ((rc = cxl_sysfs_adapter_add(adapter))) goto err_put1; + /* release the context lock as the adapter is configured */ + cxl_adapter_context_unlock(adapter); + return adapter; err_put1: diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index d9be23b24aa3..62e0dfb5f15b 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -243,8 +243,10 @@ struct cxl *cxl_alloc_adapter(void) if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num)) goto err2; - return adapter; + /* start with context lock taken */ + atomic_set(&adapter->contexts_num, -1); + return adapter; err2: cxl_remove_adapter_nr(adapter); err1: @@ -286,6 +288,44 @@ int cxl_afu_select_best_mode(struct cxl_afu *afu) return 0; } +int cxl_adapter_context_get(struct cxl *adapter) +{ + int rc; + + rc = atomic_inc_unless_negative(&adapter->contexts_num); + return rc >= 0 ? 0 : -EBUSY; +} + +void cxl_adapter_context_put(struct cxl *adapter) +{ + atomic_dec_if_positive(&adapter->contexts_num); +} + +int cxl_adapter_context_lock(struct cxl *adapter) +{ + int rc; + /* no active contexts -> contexts_num == 0 */ + rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1); + return rc ? -EBUSY : 0; +} + +void cxl_adapter_context_unlock(struct cxl *adapter) +{ + int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0); + + /* + * contexts lock taken -> contexts_num == -1 + * If not true then show a warning and force reset the lock. + * This will happen when context_unlock was requested without + * doing a context_lock. + */ + if (val != -1) { + atomic_set(&adapter->contexts_num, 0); + WARN(1, "Adapter context unlocked with %d active contexts", + val); + } +} + static int __init init_cxl(void) { int rc = 0; diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 7afad8477ad5..e96be9ca4e60 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1487,6 +1487,8 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) if ((rc = cxl_native_register_psl_err_irq(adapter))) goto err; + /* Release the context lock as adapter is configured */ + cxl_adapter_context_unlock(adapter); return 0; err: diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index b043c20f158f..a8b6d6a635e9 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -75,12 +75,31 @@ static ssize_t reset_adapter_store(struct device *device, int val; rc = sscanf(buf, "%i", &val); - if ((rc != 1) || (val != 1)) + if ((rc != 1) || (val != 1 && val != -1)) return -EINVAL; - if ((rc = cxl_ops->adapter_reset(adapter))) - return rc; - return count; + /* + * See if we can lock the context mapping that's only allowed + * when there are no contexts attached to the adapter. Once + * taken this will also prevent any context from getting activated. + */ + if (val == 1) { + rc = cxl_adapter_context_lock(adapter); + if (rc) + goto out; + + rc = cxl_ops->adapter_reset(adapter); + /* In case reset failed release context lock */ + if (rc) + cxl_adapter_context_unlock(adapter); + + } else if (val == -1) { + /* Perform a forced adapter reset */ + rc = cxl_ops->adapter_reset(adapter); + } + +out: + return rc ? rc : count; } static ssize_t load_image_on_perst_show(struct device *device, From 08b5e79ebdb58868cbb6976ba0e3898029394e6d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 13 Oct 2016 16:27:30 +1100 Subject: [PATCH 281/884] powerpc/mm: Drop dump_numa_memory_topology() At boot we dump the NUMA memory topology in dump_numa_memory_topology(), at KERN_DEBUG level, resulting in output like: Node 0 Memory: 0x0-0x100000000 Node 1 Memory: 0x100000000-0x200000000 Which is nice enough, but immediately after that we iterate over each node and call setup_node_data(), which also prints out the node ranges, at KERN_INFO, giving eg: numa: Initmem setup node 0 [mem 0x00000000-0xffffffff] numa: Initmem setup node 1 [mem 0x100000000-0x1ffffffff] Additionally dump_numa_memory_topology() does not use KERN_CONT correctly, resulting in split output lines on recent kernels. So drop dump_numa_memory_topology() as superfluous chatter. Signed-off-by: Michael Ellerman Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 36 ------------------------------------ 1 file changed, 36 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 75b9cd6150cc..db5fc2b54c5a 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -871,40 +871,6 @@ void __init dump_numa_cpu_topology(void) } } -static void __init dump_numa_memory_topology(void) -{ - unsigned int node; - unsigned int count; - - if (min_common_depth == -1 || !numa_enabled) - return; - - for_each_online_node(node) { - unsigned long i; - - printk(KERN_DEBUG "Node %d Memory:", node); - - count = 0; - - for (i = 0; i < memblock_end_of_DRAM(); - i += (1 << SECTION_SIZE_BITS)) { - if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { - if (count == 0) - printk(" 0x%lx", i); - ++count; - } else { - if (count > 0) - printk("-0x%lx", i); - count = 0; - } - } - - if (count > 0) - printk("-0x%lx", i); - printk("\n"); - } -} - /* Initialize NODE_DATA for a node on the local memory */ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) { @@ -947,8 +913,6 @@ void __init initmem_init(void) if (parse_numa_properties()) setup_nonnuma(); - else - dump_numa_memory_topology(); memblock_dump_all(); From 8467801cc8744511bd2664fae7d72ab704816844 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 18 Oct 2016 14:22:14 +0530 Subject: [PATCH 282/884] powerpc: Fix numa topology console print With recent update to printk, we get console output like below: [ 0.550639] Brought up 160 CPUs [ 0.550718] Node 0 CPUs: [ 0.550721] 0 [ 0.550754] -39 [ 0.550794] Node 1 CPUs: [ 0.550798] 40 [ 0.550817] -79 [ 0.550856] Node 16 CPUs: [ 0.550860] 80 [ 0.550880] -119 [ 0.550917] Node 17 CPUs: [ 0.550923] 120 [ 0.550942] -159 Fix this by properly using pr_cont(), ie. KERN_CONT. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index db5fc2b54c5a..a51c188b81f3 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -845,7 +845,7 @@ void __init dump_numa_cpu_topology(void) return; for_each_online_node(node) { - printk(KERN_DEBUG "Node %d CPUs:", node); + pr_info("Node %d CPUs:", node); count = 0; /* @@ -856,18 +856,18 @@ void __init dump_numa_cpu_topology(void) if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { if (count == 0) - printk(" %u", cpu); + pr_cont(" %u", cpu); ++count; } else { if (count > 1) - printk("-%u", cpu - 1); + pr_cont("-%u", cpu - 1); count = 0; } } if (count > 1) - printk("-%u", nr_cpu_ids - 1); - printk("\n"); + pr_cont("-%u", nr_cpu_ids - 1); + pr_cont("\n"); } } From 78914ff0843623ee6dbeae92fa0bb8761828684e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 18 Oct 2016 09:33:44 +1100 Subject: [PATCH 283/884] powerpc: Ignore the pkey system calls for now Eliminates warning messages: :1316:2: warning: #warning syscall pkey_mprotect not implemented [-Wcpp] :1319:2: warning: #warning syscall pkey_alloc not implemented [-Wcpp] :1322:2: warning: #warning syscall pkey_free not implemented [-Wcpp] Hopefully we will remember to revert this commit if we ever implement them. Signed-off-by: Stephen Rothwell Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/unistd.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index cf12c580f6b2..e8cdfec8d512 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -16,6 +16,10 @@ #define __NR__exit __NR_exit +#define __IGNORE_pkey_mprotect +#define __IGNORE_pkey_alloc +#define __IGNORE_pkey_free + #ifndef __ASSEMBLY__ #include From dd1dafcdf071d94cb53917612c66a169ce741461 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 14:26:57 +0000 Subject: [PATCH 284/884] irqchip/eznps: Drop pointless static qualifier in nps400_of_init() There is no need to have the 'struct irq_domain *nps400_root_domain' variable static since new value is always assigned before use. Fixes: 44df427c894a ("irqchip: add nps Internal and external irqchips") Signed-off-by: Wei Yongjun Cc: Marc Zyngier Cc: Vineet Gupta Cc: Jason Cooper Link: http://lkml.kernel.org/r/1476714417-12095-1-git-send-email-weiyj.lk@gmail.com Signed-off-by: Thomas Gleixner --- drivers/irqchip/irq-eznps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-eznps.c b/drivers/irqchip/irq-eznps.c index ebc2b0b15f67..2a7a38830a8d 100644 --- a/drivers/irqchip/irq-eznps.c +++ b/drivers/irqchip/irq-eznps.c @@ -135,7 +135,7 @@ static const struct irq_domain_ops nps400_irq_ops = { static int __init nps400_of_init(struct device_node *node, struct device_node *parent) { - static struct irq_domain *nps400_root_domain; + struct irq_domain *nps400_root_domain; if (parent) { pr_err("DeviceTree incore ic not a root irq controller\n"); From b0dddf6c147e6fe61374d625c4bb2b7c52018639 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 18 Oct 2016 16:53:11 +0100 Subject: [PATCH 285/884] efi/arm: Fix absolute relocation detection for older toolchains When building the ARM kernel with CONFIG_EFI=y, the following build error may occur when using a less recent version of binutils (2.23 or older): STUBCPY drivers/firmware/efi/libstub/lib-sort.stub.o 00000000 R_ARM_ABS32 sort 00000004 R_ARM_ABS32 __ksymtab_strings drivers/firmware/efi/libstub/lib-sort.stub.o: absolute symbol references not allowed in the EFI stub (and when building with debug symbols, the list above is much longer, and contains all the internal references between the .debug sections and the actual code) This issue is caused by the fact that objcopy v2.23 or earlier does not support wildcards in its -R and -j options, which means the following line from the Makefile: STUBCOPY_FLAGS-y := -R .debug* -R *ksymtab* -R *kcrctab* fails to take effect, leaving harmless absolute relocations in the binary that are indistinguishable from relocations that may cause crashes at runtime due to the fact that these relocations are resolved at link time using the virtual address of the kernel, which is always different from the address at which the EFI firmware loads and invokes the stub. So, as a workaround, disable debug symbols explicitly when building the stub for ARM, and strip the ksymtab and kcrctab symbols for the only exported symbol we currently reuse in the stub, which is 'sort'. Tested-by: Jon Hunter Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1476805991-7160-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- drivers/firmware/efi/libstub/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index c06945160a41..5e23e2d305e7 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -11,7 +11,7 @@ cflags-$(CONFIG_X86) += -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 \ -mno-mmx -mno-sse cflags-$(CONFIG_ARM64) := $(subst -pg,,$(KBUILD_CFLAGS)) -cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) \ +cflags-$(CONFIG_ARM) := $(subst -pg,,$(KBUILD_CFLAGS)) -g0 \ -fno-builtin -fpic -mno-single-pic-base cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt @@ -79,5 +79,6 @@ quiet_cmd_stubcopy = STUBCPY $@ # decompressor. So move our .data to .data.efistub, which is preserved # explicitly by the decompressor linker script. # -STUBCOPY_FLAGS-$(CONFIG_ARM) += --rename-section .data=.data.efistub +STUBCOPY_FLAGS-$(CONFIG_ARM) += --rename-section .data=.data.efistub \ + -R ___ksymtab+sort -R ___kcrctab+sort STUBCOPY_RELOC-$(CONFIG_ARM) := R_ARM_ABS From b5a9b340789b2b24c6896bcf7a065c31a4db671c Mon Sep 17 00:00:00 2001 From: Vincent Guittot Date: Wed, 19 Oct 2016 14:45:23 +0200 Subject: [PATCH 286/884] sched/fair: Fix incorrect task group ->load_avg A scheduler performance regression has been reported by Joseph Salisbury, which he bisected back to: 3d30544f0212 ("sched/fair: Apply more PELT fixes) The regression triggers when several levels of task groups are involved (read: SystemD) and cpu_possible_mask != cpu_present_mask. The root cause is that group entity's load (tg_child->se[i]->avg.load_avg) is initialized to scale_load_down(se->load.weight). During the creation of a child task group, its group entities on possible CPUs are attached to parent's cfs_rq (tg_parent) and their loads are added to the parent's load (tg_parent->load_avg) with update_tg_load_avg(). But only the load on online CPUs will then be updated to reflect real load, whereas load on other CPUs will stay at the initial value. The result is a tg_parent->load_avg that is higher than the real load, the weight of group entities (tg_parent->se[i]->load.weight) on online CPUs is smaller than it should be, and the task group gets a less running time than what it could expect. ( This situation can be detected with /proc/sched_debug. The ".tg_load_avg" of the task group will be much higher than sum of ".tg_load_avg_contrib" of online cfs_rqs of the task group. ) The load of group entities don't have to be intialized to something else than 0 because their load will increase when an entity is attached. Reported-by: Joseph Salisbury Tested-by: Dietmar Eggemann Signed-off-by: Vincent Guittot Acked-by: Peter Zijlstra Cc: # 4.8.x Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: joonwoop@codeaurora.org Fixes: 3d30544f0212 ("sched/fair: Apply more PELT fixes) Link: http://lkml.kernel.org/r/1476881123-10159-1-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 76ee7de1859d..d941c97dfbc3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -690,7 +690,14 @@ void init_entity_runnable_average(struct sched_entity *se) * will definitely be update (after enqueue). */ sa->period_contrib = 1023; - sa->load_avg = scale_load_down(se->load.weight); + /* + * Tasks are intialized with full load to be seen as heavy tasks until + * they get a chance to stabilize to their real load level. + * Group entities are intialized with zero load to reflect the fact that + * nothing has been attached to the task group yet. + */ + if (entity_is_task(se)) + sa->load_avg = scale_load_down(se->load.weight); sa->load_sum = sa->load_avg * LOAD_AVG_MAX; /* * At this point, util_avg won't be used in select_task_rq_fair anyway From 1ee1710cd6bbf49853688e97d6e1d98b48f28586 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 19 Oct 2016 13:19:02 +0000 Subject: [PATCH 287/884] wusb: fix error return code in wusb_prf() Fix to return error code -ENOMEM from the kmalloc() error handling case instead of 0, as done elsewhere in this function. Fixes: a19b882c07a6 ("wusb: Stop using the stack for sg crypto scratch space") Signed-off-by: Wei Yongjun Signed-off-by: Greg Kroah-Hartman --- drivers/usb/wusbcore/crypto.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/wusbcore/crypto.c b/drivers/usb/wusbcore/crypto.c index de089f3a82f3..79451f7ef1b7 100644 --- a/drivers/usb/wusbcore/crypto.c +++ b/drivers/usb/wusbcore/crypto.c @@ -339,8 +339,10 @@ ssize_t wusb_prf(void *out, size_t out_size, goto error_setkey_aes; } scratch = kmalloc(sizeof(*scratch), GFP_KERNEL); - if (!scratch) + if (!scratch) { + result = -ENOMEM; goto error_alloc_scratch; + } for (bitr = 0; bitr < (len + 63) / 64; bitr++) { sfn_le = cpu_to_le64(sfn++); From 889882bce2a5f69242c1f3acd840983f467499b9 Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba Date: Tue, 4 Oct 2016 18:26:26 +0200 Subject: [PATCH 288/884] perf/x86/intel/cstate: Add C-state residency events for Knights Landing Although KNL does support C1,C6,PC2,PC3,PC6 states, the patch only supports C6,PC2,PC3,PC6, because there is no counter for C1. C6 residency counter MSR on KNL has a different address than other platforms which is handled as a new quirk flag. Signed-off-by: Lukasz Odzioba Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Rafael J. Wysocki Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: bp@suse.de Cc: dave.hansen@linux.intel.com Cc: kan.liang@intel.com Link: http://lkml.kernel.org/r/1475598386-19597-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/cstate.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 3ca87b5a8677..4f5ac726335f 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -48,7 +48,8 @@ * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -56,15 +57,16 @@ * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL + * Available model: SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -118,6 +120,7 @@ struct cstate_model { /* Quirk flags */ #define SLM_PKG_C6_USE_C7_MSR (1UL << 0) +#define KNL_CORE_C6_MSR (1UL << 1) struct perf_cstate_msr { u64 msr; @@ -488,6 +491,18 @@ static const struct cstate_model slm_cstates __initconst = { .quirks = SLM_PKG_C6_USE_C7_MSR, }; + +static const struct cstate_model knl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = KNL_CORE_C6_MSR, +}; + + + #define X86_CSTATES_MODEL(model, states) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } @@ -523,6 +538,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); @@ -558,6 +575,11 @@ static int __init cstate_probe(const struct cstate_model *cm) if (cm->quirks & SLM_PKG_C6_USE_C7_MSR) pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + /* KNL has different MSR for CORE C6 */ + if (cm->quirks & KNL_CORE_C6_MSR) + pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; + + has_cstate_core = cstate_probe_msr(cm->core_events, PERF_CSTATE_CORE_EVENT_MAX, core_msr, core_events_attrs); From 3805a938a6c24284863732fd45cec5a04609a224 Mon Sep 17 00:00:00 2001 From: Eric Garver Date: Mon, 17 Oct 2016 16:30:12 -0400 Subject: [PATCH 289/884] flow_dissector: Check skb for VLAN only if skb specified. Fixes a panic when calling eth_get_headlen(). Noticed on i40e driver. Fixes: d5709f7ab776 ("flow_dissector: For stripped vlan, get vlan info from skb->vlan_tci") Signed-off-by: Eric Garver Reviewed-by: Jakub Sitnicki Acked-by: Amir Vadai Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 1a7b80f73376..44e6ba9d3a6b 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -247,12 +247,10 @@ ipv6: case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; - if (skb_vlan_tag_present(skb)) + if (skb && skb_vlan_tag_present(skb)) proto = skb->protocol; - if (!skb_vlan_tag_present(skb) || - proto == cpu_to_be16(ETH_P_8021Q) || - proto == cpu_to_be16(ETH_P_8021AD)) { + if (eth_type_vlan(proto)) { struct vlan_hdr _vlan; vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), From 1c5b51dfb7b4564008e0cadec5381a69e88b0d21 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 4 Jul 2016 16:59:43 +0100 Subject: [PATCH 290/884] arm64: swp emulation: bound LL/SC retries before rescheduling If a CPU does not implement a global monitor for certain memory types, then userspace can attempt a kernel DoS by issuing SWP instructions targetting the problematic memory (for example, a framebuffer mapped with non-cacheable attributes). The SWP emulation code protects against these sorts of attacks by checking for pending signals and potentially rescheduling when the STXR instruction fails during the emulation. Whilst this is good for avoiding livelock, it harms emulation of legitimate SWP instructions on CPUs where forward progress is not guaranteed if there are memory accesses to the same reservation granule (up to 2k) between the failing STXR and the retry of the LDXR. This patch solves the problem by retrying the STXR a bounded number of times (4) before breaking out of the LL/SC loop and looking for something else to do. Cc: Fixes: bd35a4adc413 ("arm64: Port SWP/SWPB emulation support from arm") Reviewed-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/armv8_deprecated.c | 36 +++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 42ffdb54e162..b0988bb1bf64 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -280,35 +280,43 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) /* * Error-checking SWP macros implemented using ldxr{b}/stxr{b} */ -#define __user_swpX_asm(data, addr, res, temp, B) \ + +/* Arbitrary constant to ensure forward-progress of the LL/SC loop */ +#define __SWP_LL_SC_LOOPS 4 + +#define __user_swpX_asm(data, addr, res, temp, temp2, B) \ __asm__ __volatile__( \ + " mov %w3, %w7\n" \ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ - "0: ldxr"B" %w2, [%3]\n" \ - "1: stxr"B" %w0, %w1, [%3]\n" \ + "0: ldxr"B" %w2, [%4]\n" \ + "1: stxr"B" %w0, %w1, [%4]\n" \ " cbz %w0, 2f\n" \ - " mov %w0, %w4\n" \ + " sub %w3, %w3, #1\n" \ + " cbnz %w3, 0b\n" \ + " mov %w0, %w5\n" \ " b 3f\n" \ "2:\n" \ " mov %w1, %w2\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ - "4: mov %w0, %w5\n" \ + "4: mov %w0, %w6\n" \ " b 3b\n" \ " .popsection" \ _ASM_EXTABLE(0b, 4b) \ _ASM_EXTABLE(1b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ - : "=&r" (res), "+r" (data), "=&r" (temp) \ - : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ + : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT), \ + "i" (__SWP_LL_SC_LOOPS) \ : "memory") -#define __user_swp_asm(data, addr, res, temp) \ - __user_swpX_asm(data, addr, res, temp, "") -#define __user_swpb_asm(data, addr, res, temp) \ - __user_swpX_asm(data, addr, res, temp, "b") +#define __user_swp_asm(data, addr, res, temp, temp2) \ + __user_swpX_asm(data, addr, res, temp, temp2, "") +#define __user_swpb_asm(data, addr, res, temp, temp2) \ + __user_swpX_asm(data, addr, res, temp, temp2, "b") /* * Bit 22 of the instruction encoding distinguishes between @@ -328,12 +336,12 @@ static int emulate_swpX(unsigned int address, unsigned int *data, } while (1) { - unsigned long temp; + unsigned long temp, temp2; if (type == TYPE_SWPB) - __user_swpb_asm(*data, address, res, temp); + __user_swpb_asm(*data, address, res, temp, temp2); else - __user_swp_asm(*data, address, res, temp); + __user_swp_asm(*data, address, res, temp, temp2); if (likely(res != -EAGAIN) || signal_pending(current)) break; From 1e6e57d9b34a9075d5f9e2048ea7b09756590d11 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 4 Jul 2016 17:44:48 +0100 Subject: [PATCH 291/884] arm64: percpu: rewrite ll/sc loops in assembly Writing the outer loop of an LL/SC sequence using do {...} while constructs potentially allows the compiler to hoist memory accesses between the STXR and the branch back to the LDXR. On CPUs that do not guarantee forward progress of LL/SC loops when faced with memory accesses to the same ERG (up to 2k) between the failed STXR and the branch back, we may end up livelocking. This patch avoids this issue in our percpu atomics by rewriting the outer loop as part of the LL/SC inline assembly block. Cc: Fixes: f97fc810798c ("arm64: percpu: Implement this_cpu operations") Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/percpu.h | 120 +++++++++++++++----------------- 1 file changed, 56 insertions(+), 64 deletions(-) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 2fee2f59288c..5394c8405e66 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -44,48 +44,44 @@ static inline unsigned long __percpu_##op(void *ptr, \ \ switch (size) { \ case 1: \ - do { \ - asm ("//__per_cpu_" #op "_1\n" \ - "ldxrb %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_1\n" \ + "1: ldxrb %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxrb %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u8 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxrb %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u8 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 2: \ - do { \ - asm ("//__per_cpu_" #op "_2\n" \ - "ldxrh %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_2\n" \ + "1: ldxrh %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxrh %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u16 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxrh %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u16 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 4: \ - do { \ - asm ("//__per_cpu_" #op "_4\n" \ - "ldxr %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_4\n" \ + "1: ldxr %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxr %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u32 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxr %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u32 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 8: \ - do { \ - asm ("//__per_cpu_" #op "_8\n" \ - "ldxr %[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_8\n" \ + "1: ldxr %[ret], %[ptr]\n" \ #asm_op " %[ret], %[ret], %[val]\n" \ - "stxr %w[loop], %[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u64 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxr %w[loop], %[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u64 *)ptr) \ + : [val] "Ir" (val)); \ break; \ default: \ BUILD_BUG(); \ @@ -150,44 +146,40 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, switch (size) { case 1: - do { - asm ("//__percpu_xchg_1\n" - "ldxrb %w[ret], %[ptr]\n" - "stxrb %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u8 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_1\n" + "1: ldxrb %w[ret], %[ptr]\n" + " stxrb %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u8 *)ptr) + : [val] "r" (val)); break; case 2: - do { - asm ("//__percpu_xchg_2\n" - "ldxrh %w[ret], %[ptr]\n" - "stxrh %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u16 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_2\n" + "1: ldxrh %w[ret], %[ptr]\n" + " stxrh %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u16 *)ptr) + : [val] "r" (val)); break; case 4: - do { - asm ("//__percpu_xchg_4\n" - "ldxr %w[ret], %[ptr]\n" - "stxr %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u32 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_4\n" + "1: ldxr %w[ret], %[ptr]\n" + " stxr %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u32 *)ptr) + : [val] "r" (val)); break; case 8: - do { - asm ("//__percpu_xchg_8\n" - "ldxr %[ret], %[ptr]\n" - "stxr %w[loop], %[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u64 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_8\n" + "1: ldxr %[ret], %[ptr]\n" + " stxr %w[loop], %[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u64 *)ptr) + : [val] "r" (val)); break; default: BUILD_BUG(); From e4961b0768852d9eb7383e1a5df178eacb714656 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 19 Oct 2016 16:57:08 +0300 Subject: [PATCH 292/884] net: core: Correctly iterate over lower adjacency list Tamir reported the following trace when processing ARP requests received via a vlan device on top of a VLAN-aware bridge: NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [swapper/1:0] [...] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 4.8.0-rc7 #1 Hardware name: Mellanox Technologies Ltd. "MSN2100-CB2F"/"SA001017", BIOS 5.6.5 06/07/2016 task: ffff88017edfea40 task.stack: ffff88017ee10000 RIP: 0010:[] [] netdev_all_lower_get_next_rcu+0x33/0x60 [...] Call Trace: [] mlxsw_sp_port_lower_dev_hold+0x5a/0xa0 [mlxsw_spectrum] [] mlxsw_sp_router_netevent_event+0x80/0x150 [mlxsw_spectrum] [] notifier_call_chain+0x4a/0x70 [] atomic_notifier_call_chain+0x1a/0x20 [] call_netevent_notifiers+0x1b/0x20 [] neigh_update+0x306/0x740 [] neigh_event_ns+0x4e/0xb0 [] arp_process+0x66f/0x700 [] ? common_interrupt+0x8c/0x8c [] arp_rcv+0x139/0x1d0 [] ? vlan_do_receive+0xda/0x320 [] __netif_receive_skb_core+0x524/0xab0 [] ? dev_queue_xmit+0x10/0x20 [] ? br_forward_finish+0x3d/0xc0 [bridge] [] ? br_handle_vlan+0xf6/0x1b0 [bridge] [] __netif_receive_skb+0x18/0x60 [] netif_receive_skb_internal+0x40/0xb0 [] netif_receive_skb+0x1c/0x70 [] br_pass_frame_up+0xc6/0x160 [bridge] [] ? deliver_clone+0x37/0x50 [bridge] [] ? br_flood+0xcc/0x160 [bridge] [] br_handle_frame_finish+0x224/0x4f0 [bridge] [] br_handle_frame+0x174/0x300 [bridge] [] __netif_receive_skb_core+0x329/0xab0 [] ? find_next_bit+0x15/0x20 [] ? cpumask_next_and+0x32/0x50 [] ? load_balance+0x178/0x9b0 [] __netif_receive_skb+0x18/0x60 [] netif_receive_skb_internal+0x40/0xb0 [] netif_receive_skb+0x1c/0x70 [] mlxsw_sp_rx_listener_func+0x61/0xb0 [mlxsw_spectrum] [] mlxsw_core_skb_receive+0x187/0x200 [mlxsw_core] [] mlxsw_pci_cq_tasklet+0x63a/0x9b0 [mlxsw_pci] [] tasklet_action+0xf6/0x110 [] __do_softirq+0xf6/0x280 [] irq_exit+0xdf/0xf0 [] do_IRQ+0x54/0xd0 [] common_interrupt+0x8c/0x8c The problem is that netdev_all_lower_get_next_rcu() never advances the iterator, thereby causing the loop over the lower adjacency list to run forever. Fix this by advancing the iterator and avoid the infinite loop. Fixes: 7ce856aaaf13 ("mlxsw: spectrum: Add couple of lower device helper functions") Signed-off-by: Ido Schimmel Reported-by: Tamir Winetroub Reviewed-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 136ae6bbe81e..465e128699a1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3877,7 +3877,7 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, ldev = netdev_all_lower_get_next(dev, &(iter))) #define netdev_for_each_all_lower_dev_rcu(dev, ldev, iter) \ - for (iter = (dev)->all_adj_list.lower.next, \ + for (iter = &(dev)->all_adj_list.lower, \ ldev = netdev_all_lower_get_next_rcu(dev, &(iter)); \ ldev; \ ldev = netdev_all_lower_get_next_rcu(dev, &(iter))) diff --git a/net/core/dev.c b/net/core/dev.c index f1fe26f66458..b09ac57f4348 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5511,10 +5511,14 @@ struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, { struct netdev_adjacent *lower; - lower = list_first_or_null_rcu(&dev->all_adj_list.lower, - struct netdev_adjacent, list); + lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - return lower ? lower->dev : NULL; + if (&lower->list == &dev->all_adj_list.lower) + return NULL; + + *iter = &lower->list; + + return lower->dev; } EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); From 97c242902c209e7d46e365335db5202634484dcb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 18 Oct 2016 18:50:23 +0200 Subject: [PATCH 293/884] switchdev: Execute bridge ndos only for bridge ports We recently got the following warning after setting up a vlan device on top of an offloaded bridge and executing 'bridge link': WARNING: CPU: 0 PID: 18566 at drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c:81 mlxsw_sp_port_orig_get.part.9+0x55/0x70 [mlxsw_spectrum] [...] CPU: 0 PID: 18566 Comm: bridge Not tainted 4.8.0-rc7 #1 Hardware name: Mellanox Technologies Ltd. Mellanox switch/Mellanox switch, BIOS 4.6.5 05/21/2015 0000000000000286 00000000e64ab94f ffff880406e6f8f0 ffffffff8135eaa3 0000000000000000 0000000000000000 ffff880406e6f930 ffffffff8108c43b 0000005106e6f988 ffff8803df398840 ffff880403c60108 ffff880406e6f990 Call Trace: [] dump_stack+0x63/0x90 [] __warn+0xcb/0xf0 [] warn_slowpath_null+0x1d/0x20 [] mlxsw_sp_port_orig_get.part.9+0x55/0x70 [mlxsw_spectrum] [] mlxsw_sp_port_attr_get+0xa5/0xb0 [mlxsw_spectrum] [] switchdev_port_attr_get+0x4f/0x140 [] switchdev_port_attr_get+0x100/0x140 [] switchdev_port_attr_get+0x100/0x140 [] switchdev_port_bridge_getlink+0x5b/0xc0 [] ? switchdev_port_fdb_dump+0x90/0x90 [] rtnl_bridge_getlink+0xe7/0x190 [] netlink_dump+0x122/0x290 [] __netlink_dump_start+0x15f/0x190 [] ? rtnl_bridge_dellink+0x230/0x230 [] rtnetlink_rcv_msg+0x1a6/0x220 [] ? __kmalloc_node_track_caller+0x208/0x2c0 [] ? rtnl_bridge_dellink+0x230/0x230 [] ? rtnl_newlink+0x890/0x890 [] netlink_rcv_skb+0xa4/0xc0 [] rtnetlink_rcv+0x28/0x30 [] netlink_unicast+0x18c/0x240 [] netlink_sendmsg+0x2fb/0x3a0 [] sock_sendmsg+0x38/0x50 [] SYSC_sendto+0x101/0x190 [] ? __sys_recvmsg+0x51/0x90 [] SyS_sendto+0xe/0x10 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 The problem is that the 8021q module propagates the call to ndo_bridge_getlink() via switchdev ops, but the switch driver doesn't recognize the netdev, as it's not offloaded. While we can ignore calls being made to non-bridge ports inside the driver, a better fix would be to push this check up to the switchdev layer. Note that these ndos can be called for non-bridged netdev, but this only happens in certain PF drivers which don't call the corresponding switchdev functions anyway. Fixes: 99f44bb3527b ("mlxsw: spectrum: Enable L3 interfaces on top of bridge devices") Signed-off-by: Ido Schimmel Reported-by: Tamir Winetroub Tested-by: Tamir Winetroub Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 02beb35f577f..3b95fe980fa2 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -771,6 +771,9 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; int err; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + err = switchdev_port_attr_get(dev, &attr); if (err && err != -EOPNOTSUPP) return err; @@ -926,6 +929,9 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlattr *afspec; int err = 0; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); if (protinfo) { @@ -959,6 +965,9 @@ int switchdev_port_bridge_dellink(struct net_device *dev, { struct nlattr *afspec; + if (!netif_is_bridge_port(dev)) + return -EOPNOTSUPP; + afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (afspec) From 85dda4e5b0ee1f5b4e8cc93d39e475006bc61ccd Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 18 Oct 2016 18:59:34 +0200 Subject: [PATCH 294/884] rtnetlink: Add rtnexthop offload flag to compare mask The offload flag is a status flag and should not be used by FIB semantics for comparison. Fixes: 37ed9493699c ("rtnetlink: add RTNH_F_EXTERNAL flag for fib offload") Signed-off-by: Jiri Pirko Reviewed-by: Andy Gospodarek Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 262f0379d83a..5a78be518101 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -350,7 +350,7 @@ struct rtnexthop { #define RTNH_F_OFFLOAD 8 /* offloaded route */ #define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */ -#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN) +#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | RTNH_F_OFFLOAD) /* Macros to handle hexthops */ From 3b913179c3fa89dd0e304193fa0c746fc0481447 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:14 +0100 Subject: [PATCH 295/884] mm: replace get_user_pages_locked() write/force parameters with gup_flags This removes the 'write' and 'force' use from get_user_pages_locked() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- mm/frame_vector.c | 8 +++++++- mm/gup.c | 12 +++--------- mm/nommu.c | 5 ++++- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index abd53f2eb74e..9fe9b0438169 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1282,7 +1282,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, int *locked); + unsigned int gup_flags, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 381bb07ed14f..81b67498bb9c 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -41,10 +41,16 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, int ret = 0; int err; int locked; + unsigned int gup_flags = 0; if (nr_frames == 0) return 0; + if (write) + gup_flags |= FOLL_WRITE; + if (force) + gup_flags |= FOLL_FORCE; + if (WARN_ON_ONCE(nr_frames > vec->nr_allocated)) nr_frames = vec->nr_allocated; @@ -59,7 +65,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, vec->got_ref = true; vec->is_pfns = false; ret = get_user_pages_locked(start, nr_frames, - write, force, (struct page **)(vec->ptrs), &locked); + gup_flags, (struct page **)(vec->ptrs), &locked); goto out; } diff --git a/mm/gup.c b/mm/gup.c index 373d1ec006e4..3cfb55ef0cf4 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -848,18 +848,12 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, * up_read(&mm->mmap_sem); */ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { - unsigned int flags = FOLL_TOUCH; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - return __get_user_pages_locked(current, current->mm, start, nr_pages, - pages, NULL, locked, true, flags); + pages, NULL, locked, true, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages_locked); diff --git a/mm/nommu.c b/mm/nommu.c index 7e27add39f7e..842cfdd1a31e 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -176,9 +176,12 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, EXPORT_SYMBOL(get_user_pages); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, int *locked) { + int write = gup_flags & FOLL_WRITE; + int force = gup_flags & FOLL_FORCE; + return get_user_pages(start, nr_pages, write, force, pages, NULL); } EXPORT_SYMBOL(get_user_pages_locked); From 7f23b3504a0df63b724180262c5f3f117f21bcae Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:15 +0100 Subject: [PATCH 296/884] mm: replace get_vaddr_frames() write/force parameters with gup_flags This removes the 'write' and 'force' from get_vaddr_frames() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- drivers/gpu/drm/exynos/exynos_drm_g2d.c | 3 ++- drivers/media/platform/omap/omap_vout.c | 2 +- drivers/media/v4l2-core/videobuf2-memops.c | 6 +++++- include/linux/mm.h | 2 +- mm/frame_vector.c | 13 ++----------- 5 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c index aa92decf4233..fbd13fabdf2d 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c +++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c @@ -488,7 +488,8 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct drm_device *drm_dev, goto err_free; } - ret = get_vaddr_frames(start, npages, true, true, g2d_userptr->vec); + ret = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, + g2d_userptr->vec); if (ret != npages) { DRM_ERROR("failed to get user pages from userptr.\n"); if (ret < 0) diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index e668dde6d857..a31b95cb3b09 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -214,7 +214,7 @@ static int omap_vout_get_userptr(struct videobuf_buffer *vb, u32 virtp, if (!vec) return -ENOMEM; - ret = get_vaddr_frames(virtp, 1, true, false, vec); + ret = get_vaddr_frames(virtp, 1, FOLL_WRITE, vec); if (ret != 1) { frame_vector_destroy(vec); return -EINVAL; diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c index 3c3b517f1d1c..1cd322e939c7 100644 --- a/drivers/media/v4l2-core/videobuf2-memops.c +++ b/drivers/media/v4l2-core/videobuf2-memops.c @@ -42,6 +42,10 @@ struct frame_vector *vb2_create_framevec(unsigned long start, unsigned long first, last; unsigned long nr; struct frame_vector *vec; + unsigned int flags = FOLL_FORCE; + + if (write) + flags |= FOLL_WRITE; first = start >> PAGE_SHIFT; last = (start + length - 1) >> PAGE_SHIFT; @@ -49,7 +53,7 @@ struct frame_vector *vb2_create_framevec(unsigned long start, vec = frame_vector_create(nr); if (!vec) return ERR_PTR(-ENOMEM); - ret = get_vaddr_frames(start & PAGE_MASK, nr, write, true, vec); + ret = get_vaddr_frames(start & PAGE_MASK, nr, flags, vec); if (ret < 0) goto out_destroy; /* We accept only complete set of PFNs */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 9fe9b0438169..91cc923ce985 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1305,7 +1305,7 @@ struct frame_vector { struct frame_vector *frame_vector_create(unsigned int nr_frames); void frame_vector_destroy(struct frame_vector *vec); int get_vaddr_frames(unsigned long start, unsigned int nr_pfns, - bool write, bool force, struct frame_vector *vec); + unsigned int gup_flags, struct frame_vector *vec); void put_vaddr_frames(struct frame_vector *vec); int frame_vector_to_pages(struct frame_vector *vec); void frame_vector_to_pfns(struct frame_vector *vec); diff --git a/mm/frame_vector.c b/mm/frame_vector.c index 81b67498bb9c..db77dcb38afd 100644 --- a/mm/frame_vector.c +++ b/mm/frame_vector.c @@ -11,10 +11,7 @@ * get_vaddr_frames() - map virtual addresses to pfns * @start: starting user address * @nr_frames: number of pages / pfns from start to map - * @write: whether pages will be written to by the caller - * @force: whether to force write access even if user mapping is - * readonly. See description of the same argument of - get_user_pages(). + * @gup_flags: flags modifying lookup behaviour * @vec: structure which receives pages / pfns of the addresses mapped. * It should have space for at least nr_frames entries. * @@ -34,23 +31,17 @@ * This function takes care of grabbing mmap_sem as necessary. */ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, - bool write, bool force, struct frame_vector *vec) + unsigned int gup_flags, struct frame_vector *vec) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; int ret = 0; int err; int locked; - unsigned int gup_flags = 0; if (nr_frames == 0) return 0; - if (write) - gup_flags |= FOLL_WRITE; - if (force) - gup_flags |= FOLL_FORCE; - if (WARN_ON_ONCE(nr_frames > vec->nr_allocated)) nr_frames = vec->nr_allocated; From 768ae309a96103ed02eb1e111e838c87854d8b51 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:16 +0100 Subject: [PATCH 297/884] mm: replace get_user_pages() write/force parameters with gup_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This removes the 'write' and 'force' from get_user_pages() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Christian König Acked-by: Jesper Nilsson Acked-by: Michal Hocko Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- arch/cris/arch-v32/drivers/cryptocop.c | 4 +--- arch/ia64/kernel/err_inject.c | 2 +- arch/x86/mm/mpx.c | 5 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++++-- drivers/gpu/drm/radeon/radeon_ttm.c | 3 ++- drivers/gpu/drm/via/via_dmablit.c | 4 ++-- drivers/infiniband/core/umem.c | 6 +++++- drivers/infiniband/hw/mthca/mthca_memfree.c | 2 +- drivers/infiniband/hw/qib/qib_user_pages.c | 3 ++- drivers/infiniband/hw/usnic/usnic_uiom.c | 5 ++++- drivers/media/v4l2-core/videobuf-dma-sg.c | 7 +++++-- drivers/misc/mic/scif/scif_rma.c | 3 +-- drivers/misc/sgi-gru/grufault.c | 2 +- drivers/platform/goldfish/goldfish_pipe.c | 3 ++- drivers/rapidio/devices/rio_mport_cdev.c | 3 ++- .../interface/vchiq_arm/vchiq_2835_arm.c | 3 +-- .../interface/vchiq_arm/vchiq_arm.c | 3 +-- drivers/virt/fsl_hypervisor.c | 4 ++-- include/linux/mm.h | 2 +- mm/gup.c | 12 +++--------- mm/mempolicy.c | 2 +- mm/nommu.c | 18 ++++-------------- 22 files changed, 49 insertions(+), 54 deletions(-) diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index b5698c876fcc..099e170a93ee 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2722,7 +2722,6 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig err = get_user_pages((unsigned long int)(oper.indata + prev_ix), noinpages, 0, /* read access only for in data */ - 0, /* no force */ inpages, NULL); @@ -2736,8 +2735,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig if (oper.do_cipher){ err = get_user_pages((unsigned long int)oper.cipher_outdata, nooutpages, - 1, /* write access for out data */ - 0, /* no force */ + FOLL_WRITE, /* write access for out data */ outpages, NULL); up_read(¤t->mm->mmap_sem); diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 09f845793d12..5ed0ea92c5bf 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, u64 virt_addr=simple_strtoull(buf, NULL, 16); int ret; - ret = get_user_pages(virt_addr, 1, VM_READ, 0, NULL, NULL); + ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG printk("Virtual address %lx is not existing.\n",virt_addr); diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 80476878eb4c..e4f800999b32 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -544,10 +544,9 @@ static int mpx_resolve_fault(long __user *addr, int write) { long gup_ret; int nr_pages = 1; - int force = 0; - gup_ret = get_user_pages((unsigned long)addr, nr_pages, write, - force, NULL, NULL); + gup_ret = get_user_pages((unsigned long)addr, nr_pages, + write ? FOLL_WRITE : 0, NULL, NULL); /* * get_user_pages() returns number of pages gotten. * 0 means we failed to fault in and get anything, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 887483b8b818..dcaf691f56b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -555,10 +555,13 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); + unsigned int flags = 0; unsigned pinned = 0; int r; + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) + flags |= FOLL_WRITE; + if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only use anonymous memory to prevent problems with writeback */ @@ -581,7 +584,7 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) list_add(&guptask.list, >t->guptasks); spin_unlock(>t->guptasklock); - r = get_user_pages(userptr, num_pages, write, 0, p, NULL); + r = get_user_pages(userptr, num_pages, flags, p, NULL); spin_lock(>t->guptasklock); list_del(&guptask.list); diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 455268214b89..3de5e6e21662 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -566,7 +566,8 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm) uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; struct page **pages = ttm->pages + pinned; - r = get_user_pages(userptr, num_pages, write, 0, pages, NULL); + r = get_user_pages(userptr, num_pages, write ? FOLL_WRITE : 0, + pages, NULL); if (r < 0) goto release_pages; diff --git a/drivers/gpu/drm/via/via_dmablit.c b/drivers/gpu/drm/via/via_dmablit.c index 7e2a12c4fed2..1a3ad769f8c8 100644 --- a/drivers/gpu/drm/via/via_dmablit.c +++ b/drivers/gpu/drm/via/via_dmablit.c @@ -241,8 +241,8 @@ via_lock_all_dma_pages(drm_via_sg_info_t *vsg, drm_via_dmablit_t *xfer) down_read(¤t->mm->mmap_sem); ret = get_user_pages((unsigned long)xfer->mem_addr, vsg->num_pages, - (vsg->direction == DMA_FROM_DEVICE), - 0, vsg->pages, NULL); + (vsg->direction == DMA_FROM_DEVICE) ? FOLL_WRITE : 0, + vsg->pages, NULL); up_read(¤t->mm->mmap_sem); if (ret != vsg->num_pages) { diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index c68746ce6624..224ad274ea0b 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -94,6 +94,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, unsigned long dma_attrs = 0; struct scatterlist *sg, *sg_list_start; int need_release = 0; + unsigned int gup_flags = FOLL_WRITE; if (dmasync) dma_attrs |= DMA_ATTR_WRITE_BARRIER; @@ -183,6 +184,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (ret) goto out; + if (!umem->writable) + gup_flags |= FOLL_FORCE; + need_release = 1; sg_list_start = umem->sg_head.sgl; @@ -190,7 +194,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, ret = get_user_pages(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), - 1, !umem->writable, page_list, vma_list); + gup_flags, page_list, vma_list); if (ret < 0) goto out; diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 6c00d04b8b28..c6fe89d79248 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -472,7 +472,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, goto out; } - ret = get_user_pages(uaddr & PAGE_MASK, 1, 1, 0, pages, NULL); + ret = get_user_pages(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages, NULL); if (ret < 0) goto out; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 2d2b94fd3633..75f08624ac05 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -67,7 +67,8 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = get_user_pages(start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, + num_pages - got, + FOLL_WRITE | FOLL_FORCE, p + got, NULL); if (ret < 0) goto bail_release; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index a0b6ebee4d8a..1ccee6ea5bc3 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -111,6 +111,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int i; int flags; dma_addr_t pa; + unsigned int gup_flags; if (!can_do_mlock()) return -EPERM; @@ -135,6 +136,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, flags = IOMMU_READ | IOMMU_CACHE; flags |= (writable) ? IOMMU_WRITE : 0; + gup_flags = FOLL_WRITE; + gup_flags |= (writable) ? 0 : FOLL_FORCE; cur_base = addr & PAGE_MASK; ret = 0; @@ -142,7 +145,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = get_user_pages(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), - 1, !writable, page_list, NULL); + gup_flags, page_list, NULL); if (ret < 0) goto out; diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index f300f060b3f3..1db0af6c7f94 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -156,6 +156,7 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, { unsigned long first, last; int err, rw = 0; + unsigned int flags = FOLL_FORCE; dma->direction = direction; switch (dma->direction) { @@ -178,12 +179,14 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma, if (NULL == dma->pages) return -ENOMEM; + if (rw == READ) + flags |= FOLL_WRITE; + dprintk(1, "init user [0x%lx+0x%lx => %d pages]\n", data, size, dma->nr_pages); err = get_user_pages(data & PAGE_MASK, dma->nr_pages, - rw == READ, 1, /* force */ - dma->pages, NULL); + flags, dma->pages, NULL); if (err != dma->nr_pages) { dma->nr_pages = (err >= 0) ? err : 0; diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index e0203b1a20fd..f806a4471eb9 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -1396,8 +1396,7 @@ retry: pinned_pages->nr_pages = get_user_pages( (u64)addr, nr_pages, - !!(prot & SCIF_PROT_WRITE), - 0, + (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0, pinned_pages->pages, NULL); up_write(&mm->mmap_sem); diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index a2d97b9b17e3..6fb773dbcd0c 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -198,7 +198,7 @@ static int non_atomic_pte_lookup(struct vm_area_struct *vma, #else *pageshift = PAGE_SHIFT; #endif - if (get_user_pages(vaddr, 1, write, 0, &page, NULL) <= 0) + if (get_user_pages(vaddr, 1, write ? FOLL_WRITE : 0, &page, NULL) <= 0) return -EFAULT; *paddr = page_to_phys(page); put_page(page); diff --git a/drivers/platform/goldfish/goldfish_pipe.c b/drivers/platform/goldfish/goldfish_pipe.c index 07462d79d040..1aba2c74160e 100644 --- a/drivers/platform/goldfish/goldfish_pipe.c +++ b/drivers/platform/goldfish/goldfish_pipe.c @@ -309,7 +309,8 @@ static ssize_t goldfish_pipe_read_write(struct file *filp, char __user *buffer, * much memory to the process. */ down_read(¤t->mm->mmap_sem); - ret = get_user_pages(address, 1, !is_write, 0, &page, NULL); + ret = get_user_pages(address, 1, is_write ? 0 : FOLL_WRITE, + &page, NULL); up_read(¤t->mm->mmap_sem); if (ret < 0) break; diff --git a/drivers/rapidio/devices/rio_mport_cdev.c b/drivers/rapidio/devices/rio_mport_cdev.c index 436dfe871d32..9013a585507e 100644 --- a/drivers/rapidio/devices/rio_mport_cdev.c +++ b/drivers/rapidio/devices/rio_mport_cdev.c @@ -892,7 +892,8 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode, down_read(¤t->mm->mmap_sem); pinned = get_user_pages( (unsigned long)xfer->loc_addr & PAGE_MASK, - nr_pages, dir == DMA_FROM_DEVICE, 0, + nr_pages, + dir == DMA_FROM_DEVICE ? FOLL_WRITE : 0, page_list, NULL); up_read(¤t->mm->mmap_sem); diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c index c29040fdf9a7..1091b9f1dd07 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_2835_arm.c @@ -423,8 +423,7 @@ create_pagelist(char __user *buf, size_t count, unsigned short type, actual_pages = get_user_pages(task, task->mm, (unsigned long)buf & ~(PAGE_SIZE - 1), num_pages, - (type == PAGELIST_READ) /*Write */ , - 0 /*Force */ , + (type == PAGELIST_READ) ? FOLL_WRITE : 0, pages, NULL /*vmas */); up_read(&task->mm->mmap_sem); diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index e11c0e07471b..7b6cd4d80621 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1477,8 +1477,7 @@ dump_phys_mem(void *virt_addr, uint32_t num_bytes) current->mm, /* mm */ (unsigned long)virt_addr, /* start */ num_pages, /* len */ - 0, /* write */ - 0, /* force */ + 0, /* gup_flags */ pages, /* pages (array of page pointers) */ NULL); /* vmas */ up_read(¤t->mm->mmap_sem); diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c index 60bdad3a689b..150ce2abf6c8 100644 --- a/drivers/virt/fsl_hypervisor.c +++ b/drivers/virt/fsl_hypervisor.c @@ -245,8 +245,8 @@ static long ioctl_memcpy(struct fsl_hv_ioctl_memcpy __user *p) /* Get the physical addresses of the source buffer */ down_read(¤t->mm->mmap_sem); num_pinned = get_user_pages(param.local_vaddr - lb_offset, - num_pages, (param.source == -1) ? READ : WRITE, - 0, pages, NULL); + num_pages, (param.source == -1) ? 0 : FOLL_WRITE, + pages, NULL); up_read(¤t->mm->mmap_sem); if (num_pinned != num_pages) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 91cc923ce985..30bb5d9631bb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1279,7 +1279,7 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, int write, int force, struct page **pages, struct vm_area_struct **vmas); long get_user_pages(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas); long get_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked); diff --git a/mm/gup.c b/mm/gup.c index 3cfb55ef0cf4..bbc2e76cdd77 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -987,18 +987,12 @@ EXPORT_SYMBOL(get_user_pages_remote); * obviously don't pass FOLL_REMOTE in here. */ long get_user_pages(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { - unsigned int flags = FOLL_TOUCH; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - return __get_user_pages_locked(current, current->mm, start, nr_pages, - pages, vmas, NULL, false, flags); + pages, vmas, NULL, false, + gup_flags | FOLL_TOUCH); } EXPORT_SYMBOL(get_user_pages); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index ad1c96ac313c..0b859af06b87 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -850,7 +850,7 @@ static int lookup_node(unsigned long addr) struct page *p; int err; - err = get_user_pages(addr & PAGE_MASK, 1, 0, 0, &p, NULL); + err = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL); if (err >= 0) { err = page_to_nid(p); put_page(p); diff --git a/mm/nommu.c b/mm/nommu.c index 842cfdd1a31e..70cb844dfd95 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -160,18 +160,11 @@ finish_or_fault: * - don't permit access to VMAs that don't support it, such as I/O mappings */ long get_user_pages(unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { - int flags = 0; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - - return __get_user_pages(current, current->mm, start, nr_pages, flags, - pages, vmas, NULL); + return __get_user_pages(current, current->mm, start, nr_pages, + gup_flags, pages, vmas, NULL); } EXPORT_SYMBOL(get_user_pages); @@ -179,10 +172,7 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked) { - int write = gup_flags & FOLL_WRITE; - int force = gup_flags & FOLL_FORCE; - - return get_user_pages(start, nr_pages, write, force, pages, NULL); + return get_user_pages(start, nr_pages, gup_flags, pages, NULL); } EXPORT_SYMBOL(get_user_pages_locked); From 9beae1ea89305a9667ceaab6d0bf46a045ad71e7 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:17 +0100 Subject: [PATCH 298/884] mm: replace get_user_pages_remote() write/force parameters with gup_flags This removes the 'write' and 'force' from get_user_pages_remote() and replaces them with 'gup_flags' to make the use of FOLL_FORCE explicit in callers as use of this flag can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Reviewed-by: Jan Kara Signed-off-by: Linus Torvalds --- drivers/gpu/drm/etnaviv/etnaviv_gem.c | 7 +++++-- drivers/gpu/drm/i915/i915_gem_userptr.c | 6 +++++- drivers/infiniband/core/umem_odp.c | 7 +++++-- fs/exec.c | 9 +++++++-- include/linux/mm.h | 2 +- kernel/events/uprobes.c | 6 ++++-- mm/gup.c | 22 +++++++--------------- mm/memory.c | 6 +++++- security/tomoyo/domain.c | 2 +- 9 files changed, 40 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 5ce3603e6eac..0370b842d9cc 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -748,19 +748,22 @@ static struct page **etnaviv_gem_userptr_do_get_pages( int ret = 0, pinned, npages = etnaviv_obj->base.size >> PAGE_SHIFT; struct page **pvec; uintptr_t ptr; + unsigned int flags = 0; pvec = drm_malloc_ab(npages, sizeof(struct page *)); if (!pvec) return ERR_PTR(-ENOMEM); + if (!etnaviv_obj->userptr.ro) + flags |= FOLL_WRITE; + pinned = 0; ptr = etnaviv_obj->userptr.ptr; down_read(&mm->mmap_sem); while (pinned < npages) { ret = get_user_pages_remote(task, mm, ptr, npages - pinned, - !etnaviv_obj->userptr.ro, 0, - pvec + pinned, NULL); + flags, pvec + pinned, NULL); if (ret < 0) break; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e537930c64b5..c6f780f5abc9 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -508,6 +508,10 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) pvec = drm_malloc_gfp(npages, sizeof(struct page *), GFP_TEMPORARY); if (pvec != NULL) { struct mm_struct *mm = obj->userptr.mm->mm; + unsigned int flags = 0; + + if (!obj->userptr.read_only) + flags |= FOLL_WRITE; ret = -EFAULT; if (atomic_inc_not_zero(&mm->mm_users)) { @@ -517,7 +521,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) (work->task, mm, obj->userptr.ptr + pinned * PAGE_SIZE, npages - pinned, - !obj->userptr.read_only, 0, + flags, pvec + pinned, NULL); if (ret < 0) break; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 75077a018675..1f0fe3217f23 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -527,6 +527,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, u64 off; int j, k, ret = 0, start_idx, npages = 0; u64 base_virt_addr; + unsigned int flags = 0; if (access_mask == 0) return -EINVAL; @@ -556,6 +557,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, goto out_put_task; } + if (access_mask & ODP_WRITE_ALLOWED_BIT) + flags |= FOLL_WRITE; + start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT; k = start_idx; @@ -574,8 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, */ npages = get_user_pages_remote(owning_process, owning_mm, user_virt, gup_num_pages, - access_mask & ODP_WRITE_ALLOWED_BIT, - 0, local_page_list, NULL); + flags, local_page_list, NULL); up_read(&owning_mm->mmap_sem); if (npages < 0) diff --git a/fs/exec.c b/fs/exec.c index 6fcfb3f7b137..4e497b9ee71e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -191,6 +191,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, { struct page *page; int ret; + unsigned int gup_flags = FOLL_FORCE; #ifdef CONFIG_STACK_GROWSUP if (write) { @@ -199,12 +200,16 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, return NULL; } #endif + + if (write) + gup_flags |= FOLL_WRITE; + /* * We are doing an exec(). 'current' is the process * doing the exec and bprm->mm is the new process's mm. */ - ret = get_user_pages_remote(current, bprm->mm, pos, 1, write, - 1, &page, NULL); + ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags, + &page, NULL); if (ret <= 0) return NULL; diff --git a/include/linux/mm.h b/include/linux/mm.h index 30bb5d9631bb..ecc4be7b67e0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1276,7 +1276,7 @@ long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct **vmas, int *nonblocking); long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas); long get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index d4129bb05e5d..f9ec9add2164 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -300,7 +300,8 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, retry: /* Read the page with vaddr into memory */ - ret = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma); + ret = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page, + &vma); if (ret <= 0) return ret; @@ -1710,7 +1711,8 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr) * but we treat this as a 'remote' access since it is * essentially a kernel access to the memory. */ - result = get_user_pages_remote(NULL, mm, vaddr, 1, 0, 1, &page, NULL); + result = get_user_pages_remote(NULL, mm, vaddr, 1, FOLL_FORCE, &page, + NULL); if (result < 0) return result; diff --git a/mm/gup.c b/mm/gup.c index bbc2e76cdd77..7aa113c2d373 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -915,9 +915,7 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin - * @write: whether pages will be written to by the caller - * @force: whether to force access even when user mapping is currently - * protected (but never forces write access to shared mapping). + * @gup_flags: flags modifying lookup behaviour * @pages: array that receives pointers to the pages pinned. * Should be at least nr_pages long. Or NULL, if caller * only intends to ensure the pages are faulted in. @@ -946,9 +944,9 @@ EXPORT_SYMBOL(get_user_pages_unlocked); * or similar operation cannot guarantee anything stronger anyway because * locks can't be held over the syscall boundary. * - * If write=0, the page must not be written to. If the page is written to, - * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called - * after the page is finished with, and before put_page is called. + * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page + * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must + * be called after the page is finished with, and before put_page is called. * * get_user_pages is typically used for fewer-copy IO operations, to get a * handle on the memory by some means other than accesses via the user virtual @@ -965,18 +963,12 @@ EXPORT_SYMBOL(get_user_pages_unlocked); */ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, + unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { - unsigned int flags = FOLL_TOUCH | FOLL_REMOTE; - - if (write) - flags |= FOLL_WRITE; - if (force) - flags |= FOLL_FORCE; - return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, - NULL, false, flags); + NULL, false, + gup_flags | FOLL_TOUCH | FOLL_REMOTE); } EXPORT_SYMBOL(get_user_pages_remote); diff --git a/mm/memory.c b/mm/memory.c index fc1987dfd8cc..20a9adb7b36e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3873,6 +3873,10 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, { struct vm_area_struct *vma; void *old_buf = buf; + unsigned int flags = FOLL_FORCE; + + if (write) + flags |= FOLL_WRITE; down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transferred */ @@ -3882,7 +3886,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, struct page *page = NULL; ret = get_user_pages_remote(tsk, mm, addr, 1, - write, 1, &page, &vma); + flags, &page, &vma); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index ade7c6cad172..682b73af7766 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -881,7 +881,7 @@ bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, * the execve(). */ if (get_user_pages_remote(current, bprm->mm, pos, 1, - 0, 1, &page, NULL) <= 0) + FOLL_FORCE, &page, NULL) <= 0) return false; #else page = bprm->page[pos / PAGE_SIZE]; From 442486ec1096781c50227b73f721a63974b0fdda Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:18 +0100 Subject: [PATCH 299/884] mm: replace __access_remote_vm() write parameter with gup_flags This removes the 'write' argument from __access_remote_vm() and replaces it with 'gup_flags' as use of this function previously silently implied FOLL_FORCE, whereas after this patch callers explicitly pass this flag. We make this explicit as use of FOLL_FORCE can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- mm/memory.c | 23 +++++++++++++++-------- mm/nommu.c | 9 ++++++--- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 20a9adb7b36e..79ebed3a4c2b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3869,14 +3869,11 @@ EXPORT_SYMBOL_GPL(generic_access_phys); * given task for page fault accounting. */ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; void *old_buf = buf; - unsigned int flags = FOLL_FORCE; - - if (write) - flags |= FOLL_WRITE; + int write = gup_flags & FOLL_WRITE; down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transferred */ @@ -3886,7 +3883,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, struct page *page = NULL; ret = get_user_pages_remote(tsk, mm, addr, 1, - flags, &page, &vma); + gup_flags, &page, &vma); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT break; @@ -3945,7 +3942,12 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + unsigned int flags = FOLL_FORCE; + + if (write) + flags |= FOLL_WRITE; + + return __access_remote_vm(NULL, mm, addr, buf, len, flags); } /* @@ -3958,12 +3960,17 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, { struct mm_struct *mm; int ret; + unsigned int flags = FOLL_FORCE; mm = get_task_mm(tsk); if (!mm) return 0; - ret = __access_remote_vm(tsk, mm, addr, buf, len, write); + if (write) + flags |= FOLL_WRITE; + + ret = __access_remote_vm(tsk, mm, addr, buf, len, flags); + mmput(mm); return ret; diff --git a/mm/nommu.c b/mm/nommu.c index 70cb844dfd95..bde7df35118b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1809,9 +1809,10 @@ void filemap_map_pages(struct fault_env *fe, EXPORT_SYMBOL(filemap_map_pages); static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, void *buf, int len, int write) + unsigned long addr, void *buf, int len, unsigned int gup_flags) { struct vm_area_struct *vma; + int write = gup_flags & FOLL_WRITE; down_read(&mm->mmap_sem); @@ -1853,7 +1854,8 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, int write) { - return __access_remote_vm(NULL, mm, addr, buf, len, write); + return __access_remote_vm(NULL, mm, addr, buf, len, + write ? FOLL_WRITE : 0); } /* @@ -1871,7 +1873,8 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in if (!mm) return 0; - len = __access_remote_vm(tsk, mm, addr, buf, len, write); + len = __access_remote_vm(tsk, mm, addr, buf, len, + write ? FOLL_WRITE : 0); mmput(mm); return len; From 6347e8d5bcce33fc36e651901efefbe2c93a43ef Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:19 +0100 Subject: [PATCH 300/884] mm: replace access_remote_vm() write parameter with gup_flags This removes the 'write' argument from access_remote_vm() and replaces it with 'gup_flags' as use of this function previously silently implied FOLL_FORCE, whereas after this patch callers explicitly pass this flag. We make this explicit as use of FOLL_FORCE can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- fs/proc/base.c | 19 +++++++++++++------ include/linux/mm.h | 2 +- mm/memory.c | 11 +++-------- mm/nommu.c | 7 +++---- 4 files changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index c2964d890c9a..8e654468ab67 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -252,7 +252,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_FORCE); if (rv <= 0) goto out_free_page; @@ -270,7 +270,8 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, + FOLL_FORCE); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -305,7 +306,8 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, + FOLL_FORCE); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -354,7 +356,8 @@ skip_argv: bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, + FOLL_FORCE); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -832,6 +835,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, unsigned long addr = *ppos; ssize_t copied; char *page; + unsigned int flags = FOLL_FORCE; if (!mm) return 0; @@ -844,6 +848,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!atomic_inc_not_zero(&mm->mm_users)) goto free; + if (write) + flags |= FOLL_WRITE; + while (count > 0) { int this_len = min_t(int, count, PAGE_SIZE); @@ -852,7 +859,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, break; } - this_len = access_remote_vm(mm, addr, page, this_len, write); + this_len = access_remote_vm(mm, addr, page, this_len, flags); if (!this_len) { if (!copied) copied = -EIO; @@ -965,7 +972,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, this_len = min(max_len, this_len); retval = access_remote_vm(mm, (env_start + src), - page, this_len, 0); + page, this_len, FOLL_FORCE); if (retval <= 0) { ret = retval; diff --git a/include/linux/mm.h b/include/linux/mm.h index ecc4be7b67e0..f31bf9058587 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1268,7 +1268,7 @@ static inline int fixup_user_fault(struct task_struct *tsk, extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write); + void *buf, int len, unsigned int gup_flags); long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, diff --git a/mm/memory.c b/mm/memory.c index 79ebed3a4c2b..bac2d994850e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3935,19 +3935,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write: whether the access is a write + * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { - unsigned int flags = FOLL_FORCE; - - if (write) - flags |= FOLL_WRITE; - - return __access_remote_vm(NULL, mm, addr, buf, len, flags); + return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* diff --git a/mm/nommu.c b/mm/nommu.c index bde7df35118b..93d5bb53fc63 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1847,15 +1847,14 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, * @addr: start address to access * @buf: source or destination buffer * @len: number of bytes to transfer - * @write: whether the access is a write + * @gup_flags: flags modifying lookup behaviour * * The caller must hold a reference on @mm. */ int access_remote_vm(struct mm_struct *mm, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { - return __access_remote_vm(NULL, mm, addr, buf, len, - write ? FOLL_WRITE : 0); + return __access_remote_vm(NULL, mm, addr, buf, len, gup_flags); } /* From 82454581d76ccb95535ba6898b39ac5baa123633 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Oct 2016 13:24:07 -0700 Subject: [PATCH 301/884] tcp: do not export sysctl_tcp_low_latency Since commit b2fb4f54ecd4 ("tcp: uninline tcp_prequeue()") we no longer access sysctl_tcp_low_latency from a module. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 79d55eb3ec3f..61b7be303eec 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -86,7 +86,6 @@ int sysctl_tcp_tw_reuse __read_mostly; int sysctl_tcp_low_latency __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_low_latency); #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, From 137baabe351e0554d06c6d5c84059fe343e2791e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 15:47:39 +0000 Subject: [PATCH 302/884] mm/numa: Remove duplicated include from mprotect.c Signed-off-by: Wei Yongjun Cc: Dave Hansen Cc: linux-mm@kvack.org Cc: Andrew Morton Cc: Mel Gorman Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/1476719259-6214-1-git-send-email-weiyj.lk@gmail.com Signed-off-by: Thomas Gleixner --- mm/mprotect.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/mprotect.c b/mm/mprotect.c index bcdbe62f3e6d..11936526b08b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include From f307ab6dcea03f9d8e4d70508fd7d1ca57cfa7f9 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 13 Oct 2016 01:20:20 +0100 Subject: [PATCH 303/884] mm: replace access_process_vm() write parameter with gup_flags This removes the 'write' argument from access_process_vm() and replaces it with 'gup_flags' as use of this function previously silently implied FOLL_FORCE, whereas after this patch callers explicitly pass this flag. We make this explicit as use of FOLL_FORCE can result in surprising behaviour (and hence bugs) within the mm subsystem. Signed-off-by: Lorenzo Stoakes Acked-by: Jesper Nilsson Acked-by: Michal Hocko Acked-by: Michael Ellerman Signed-off-by: Linus Torvalds --- arch/alpha/kernel/ptrace.c | 9 ++++++--- arch/blackfin/kernel/ptrace.c | 5 +++-- arch/cris/arch-v32/kernel/ptrace.c | 4 ++-- arch/ia64/kernel/ptrace.c | 14 +++++++++----- arch/m32r/kernel/ptrace.c | 15 ++++++++++----- arch/mips/kernel/ptrace32.c | 5 +++-- arch/powerpc/kernel/ptrace32.c | 5 +++-- arch/score/kernel/ptrace.c | 10 ++++++---- arch/sparc/kernel/ptrace_64.c | 24 ++++++++++++++++-------- arch/x86/kernel/step.c | 3 ++- arch/x86/um/ptrace_32.c | 3 ++- arch/x86/um/ptrace_64.c | 3 ++- include/linux/mm.h | 3 ++- kernel/ptrace.c | 16 ++++++++++------ mm/memory.c | 8 ++------ mm/nommu.c | 6 +++--- mm/util.c | 5 +++-- 17 files changed, 84 insertions(+), 54 deletions(-) diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index d9ee81769899..940dfb406591 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -157,14 +157,16 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data) static inline int read_int(struct task_struct *task, unsigned long addr, int * data) { - int copied = access_process_vm(task, addr, data, sizeof(int), 0); + int copied = access_process_vm(task, addr, data, sizeof(int), + FOLL_FORCE); return (copied == sizeof(int)) ? 0 : -EIO; } static inline int write_int(struct task_struct *task, unsigned long addr, int data) { - int copied = access_process_vm(task, addr, &data, sizeof(int), 1); + int copied = access_process_vm(task, addr, &data, sizeof(int), + FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(int)) ? 0 : -EIO; } @@ -281,7 +283,8 @@ long arch_ptrace(struct task_struct *child, long request, /* When I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), + FOLL_FORCE); ret = -EIO; if (copied != sizeof(tmp)) break; diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 8b8fe671b1a6..8d79286ee4e8 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -271,7 +271,7 @@ long arch_ptrace(struct task_struct *child, long request, case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: copied = access_process_vm(child, addr, &tmp, - to_copy, 0); + to_copy, FOLL_FORCE); if (copied) break; @@ -324,7 +324,8 @@ long arch_ptrace(struct task_struct *child, long request, case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: copied = access_process_vm(child, addr, &data, - to_copy, 1); + to_copy, + FOLL_FORCE | FOLL_WRITE); break; case BFIN_MEM_ACCESS_DMA: if (safe_dma_memcpy(paddr, &data, to_copy)) diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f085229cf870..f0df654ac6fc 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request, /* The trampoline page is globally mapped, no page table to traverse.*/ tmp = *(unsigned long*)addr; } else { - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -279,7 +279,7 @@ static int insn_size(struct task_struct *child, unsigned long pc) int opsize = 0; /* Read the opcode at pc (do what PTRACE_PEEKTEXT would do). */ - copied = access_process_vm(child, pc, &opcode, sizeof(opcode), 0); + copied = access_process_vm(child, pc, &opcode, sizeof(opcode), FOLL_FORCE); if (copied != sizeof(opcode)) return 0; diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 6f54d511cc50..31aa8c0f68e1 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -453,7 +453,7 @@ ia64_peek (struct task_struct *child, struct switch_stack *child_stack, return 0; } } - copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); + copied = access_process_vm(child, addr, &ret, sizeof(ret), FOLL_FORCE); if (copied != sizeof(ret)) return -EIO; *val = ret; @@ -489,7 +489,8 @@ ia64_poke (struct task_struct *child, struct switch_stack *child_stack, *ia64_rse_skip_regs(krbs, regnum) = val; } } - } else if (access_process_vm(child, addr, &val, sizeof(val), 1) + } else if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE) != sizeof(val)) return -EIO; return 0; @@ -543,7 +544,8 @@ ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, ret = ia64_peek(child, sw, user_rbs_end, addr, &val); if (ret < 0) return ret; - if (access_process_vm(child, addr, &val, sizeof(val), 1) + if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE) != sizeof(val)) return -EIO; } @@ -559,7 +561,8 @@ ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw, /* now copy word for word from user rbs to kernel rbs: */ for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { - if (access_process_vm(child, addr, &val, sizeof(val), 0) + if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE) != sizeof(val)) return -EIO; @@ -1156,7 +1159,8 @@ arch_ptrace (struct task_struct *child, long request, case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: /* read word at location addr */ - if (access_process_vm(child, addr, &data, sizeof(data), 0) + if (access_process_vm(child, addr, &data, sizeof(data), + FOLL_FORCE) != sizeof(data)) return -EIO; /* ensure return value is not mistaken for error code */ diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 51f5e9aa4901..c145605a981f 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -493,7 +493,8 @@ unregister_all_debug_traps(struct task_struct *child) int i; for (i = 0; i < p->nr_trap; i++) - access_process_vm(child, p->addr[i], &p->insn[i], sizeof(p->insn[i]), 1); + access_process_vm(child, p->addr[i], &p->insn[i], sizeof(p->insn[i]), + FOLL_FORCE | FOLL_WRITE); p->nr_trap = 0; } @@ -537,7 +538,8 @@ embed_debug_trap(struct task_struct *child, unsigned long next_pc) unsigned long next_insn, code; unsigned long addr = next_pc & ~3; - if (access_process_vm(child, addr, &next_insn, sizeof(next_insn), 0) + if (access_process_vm(child, addr, &next_insn, sizeof(next_insn), + FOLL_FORCE) != sizeof(next_insn)) { return -1; /* error */ } @@ -546,7 +548,8 @@ embed_debug_trap(struct task_struct *child, unsigned long next_pc) if (register_debug_trap(child, next_pc, next_insn, &code)) { return -1; /* error */ } - if (access_process_vm(child, addr, &code, sizeof(code), 1) + if (access_process_vm(child, addr, &code, sizeof(code), + FOLL_FORCE | FOLL_WRITE) != sizeof(code)) { return -1; /* error */ } @@ -562,7 +565,8 @@ withdraw_debug_trap(struct pt_regs *regs) addr = (regs->bpc - 2) & ~3; regs->bpc -= 2; if (unregister_debug_trap(current, addr, &code)) { - access_process_vm(current, addr, &code, sizeof(code), 1); + access_process_vm(current, addr, &code, sizeof(code), + FOLL_FORCE | FOLL_WRITE); invalidate_cache(); } } @@ -589,7 +593,8 @@ void user_enable_single_step(struct task_struct *child) /* Compute next pc. */ pc = get_stack_long(child, PT_BPC); - if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0) + if (access_process_vm(child, pc&~3, &insn, sizeof(insn), + FOLL_FORCE) != sizeof(insn)) return; diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 283b5a1967d1..7e71a4e0281b 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -70,7 +70,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; copied = access_process_vm(child, (u64)addrOthers, &tmp, - sizeof(tmp), 0); + sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; ret = put_user(tmp, (u32 __user *) (unsigned long) data); @@ -179,7 +179,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; ret = 0; if (access_process_vm(child, (u64)addrOthers, &data, - sizeof(data), 1) == sizeof(data)) + sizeof(data), + FOLL_FORCE | FOLL_WRITE) == sizeof(data)) break; ret = -EIO; break; diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index f52b7db327c8..010b7b310237 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -74,7 +74,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; copied = access_process_vm(child, (u64)addrOthers, &tmp, - sizeof(tmp), 0); + sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; ret = put_user(tmp, (u32 __user *)data); @@ -179,7 +179,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; ret = 0; if (access_process_vm(child, (u64)addrOthers, &tmp, - sizeof(tmp), 1) == sizeof(tmp)) + sizeof(tmp), + FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; ret = -EIO; break; diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c index 55836188b217..4f7314d5f334 100644 --- a/arch/score/kernel/ptrace.c +++ b/arch/score/kernel/ptrace.c @@ -131,7 +131,7 @@ read_tsk_long(struct task_struct *child, { int copied; - copied = access_process_vm(child, addr, res, sizeof(*res), 0); + copied = access_process_vm(child, addr, res, sizeof(*res), FOLL_FORCE); return copied != sizeof(*res) ? -EIO : 0; } @@ -142,7 +142,7 @@ read_tsk_short(struct task_struct *child, { int copied; - copied = access_process_vm(child, addr, res, sizeof(*res), 0); + copied = access_process_vm(child, addr, res, sizeof(*res), FOLL_FORCE); return copied != sizeof(*res) ? -EIO : 0; } @@ -153,7 +153,8 @@ write_tsk_short(struct task_struct *child, { int copied; - copied = access_process_vm(child, addr, &val, sizeof(val), 1); + copied = access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE); return copied != sizeof(val) ? -EIO : 0; } @@ -164,7 +165,8 @@ write_tsk_long(struct task_struct *child, { int copied; - copied = access_process_vm(child, addr, &val, sizeof(val), 1); + copied = access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE); return copied != sizeof(val) ? -EIO : 0; } diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 9ddc4928a089..ac082dd8c67d 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -127,7 +127,8 @@ static int get_from_target(struct task_struct *target, unsigned long uaddr, if (copy_from_user(kbuf, (void __user *) uaddr, len)) return -EFAULT; } else { - int len2 = access_process_vm(target, uaddr, kbuf, len, 0); + int len2 = access_process_vm(target, uaddr, kbuf, len, + FOLL_FORCE); if (len2 != len) return -EFAULT; } @@ -141,7 +142,8 @@ static int set_to_target(struct task_struct *target, unsigned long uaddr, if (copy_to_user((void __user *) uaddr, kbuf, len)) return -EFAULT; } else { - int len2 = access_process_vm(target, uaddr, kbuf, len, 1); + int len2 = access_process_vm(target, uaddr, kbuf, len, + FOLL_FORCE | FOLL_WRITE); if (len2 != len) return -EFAULT; } @@ -505,7 +507,8 @@ static int genregs32_get(struct task_struct *target, if (access_process_vm(target, (unsigned long) ®_window[pos], - k, sizeof(*k), 0) + k, sizeof(*k), + FOLL_FORCE) != sizeof(*k)) return -EFAULT; k++; @@ -531,12 +534,14 @@ static int genregs32_get(struct task_struct *target, if (access_process_vm(target, (unsigned long) ®_window[pos], - ®, sizeof(reg), 0) + ®, sizeof(reg), + FOLL_FORCE) != sizeof(reg)) return -EFAULT; if (access_process_vm(target, (unsigned long) u, - ®, sizeof(reg), 1) + ®, sizeof(reg), + FOLL_FORCE | FOLL_WRITE) != sizeof(reg)) return -EFAULT; pos++; @@ -615,7 +620,8 @@ static int genregs32_set(struct task_struct *target, (unsigned long) ®_window[pos], (void *) k, - sizeof(*k), 1) + sizeof(*k), + FOLL_FORCE | FOLL_WRITE) != sizeof(*k)) return -EFAULT; k++; @@ -642,13 +648,15 @@ static int genregs32_set(struct task_struct *target, if (access_process_vm(target, (unsigned long) u, - ®, sizeof(reg), 0) + ®, sizeof(reg), + FOLL_FORCE) != sizeof(reg)) return -EFAULT; if (access_process_vm(target, (unsigned long) ®_window[pos], - ®, sizeof(reg), 1) + ®, sizeof(reg), + FOLL_FORCE | FOLL_WRITE) != sizeof(reg)) return -EFAULT; pos++; diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c9a073866ca7..a23ce84a3f6c 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -57,7 +57,8 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) unsigned char opcode[15]; unsigned long addr = convert_ip_to_linear(child, regs); - copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); + copied = access_process_vm(child, addr, opcode, sizeof(opcode), + FOLL_FORCE); for (i = 0; i < copied; i++) { switch (opcode[i]) { /* popf and iret */ diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 5766ead6fdb9..60a5a5a85505 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -36,7 +36,8 @@ int is_syscall(unsigned long addr) * slow, but that doesn't matter, since it will be called only * in case of singlestepping, if copy_from_user failed. */ - n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + n = access_process_vm(current, addr, &instr, sizeof(instr), + FOLL_FORCE); if (n != sizeof(instr)) { printk(KERN_ERR "is_syscall : failed to read " "instruction from 0x%lx\n", addr); diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index 0b5c184dd5b3..e30202b1716e 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -212,7 +212,8 @@ int is_syscall(unsigned long addr) * slow, but that doesn't matter, since it will be called only * in case of singlestepping, if copy_from_user failed. */ - n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + n = access_process_vm(current, addr, &instr, sizeof(instr), + FOLL_FORCE); if (n != sizeof(instr)) { printk("is_syscall : failed to read instruction from " "0x%lx\n", addr); diff --git a/include/linux/mm.h b/include/linux/mm.h index f31bf9058587..ffbd72979ee7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1266,7 +1266,8 @@ static inline int fixup_user_fault(struct task_struct *tsk, } #endif -extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, + unsigned int gup_flags); extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 2a99027312a6..e6474f7272ec 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -537,7 +537,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst int this_len, retval; this_len = (len > sizeof(buf)) ? sizeof(buf) : len; - retval = access_process_vm(tsk, src, buf, this_len, 0); + retval = access_process_vm(tsk, src, buf, this_len, FOLL_FORCE); if (!retval) { if (copied) break; @@ -564,7 +564,8 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds this_len = (len > sizeof(buf)) ? sizeof(buf) : len; if (copy_from_user(buf, src, this_len)) return -EFAULT; - retval = access_process_vm(tsk, dst, buf, this_len, 1); + retval = access_process_vm(tsk, dst, buf, this_len, + FOLL_FORCE | FOLL_WRITE); if (!retval) { if (copied) break; @@ -1127,7 +1128,7 @@ int generic_ptrace_peekdata(struct task_struct *tsk, unsigned long addr, unsigned long tmp; int copied; - copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), 0); + copied = access_process_vm(tsk, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) return -EIO; return put_user(tmp, (unsigned long __user *)data); @@ -1138,7 +1139,8 @@ int generic_ptrace_pokedata(struct task_struct *tsk, unsigned long addr, { int copied; - copied = access_process_vm(tsk, addr, &data, sizeof(data), 1); + copied = access_process_vm(tsk, addr, &data, sizeof(data), + FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(data)) ? 0 : -EIO; } @@ -1155,7 +1157,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, switch (request) { case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: - ret = access_process_vm(child, addr, &word, sizeof(word), 0); + ret = access_process_vm(child, addr, &word, sizeof(word), + FOLL_FORCE); if (ret != sizeof(word)) ret = -EIO; else @@ -1164,7 +1167,8 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request, case PTRACE_POKETEXT: case PTRACE_POKEDATA: - ret = access_process_vm(child, addr, &data, sizeof(data), 1); + ret = access_process_vm(child, addr, &data, sizeof(data), + FOLL_FORCE | FOLL_WRITE); ret = (ret != sizeof(data) ? -EIO : 0); break; diff --git a/mm/memory.c b/mm/memory.c index bac2d994850e..e18c57bdc75c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3951,20 +3951,16 @@ int access_remote_vm(struct mm_struct *mm, unsigned long addr, * Do not walk the page table directly, use get_user_pages */ int access_process_vm(struct task_struct *tsk, unsigned long addr, - void *buf, int len, int write) + void *buf, int len, unsigned int gup_flags) { struct mm_struct *mm; int ret; - unsigned int flags = FOLL_FORCE; mm = get_task_mm(tsk); if (!mm) return 0; - if (write) - flags |= FOLL_WRITE; - - ret = __access_remote_vm(tsk, mm, addr, buf, len, flags); + ret = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); mmput(mm); diff --git a/mm/nommu.c b/mm/nommu.c index 93d5bb53fc63..db5fd1795298 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1861,7 +1861,8 @@ int access_remote_vm(struct mm_struct *mm, unsigned long addr, * Access another process' address space. * - source/target buffer must be kernel space */ -int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, + unsigned int gup_flags) { struct mm_struct *mm; @@ -1872,8 +1873,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in if (!mm) return 0; - len = __access_remote_vm(tsk, mm, addr, buf, len, - write ? FOLL_WRITE : 0); + len = __access_remote_vm(tsk, mm, addr, buf, len, gup_flags); mmput(mm); return len; diff --git a/mm/util.c b/mm/util.c index 4c685bde5ebc..952cbe7ad7b7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -624,7 +624,7 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen) if (len > buflen) len = buflen; - res = access_process_vm(task, arg_start, buffer, len, 0); + res = access_process_vm(task, arg_start, buffer, len, FOLL_FORCE); /* * If the nul at the end of args has been overwritten, then @@ -639,7 +639,8 @@ int get_cmdline(struct task_struct *task, char *buffer, int buflen) if (len > buflen - res) len = buflen - res; res += access_process_vm(task, env_start, - buffer+res, len, 0); + buffer+res, len, + FOLL_FORCE); res = strnlen(buffer, res); } } From 854dd54245f7f1b1175b1bada613929396a571be Mon Sep 17 00:00:00 2001 From: Renat Valiullin Date: Thu, 13 Oct 2016 11:45:39 -0700 Subject: [PATCH 304/884] x86/vmware: Skip timer_irq_works() check on VMware The timer_irq_works() boot check may sometimes fail in a VM, when the Host is overcommitted or when the Guest is running nested. Since the intended check is unnecessary on VMware's virtual hardware, by-pass it. Signed-off-by: Renat Valiullin Acked-by: Alok N Kataria Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20161013184539.GA11497@rvaliullin-vm Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/vmware.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 81160578b91a..5130985b758b 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #define CPUID_VMWARE_INFO_LEAF 0x40000000 @@ -94,6 +95,10 @@ static void __init vmware_platform_setup(void) } else { pr_warn("Failed to get TSC freq from the hypervisor\n"); } + +#ifdef CONFIG_X86_IO_APIC + no_timer_check = 1; +#endif } /* From 8214899342981dbd49ae24aadbbd19e9e7830684 Mon Sep 17 00:00:00 2001 From: Piotr Luc Date: Tue, 18 Oct 2016 17:01:11 +0200 Subject: [PATCH 305/884] x86/cpufeature: Add AVX512_4VNNIW and AVX512_4FMAPS features AVX512_4VNNIW - Vector instructions for deep learning enhanced word variable precision. AVX512_4FMAPS - Vector instructions for deep learning floating-point single precision. These new instructions are to be used in future Intel Xeon & Xeon Phi processors. The bits 2&3 of CPUID[level:0x07, EDX] inform that new instructions are supported by a processor. The spec can be found in the Intel Software Developer Manual (SDM) or in the Instruction Set Extensions Programming Reference (ISE). Define new feature flags to enumerate the new instructions in /proc/cpuinfo accordingly to CPUID bits and add the required xsave extensions which are required for proper operation. Signed-off-by: Piotr Luc Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Brian Gerst Cc: Dave Hansen Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20161018150111.29926-1-piotr.luc@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 2 ++ arch/x86/kernel/cpu/scattered.c | 2 ++ arch/x86/kernel/fpu/xstate.c | 2 ++ tools/arch/x86/include/asm/cpufeatures.h | 2 ++ 4 files changed, 8 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 1188bc849ee3..a39629206864 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -194,6 +194,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df9398d..1db8dc490b66 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 124aa5c593f8..095ef7ddd6ae 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); setup_clear_cpu_cap(X86_FEATURE_PKU); + setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); + setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); } /* diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 1188bc849ee3..a39629206864 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -194,6 +194,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ From 8835ca59dac2bc1e0136791abf3ccd51588803ce Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 19 Oct 2016 09:11:24 -0700 Subject: [PATCH 306/884] printk: suppress empty continuation lines We have a fairly common pattern where you print several things as continuations on one single line in a loop, and then at the end you do printk(KERN_CONT "\n"); to flush the buffered output. But if the output was flushed by something else (concurrent printk activity, or just system logging), we don't want that final flushing to just print an empty line. So just suppress empty continuation lines when they couldn't be merged into the line they are a continuation of. Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index d5e397315473..de08fc90baaf 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1769,6 +1769,10 @@ static size_t log_output(int facility, int level, enum log_flags lflags, const c cont_flush(); } + /* Skip empty continuation lines that couldn't be added - they just flush */ + if (!text_len && (lflags & LOG_CONT)) + return 0; + /* If it doesn't end in a newline, try to buffer the current line */ if (!(lflags & LOG_NEWLINE)) { if (cont_add(facility, level, lflags, text, text_len)) From 1ecc281ec2f52d715dfc8cb67b6820ea813b9e52 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 17 Oct 2016 21:50:23 +0200 Subject: [PATCH 307/884] netfilter: x_tables: suppress kmemcheck warning Markus Trippelsdorf reports: WARNING: kmemcheck: Caught 64-bit read from uninitialized memory (ffff88001e605480) 4055601e0088ffff000000000000000090686d81ffffffff0000000000000000 u u u u u u u u u u u u u u u u i i i i i i i i u u u u u u u u ^ |RIP: 0010:[] [] nf_register_net_hook+0x51/0x160 [..] [] nf_register_net_hook+0x51/0x160 [] nf_register_net_hooks+0x3f/0xa0 [] ipt_register_table+0xe5/0x110 [..] This warning is harmless; we copy 'uninitialized' data from the hook ops but it will not be used. Long term the structures keeping run-time data should be disentangled from those only containing config-time data (such as where in the list to insert a hook), but thats -next material. Reported-by: Markus Trippelsdorf Suggested-by: Al Viro Signed-off-by: Florian Westphal Acked-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e0aa7c1d0224..fc4977456c30 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1513,7 +1513,7 @@ xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) if (!num_hooks) return ERR_PTR(-EINVAL); - ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); + ops = kcalloc(num_hooks, sizeof(*ops), GFP_KERNEL); if (ops == NULL) return ERR_PTR(-ENOMEM); From f61851f64b171a684f5a1fa78325756dbbaadadc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Narajowski?= Date: Wed, 19 Oct 2016 10:20:27 +0200 Subject: [PATCH 308/884] Bluetooth: Fix append max 11 bytes of name to scan rsp data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Append maximum of 10 + 1 bytes of name to scan response data. Complete name is appended only if exists and is <= 10 characters. Else append short name if exists or shorten complete name if not. This makes sure name is consistent across multiple advertising instances. Signed-off-by: Michał Narajowski Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_request.c | 51 +++++++++++++++++-------------------- net/bluetooth/hci_request.h | 2 ++ net/bluetooth/mgmt.c | 26 ++++++++++++------- 3 files changed, 43 insertions(+), 36 deletions(-) diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e2288421fe6b..1015d9c8d97d 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -969,41 +969,38 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) +u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - size_t complete_len; size_t short_len; - int max_len; + size_t complete_len; - max_len = HCI_MAX_AD_LENGTH - ad_len - 2; + /* no space left for name (+ NULL + type + len) */ + if ((HCI_MAX_AD_LENGTH - ad_len) < HCI_MAX_SHORT_NAME_LENGTH + 3) + return ad_len; + + /* use complete name if present and fits */ complete_len = strlen(hdev->dev_name); - short_len = strlen(hdev->short_name); - - /* no space left for name */ - if (max_len < 1) - return ad_len; - - /* no name set */ - if (!complete_len) - return ad_len; - - /* complete name fits and is eq to max short name len or smaller */ - if (complete_len <= max_len && - complete_len <= HCI_MAX_SHORT_NAME_LENGTH) { + if (complete_len && complete_len <= HCI_MAX_SHORT_NAME_LENGTH) return eir_append_data(ptr, ad_len, EIR_NAME_COMPLETE, - hdev->dev_name, complete_len); - } + hdev->dev_name, complete_len + 1); - /* short name set and fits */ - if (short_len && short_len <= max_len) { + /* use short name if present */ + short_len = strlen(hdev->short_name); + if (short_len) return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, - hdev->short_name, short_len); - } + hdev->short_name, short_len + 1); - /* no short name set so shorten complete name */ - if (!short_len) { - return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, - hdev->dev_name, max_len); + /* use shortened full name if present, we already know that name + * is longer then HCI_MAX_SHORT_NAME_LENGTH + */ + if (complete_len) { + u8 name[HCI_MAX_SHORT_NAME_LENGTH + 1]; + + memcpy(name, hdev->dev_name, HCI_MAX_SHORT_NAME_LENGTH); + name[HCI_MAX_SHORT_NAME_LENGTH] = '\0'; + + return eir_append_data(ptr, ad_len, EIR_NAME_SHORT, name, + sizeof(name)); } return ad_len; diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 6b06629245a8..dde77bd59f91 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -106,6 +106,8 @@ static inline void hci_update_background_scan(struct hci_dev *hdev) void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); +u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len); + static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, u8 data_len) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 736038085feb..1fba2a03f8ae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6017,7 +6017,15 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) +static u8 calculate_name_len(struct hci_dev *hdev) +{ + u8 buf[HCI_MAX_SHORT_NAME_LENGTH + 3]; + + return append_local_name(hdev, buf, 0); +} + +static u8 tlv_data_max_len(struct hci_dev *hdev, u32 adv_flags, + bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; @@ -6030,9 +6038,8 @@ static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; } else { - /* at least 1 byte of name should fit in */ if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) - max_len -= 3; + max_len -= calculate_name_len(hdev); if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) max_len -= 4; @@ -6063,12 +6070,13 @@ static bool appearance_managed(u32 adv_flags) return adv_flags & MGMT_ADV_FLAG_APPEARANCE; } -static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, + u8 len, bool is_adv_data) { int i, cur_len; u8 max_len; - max_len = tlv_data_max_len(adv_flags, is_adv_data); + max_len = tlv_data_max_len(hdev, adv_flags, is_adv_data); if (len > max_len) return false; @@ -6215,8 +6223,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) || - !tlv_data_is_valid(flags, cp->data + cp->adv_data_len, + if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -6429,8 +6437,8 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, rp.instance = cp->instance; rp.flags = cp->flags; - rp.max_adv_data_len = tlv_data_max_len(flags, true); - rp.max_scan_rsp_len = tlv_data_max_len(flags, false); + rp.max_adv_data_len = tlv_data_max_len(hdev, flags, true); + rp.max_scan_rsp_len = tlv_data_max_len(hdev, flags, false); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_STATUS_SUCCESS, &rp, sizeof(rp)); From 3da43104d3187184d7417569cb3360511229f761 Mon Sep 17 00:00:00 2001 From: Noam Camus Date: Wed, 19 Oct 2016 14:25:03 +0300 Subject: [PATCH 309/884] ARC: Adjust cpuinfo for non-continuous cpu ids num_possible_cpus() returns how many CPUs may be present on system. However we want the highest possible CPU number. This may be differ in a sparsed possible CPUs map. Such map achived by OF for plat-eznps. For example if we have: possible cpus mask 0,3 Then: num_possible_cpus() is equal 2 while nr_cpu_ids is equal 4. Only for value 4 c_start() will provide correct cpuinfo at procfs. Signed-off-by: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 3df7f9c72f42..75e540972135 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -507,7 +507,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) * way to pass it w/o having to kmalloc/free a 2 byte string. * Encode cpu-id as 0xFFcccc, which is decoded by show routine. */ - return *pos < num_possible_cpus() ? cpu_to_ptr(*pos) : NULL; + return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) From 17a51f12cfbd2814fd35966a069b242569c53e27 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Oct 2016 09:00:52 +0200 Subject: [PATCH 310/884] ahci: only try to use multi-MSI mode if there is more than 1 port We should only try to allocate multiple MSI or MSI-X vectors if the device actually has multiple ports. Otherwise pci_alloc_irq_vectors will return a single vector due to n_ports = 1, in which case we shouldn't set the AHCI_HFLAG_MULTI_MSI flag. Signed-off-by: Christoph Hellwig Fixes: 0b9e2988 ("ahci: use pci_alloc_irq_vectors") Reported-by: Emmanuel Benisty Tested-by: Emmanuel Benisty Signed-off-by: Tejun Heo --- drivers/ata/ahci.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index ba5f11cebee2..ed311a040fed 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1418,21 +1418,24 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports, * Message mode could be enforced. In this case assume that advantage * of multipe MSIs is negated and use single MSI mode instead. */ - nvec = pci_alloc_irq_vectors(pdev, n_ports, INT_MAX, - PCI_IRQ_MSIX | PCI_IRQ_MSI); - if (nvec > 0) { - if (!(readl(hpriv->mmio + HOST_CTL) & HOST_MRSM)) { - hpriv->get_irq_vector = ahci_get_irq_vector; - hpriv->flags |= AHCI_HFLAG_MULTI_MSI; - return nvec; - } + if (n_ports > 1) { + nvec = pci_alloc_irq_vectors(pdev, n_ports, INT_MAX, + PCI_IRQ_MSIX | PCI_IRQ_MSI); + if (nvec > 0) { + if (!(readl(hpriv->mmio + HOST_CTL) & HOST_MRSM)) { + hpriv->get_irq_vector = ahci_get_irq_vector; + hpriv->flags |= AHCI_HFLAG_MULTI_MSI; + return nvec; + } - /* - * Fallback to single MSI mode if the controller enforced MRSM - * mode. - */ - printk(KERN_INFO "ahci: MRSM is on, fallback to single MSI\n"); - pci_free_irq_vectors(pdev); + /* + * Fallback to single MSI mode if the controller + * enforced MRSM mode. + */ + printk(KERN_INFO + "ahci: MRSM is on, fallback to single MSI\n"); + pci_free_irq_vectors(pdev); + } } /* From 75d29713b792da4782cadfaa87e802183440694e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:34:29 +0300 Subject: [PATCH 311/884] libnvdimm, namespace: potential NULL deref on allocation error If the kcalloc() fails then "devs" can be NULL and we dereference it checking "devs[i]". Fixes: 1b40e09a1232 ('libnvdimm: blk labels and namespace instantiation') Signed-off-by: Dan Carpenter Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 3509cff68ef9..abe5c6bc756c 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2176,12 +2176,14 @@ static struct device **scan_labels(struct nd_region *nd_region) return devs; err: - for (i = 0; devs[i]; i++) - if (is_nd_blk(&nd_region->dev)) - namespace_blk_release(devs[i]); - else - namespace_pmem_release(devs[i]); - kfree(devs); + if (devs) { + for (i = 0; devs[i]; i++) + if (is_nd_blk(&nd_region->dev)) + namespace_blk_release(devs[i]); + else + namespace_pmem_release(devs[i]); + kfree(devs); + } return NULL; } From 3115bb02b5c23d960df5f1bf551ec394a9bb10ec Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 13 Oct 2016 09:54:21 -0600 Subject: [PATCH 312/884] pmem: report error on clear poison failure ACPI Clear Uncorrectable Error DSM function may fail or may be unsupported on a platform. pmem_clear_poison() returns without clearing badblocks in such cases. This failure is detected at the next read (-EIO). This behavior can lead to an issue when user keeps writing but does not read immediately. For instance, flight recorder file may be only read when it is necessary for troubleshooting. Change pmem_do_bvec() and pmem_clear_poison() to return -EIO so that filesystem can log an error message on a write error. Cc: Vishal Verma Signed-off-by: Toshi Kani Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 42b3a8217073..24618431a14b 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -47,7 +47,7 @@ static struct nd_region *to_region(struct pmem_device *pmem) return to_nd_region(to_dev(pmem)->parent); } -static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, +static int pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, unsigned int len) { struct device *dev = to_dev(pmem); @@ -62,8 +62,12 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, __func__, (unsigned long long) sector, cleared / 512, cleared / 512 > 1 ? "s" : ""); badblocks_clear(&pmem->bb, sector, cleared / 512); + } else { + return -EIO; } + invalidate_pmem(pmem->virt_addr + offset, len); + return 0; } static void write_pmem(void *pmem_addr, struct page *page, @@ -123,7 +127,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, flush_dcache_page(page); write_pmem(pmem_addr, page, off, len); if (unlikely(bad_pmem)) { - pmem_clear_poison(pmem, pmem_off, len); + rc = pmem_clear_poison(pmem, pmem_off, len); write_pmem(pmem_addr, page, off, len); } } From 8ef2074d28373014d05e92b5f13364ef51075b6e Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Wed, 19 Oct 2016 09:51:05 -0600 Subject: [PATCH 313/884] nvme: Add tertiary number to NVME_VS NVMe 1.2.1 specification adds a tertiary element to the version number. This updates the macro and its callers to include the final number and fixup a single place in nvmet where the version was generated manually. Signed-off-by: Gabriel Krisman Bertazi Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 8 ++++---- drivers/nvme/host/pci.c | 4 ++-- drivers/nvme/host/scsi.c | 4 ++-- drivers/nvme/target/core.c | 2 +- include/linux/nvme.h | 3 ++- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 2a57f5ede386..bb168b71048d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -900,9 +900,9 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) return -ENODEV; } - if (ns->ctrl->vs >= NVME_VS(1, 1)) + if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui)); - if (ns->ctrl->vs >= NVME_VS(1, 2)) + if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) memcpy(ns->uuid, (*id)->nguid, sizeof(ns->uuid)); return 0; @@ -1242,7 +1242,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } page_shift = NVME_CAP_MPSMIN(cap) + 12; - if (ctrl->vs >= NVME_VS(1, 1)) + if (ctrl->vs >= NVME_VS(1, 1, 0)) ctrl->subsystem = NVME_CAP_NSSRC(cap); ret = nvme_identify_ctrl(ctrl, &id); @@ -1842,7 +1842,7 @@ static void nvme_scan_work(struct work_struct *work) return; nn = le32_to_cpu(id->nn); - if (ctrl->vs >= NVME_VS(1, 1) && + if (ctrl->vs >= NVME_VS(1, 1, 0) && !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { if (!nvme_scan_ns_list(ctrl, nn)) goto done; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a7c6e9d74943..26a8d31b291d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1214,7 +1214,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); struct nvme_queue *nvmeq; - dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ? + dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? NVME_CAP_NSSRC(cap) : 0; if (dev->subsystem && @@ -1633,7 +1633,7 @@ static int nvme_pci_enable(struct nvme_dev *dev) * NULL as final argument to sysfs_add_file_to_group. */ - if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) { + if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) { dev->cmb = nvme_map_cmb(dev); if (dev->cmbsz) { diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index c2a0a1c7d05d..3eaa4d27801e 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -606,7 +606,7 @@ static int nvme_fill_device_id_eui64(struct nvme_ns *ns, struct sg_io_hdr *hdr, eui = id_ns->eui64; len = sizeof(id_ns->eui64); - if (ns->ctrl->vs >= NVME_VS(1, 2)) { + if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) { if (bitmap_empty(eui, len * 8)) { eui = id_ns->nguid; len = sizeof(id_ns->nguid); @@ -679,7 +679,7 @@ static int nvme_trans_device_id_page(struct nvme_ns *ns, struct sg_io_hdr *hdr, { int res; - if (ns->ctrl->vs >= NVME_VS(1, 1)) { + if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) { res = nvme_fill_device_id_eui64(ns, hdr, resp, alloc_len); if (res != -EOPNOTSUPP) return res; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6559d5afa7bf..b4cacb6f0258 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -882,7 +882,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, if (!subsys) return NULL; - subsys->ver = (1 << 16) | (2 << 8) | 1; /* NVMe 1.2.1 */ + subsys->ver = NVME_VS(1, 2, 1); /* NVMe 1.2.1 */ switch (type) { case NVME_NQN_NVME: diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7676557ce357..086d196e68f7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -960,6 +960,7 @@ struct nvme_completion { __le16 status; /* did the command fail, and if so, why? */ }; -#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) +#define NVME_VS(major, minor, tertiary) \ + (((major) << 16) | ((minor) << 8) | (tertiary)) #endif /* _LINUX_NVME_H */ From a446c0840e244f34c22cc13b3a62d50aa51fb4c6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Sep 2016 13:51:06 +0200 Subject: [PATCH 314/884] nvme.h: resync with nvme-cli Import a few updates to nvme.h from nvme-cli. This mostly includes a few new fields and error codes, but also a few renames that so far are only used in user space. Also one field is moved from an array of two le64 values to one of 16 u8 values so that we can more easily access it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Jens Axboe --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/discovery.c | 2 +- include/linux/nvme.h | 33 +++++++++++++++++++++++++++------ 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 7ab9c9381b98..6944f246e562 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -199,7 +199,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) */ /* we support multiple ports and multiples hosts: */ - id->mic = (1 << 0) | (1 << 1); + id->cmic = (1 << 0) | (1 << 1); /* no limit on data transfer sizes for now */ id->mdts = 0; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 6f65646e89cf..a6c25fbcff3f 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -54,7 +54,7 @@ static void nvmet_format_discovery_entry(struct nvmf_disc_rsp_page_hdr *hdr, /* we support only dynamic controllers */ e->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); e->asqsz = cpu_to_le16(NVMF_AQ_DEPTH); - e->nqntype = type; + e->subtype = type; memcpy(e->trsvcid, port->disc_addr.trsvcid, NVMF_TRSVCID_SIZE); memcpy(e->traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); memcpy(e->tsas.common, port->disc_addr.tsas.common, NVMF_TSAS_SIZE); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 086d196e68f7..989699641e10 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -182,7 +182,7 @@ struct nvme_id_ctrl { char fr[8]; __u8 rab; __u8 ieee[3]; - __u8 mic; + __u8 cmic; __u8 mdts; __le16 cntlid; __le32 ver; @@ -202,7 +202,13 @@ struct nvme_id_ctrl { __u8 apsta; __le16 wctemp; __le16 cctemp; - __u8 rsvd270[50]; + __le16 mtfa; + __le32 hmpre; + __le32 hmmin; + __u8 tnvmcap[16]; + __u8 unvmcap[16]; + __le32 rpmbs; + __u8 rsvd316[4]; __le16 kas; __u8 rsvd322[190]; __u8 sqes; @@ -267,7 +273,7 @@ struct nvme_id_ns { __le16 nabo; __le16 nabspf; __u16 rsvd46; - __le64 nvmcap[2]; + __u8 nvmcap[16]; __u8 rsvd64[40]; __u8 nguid[16]; __u8 eui64[8]; @@ -556,8 +562,10 @@ enum nvme_admin_opcode { nvme_admin_set_features = 0x09, nvme_admin_get_features = 0x0a, nvme_admin_async_event = 0x0c, + nvme_admin_ns_mgmt = 0x0d, nvme_admin_activate_fw = 0x10, nvme_admin_download_fw = 0x11, + nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, @@ -583,6 +591,7 @@ enum { NVME_FEAT_WRITE_ATOMIC = 0x0a, NVME_FEAT_ASYNC_EVENT = 0x0b, NVME_FEAT_AUTO_PST = 0x0c, + NVME_FEAT_HOST_MEM_BUF = 0x0d, NVME_FEAT_KATO = 0x0f, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, @@ -745,7 +754,7 @@ struct nvmf_common_command { struct nvmf_disc_rsp_page_entry { __u8 trtype; __u8 adrfam; - __u8 nqntype; + __u8 subtype; __u8 treq; __le16 portid; __le16 cntlid; @@ -905,12 +914,23 @@ enum { NVME_SC_INVALID_VECTOR = 0x108, NVME_SC_INVALID_LOG_PAGE = 0x109, NVME_SC_INVALID_FORMAT = 0x10a, - NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b, + NVME_SC_FW_NEEDS_CONV_RESET = 0x10b, NVME_SC_INVALID_QUEUE = 0x10c, NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, NVME_SC_FEATURE_NOT_PER_NS = 0x10f, - NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, + NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, + NVME_SC_FW_NEEDS_RESET = 0x111, + NVME_SC_FW_NEEDS_MAX_TIME = 0x112, + NVME_SC_FW_ACIVATE_PROHIBITED = 0x113, + NVME_SC_OVERLAPPING_RANGE = 0x114, + NVME_SC_NS_INSUFFICENT_CAP = 0x115, + NVME_SC_NS_ID_UNAVAILABLE = 0x116, + NVME_SC_NS_ALREADY_ATTACHED = 0x118, + NVME_SC_NS_IS_PRIVATE = 0x119, + NVME_SC_NS_NOT_ATTACHED = 0x11a, + NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, + NVME_SC_CTRL_LIST_INVALID = 0x11c, /* * I/O Command Set Specific - NVM commands: @@ -941,6 +961,7 @@ enum { NVME_SC_REFTAG_CHECK = 0x284, NVME_SC_COMPARE_FAILED = 0x285, NVME_SC_ACCESS_DENIED = 0x286, + NVME_SC_UNWRITTEN_BLOCK = 0x287, NVME_SC_DNR = 0x4000, }; From 8d63687afda019a6e037bf9c4ceb3e514c26a35d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Sep 2016 13:51:07 +0200 Subject: [PATCH 315/884] nvme.h: don't use uuid_be This makes life easier for nvme-cli and we don't really need the uuid type anyway to start with. Signed-off-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Jay Freyensee Signed-off-by: Jens Axboe --- include/linux/nvme.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 989699641e10..d31ff2dd1d51 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -16,7 +16,6 @@ #define _LINUX_NVME_H #include -#include /* NQN names in commands fields specified one size */ #define NVMF_NQN_FIELD_LEN 256 @@ -803,7 +802,7 @@ struct nvmf_connect_command { }; struct nvmf_connect_data { - uuid_be hostid; + __u8 hostid[16]; __le16 cntlid; char resv4[238]; char subsysnqn[NVMF_NQN_FIELD_LEN]; From 329dd7681c5af84e8ea9f4494c1a304389cdfc6e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Sep 2016 13:51:08 +0200 Subject: [PATCH 316/884] nvme.h: add an enum for cns values Ported over from nvme-cli. Signed-off-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d31ff2dd1d51..fc3c24206593 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -281,6 +281,16 @@ struct nvme_id_ns { __u8 vs[3712]; }; +enum { + NVME_ID_CNS_NS = 0x00, + NVME_ID_CNS_CTRL = 0x01, + NVME_ID_CNS_NS_ACTIVE_LIST = 0x02, + NVME_ID_CNS_NS_PRESENT_LIST = 0x10, + NVME_ID_CNS_NS_PRESENT = 0x11, + NVME_ID_CNS_CTRL_NS_LIST = 0x12, + NVME_ID_CNS_CTRL_LIST = 0x13, +}; + enum { NVME_NS_FEAT_THIN = 1 << 0, NVME_NS_FLBAS_LBA_MASK = 0xf, From fa60682677c594d81e9b68b8a4046cde75a7374b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Sep 2016 13:51:09 +0200 Subject: [PATCH 317/884] nvme: use symbolic constants for CNS values Signed-off-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bb168b71048d..79e679d12f3b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -554,7 +554,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) /* gcc-4.4.4 (at least) has issues with initializers and anon unions */ c.identify.opcode = nvme_admin_identify; - c.identify.cns = cpu_to_le32(1); + c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL); *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); if (!*id) @@ -572,7 +572,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n struct nvme_command c = { }; c.identify.opcode = nvme_admin_identify; - c.identify.cns = cpu_to_le32(2); + c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST); c.identify.nsid = cpu_to_le32(nsid); return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); } From e9c9346e20c1b18a3ec30defd5ff7134bcc0da6d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Sep 2016 13:51:10 +0200 Subject: [PATCH 318/884] nvmet: use symbolic constants for CNS values Signed-off-by: Christoph Hellwig Reviewed-by: Gabriel Krisman Bertazi Reviewed-by: Jay Freyensee Signed-off-by: Jens Axboe --- drivers/nvme/target/admin-cmd.c | 6 +++--- drivers/nvme/target/discovery.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 6944f246e562..6fe4c48a21e4 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -511,13 +511,13 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req) case nvme_admin_identify: req->data_len = 4096; switch (le32_to_cpu(cmd->identify.cns)) { - case 0x00: + case NVME_ID_CNS_NS: req->execute = nvmet_execute_identify_ns; return 0; - case 0x01: + case NVME_ID_CNS_CTRL: req->execute = nvmet_execute_identify_ctrl; return 0; - case 0x02: + case NVME_ID_CNS_NS_ACTIVE_LIST: req->execute = nvmet_execute_identify_nslist; return 0; } diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index a6c25fbcff3f..12f39eea569f 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -187,7 +187,7 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req) case nvme_admin_identify: req->data_len = 4096; switch (le32_to_cpu(cmd->identify.cns)) { - case 0x01: + case NVME_ID_CNS_CTRL: req->execute = nvmet_execute_identify_disc_ctrl; return 0; From 83aa3e0f791d458a28f91d7a50f92926f971ef7c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 17:21:30 +0200 Subject: [PATCH 319/884] nfs4: fix missing-braces warning A bugfix introduced a harmless warning for update_open_stateid: fs/nfs/nfs4proc.c:1548:2: error: missing braces around initializer [-Werror=missing-braces] Removing the zero in the initializer will do the right thing here and initialize the entire structure to zero. Fixes: 1393d9612ba0 ("NFSv4: Fix a race when updating an open_stateid") Signed-off-by: Arnd Bergmann Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ed7da820d4bf..45b38ee4813c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1545,7 +1545,7 @@ static int update_open_stateid(struct nfs4_state *state, struct nfs_client *clp = server->nfs_client; struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *deleg_cur; - nfs4_stateid freeme = {0}; + nfs4_stateid freeme = { }; int ret = 0; fmode &= (FMODE_READ|FMODE_WRITE); From da25311c7ca8b0254a686fc0d597075b9aa3b683 Mon Sep 17 00:00:00 2001 From: Patrick Scheuring Date: Wed, 19 Oct 2016 12:04:02 -0700 Subject: [PATCH 320/884] Input: i8042 - add XMG C504 to keyboard reset table The Schenker XMG C504 is a rebranded Gigabyte P35 v2 laptop. Therefore it also needs a keyboard reset to detect the Elantech touchpad. Otherwise the touchpad appears to be dead. With this patch the touchpad is detected: $ dmesg | grep -E "(i8042|Elantech|elantech)" [ 2.675399] i8042: PNP: PS/2 Controller [PNP0303:PS2K,PNP0f13:PS2M] at 0x60,0x64 irq 1,12 [ 2.680372] i8042: Attempting to reset device connected to KBD port [ 2.789037] serio: i8042 KBD port at 0x60,0x64 irq 1 [ 2.791586] serio: i8042 AUX port at 0x60,0x64 irq 12 [ 2.813840] input: AT Translated Set 2 keyboard as /devices/platform/i8042/serio0/input/input4 [ 3.811431] psmouse serio1: elantech: assuming hardware version 4 (with firmware version 0x361f0e) [ 3.825424] psmouse serio1: elantech: Synaptics capabilities query result 0x00, 0x15, 0x0f. [ 3.839424] psmouse serio1: elantech: Elan sample query result 03, 58, 74 [ 3.911349] input: ETPS/2 Elantech Touchpad as /devices/platform/i8042/serio1/input/input6 Signed-off-by: Patrick Scheuring Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index f4bfb4b2d50a..073246c7d163 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -877,6 +877,13 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "P34"), }, }, + { + /* Schenker XMG C504 - Elantech touchpad */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "XMG"), + DMI_MATCH(DMI_PRODUCT_NAME, "C504"), + }, + }, { } }; From c30a70d3ac60f3216e8d60d7746caad084a7eb46 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Wed, 19 Oct 2016 09:06:41 +0200 Subject: [PATCH 321/884] stmmac: fix and review the ptp registration. The commit commit 7086605a6ab5 ("stmmac: fix error check when init ptp") breaks the procedure added by the commit efee95f42b5d ("ptp_clock: future-proofing drivers against PTP subsystem becoming optional") So this patch tries to re-import the logic added by the latest commit above: it makes sense to have the stmmac_ptp_register as void function and, inside the main, the stmmac_init_ptp can fails in case of the capability cannot be supported by the HW. Signed-off-by: Giuseppe Cavallaro Cc: Alexandre TORGUE Cc: Rayagond Kokatanur Cc: Dan Carpenter Cc: Nicolas Pitre Acked-by: Nicolas Pitre Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 ++++-- drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c | 15 ++++----------- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 8dc9056c1001..b15fc55f1b96 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -145,7 +145,7 @@ int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); void stmmac_set_ethtool_ops(struct net_device *netdev); -int stmmac_ptp_register(struct stmmac_priv *priv); +void stmmac_ptp_register(struct stmmac_priv *priv); void stmmac_ptp_unregister(struct stmmac_priv *priv); int stmmac_resume(struct device *dev); int stmmac_suspend(struct device *dev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6c85b61aaa0b..48e71fad4210 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -676,7 +676,9 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; - return stmmac_ptp_register(priv); + stmmac_ptp_register(priv); + + return 0; } static void stmmac_release_ptp(struct stmmac_priv *priv) @@ -1710,7 +1712,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) if (init_ptp) { ret = stmmac_init_ptp(priv); if (ret) - netdev_warn(priv->dev, "PTP support cannot init.\n"); + netdev_warn(priv->dev, "fail to init PTP.\n"); } #ifdef CONFIG_DEBUG_FS diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 5d61fb27219a..1477471f8d44 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -177,7 +177,7 @@ static struct ptp_clock_info stmmac_ptp_clock_ops = { * Description: this function will register the ptp clock driver * to kernel. It also does some house keeping work. */ -int stmmac_ptp_register(struct stmmac_priv *priv) +void stmmac_ptp_register(struct stmmac_priv *priv) { spin_lock_init(&priv->ptp_lock); priv->ptp_clock_ops = stmmac_ptp_clock_ops; @@ -185,17 +185,10 @@ int stmmac_ptp_register(struct stmmac_priv *priv) priv->ptp_clock = ptp_clock_register(&priv->ptp_clock_ops, priv->device); if (IS_ERR(priv->ptp_clock)) { - int ret = PTR_ERR(priv->ptp_clock); - + netdev_err(priv->dev, "ptp_clock_register failed\n"); priv->ptp_clock = NULL; - return ret; - } - - spin_lock_init(&priv->ptp_lock); - - netdev_dbg(priv->dev, "Added PTP HW clock successfully\n"); - - return 0; + } else if (priv->ptp_clock) + netdev_info(priv->dev, "registered PTP clock\n"); } /** From 7d36b9c102318aa86aceb074359305da88ce9ef9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 19 Oct 2016 20:49:39 +0900 Subject: [PATCH 322/884] clk: uniphier: fix memory overrun bug The first loop of this "for" statement writes memory beyond the allocated clk_hw_onecell_data. It should be: for (clk_num--; clk_num >= 0; clk_num--) ... Or more simply: while (--clk_num >= 0) ... Fixes: 734d82f4a678 ("clk: uniphier: add core support code for UniPhier clock driver") Signed-off-by: Masahiro Yamada Signed-off-by: Stephen Boyd --- drivers/clk/uniphier/clk-uniphier-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/uniphier/clk-uniphier-core.c b/drivers/clk/uniphier/clk-uniphier-core.c index f4e0f6be5f33..84bc465d31aa 100644 --- a/drivers/clk/uniphier/clk-uniphier-core.c +++ b/drivers/clk/uniphier/clk-uniphier-core.c @@ -79,7 +79,7 @@ static int uniphier_clk_probe(struct platform_device *pdev) hw_data->num = clk_num; /* avoid returning NULL for unused idx */ - for (; clk_num >= 0; clk_num--) + while (--clk_num >= 0) hw_data->hws[clk_num] = ERR_PTR(-EINVAL); for (p = data; p->name; p++) { From 5c6201e60a57c6b240d446c8a2d83063283b2743 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 19 Oct 2016 17:22:07 +0900 Subject: [PATCH 323/884] clk: uniphier: rename MIO clock to SD clock for Pro5, PXs2, LD20 SoCs I made a mistake as for naming for this block. The MIO block is not implemented for these 3 SoCs in the first place. The current naming will be a trouble if an SoC with both MIO and SD-ctrl blocks appear in the future. This driver has just been merged in the previous merge window. Rename it before the release. Signed-off-by: Masahiro Yamada Signed-off-by: Stephen Boyd --- .../devicetree/bindings/clock/uniphier-clock.txt | 16 ++++++++-------- drivers/clk/uniphier/clk-uniphier-core.c | 14 +++++++------- drivers/clk/uniphier/clk-uniphier-mio.c | 2 +- drivers/clk/uniphier/clk-uniphier.h | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Documentation/devicetree/bindings/clock/uniphier-clock.txt b/Documentation/devicetree/bindings/clock/uniphier-clock.txt index c7179d3b5c33..812163060fa3 100644 --- a/Documentation/devicetree/bindings/clock/uniphier-clock.txt +++ b/Documentation/devicetree/bindings/clock/uniphier-clock.txt @@ -24,7 +24,7 @@ Example: reg = <0x61840000 0x4000>; clock { - compatible = "socionext,uniphier-ld20-clock"; + compatible = "socionext,uniphier-ld11-clock"; #clock-cells = <1>; }; @@ -43,8 +43,8 @@ Provided clocks: 21: USB3 ch1 PHY1 -Media I/O (MIO) clock ---------------------- +Media I/O (MIO) clock, SD clock +------------------------------- Required properties: - compatible: should be one of the following: @@ -52,10 +52,10 @@ Required properties: "socionext,uniphier-ld4-mio-clock" - for LD4 SoC. "socionext,uniphier-pro4-mio-clock" - for Pro4 SoC. "socionext,uniphier-sld8-mio-clock" - for sLD8 SoC. - "socionext,uniphier-pro5-mio-clock" - for Pro5 SoC. - "socionext,uniphier-pxs2-mio-clock" - for PXs2/LD6b SoC. + "socionext,uniphier-pro5-sd-clock" - for Pro5 SoC. + "socionext,uniphier-pxs2-sd-clock" - for PXs2/LD6b SoC. "socionext,uniphier-ld11-mio-clock" - for LD11 SoC. - "socionext,uniphier-ld20-mio-clock" - for LD20 SoC. + "socionext,uniphier-ld20-sd-clock" - for LD20 SoC. - #clock-cells: should be 1. Example: @@ -66,7 +66,7 @@ Example: reg = <0x59810000 0x800>; clock { - compatible = "socionext,uniphier-ld20-mio-clock"; + compatible = "socionext,uniphier-ld11-mio-clock"; #clock-cells = <1>; }; @@ -112,7 +112,7 @@ Example: reg = <0x59820000 0x200>; clock { - compatible = "socionext,uniphier-ld20-peri-clock"; + compatible = "socionext,uniphier-ld11-peri-clock"; #clock-cells = <1>; }; diff --git a/drivers/clk/uniphier/clk-uniphier-core.c b/drivers/clk/uniphier/clk-uniphier-core.c index 84bc465d31aa..26c53f7963a4 100644 --- a/drivers/clk/uniphier/clk-uniphier-core.c +++ b/drivers/clk/uniphier/clk-uniphier-core.c @@ -142,7 +142,7 @@ static const struct of_device_id uniphier_clk_match[] = { .compatible = "socionext,uniphier-ld20-clock", .data = uniphier_ld20_sys_clk_data, }, - /* Media I/O clock */ + /* Media I/O clock, SD clock */ { .compatible = "socionext,uniphier-sld3-mio-clock", .data = uniphier_sld3_mio_clk_data, @@ -160,20 +160,20 @@ static const struct of_device_id uniphier_clk_match[] = { .data = uniphier_sld3_mio_clk_data, }, { - .compatible = "socionext,uniphier-pro5-mio-clock", - .data = uniphier_pro5_mio_clk_data, + .compatible = "socionext,uniphier-pro5-sd-clock", + .data = uniphier_pro5_sd_clk_data, }, { - .compatible = "socionext,uniphier-pxs2-mio-clock", - .data = uniphier_pro5_mio_clk_data, + .compatible = "socionext,uniphier-pxs2-sd-clock", + .data = uniphier_pro5_sd_clk_data, }, { .compatible = "socionext,uniphier-ld11-mio-clock", .data = uniphier_sld3_mio_clk_data, }, { - .compatible = "socionext,uniphier-ld20-mio-clock", - .data = uniphier_pro5_mio_clk_data, + .compatible = "socionext,uniphier-ld20-sd-clock", + .data = uniphier_pro5_sd_clk_data, }, /* Peripheral clock */ { diff --git a/drivers/clk/uniphier/clk-uniphier-mio.c b/drivers/clk/uniphier/clk-uniphier-mio.c index 6aa7ec768d0b..218d20f099ce 100644 --- a/drivers/clk/uniphier/clk-uniphier-mio.c +++ b/drivers/clk/uniphier/clk-uniphier-mio.c @@ -93,7 +93,7 @@ const struct uniphier_clk_data uniphier_sld3_mio_clk_data[] = { { /* sentinel */ } }; -const struct uniphier_clk_data uniphier_pro5_mio_clk_data[] = { +const struct uniphier_clk_data uniphier_pro5_sd_clk_data[] = { UNIPHIER_MIO_CLK_SD_FIXED, UNIPHIER_MIO_CLK_SD(0, 0), UNIPHIER_MIO_CLK_SD(1, 1), diff --git a/drivers/clk/uniphier/clk-uniphier.h b/drivers/clk/uniphier/clk-uniphier.h index 3ae184062388..0244dba1f4cf 100644 --- a/drivers/clk/uniphier/clk-uniphier.h +++ b/drivers/clk/uniphier/clk-uniphier.h @@ -115,7 +115,7 @@ extern const struct uniphier_clk_data uniphier_pxs2_sys_clk_data[]; extern const struct uniphier_clk_data uniphier_ld11_sys_clk_data[]; extern const struct uniphier_clk_data uniphier_ld20_sys_clk_data[]; extern const struct uniphier_clk_data uniphier_sld3_mio_clk_data[]; -extern const struct uniphier_clk_data uniphier_pro5_mio_clk_data[]; +extern const struct uniphier_clk_data uniphier_pro5_sd_clk_data[]; extern const struct uniphier_clk_data uniphier_ld4_peri_clk_data[]; extern const struct uniphier_clk_data uniphier_pro4_peri_clk_data[]; From 1dec78585328db00e33fb18dc1a6deed0e2095a5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 19 Oct 2016 14:38:50 -0700 Subject: [PATCH 324/884] ARC: fix build warning in elf.h The cast valid since TASK_SIZE * 2 will never actually cause overflow. | CC fs/binfmt_elf.o | In file included from ../include/linux/elf.h:4:0, | from ../include/linux/module.h:15, | from ../fs/binfmt_elf.c:12: | ../fs/binfmt_elf.c: In function load_elf_binar: | ../arch/arc/include/asm/elf.h:57:29: warning: integer overflow in expression [-Woverflow] | #define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) | ^ | ../fs/binfmt_elf.c:921:16: note: in expansion of macro ELF_ET_DYN_BASE | load_bias = ELF_ET_DYN_BASE - vaddr; Signed-off-by: Vineet Gupta --- arch/arc/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index 7096f97a1434..aa2d6da9d187 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -54,7 +54,7 @@ extern int elf_check_arch(const struct elf32_hdr *); * the loader. We need to make sure that it is out of the way of the program * that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (2UL * TASK_SIZE / 3) /* * When the program starts, a1 contains a pointer to a function to be From 390975ac3978162ec9c6beca66f0fe83b0be33bb Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 18 Oct 2016 21:21:43 +0200 Subject: [PATCH 325/884] ubifs: Rename ubifs_rename2 Since ->rename2 is gone, rename ubifs_rename2() to ubifs_rename(). Suggested-by: Linus Torvalds Signed-off-by: Richard Weinberger --- fs/ubifs/dir.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c8f60df2733e..668ec3b90ea1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1060,9 +1060,9 @@ static void unlock_4_inodes(struct inode *inode1, struct inode *inode2, mutex_unlock(&ubifs_inode(inode1)->ui_mutex); } -static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - unsigned int flags) +static int do_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) { struct ubifs_info *c = old_dir->i_sb->s_fs_info; struct inode *old_inode = d_inode(old_dentry); @@ -1323,7 +1323,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, return err; } -static int ubifs_rename2(struct inode *old_dir, struct dentry *old_dentry, +static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { @@ -1336,7 +1336,7 @@ static int ubifs_rename2(struct inode *old_dir, struct dentry *old_dentry, if (flags & RENAME_EXCHANGE) return ubifs_xrename(old_dir, old_dentry, new_dir, new_dentry); - return ubifs_rename(old_dir, old_dentry, new_dir, new_dentry, flags); + return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags); } int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, @@ -1387,7 +1387,7 @@ const struct inode_operations ubifs_dir_inode_operations = { .mkdir = ubifs_mkdir, .rmdir = ubifs_rmdir, .mknod = ubifs_mknod, - .rename = ubifs_rename2, + .rename = ubifs_rename, .setattr = ubifs_setattr, .getattr = ubifs_getattr, .listxattr = ubifs_listxattr, From 843741c5778398ea67055067f4cc65ae6c80ca0e Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Tue, 20 Sep 2016 10:08:30 +0200 Subject: [PATCH 326/884] ubifs: Fix xattr_names length in exit paths When the operation fails we also have to undo the changes we made to ->xattr_names. Otherwise listxattr() will report wrong lengths. Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger --- fs/ubifs/xattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 6c2f4d41ed73..d9f9615bfd71 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -172,6 +172,7 @@ out_cancel: host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); host_ui->xattr_size -= CALC_XATTR_BYTES(size); + host_ui->xattr_names -= nm->len; mutex_unlock(&host_ui->ui_mutex); out_free: make_bad_inode(inode); @@ -478,6 +479,7 @@ out_cancel: host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(nm->len); host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); + host_ui->xattr_names += nm->len; mutex_unlock(&host_ui->ui_mutex); ubifs_release_budget(c, &req); make_bad_inode(inode); From a15cf34fed779d3d49d76e3a7e63bd0132b6a458 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 15 Oct 2016 17:00:11 +0100 Subject: [PATCH 327/884] ubi: fix swapped arguments to call to ubi_alloc_aeb Static analysis by CoverityScan detected the ec and pnum arguments are in the wrong order on a call to ubi_alloc_aeb. Swap the order to fix this. Fixes: 91f4285fe389a27 ("UBI: provide helpers to allocate and free aeb elements") Signed-off-by: Colin Ian King Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index d6384d965788..2ff62157d3bb 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -287,7 +287,7 @@ static int update_vol(struct ubi_device *ubi, struct ubi_attach_info *ai, /* new_aeb is newer */ if (cmp_res & 1) { - victim = ubi_alloc_aeb(ai, aeb->ec, aeb->pnum); + victim = ubi_alloc_aeb(ai, aeb->pnum, aeb->ec); if (!victim) return -ENOMEM; From 884a3b647809cb31cf6bd948f814e93753b38502 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 13 Oct 2016 16:05:36 +0200 Subject: [PATCH 328/884] UBI: Fix crash in try_recover_peb() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/mtd/ubi/eba.c: In function ‘try_recover_peb’: drivers/mtd/ubi/eba.c:744: warning: ‘vid_hdr’ is used uninitialized in this function The pointer vid_hdr is indeed not initialized, leading to a crash when it is dereferenced. Fix this by obtaining the pointer from the VID buffer, like is done everywhere else. Fixes: 3291b52f9ff0acc8 ("UBI: introduce the VID buffer concept") Signed-off-by: Geert Uytterhoeven Reviewed-by: Boris Brezillon Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/eba.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c index 95c4048a371e..388e46be6ad9 100644 --- a/drivers/mtd/ubi/eba.c +++ b/drivers/mtd/ubi/eba.c @@ -741,6 +741,7 @@ static int try_recover_peb(struct ubi_volume *vol, int pnum, int lnum, goto out_put; } + vid_hdr = ubi_get_vid_hdr(vidb); ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC); mutex_lock(&ubi->buf_mutex); From c83ed4c9dbb358b9e7707486e167e940d48bfeed Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Wed, 19 Oct 2016 12:43:07 +0200 Subject: [PATCH 329/884] ubifs: Abort readdir upon error If UBIFS is facing an error while walking a directory, it reports this error and ubifs_readdir() returns the error code. But the VFS readdir logic does not make the getdents system call fail in all cases. When the readdir cursor indicates that more entries are present, the system call will just return and the libc wrapper will try again since it also knows that more entries are present. This causes the libc wrapper to busy loop for ever when a directory is corrupted on UBIFS. A common approach do deal with corrupted directory entries is skipping them by setting the cursor to the next entry. On UBIFS this approach is not possible since we cannot compute the next directory entry cursor position without reading the current entry. So all we can do is setting the cursor to the "no more entries" position and make getdents exit. Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger --- fs/ubifs/dir.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 668ec3b90ea1..bd4a5e8ce441 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -439,7 +439,7 @@ static unsigned int vfs_dent_type(uint8_t type) */ static int ubifs_readdir(struct file *file, struct dir_context *ctx) { - int err; + int err = 0; struct qstr nm; union ubifs_key key; struct ubifs_dent_node *dent; @@ -541,14 +541,12 @@ out: kfree(file->private_data); file->private_data = NULL; - if (err != -ENOENT) { + if (err != -ENOENT) ubifs_err(c, "cannot find next direntry, error %d", err); - return err; - } /* 2 is a special value indicating that there are no more direntries */ ctx->pos = 2; - return 0; + return err; } /* Free saved readdir() state when the directory is closed */ From 8dedefbc38172f3fcb43a26b6d0e394dcb1ee562 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 4 Oct 2016 16:40:14 -0700 Subject: [PATCH 330/884] drm/fsl-dcu: enable TCON bypass mode by default Do not use encoder disable/enable callbacks to control bypass mode as this seems to mess with the signals not liked by displays. This also makes more sense since the encoder is already defined to be parallel RGB/LVDS at creation time. Signed-off-by: Stefan Agner Tested-By: Meng Yi --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 2 ++ drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c | 39 +++-------------------- 2 files changed, 7 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index 0884c45aefe8..3897f5671776 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -273,6 +273,8 @@ static int fsl_dcu_drm_pm_resume(struct device *dev) goto disable_dcu_clk; } + if (fsl_dev->tcon) + fsl_tcon_bypass_enable(fsl_dev->tcon); fsl_dcu_drm_init_planes(fsl_dev->drm); drm_atomic_helper_resume(fsl_dev->drm, fsl_dev->state); diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c index 26edcc899712..e1dd75b18118 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c @@ -20,38 +20,6 @@ #include "fsl_dcu_drm_drv.h" #include "fsl_tcon.h" -static int -fsl_dcu_drm_encoder_atomic_check(struct drm_encoder *encoder, - struct drm_crtc_state *crtc_state, - struct drm_connector_state *conn_state) -{ - return 0; -} - -static void fsl_dcu_drm_encoder_disable(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; - - if (fsl_dev->tcon) - fsl_tcon_bypass_disable(fsl_dev->tcon); -} - -static void fsl_dcu_drm_encoder_enable(struct drm_encoder *encoder) -{ - struct drm_device *dev = encoder->dev; - struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; - - if (fsl_dev->tcon) - fsl_tcon_bypass_enable(fsl_dev->tcon); -} - -static const struct drm_encoder_helper_funcs encoder_helper_funcs = { - .atomic_check = fsl_dcu_drm_encoder_atomic_check, - .disable = fsl_dcu_drm_encoder_disable, - .enable = fsl_dcu_drm_encoder_enable, -}; - static void fsl_dcu_drm_encoder_destroy(struct drm_encoder *encoder) { drm_encoder_cleanup(encoder); @@ -68,13 +36,16 @@ int fsl_dcu_drm_encoder_create(struct fsl_dcu_drm_device *fsl_dev, int ret; encoder->possible_crtcs = 1; + + /* Use bypass mode for parallel RGB/LVDS encoder */ + if (fsl_dev->tcon) + fsl_tcon_bypass_enable(fsl_dev->tcon); + ret = drm_encoder_init(fsl_dev->drm, encoder, &encoder_funcs, DRM_MODE_ENCODER_LVDS, NULL); if (ret < 0) return ret; - drm_encoder_helper_add(encoder, &encoder_helper_funcs); - return 0; } From b6ead864ea893ef55828be0ec0d6c10f7c1c4e30 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 4 Oct 2016 16:56:38 -0700 Subject: [PATCH 331/884] drm/fsl-dcu: do not transfer registers on plane init There is no need to explicitly initiate a register transfer and turn off the DCU after initializing the plane registers. In fact, this is harmful and leads to unnecessary flickers if the DCU has been left on by the bootloader. Signed-off-by: Stefan Agner Tested-By: Meng Yi --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c index a7e5486bd1e9..9e6f7d8112b3 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c @@ -211,11 +211,6 @@ void fsl_dcu_drm_init_planes(struct drm_device *dev) for (j = 1; j <= fsl_dev->soc->layer_regs; j++) regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0); } - regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE, - DCU_MODE_DCU_MODE_MASK, - DCU_MODE_DCU_MODE(DCU_MODE_OFF)); - regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, - DCU_UPDATE_MODE_READREG); } struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev) From 9789037695018e25902469ea0e540c07580b940f Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Wed, 5 Oct 2016 14:37:57 -0700 Subject: [PATCH 332/884] drm/fsl-dcu: do not transfer registers in mode_set_nofb Do not schedule a transfer of mode settings early. Modes should get applied on on CRTC enable where we also enable the pixel clock. Signed-off-by: Stefan Agner Tested-By: Meng Yi --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c index 3371635cd4d7..5ad1d68c8194 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c @@ -116,8 +116,6 @@ static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc) DCU_THRESHOLD_LS_BF_VS(BF_VS_VAL) | DCU_THRESHOLD_OUT_BUF_HIGH(BUF_MAX_VAL) | DCU_THRESHOLD_OUT_BUF_LOW(BUF_MIN_VAL)); - regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, - DCU_UPDATE_MODE_READREG); return; } From 0a70c998d0c571c66d0ba8ffd9f803392a53193d Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Tue, 4 Oct 2016 17:40:29 -0700 Subject: [PATCH 333/884] drm/fsl-dcu: enable pixel clock when enabling CRTC The pixel clock should not be on if the CRTC is not in use, hence move clock enable/disable calls into CRTC callbacks. Signed-off-by: Stefan Agner Tested-By: Meng Yi --- drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c | 2 ++ drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c | 21 +-------------------- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c index 5ad1d68c8194..b2d5e188b1b8 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c @@ -51,6 +51,7 @@ static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc) DCU_MODE_DCU_MODE(DCU_MODE_OFF)); regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE, DCU_UPDATE_MODE_READREG); + clk_disable_unprepare(fsl_dev->pix_clk); } static void fsl_dcu_drm_crtc_enable(struct drm_crtc *crtc) @@ -58,6 +59,7 @@ static void fsl_dcu_drm_crtc_enable(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct fsl_dcu_drm_device *fsl_dev = dev->dev_private; + clk_prepare_enable(fsl_dev->pix_clk); regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE, DCU_MODE_DCU_MODE_MASK, DCU_MODE_DCU_MODE(DCU_MODE_NORMAL)); diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index 3897f5671776..e04efbed1a54 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -267,12 +267,6 @@ static int fsl_dcu_drm_pm_resume(struct device *dev) return ret; } - ret = clk_prepare_enable(fsl_dev->pix_clk); - if (ret < 0) { - dev_err(dev, "failed to enable pix clk\n"); - goto disable_dcu_clk; - } - if (fsl_dev->tcon) fsl_tcon_bypass_enable(fsl_dev->tcon); fsl_dcu_drm_init_planes(fsl_dev->drm); @@ -286,10 +280,6 @@ static int fsl_dcu_drm_pm_resume(struct device *dev) enable_irq(fsl_dev->irq); return 0; - -disable_dcu_clk: - clk_disable_unprepare(fsl_dev->clk); - return ret; } #endif @@ -403,18 +393,12 @@ static int fsl_dcu_drm_probe(struct platform_device *pdev) goto disable_clk; } - ret = clk_prepare_enable(fsl_dev->pix_clk); - if (ret < 0) { - dev_err(dev, "failed to enable pix clk\n"); - goto unregister_pix_clk; - } - fsl_dev->tcon = fsl_tcon_init(dev); drm = drm_dev_alloc(driver, dev); if (IS_ERR(drm)) { ret = PTR_ERR(drm); - goto disable_pix_clk; + goto unregister_pix_clk; } fsl_dev->dev = dev; @@ -435,8 +419,6 @@ static int fsl_dcu_drm_probe(struct platform_device *pdev) unref: drm_dev_unref(drm); -disable_pix_clk: - clk_disable_unprepare(fsl_dev->pix_clk); unregister_pix_clk: clk_unregister(fsl_dev->pix_clk); disable_clk: @@ -449,7 +431,6 @@ static int fsl_dcu_drm_remove(struct platform_device *pdev) struct fsl_dcu_drm_device *fsl_dev = platform_get_drvdata(pdev); clk_disable_unprepare(fsl_dev->clk); - clk_disable_unprepare(fsl_dev->pix_clk); clk_unregister(fsl_dev->pix_clk); drm_put_dev(fsl_dev->drm); From 02eb924fabc5b699c0d9d354491e6f0767e3c139 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 6 Oct 2016 10:07:07 -0500 Subject: [PATCH 334/884] target/user: Use sense_reason_t in tcmu_queue_cmd_ring Instead of using -ERROR-style returns, use sense_reason_t. This lets us remove tcmu_pass_op(), and return more correct sense values. Signed-off-by: Andy Grover Signed-off-by: Bryant G. Ly Reviewed-by: Christoph Hellwig Reviewed-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 62bf4fe5704a..0cd1c61ba2ed 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -389,7 +389,8 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t d return true; } -static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) +static sense_reason_t +tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) { struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; struct se_cmd *se_cmd = tcmu_cmd->se_cmd; @@ -405,7 +406,7 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS); if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) - return -EINVAL; + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; /* * Must be a certain minimum size for response sense info, but @@ -450,7 +451,7 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) finish_wait(&udev->wait_cmdr, &__wait); if (!ret) { pr_warn("tcmu: command timed out\n"); - return -ETIMEDOUT; + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; } spin_lock_irq(&udev->cmdr_lock); @@ -526,10 +527,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) mod_timer(&udev->timeout, round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT))); - return 0; + return TCM_NO_SENSE; } -static int tcmu_queue_cmd(struct se_cmd *se_cmd) +static sense_reason_t +tcmu_queue_cmd(struct se_cmd *se_cmd) { struct se_device *se_dev = se_cmd->se_dev; struct tcmu_dev *udev = TCMU_DEV(se_dev); @@ -538,10 +540,10 @@ static int tcmu_queue_cmd(struct se_cmd *se_cmd) tcmu_cmd = tcmu_alloc_cmd(se_cmd); if (!tcmu_cmd) - return -ENOMEM; + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; ret = tcmu_queue_cmd_ring(tcmu_cmd); - if (ret < 0) { + if (ret != TCM_NO_SENSE) { pr_err("TCMU: Could not queue command\n"); spin_lock_irq(&udev->commands_lock); idr_remove(&udev->commands, tcmu_cmd->cmd_id); @@ -1128,21 +1130,10 @@ static sector_t tcmu_get_blocks(struct se_device *dev) dev->dev_attrib.block_size); } -static sense_reason_t -tcmu_pass_op(struct se_cmd *se_cmd) -{ - int ret = tcmu_queue_cmd(se_cmd); - - if (ret != 0) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - else - return TCM_NO_SENSE; -} - static sense_reason_t tcmu_parse_cdb(struct se_cmd *cmd) { - return passthrough_parse_cdb(cmd, tcmu_pass_op); + return passthrough_parse_cdb(cmd, tcmu_queue_cmd); } static const struct target_backend_ops tcmu_ops = { From 554617b2bbe25c3fb3c80c28fe7a465884bb40b1 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 25 Aug 2016 08:55:53 -0700 Subject: [PATCH 335/884] target/user: Return an error if cmd data size is too large Userspace should be implementing VPD B0 (Block Limits) to inform the initiator of max data size, but just in case we do get a too-large request, do what the spec says and return INVALID_CDB_FIELD. Make sure to unlock udev->cmdr_lock before returning. Signed-off-by: Andy Grover Reviewed-by: Christoph Hellwig Reviewed-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 0cd1c61ba2ed..5de1eac17fed 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -433,11 +433,14 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); data_length += se_cmd->t_bidi_data_sg->length; } - if ((command_size > (udev->cmdr_size / 2)) - || data_length > udev->data_size) - pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu " + if ((command_size > (udev->cmdr_size / 2)) || + data_length > udev->data_size) { + pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu " "cmd/data ring buffers\n", command_size, data_length, udev->cmdr_size, udev->data_size); + spin_unlock_irq(&udev->cmdr_lock); + return TCM_INVALID_CDB_FIELD; + } while (!is_ring_space_avail(udev, command_size, data_length)) { int ret; From 3d9b95558f5874ac5d63a057813dc66b480de7e1 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 25 Aug 2016 08:55:54 -0700 Subject: [PATCH 336/884] target/user: Fix comments to not refer to data ring We no longer use a ringbuffer for the data area, so this might cause confusion. Just call it the data area. Signed-off-by: Andy Grover Reviewed-by: Christoph Hellwig Reviewed-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 5de1eac17fed..47562509b489 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -96,7 +96,7 @@ struct tcmu_dev { size_t dev_size; u32 cmdr_size; u32 cmdr_last_cleaned; - /* Offset of data ring from start of mb */ + /* Offset of data area from start of mb */ /* Must add data_off and mb_addr to get the address */ size_t data_off; size_t data_size; @@ -349,7 +349,7 @@ static inline size_t spc_bitmap_free(unsigned long *bitmap) /* * We can't queue a command until we have space available on the cmd ring *and* - * space available on the data ring. + * space available on the data area. * * Called with ring lock held. */ @@ -436,7 +436,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) if ((command_size > (udev->cmdr_size / 2)) || data_length > udev->data_size) { pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu " - "cmd/data ring buffers\n", command_size, data_length, + "cmd ring/data area\n", command_size, data_length, udev->cmdr_size, udev->data_size); spin_unlock_irq(&udev->cmdr_lock); return TCM_INVALID_CDB_FIELD; @@ -491,9 +491,7 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS); - /* - * Fix up iovecs, and handle if allocation in data ring wrapped. - */ + /* Handle allocating space from the data area */ iov = &entry->req.iov[0]; iov_cnt = 0; copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE @@ -566,7 +564,7 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry * if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { /* * cmd has been completed already from timeout, just reclaim - * data ring space and free cmd + * data area space and free cmd */ free_data_area(udev, cmd); From 3fc6a642e4355abef986b2dd11672216fb18212e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 2 Sep 2016 15:30:34 +0100 Subject: [PATCH 337/884] iscsi-target: fix spelling mistake "Unsolicitied" -> "Unsolicited" Trivial fix to spelling mistakes in pr_debug message and comments Signed-off-by: Colin Ian King Reviewed-by: Bart Van Assche Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 2 +- drivers/target/iscsi/iscsi_target_login.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 39b928c2849d..1aaf1f3148b2 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2982,7 +2982,7 @@ iscsit_build_nopin_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn, pr_debug("Built NOPIN %s Response ITT: 0x%08x, TTT: 0x%08x," " StatSN: 0x%08x, Length %u\n", (nopout_response) ? - "Solicitied" : "Unsolicitied", cmd->init_task_tag, + "Solicited" : "Unsolicited", cmd->init_task_tag, cmd->targ_xfer_tag, cmd->stat_sn, cmd->buf_ptr_size); } EXPORT_SYMBOL(iscsit_build_nopin_rsp); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index adf419fa4291..15f79a2ca34a 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -434,7 +434,7 @@ static int iscsi_login_zero_tsih_s2( /* * Make MaxRecvDataSegmentLength PAGE_SIZE aligned for - * Immediate Data + Unsolicitied Data-OUT if necessary.. + * Immediate Data + Unsolicited Data-OUT if necessary.. */ param = iscsi_find_param_from_key("MaxRecvDataSegmentLength", conn->param_list); @@ -646,7 +646,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn) { struct iscsi_session *sess = conn->sess; /* - * FIXME: Unsolicitied NopIN support for ISER + * FIXME: Unsolicited NopIN support for ISER */ if (conn->conn_transport->transport_type == ISCSI_INFINIBAND) return; From 1a40f0a36fb669226f0fd29aaece5ff7b6399e80 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Thu, 15 Sep 2016 21:20:11 +0530 Subject: [PATCH 338/884] iscsi-target: fix iscsi cmd leak If iscsi-target receives NOP OUT with ITT and TTT set to 0xffffffff it allocates iscsi_cmd but does not free the cmd, so free iscsi_cmd in this case. Signed-off-by: Varun Prakash Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 1aaf1f3148b2..b7d747e92c7a 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1804,6 +1804,10 @@ int iscsit_process_nop_out(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * Otherwise, initiator is not expecting a NOPIN is response. * Just ignore for now. */ + + if (cmd) + iscsit_free_cmd(cmd, false); + return 0; } EXPORT_SYMBOL(iscsit_process_nop_out); From 527268df31e57cf2b6d417198717c6d6afdb1e3e Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 4 Oct 2016 16:37:05 -0700 Subject: [PATCH 339/884] target: Re-add missing SCF_ACK_KREF assignment in v4.1.y This patch fixes a regression in >= v4.1.y code where the original SCF_ACK_KREF assignment in target_get_sess_cmd() was dropped upstream in commit 054922bb, but the series for addressing TMR ABORT_TASK + LUN_RESET with fabric session reinstatement in commit febe562c20 still depends on this code in transport_cmd_finish_abort(). The regression manifests itself as a se_cmd->cmd_kref +1 leak, where ABORT_TASK + LUN_RESET can hang indefinately for a specific I_T session for drivers using SCF_ACK_KREF, resulting in hung kthreads. This patch has been verified with v4.1.y code. Reported-by: Vaibhav Tandon Tested-by: Vaibhav Tandon Cc: Vaibhav Tandon Cc: stable@vger.kernel.org # 4.1+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 6094a6beddde..00ec46413146 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2547,8 +2547,10 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) * fabric acknowledgement that requires two target_put_sess_cmd() * invocations before se_cmd descriptor release. */ - if (ack_kref) + if (ack_kref) { kref_get(&se_cmd->cmd_kref); + se_cmd->se_cmd_flags |= SCF_ACK_KREF; + } spin_lock_irqsave(&se_sess->sess_cmd_lock, flags); if (se_sess->sess_tearing_down) { From 449a137846c84829a328757cd21fd9ca65c08519 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 8 Oct 2016 17:26:44 -0700 Subject: [PATCH 340/884] target: Make EXTENDED_COPY 0xe4 failure return COPY TARGET DEVICE NOT REACHABLE This patch addresses a bug where EXTENDED_COPY across multiple LUNs results in a CHECK_CONDITION when the source + destination are not located on the same physical node. ESX Host environments expect sense COPY_ABORTED w/ COPY TARGET DEVICE NOT REACHABLE to be returned when this occurs, in order to signal fallback to local copy method. As described in section 6.3.3 of spc4r22: "If it is not possible to complete processing of a segment because the copy manager is unable to establish communications with a copy target device, because the copy target device does not respond to INQUIRY, or because the data returned in response to INQUIRY indicates an unsupported logical unit, then the EXTENDED COPY command shall be terminated with CHECK CONDITION status, with the sense key set to COPY ABORTED, and the additional sense code set to COPY TARGET DEVICE NOT REACHABLE." Tested on v4.1.y with ESX v5.5u2+ with BlockCopy across multiple nodes. Reported-by: Nixon Vincent Tested-by: Nixon Vincent Cc: Nixon Vincent Tested-by: Dinesh Israni Signed-off-by: Dinesh Israni Cc: Dinesh Israni Cc: stable@vger.kernel.org # 3.14+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 7 +++++++ drivers/target/target_core_xcopy.c | 22 ++++++++++++++++------ include/target/target_core_base.h | 1 + 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 00ec46413146..000bc6d077ea 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1706,6 +1706,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED: case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED: case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED: + case TCM_COPY_TARGET_DEVICE_NOT_REACHABLE: break; case TCM_OUT_OF_RESOURCES: sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; @@ -2873,6 +2874,12 @@ static const struct sense_info sense_info_table[] = { .ascq = 0x03, /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */ .add_sector_info = true, }, + [TCM_COPY_TARGET_DEVICE_NOT_REACHABLE] = { + .key = COPY_ABORTED, + .asc = 0x0d, + .ascq = 0x02, /* COPY TARGET DEVICE NOT REACHABLE */ + + }, [TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE] = { /* * Returning ILLEGAL REQUEST would cause immediate IO errors on diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index 75cd85426ae3..f2a1443ecb33 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -104,7 +104,7 @@ static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op } mutex_unlock(&g_device_mutex); - pr_err("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); + pr_debug_ratelimited("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n"); return -EINVAL; } @@ -185,7 +185,7 @@ static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, struct xcopy_op *xop, unsigned char *p, - unsigned short tdll) + unsigned short tdll, sense_reason_t *sense_ret) { struct se_device *local_dev = se_cmd->se_dev; unsigned char *desc = p; @@ -193,6 +193,8 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, unsigned short start = 0; bool src = true; + *sense_ret = TCM_INVALID_PARAMETER_LIST; + if (offset != 0) { pr_err("XCOPY target descriptor list length is not" " multiple of %d\n", XCOPY_TARGET_DESC_LEN); @@ -243,9 +245,16 @@ static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd, rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true); else rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false); - - if (rc < 0) + /* + * If a matching IEEE NAA 0x83 descriptor for the requested device + * is not located on this node, return COPY_ABORTED with ASQ/ASQC + * 0x0d/0x02 - COPY_TARGET_DEVICE_NOT_REACHABLE to request the + * initiator to fall back to normal copy method. + */ + if (rc < 0) { + *sense_ret = TCM_COPY_TARGET_DEVICE_NOT_REACHABLE; goto out; + } pr_debug("XCOPY TGT desc: Source dev: %p NAA IEEE WWN: 0x%16phN\n", xop->src_dev, &xop->src_tid_wwn[0]); @@ -816,7 +825,8 @@ out: xcopy_pt_undepend_remotedev(xop); kfree(xop); - pr_warn("target_xcopy_do_work: Setting X-COPY CHECK_CONDITION -> sending response\n"); + pr_warn_ratelimited("target_xcopy_do_work: rc: %d, Setting X-COPY CHECK_CONDITION" + " -> sending response\n", rc); ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION; target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION); } @@ -875,7 +885,7 @@ sense_reason_t target_do_xcopy(struct se_cmd *se_cmd) " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage, tdll, sdll, inline_dl); - rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll); + rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll, &ret); if (rc <= 0) goto out; diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index fb8e3b6febdf..c2119008990a 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -177,6 +177,7 @@ enum tcm_sense_reason_table { TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED = R(0x15), TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = R(0x16), TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = R(0x17), + TCM_COPY_TARGET_DEVICE_NOT_REACHABLE = R(0x18), #undef R }; From 926317de33998c112c5510301868ea9aa34097e2 Mon Sep 17 00:00:00 2001 From: Dinesh Israni Date: Mon, 10 Oct 2016 20:22:03 -0700 Subject: [PATCH 341/884] target: Don't override EXTENDED_COPY xcopy_pt_cmd SCSI status code This patch addresses a bug where a local EXTENDED_COPY WRITE or READ backend I/O request would always return SAM_STAT_CHECK_CONDITION, even if underlying xcopy_pt_cmd->se_cmd generated a different SCSI status code. ESX host environments expect to hit SAM_STAT_RESERVATION_CONFLICT for certain scenarios, and SAM_STAT_CHECK_CONDITION results in non-retriable status for these cases. Tested on v4.1.y with ESX v5.5u2+ with local IBLOCK backend copy. Reported-by: Nixon Vincent Tested-by: Nixon Vincent Cc: Nixon Vincent Tested-by: Dinesh Israni Signed-off-by: Dinesh Israni Cc: Dinesh Israni Cc: stable@vger.kernel.org # 3.14+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_xcopy.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c index f2a1443ecb33..094a1440eacb 100644 --- a/drivers/target/target_core_xcopy.c +++ b/drivers/target/target_core_xcopy.c @@ -662,6 +662,7 @@ static int target_xcopy_read_source( rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0], remote_port, true); if (rc < 0) { + ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; transport_generic_free_cmd(se_cmd, 0); return rc; } @@ -673,6 +674,7 @@ static int target_xcopy_read_source( rc = target_xcopy_issue_pt_cmd(xpt_cmd); if (rc < 0) { + ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; transport_generic_free_cmd(se_cmd, 0); return rc; } @@ -723,6 +725,7 @@ static int target_xcopy_write_destination( remote_port, false); if (rc < 0) { struct se_cmd *src_cmd = &xop->src_pt_cmd->se_cmd; + ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; /* * If the failure happened before the t_mem_list hand-off in * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so that @@ -738,6 +741,7 @@ static int target_xcopy_write_destination( rc = target_xcopy_issue_pt_cmd(xpt_cmd); if (rc < 0) { + ec_cmd->scsi_status = xpt_cmd->se_cmd.scsi_status; se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC; transport_generic_free_cmd(se_cmd, 0); return rc; @@ -824,10 +828,14 @@ static void target_xcopy_do_work(struct work_struct *work) out: xcopy_pt_undepend_remotedev(xop); kfree(xop); - - pr_warn_ratelimited("target_xcopy_do_work: rc: %d, Setting X-COPY CHECK_CONDITION" - " -> sending response\n", rc); - ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION; + /* + * Don't override an error scsi status if it has already been set + */ + if (ec_cmd->scsi_status == SAM_STAT_GOOD) { + pr_warn_ratelimited("target_xcopy_do_work: rc: %d, Setting X-COPY" + " CHECK_CONDITION -> sending response\n", rc); + ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION; + } target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION); } From 61f36166c245e563c7a2b624f4c78c5ce0f680d6 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sun, 16 Oct 2016 00:27:42 -0700 Subject: [PATCH 342/884] Revert "target: Fix residual overflow handling in target_complete_cmd_with_length" This reverts commit c1ccbfe0311e2380a6d2dcb0714b36904f5d586f. Reverting this patch, as it incorrectly assumes the additional length for INQUIRY in target_complete_cmd_with_length() is SCSI allocation length, which breaks existing user-space code when SCSI allocation length is smaller than additional length. root@scsi-mq:~# sg_inq --len=4 -vvvv /dev/sdb found bsg_major=253 open /dev/sdb with flags=0x800 inquiry cdb: 12 00 00 00 04 00 duration=0 ms inquiry: pass-through requested 4 bytes (data-in) but got -28 bytes inquiry: pass-through can't get negative bytes, say it got none inquiry: got too few bytes (0) INQUIRY resid (32) should never exceed requested len=4 inquiry: failed requesting 4 byte response: Malformed response to SCSI command [resid=32] AFAICT the original change was not to address a specific host issue, so go ahead and revert to original logic for now. Cc: Douglas Gilbert Cc: Martin K. Petersen Cc: Sumit Rai Cc: stable@vger.kernel.org # 4.8+ Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 000bc6d077ea..e825d580ccee 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -754,15 +754,7 @@ EXPORT_SYMBOL(target_complete_cmd); void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) { - if (scsi_status != SAM_STAT_GOOD) { - return; - } - - /* - * Calculate new residual count based upon length of SCSI data - * transferred. - */ - if (length < cmd->data_length) { + if (scsi_status == SAM_STAT_GOOD && length < cmd->data_length) { if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { cmd->residual_count += cmd->data_length - length; } else { @@ -771,12 +763,6 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len } cmd->data_length = length; - } else if (length > cmd->data_length) { - cmd->se_cmd_flags |= SCF_OVERFLOW_BIT; - cmd->residual_count = length - cmd->data_length; - } else { - cmd->se_cmd_flags &= ~(SCF_OVERFLOW_BIT | SCF_UNDERFLOW_BIT); - cmd->residual_count = 0; } target_complete_cmd(cmd, scsi_status); From 1d55a4bfd080ff4c6c96acfccfb7cdd2615ed6c2 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 20 Oct 2016 15:40:55 +1100 Subject: [PATCH 343/884] xfs: remove redundant assignment of ifp Remove redundant ifp = ifp statement, it does nothing. Found with static analysis by CoverityScan. Signed-off-by: Colin Ian King Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c27344cf38e1..0283b7eaf973 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5204,7 +5204,7 @@ xfs_bunmapi_cow( ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof, &eidx, &got, &new); - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp; + ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ASSERT((eidx >= 0) && (eidx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))); ASSERT(del->br_blockcount > 0); From 1be7f9be0efa4e90547f50b8188f4e70710a1173 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Oct 2016 15:41:48 +1100 Subject: [PATCH 344/884] xfs: Fix uninitialized variable in xfs_reflink_reserve_cow_range() with gcc 4.1.2: fs/xfs/xfs_reflink.c: In function xfs_reflink_reserve_cow_range: fs/xfs/xfs_reflink.c:327: warning: error may be used uninitialized in this function Indeed, if "count" is zero, the function will return an uninitialized error value. While "count" is unlikely to be zero, this function is called through the public iomap API. Hence fix this by preinitializing error to zero. Fixes: 2a06705cd5954030 ("xfs: create delalloc extents in CoW fork") Signed-off-by: Geert Uytterhoeven Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5965e9455d91..d48a7cc2fe00 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -324,7 +324,7 @@ xfs_reflink_reserve_cow_range( struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t offset_fsb, end_fsb; bool skipped = false; - int error; + int error = 0; trace_xfs_reflink_reserve_cow_range(ip, offset, count); From f1b8243c55ca6fd2a3898e2f586b8cfcfff684bb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 20 Oct 2016 15:42:30 +1100 Subject: [PATCH 345/884] xfs: add some 'static' annotations sparse reported that several variables and a function were not forward-declared anywhere and therefore should be 'static'. Found with sparse by running 'make C=2 CF=-D__CHECK_ENDIAN__ fs/xfs/' Signed-off-by: Eric Biggers Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_btree.c | 2 +- fs/xfs/xfs_sysfs.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 5c8e6f2ce44f..0e80993c8a59 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -4826,7 +4826,7 @@ xfs_btree_calc_size( return rval; } -int +static int xfs_btree_count_blocks_helper( struct xfs_btree_cur *cur, int level, diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index 5f8d55d29a11..276d3023d60f 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = { }; -struct kobj_type xfs_error_cfg_ktype = { +static struct kobj_type xfs_error_cfg_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, .default_attrs = xfs_error_attrs, }; -struct kobj_type xfs_error_ktype = { +static struct kobj_type xfs_error_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, }; From 0ee7a3f6b5b2f22bb69bfc6c60d0ea0777003098 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:44:14 +1100 Subject: [PATCH 346/884] xfs: don't take the IOLOCK exclusive for direct I/O page invalidation XFS historically took the iolock exclusive when invalidating pages before direct I/O operations to protect against writeback starvations. But this writeback starvation issues has been fixed a long time ago in the core writeback code, and all other file systems manage to do without the exclusive lock. Convert XFS over to avoid the exclusive lock in this case, and also move to range invalidations like done by the other file systems. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Carlos Maiolino Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 98 +++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 68 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a314fc7b56fa..0dc9971d3c84 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -249,6 +249,7 @@ xfs_file_dio_aio_read( struct xfs_inode *ip = XFS_I(inode); loff_t isize = i_size_read(inode); size_t count = iov_iter_count(to); + loff_t end = iocb->ki_pos + count - 1; struct iov_iter data; struct xfs_buftarg *target; ssize_t ret = 0; @@ -272,49 +273,21 @@ xfs_file_dio_aio_read( file_accessed(iocb->ki_filp); - /* - * Locking is a bit tricky here. If we take an exclusive lock for direct - * IO, we effectively serialise all new concurrent read IO to this file - * and block it behind IO that is currently in progress because IO in - * progress holds the IO lock shared. We only need to hold the lock - * exclusive to blow away the page cache, so only take lock exclusively - * if the page cache needs invalidation. This allows the normal direct - * IO case of no page cache pages to proceeed concurrently without - * serialisation. - */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if (mapping->nrpages) { - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); - xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); + if (ret) + goto out_unlock; /* - * The generic dio code only flushes the range of the particular - * I/O. Because we take an exclusive lock here, this whole - * sequence is considerably more expensive for us. This has a - * noticeable performance impact for any file with cached pages, - * even when outside of the range of the particular I/O. - * - * Hence, amortize the cost of the lock against a full file - * flush and reduce the chances of repeated iolock cycles going - * forward. + * Invalidate whole pages. This can return an error if we fail + * to invalidate a page, but this should never happen on XFS. + * Warn if it does fail. */ - if (mapping->nrpages) { - ret = filemap_write_and_wait(mapping); - if (ret) { - xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; - } - - /* - * Invalidate whole pages. This can return an error if - * we fail to invalidate a page, but this should never - * happen on XFS. Warn if it does fail. - */ - ret = invalidate_inode_pages2(mapping); - WARN_ON_ONCE(ret); - ret = 0; - } - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + ret = invalidate_inode_pages2_range(mapping, + iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); + WARN_ON_ONCE(ret); + ret = 0; } data = *to; @@ -324,8 +297,9 @@ xfs_file_dio_aio_read( iocb->ki_pos += ret; iov_iter_advance(to, ret); } - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); +out_unlock: + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; } @@ -570,61 +544,49 @@ xfs_file_dio_aio_write( if ((iocb->ki_pos | count) & target->bt_logical_sectormask) return -EINVAL; - /* "unaligned" here means not aligned to a filesystem block */ + /* + * Don't take the exclusive iolock here unless the I/O is unaligned to + * the file system block size. We don't need to consider the EOF + * extension case here because xfs_file_aio_write_checks() will relock + * the inode as necessary for EOF zeroing cases and fill out the new + * inode size as appropriate. + */ if ((iocb->ki_pos & mp->m_blockmask) || - ((iocb->ki_pos + count) & mp->m_blockmask)) + ((iocb->ki_pos + count) & mp->m_blockmask)) { unaligned_io = 1; - - /* - * We don't need to take an exclusive lock unless there page cache needs - * to be invalidated or unaligned IO is being executed. We don't need to - * consider the EOF extension case here because - * xfs_file_aio_write_checks() will relock the inode as necessary for - * EOF zeroing cases and fill out the new inode size as appropriate. - */ - if (unaligned_io || mapping->nrpages) iolock = XFS_IOLOCK_EXCL; - else + } else { iolock = XFS_IOLOCK_SHARED; - xfs_rw_ilock(ip, iolock); - - /* - * Recheck if there are cached pages that need invalidate after we got - * the iolock to protect against other threads adding new pages while - * we were waiting for the iolock. - */ - if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) { - xfs_rw_iunlock(ip, iolock); - iolock = XFS_IOLOCK_EXCL; - xfs_rw_ilock(ip, iolock); } + xfs_rw_ilock(ip, iolock); + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; count = iov_iter_count(from); end = iocb->ki_pos + count - 1; - /* - * See xfs_file_dio_aio_read() for why we do a full-file flush here. - */ if (mapping->nrpages) { - ret = filemap_write_and_wait(VFS_I(ip)->i_mapping); + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); if (ret) goto out; + /* * Invalidate whole pages. This can return an error if we fail * to invalidate a page, but this should never happen on XFS. * Warn if it does fail. */ - ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping); + ret = invalidate_inode_pages2_range(mapping, + iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); WARN_ON_ONCE(ret); ret = 0; } /* * If we are doing unaligned IO, wait for all other IO to drain, - * otherwise demote the lock if we had to flush cached pages + * otherwise demote the lock if we had to take the exclusive lock + * for other reasons in xfs_file_aio_write_checks. */ if (unaligned_io) inode_dio_wait(inode); From fe23759eaf2f6540de20c1623f066aad967ff9c9 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 20 Oct 2016 15:44:53 +1100 Subject: [PATCH 347/884] xfs: remove pointless error goto in xfs_bmap_remap_alloc The commit: f65306ea xfs: map an inode's offset to an exact physical block added a pointless error0: target; remove it. Addresses-Coverity-Id: 1373865 Signed-off-by: Eric Sandeen Reviewed-by: Bill O'Donnell Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0283b7eaf973..80bdb11ca6bf 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3974,9 +3974,6 @@ xfs_bmap_remap_alloc( * allocating, so skip that check by pretending to be freeing. */ error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); - if (error) - goto error0; -error0: xfs_perag_put(args.pag); if (error) trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_); From d099245297e28fc9f8493edd9d5a1f0967a72511 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2016 15:45:40 +1100 Subject: [PATCH 348/884] xfs: unset MS_ACTIVE if mount fails As part of the inode block map intent log item recovery process, we had to set the IRECOVERY flag to prevent an unlinked inode from being truncated during the first iput call. This required us to set MS_ACTIVE so that iput puts the inode on the lru instead of immediately evicting the inode. Unfortunately, if the mount fails later on, the inodes that have been loaded (root dir and realtime) actually need to be evicted since we're aborting the mount. If we don't clear MS_ACTIVE in the failure step, those inodes are not evicted and therefore leak. The leak was found by running xfs/130 and rmmoding xfs immediately after the test. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/xfs/xfs_mount.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index fc7873942bea..b341f10cf481 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1009,6 +1009,7 @@ xfs_mountfs( out_quota: xfs_qm_unmount_quotas(mp); out_rtunmount: + mp->m_super->s_flags &= ~MS_ACTIVE; xfs_rtunmount_inodes(mp); out_rele_rip: IRELE(rip); From 58d789678546d46d7bbd809dd7dab417c0f23655 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2016 15:46:18 +1100 Subject: [PATCH 349/884] libxfs: clean up _calc_dquots_per_chunk The function xfs_calc_dquots_per_chunk takes a parameter in units of basic blocks. The kernel seems to get the units wrong, but userspace got 'fixed' by commenting out the unnecessary conversion. Fix both. cc: Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_dquot_buf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c index 3cc3cf767474..ac9a003dd29a 100644 --- a/fs/xfs/libxfs/xfs_dquot_buf.c +++ b/fs/xfs/libxfs/xfs_dquot_buf.c @@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc( if (mp->m_quotainfo) ndquots = mp->m_quotainfo->qi_dqperchunk; else - ndquots = xfs_calc_dquots_per_chunk( - XFS_BB_TO_FSB(mp, bp->b_length)); + ndquots = xfs_calc_dquots_per_chunk(bp->b_length); for (i = 0; i < ndquots; i++, d++) { if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), From 8cdcc8102c0cfad20513ed1bfb96e0e9963928a8 Mon Sep 17 00:00:00 2001 From: Roger Willcocks Date: Thu, 20 Oct 2016 15:48:38 +1100 Subject: [PATCH 350/884] libxfs: v3 inodes are only valid on crc-enabled filesystems xfs_repair was not detecting that version 3 inodes are invalid for for non-CRC filesystems. The result is specific inode corruptions go undetected and hence aren't repaired if only the version number is out of range. The core of the problem is that the XFS_DINODE_GOOD_VERSION() macro doesn't know that valid inode versions are dependent on a superblock version number. Fix this in libxfs, and propagate the new function out into the rest of xfsprogs to fix the issue. [Darrick: port to kernel from xfsprogs] Reported-by: Leslie Rhorer Signed-off-by: Roger Willcocks Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_format.h | 1 - fs/xfs/libxfs/xfs_inode_buf.c | 13 ++++++++++++- fs/xfs/libxfs/xfs_inode_buf.h | 2 ++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f6547fc5e016..6b7579e7b60a 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -865,7 +865,6 @@ typedef struct xfs_timestamp { * padding field for v3 inodes. */ #define XFS_DINODE_MAGIC 0x494e /* 'IN' */ -#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3) typedef struct xfs_dinode { __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ __be16 di_mode; /* mode and type of file */ diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 8de9a3a29589..134424fac434 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -57,6 +57,17 @@ xfs_inobp_check( } #endif +bool +xfs_dinode_good_version( + struct xfs_mount *mp, + __u8 version) +{ + if (xfs_sb_version_hascrc(&mp->m_sb)) + return version == 3; + + return version == 1 || version == 2; +} + /* * If we are doing readahead on an inode buffer, we might be in log recovery * reading an inode allocation buffer that hasn't yet been replayed, and hence @@ -91,7 +102,7 @@ xfs_inode_buf_verify( dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog)); di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && - XFS_DINODE_GOOD_VERSION(dip->di_version); + xfs_dinode_good_version(mp, dip->di_version); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, XFS_RANDOM_ITOBP_INOTOBP))) { diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 62d9d4681c8c..3cfe12a4f58a 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -74,6 +74,8 @@ void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); void xfs_log_dinode_to_disk(struct xfs_log_dinode *from, struct xfs_dinode *to); +bool xfs_dinode_good_version(struct xfs_mount *mp, __u8 version); + #if defined(DEBUG) void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); #else From 4fbc2c65255f77b315a4ee3ccac397d677a35737 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:48:54 +1100 Subject: [PATCH 351/884] xfs: remove the same fs check from xfs_file_share_range The VFS already does the check, and the placement of this duplicate is in the way of the following locking rework. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 0dc9971d3c84..194f8f396e4d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -965,9 +965,6 @@ xfs_file_share_range( IS_SWAPFILE(inode_out)) return -ETXTBSY; - /* Reflink only works within this filesystem. */ - if (inode_in->i_sb != inode_out->i_sb) - return -EXDEV; same_inode = (inode_in->i_ino == inode_out->i_ino); /* Don't reflink dirs, pipes, sockets... */ From a62e82b35b97e60e9e22a4e303900f342139822f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:49:03 +1100 Subject: [PATCH 352/884] xfs: fix the same_inode check in xfs_file_share_range The VFS i_ino is an unsigned long, while XFS inode numbers are 64-bit wide, so checking i_ino for equality could lead to rate false positives on 32-bit architectures. Just compare the inode pointers themselves to be safe. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 194f8f396e4d..d5b835e82b2d 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -965,7 +965,7 @@ xfs_file_share_range( IS_SWAPFILE(inode_out)) return -ETXTBSY; - same_inode = (inode_in->i_ino == inode_out->i_ino); + same_inode = (inode_in == inode_out); /* Don't reflink dirs, pipes, sockets... */ if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) From 576177818e6f1e65f6109ed4a8fae8b60131c861 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:49:19 +1100 Subject: [PATCH 353/884] xfs: move inode locking from xfs_reflink_remap_range to xfs_file_share_range We need the iolock protection to stabilizie the IS_SWAPFILE and IS_IMMUTABLE values, as well as preventing new buffered writers re-dirtying the file data that we just wrote out. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 64 +++++++++++++++++++++++++++++--------------- fs/xfs/xfs_reflink.c | 15 ----------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index d5b835e82b2d..663761edd778 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -958,38 +958,54 @@ xfs_file_share_range( inode_out = file_inode(file_out); bs = inode_out->i_sb->s_blocksize; - /* Don't touch certain kinds of inodes */ - if (IS_IMMUTABLE(inode_out)) - return -EPERM; - if (IS_SWAPFILE(inode_in) || - IS_SWAPFILE(inode_out)) - return -ETXTBSY; - + /* Lock both files against IO */ same_inode = (inode_in == inode_out); + if (same_inode) { + xfs_ilock(XFS_I(inode_in), XFS_IOLOCK_EXCL); + xfs_ilock(XFS_I(inode_in), XFS_MMAPLOCK_EXCL); + } else { + xfs_lock_two_inodes(XFS_I(inode_in), XFS_I(inode_out), + XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(XFS_I(inode_in), XFS_I(inode_out), + XFS_MMAPLOCK_EXCL); + } + + /* Don't touch certain kinds of inodes */ + ret = -EPERM; + if (IS_IMMUTABLE(inode_out)) + goto out_unlock; + ret = -ETXTBSY; + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + goto out_unlock; /* Don't reflink dirs, pipes, sockets... */ + ret = -EISDIR; if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - return -EISDIR; + goto out_unlock; + ret = -EINVAL; if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) - return -EINVAL; + goto out_unlock; if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - return -EINVAL; + goto out_unlock; /* Don't share DAX file data for now. */ if (IS_DAX(inode_in) || IS_DAX(inode_out)) - return -EINVAL; + goto out_unlock; /* Are we going all the way to the end? */ isize = i_size_read(inode_in); - if (isize == 0) - return 0; + if (isize == 0) { + ret = 0; + goto out_unlock; + } + if (len == 0) len = isize - pos_in; /* Ensure offsets don't wrap and the input is inside i_size */ if (pos_in + len < pos_in || pos_out + len < pos_out || pos_in + len > isize) - return -EINVAL; + goto out_unlock; /* Don't allow dedupe past EOF in the dest file */ if (is_dedupe) { @@ -997,7 +1013,7 @@ xfs_file_share_range( disize = i_size_read(inode_out); if (pos_out >= disize || pos_out + len > disize) - return -EINVAL; + goto out_unlock; } /* If we're linking to EOF, continue to the block boundary. */ @@ -1009,28 +1025,32 @@ xfs_file_share_range( /* Only reflink if we're aligned to block boundaries */ if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) - return -EINVAL; + goto out_unlock; /* Don't allow overlapped reflink within the same file */ if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen) - return -EINVAL; + goto out_unlock; /* Wait for the completion of any pending IOs on srcfile */ ret = xfs_file_wait_for_io(inode_in, pos_in, len); if (ret) - goto out; + goto out_unlock; ret = xfs_file_wait_for_io(inode_out, pos_out, len); if (ret) - goto out; + goto out_unlock; if (is_dedupe) flags |= XFS_REFLINK_DEDUPE; ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), pos_out, len, flags); - if (ret < 0) - goto out; -out: +out_unlock: + xfs_iunlock(XFS_I(inode_in), XFS_MMAPLOCK_EXCL); + xfs_iunlock(XFS_I(inode_in), XFS_IOLOCK_EXCL); + if (!same_inode) { + xfs_iunlock(XFS_I(inode_out), XFS_MMAPLOCK_EXCL); + xfs_iunlock(XFS_I(inode_out), XFS_IOLOCK_EXCL); + } return ret; } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index d48a7cc2fe00..3b1c1a6bb5da 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1341,15 +1341,6 @@ xfs_reflink_remap_range( trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); - /* Lock both files against IO */ - if (src->i_ino == dest->i_ino) { - xfs_ilock(src, XFS_IOLOCK_EXCL); - xfs_ilock(src, XFS_MMAPLOCK_EXCL); - } else { - xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL); - xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); - } - /* * Check that the extents are the same. */ @@ -1401,12 +1392,6 @@ xfs_reflink_remap_range( goto out_error; out_error: - xfs_iunlock(src, XFS_MMAPLOCK_EXCL); - xfs_iunlock(src, XFS_IOLOCK_EXCL); - if (src->i_ino != dest->i_ino) { - xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); - xfs_iunlock(dest, XFS_IOLOCK_EXCL); - } if (error) trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); return error; From ec40759902556f21f37641ad9f19d02c4dd4b555 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:49:55 +1100 Subject: [PATCH 354/884] xfs: remove xfs_file_wait_for_io filemap_write_and_wait_range operates on full pages, so there is no need for the rounding operations. Additionally this allows us to micro-optimize by skipping the second inode_dio_wait for a intra-file clone. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 39 ++++++++++----------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 663761edd778..93729752bccb 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -909,32 +909,6 @@ out_unlock: return error; } -/* - * Flush all file writes out to disk. - */ -static int -xfs_file_wait_for_io( - struct inode *inode, - loff_t offset, - size_t len) -{ - loff_t rounding; - loff_t ioffset; - loff_t iendoffset; - loff_t bs; - int ret; - - bs = inode->i_sb->s_blocksize; - inode_dio_wait(inode); - - rounding = max_t(xfs_off_t, bs, PAGE_SIZE); - ioffset = round_down(offset, rounding); - iendoffset = round_up(offset + len, rounding) - 1; - ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, - iendoffset); - return ret; -} - /* Hook up to the VFS reflink function */ STATIC int xfs_file_share_range( @@ -1031,11 +1005,18 @@ xfs_file_share_range( if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen) goto out_unlock; - /* Wait for the completion of any pending IOs on srcfile */ - ret = xfs_file_wait_for_io(inode_in, pos_in, len); + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode_in); + if (!same_inode) + inode_dio_wait(inode_out); + + ret = filemap_write_and_wait_range(inode_in->i_mapping, + pos_in, pos_in + len - 1); if (ret) goto out_unlock; - ret = xfs_file_wait_for_io(inode_out, pos_out, len); + + ret = filemap_write_and_wait_range(inode_out->i_mapping, + pos_out, pos_out + len - 1); if (ret) goto out_unlock; From 5faaf4fa0a20d38edc4df57baf24ea35b7e91178 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:50:07 +1100 Subject: [PATCH 355/884] xfs: merge xfs_reflink_remap_range and xfs_file_share_range There is no clear division of responsibility between those functions, so just merge them into one to keep the code simple. Also move xfs_file_wait_for_io to xfs_reflink.c together with its only caller. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_file.c | 132 +------------------------------- fs/xfs/xfs_reflink.c | 178 +++++++++++++++++++++++++++++++++---------- fs/xfs/xfs_reflink.h | 7 +- 3 files changed, 143 insertions(+), 174 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 93729752bccb..6e4f7f900fea 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -909,132 +909,6 @@ out_unlock: return error; } -/* Hook up to the VFS reflink function */ -STATIC int -xfs_file_share_range( - struct file *file_in, - loff_t pos_in, - struct file *file_out, - loff_t pos_out, - u64 len, - bool is_dedupe) -{ - struct inode *inode_in; - struct inode *inode_out; - ssize_t ret; - loff_t bs; - loff_t isize; - int same_inode; - loff_t blen; - unsigned int flags = 0; - - inode_in = file_inode(file_in); - inode_out = file_inode(file_out); - bs = inode_out->i_sb->s_blocksize; - - /* Lock both files against IO */ - same_inode = (inode_in == inode_out); - if (same_inode) { - xfs_ilock(XFS_I(inode_in), XFS_IOLOCK_EXCL); - xfs_ilock(XFS_I(inode_in), XFS_MMAPLOCK_EXCL); - } else { - xfs_lock_two_inodes(XFS_I(inode_in), XFS_I(inode_out), - XFS_IOLOCK_EXCL); - xfs_lock_two_inodes(XFS_I(inode_in), XFS_I(inode_out), - XFS_MMAPLOCK_EXCL); - } - - /* Don't touch certain kinds of inodes */ - ret = -EPERM; - if (IS_IMMUTABLE(inode_out)) - goto out_unlock; - ret = -ETXTBSY; - if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) - goto out_unlock; - - /* Don't reflink dirs, pipes, sockets... */ - ret = -EISDIR; - if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) - goto out_unlock; - ret = -EINVAL; - if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) - goto out_unlock; - if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) - goto out_unlock; - - /* Don't share DAX file data for now. */ - if (IS_DAX(inode_in) || IS_DAX(inode_out)) - goto out_unlock; - - /* Are we going all the way to the end? */ - isize = i_size_read(inode_in); - if (isize == 0) { - ret = 0; - goto out_unlock; - } - - if (len == 0) - len = isize - pos_in; - - /* Ensure offsets don't wrap and the input is inside i_size */ - if (pos_in + len < pos_in || pos_out + len < pos_out || - pos_in + len > isize) - goto out_unlock; - - /* Don't allow dedupe past EOF in the dest file */ - if (is_dedupe) { - loff_t disize; - - disize = i_size_read(inode_out); - if (pos_out >= disize || pos_out + len > disize) - goto out_unlock; - } - - /* If we're linking to EOF, continue to the block boundary. */ - if (pos_in + len == isize) - blen = ALIGN(isize, bs) - pos_in; - else - blen = len; - - /* Only reflink if we're aligned to block boundaries */ - if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || - !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) - goto out_unlock; - - /* Don't allow overlapped reflink within the same file */ - if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen) - goto out_unlock; - - /* Wait for the completion of any pending IOs on both files */ - inode_dio_wait(inode_in); - if (!same_inode) - inode_dio_wait(inode_out); - - ret = filemap_write_and_wait_range(inode_in->i_mapping, - pos_in, pos_in + len - 1); - if (ret) - goto out_unlock; - - ret = filemap_write_and_wait_range(inode_out->i_mapping, - pos_out, pos_out + len - 1); - if (ret) - goto out_unlock; - - if (is_dedupe) - flags |= XFS_REFLINK_DEDUPE; - ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out), - pos_out, len, flags); - -out_unlock: - xfs_iunlock(XFS_I(inode_in), XFS_MMAPLOCK_EXCL); - xfs_iunlock(XFS_I(inode_in), XFS_IOLOCK_EXCL); - if (!same_inode) { - xfs_iunlock(XFS_I(inode_out), XFS_MMAPLOCK_EXCL); - xfs_iunlock(XFS_I(inode_out), XFS_IOLOCK_EXCL); - } - return ret; -} - STATIC ssize_t xfs_file_copy_range( struct file *file_in, @@ -1046,7 +920,7 @@ xfs_file_copy_range( { int error; - error = xfs_file_share_range(file_in, pos_in, file_out, pos_out, + error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, false); if (error) return error; @@ -1061,7 +935,7 @@ xfs_file_clone_range( loff_t pos_out, u64 len) { - return xfs_file_share_range(file_in, pos_in, file_out, pos_out, + return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out, len, false); } @@ -1084,7 +958,7 @@ xfs_file_dedupe_range( if (len > XFS_MAX_DEDUPE_LEN) len = XFS_MAX_DEDUPE_LEN; - error = xfs_file_share_range(src_file, loff, dst_file, dst_loff, + error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff, len, true); if (error) return error; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 3b1c1a6bb5da..6592daa833a4 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -1312,19 +1312,26 @@ out_error: */ int xfs_reflink_remap_range( - struct xfs_inode *src, - xfs_off_t srcoff, - struct xfs_inode *dest, - xfs_off_t destoff, - xfs_off_t len, - unsigned int flags) + struct file *file_in, + loff_t pos_in, + struct file *file_out, + loff_t pos_out, + u64 len, + bool is_dedupe) { + struct inode *inode_in = file_inode(file_in); + struct xfs_inode *src = XFS_I(inode_in); + struct inode *inode_out = file_inode(file_out); + struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; + loff_t bs = inode_out->i_sb->s_blocksize; + bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; - int error; xfs_extlen_t cowextsize; - bool is_same; + loff_t isize; + ssize_t ret; + loff_t blen; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; @@ -1332,48 +1339,135 @@ xfs_reflink_remap_range( if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; + /* Lock both files against IO */ + if (same_inode) { + xfs_ilock(src, XFS_IOLOCK_EXCL); + xfs_ilock(src, XFS_MMAPLOCK_EXCL); + } else { + xfs_lock_two_inodes(src, dest, XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); + } + + /* Don't touch certain kinds of inodes */ + ret = -EPERM; + if (IS_IMMUTABLE(inode_out)) + goto out_unlock; + + ret = -ETXTBSY; + if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) + goto out_unlock; + + + /* Don't reflink dirs, pipes, sockets... */ + ret = -EISDIR; + if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) + goto out_unlock; + ret = -EINVAL; + if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode)) + goto out_unlock; + if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) + goto out_unlock; + /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) - return -EINVAL; + goto out_unlock; - if (flags & ~XFS_REFLINK_ALL) - return -EINVAL; + /* Don't share DAX file data for now. */ + if (IS_DAX(inode_in) || IS_DAX(inode_out)) + goto out_unlock; - trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff); + /* Are we going all the way to the end? */ + isize = i_size_read(inode_in); + if (isize == 0) { + ret = 0; + goto out_unlock; + } + + if (len == 0) + len = isize - pos_in; + + /* Ensure offsets don't wrap and the input is inside i_size */ + if (pos_in + len < pos_in || pos_out + len < pos_out || + pos_in + len > isize) + goto out_unlock; + + /* Don't allow dedupe past EOF in the dest file */ + if (is_dedupe) { + loff_t disize; + + disize = i_size_read(inode_out); + if (pos_out >= disize || pos_out + len > disize) + goto out_unlock; + } + + /* If we're linking to EOF, continue to the block boundary. */ + if (pos_in + len == isize) + blen = ALIGN(isize, bs) - pos_in; + else + blen = len; + + /* Only reflink if we're aligned to block boundaries */ + if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) || + !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs)) + goto out_unlock; + + /* Don't allow overlapped reflink within the same file */ + if (same_inode) { + if (pos_out + blen > pos_in && pos_out < pos_in + blen) + goto out_unlock; + } + + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode_in); + if (!same_inode) + inode_dio_wait(inode_out); + + ret = filemap_write_and_wait_range(inode_in->i_mapping, + pos_in, pos_in + len - 1); + if (ret) + goto out_unlock; + + ret = filemap_write_and_wait_range(inode_out->i_mapping, + pos_out, pos_out + len - 1); + if (ret) + goto out_unlock; + + trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); /* * Check that the extents are the same. */ - if (flags & XFS_REFLINK_DEDUPE) { - is_same = false; - error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest), - destoff, len, &is_same); - if (error) - goto out_error; + if (is_dedupe) { + bool is_same = false; + + ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out, + len, &is_same); + if (ret) + goto out_unlock; if (!is_same) { - error = -EBADE; - goto out_error; + ret = -EBADE; + goto out_unlock; } } - error = xfs_reflink_set_inode_flag(src, dest); - if (error) - goto out_error; + ret = xfs_reflink_set_inode_flag(src, dest); + if (ret) + goto out_unlock; /* * Invalidate the page cache so that we can clear any CoW mappings * in the destination file. */ - truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff, - PAGE_ALIGN(destoff + len) - 1); + truncate_inode_pages_range(&inode_out->i_data, pos_out, + PAGE_ALIGN(pos_out + len) - 1); - dfsbno = XFS_B_TO_FSBT(mp, destoff); - sfsbno = XFS_B_TO_FSBT(mp, srcoff); + dfsbno = XFS_B_TO_FSBT(mp, pos_out); + sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); - error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, - destoff + len); - if (error) - goto out_error; + ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, + pos_out + len); + if (ret) + goto out_unlock; /* * Carry the cowextsize hint from src to dest if we're sharing the @@ -1381,20 +1475,24 @@ xfs_reflink_remap_range( * has a cowextsize hint, and the destination file does not. */ cowextsize = 0; - if (srcoff == 0 && len == i_size_read(VFS_I(src)) && + if (pos_in == 0 && len == i_size_read(inode_in) && (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && - destoff == 0 && len >= i_size_read(VFS_I(dest)) && + pos_out == 0 && len >= i_size_read(inode_out) && !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; - error = xfs_reflink_update_dest(dest, destoff + len, cowextsize); - if (error) - goto out_error; + ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize); -out_error: - if (error) - trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_); - return error; +out_unlock: + xfs_iunlock(src, XFS_MMAPLOCK_EXCL); + xfs_iunlock(src, XFS_IOLOCK_EXCL); + if (src->i_ino != dest->i_ino) { + xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); + xfs_iunlock(dest, XFS_IOLOCK_EXCL); + } + if (ret) + trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); + return ret; } /* diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 5dc3c8ac12aa..7ddd9f69560d 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); -#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */ -#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE) -extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff, - struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len, - unsigned int flags); +extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe); extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip, struct xfs_trans **tpp); extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset, From d33fd776f992332be110f6ceca6dad60cb5d513f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:51:28 +1100 Subject: [PATCH 356/884] iomap: add IOMAP_REPORT This allows the file system to tell a FIEMAP from a read operation, and thus avoids the need to report flags that aren't actually used in the read path. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/iomap.c | 2 +- include/linux/iomap.h | 17 +++++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 013d1d36fbbf..a92204012e2d 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -561,7 +561,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, } while (len > 0) { - ret = iomap_apply(inode, start, len, 0, ops, &ctx, + ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx, iomap_fiemap_actor); /* inode with no (attribute) mapping will give ENOENT */ if (ret == -ENOENT) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e63e288dee83..7892f55a1866 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -19,11 +19,15 @@ struct vm_fault; #define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ /* - * Flags for iomap mappings: + * Flags for all iomap mappings: */ -#define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */ -#define IOMAP_F_SHARED 0x02 /* block shared with another file */ -#define IOMAP_F_NEW 0x04 /* blocks have been newly allocated */ +#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ + +/* + * Flags that only need to be reported for IOMAP_REPORT requests: + */ +#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ +#define IOMAP_F_SHARED 0x20 /* block shared with another file */ /* * Magic value for blkno: @@ -42,8 +46,9 @@ struct iomap { /* * Flags for iomap_begin / iomap_end. No flag implies a read. */ -#define IOMAP_WRITE (1 << 0) -#define IOMAP_ZERO (1 << 1) +#define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */ +#define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ +#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ struct iomap_ops { /* From 0a0af28cad9a43d90f13c2047bd8ee3d4cffb7f3 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 20 Oct 2016 15:51:50 +1100 Subject: [PATCH 357/884] xfs: add xfs_trim_extent This helpers allows to trim an extent to a subset of it's original range while making sure the block numbers in it remain valid, In the future xfs_trim_extent and xfs_bmapi_trim_map should probably be merged in some form. Signed-off-by: Darrick J. Wong [hch: split from a previous patch from Darrick, moved around and added support for "raw" delayed extents"] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 33 +++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_bmap.h | 2 ++ 2 files changed, 35 insertions(+) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 80bdb11ca6bf..381e7659598c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3996,6 +3996,39 @@ xfs_bmap_alloc( return xfs_bmap_btalloc(ap); } +/* Trim extent to fit a logical block range. */ +void +xfs_trim_extent( + struct xfs_bmbt_irec *irec, + xfs_fileoff_t bno, + xfs_filblks_t len) +{ + xfs_fileoff_t distance; + xfs_fileoff_t end = bno + len; + + if (irec->br_startoff + irec->br_blockcount <= bno || + irec->br_startoff >= end) { + irec->br_blockcount = 0; + return; + } + + if (irec->br_startoff < bno) { + distance = bno - irec->br_startoff; + if (isnullstartblock(irec->br_startblock)) + irec->br_startblock = DELAYSTARTBLOCK; + if (irec->br_startblock != DELAYSTARTBLOCK && + irec->br_startblock != HOLESTARTBLOCK) + irec->br_startblock += distance; + irec->br_startoff += distance; + irec->br_blockcount -= distance; + } + + if (end < irec->br_startoff + irec->br_blockcount) { + distance = irec->br_startoff + irec->br_blockcount - end; + irec->br_blockcount -= distance; + } +} + /* * Trim the returned map to the required bounds */ diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index f97db7132564..eb86af0c988b 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -190,6 +190,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, #define XFS_BMAP_TRACE_EXLIST(ip,c,w) #endif +void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, + xfs_filblks_t len); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, From 62c5ac89de7d5eecdbaa94ca3d554b0bd41578b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:52:00 +1100 Subject: [PATCH 358/884] xfs: handle "raw" delayed extents xfs_reflink_trim_around_shared Delalloc extents in the extent list contain the number of reserved indirect blocks in their startblock value and don't use the magic DELAYSTARTBLOCK constant. Ensure that xfs_reflink_trim_around_shared handles them properly by checking for isnullstartblock(). Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6592daa833a4..6c4c215634ec 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared( if (!xfs_is_reflink_inode(ip) || ISUNWRITTEN(irec) || irec->br_startblock == HOLESTARTBLOCK || - irec->br_startblock == DELAYSTARTBLOCK) { + irec->br_startblock == DELAYSTARTBLOCK || + isnullstartblock(irec->br_startblock)) { *shared = false; return 0; } From 5f9268ca53aca992106d74edde3e7cf6c1be60a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:53:32 +1100 Subject: [PATCH 359/884] xfs: don't bother looking at the refcount tree for reads There is no need to trim an extent into a shared or non-shared one, or report any flags for plain old reads. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index d907eb9f8ef3..1dabf2eb136a 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -996,11 +996,14 @@ xfs_file_iomap_begin( return error; } - /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); - if (error) { - xfs_iunlock(ip, lockmode); - return error; + if (flags & (IOMAP_WRITE | IOMAP_ZERO | IOMAP_REPORT)) { + /* Trim the mapping to the nearest shared extent boundary. */ + error = xfs_reflink_trim_around_shared(ip, &imap, &shared, + &trimmed); + if (error) { + xfs_iunlock(ip, lockmode); + return error; + } } if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { From 3ba020befef030aaabbd5eb82a09f6ddf02a9542 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:53:50 +1100 Subject: [PATCH 360/884] xfs: optimize writes to reflink files Instead of reserving space as the first thing in write_begin move it past reading the extent in the data fork. That way we only have to read from the data fork once and can reuse that information for trimming the extent to the shared/unshared boundary. Additionally this allows to easily limit the actual write size to said boundary, and avoid a roundtrip on the ilock. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_iomap.c | 56 +++++++++++------ fs/xfs/xfs_reflink.c | 142 ++++++++++++++++++------------------------- fs/xfs/xfs_reflink.h | 4 +- fs/xfs/xfs_trace.h | 3 +- 4 files changed, 100 insertions(+), 105 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1dabf2eb136a..436e109bb01e 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay( xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, &got, &prev); if (!eof && got.br_startoff <= offset_fsb) { + if (xfs_is_reflink_inode(ip)) { + bool shared; + + end_fsb = min(XFS_B_TO_FSB(mp, offset + count), + maxbytes_fsb); + xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); + error = xfs_reflink_reserve_cow(ip, &got, &shared); + if (error) + goto out_unlock; + } + trace_xfs_iomap_found(ip, offset, count, 0, &got); goto done; } @@ -961,19 +972,13 @@ xfs_file_iomap_begin( struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; - bool shared, trimmed; int nimaps = 1, error = 0; + bool shared = false, trimmed = false; unsigned lockmode; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; - if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { - error = xfs_reflink_reserve_cow_range(ip, offset, length); - if (error < 0) - return error; - } - if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { /* Reserve delalloc blocks for regular writeback. */ @@ -981,7 +986,16 @@ xfs_file_iomap_begin( iomap); } - lockmode = xfs_ilock_data_map_shared(ip); + /* + * COW writes will allocate delalloc space, so we need to make sure + * to take the lock exclusively here. + */ + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, XFS_ILOCK_EXCL); + } else { + lockmode = xfs_ilock_data_map_shared(ip); + } ASSERT(offset <= mp->m_super->s_maxbytes); if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) @@ -991,19 +1005,24 @@ xfs_file_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, 0); - if (error) { - xfs_iunlock(ip, lockmode); - return error; - } + if (error) + goto out_unlock; - if (flags & (IOMAP_WRITE | IOMAP_ZERO | IOMAP_REPORT)) { + if (flags & IOMAP_REPORT) { /* Trim the mapping to the nearest shared extent boundary. */ error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); - if (error) { - xfs_iunlock(ip, lockmode); - return error; - } + if (error) + goto out_unlock; + } + + if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) { + error = xfs_reflink_reserve_cow(ip, &imap, &shared); + if (error) + goto out_unlock; + + end_fsb = imap.br_startoff + imap.br_blockcount; + length = XFS_FSB_TO_B(mp, end_fsb) - offset; } if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { @@ -1042,6 +1061,9 @@ xfs_file_iomap_begin( if (shared) iomap->flags |= IOMAP_F_SHARED; return 0; +out_unlock: + xfs_iunlock(ip, lockmode); + return error; } static int diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 6c4c215634ec..9c477de3c1ac 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -228,50 +228,54 @@ xfs_reflink_trim_around_shared( } } -/* Create a CoW reservation for a range of blocks within a file. */ -static int -__xfs_reflink_reserve_cow( +/* + * Trim the passed in imap to the next shared/unshared extent boundary, and + * if imap->br_startoff points to a shared extent reserve space for it in the + * COW fork. In this case *shared is set to true, else to false. + * + * Note that imap will always contain the block numbers for the existing blocks + * in the data fork, as the upper layers need them for read-modify-write + * operations. + */ +int +xfs_reflink_reserve_cow( struct xfs_inode *ip, - xfs_fileoff_t *offset_fsb, - xfs_fileoff_t end_fsb, - bool *skipped) + struct xfs_bmbt_irec *imap, + bool *shared) { - struct xfs_bmbt_irec got, prev, imap; - xfs_fileoff_t orig_end_fsb; - int nimaps, eof = 0, error = 0; - bool shared = false, trimmed = false; + struct xfs_bmbt_irec got, prev; + xfs_fileoff_t end_fsb, orig_end_fsb; + int eof = 0, error = 0; + bool trimmed; xfs_extnum_t idx; xfs_extlen_t align; - /* Already reserved? Skip the refcount btree access. */ - xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, + /* + * Search the COW fork extent list first. This serves two purposes: + * first this implement the speculative preallocation using cowextisze, + * so that we also unshared block adjacent to shared blocks instead + * of just the shared blocks themselves. Second the lookup in the + * extent list is generally faster than going out to the shared extent + * tree. + */ + xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx, &got, &prev); - if (!eof && got.br_startoff <= *offset_fsb) { - end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; - trace_xfs_reflink_cow_found(ip, &got); - goto done; - } + if (!eof && got.br_startoff <= imap->br_startoff) { + trace_xfs_reflink_cow_found(ip, imap); + xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); - /* Read extent from the source file. */ - nimaps = 1; - error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, - &imap, &nimaps, 0); - if (error) - goto out_unlock; - ASSERT(nimaps == 1); + *shared = true; + return 0; + } /* Trim the mapping to the nearest shared extent boundary. */ - error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); + error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); if (error) - goto out_unlock; - - end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount; + return error; /* Not shared? Just report the (potentially capped) extent. */ - if (!shared) { - *skipped = true; - goto done; - } + if (!*shared) + return 0; /* * Fork all the shared blocks from our write offset until the end of @@ -279,72 +283,38 @@ __xfs_reflink_reserve_cow( */ error = xfs_qm_dqattach_locked(ip, 0); if (error) - goto out_unlock; + return error; + + end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount; align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); if (align) end_fsb = roundup_64(end_fsb, align); retry: - error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, - end_fsb - *offset_fsb, &got, - &prev, &idx, eof); + error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff, + end_fsb - imap->br_startoff, &got, &prev, &idx, eof); switch (error) { case 0: break; case -ENOSPC: case -EDQUOT: /* retry without any preallocation */ - trace_xfs_reflink_cow_enospc(ip, &imap); + trace_xfs_reflink_cow_enospc(ip, imap); if (end_fsb != orig_end_fsb) { end_fsb = orig_end_fsb; goto retry; } /*FALLTHRU*/ default: - goto out_unlock; + return error; } if (end_fsb != orig_end_fsb) xfs_inode_set_cowblocks_tag(ip); trace_xfs_reflink_cow_alloc(ip, &got); -done: - *offset_fsb = end_fsb; -out_unlock: - return error; -} - -/* Create a CoW reservation for part of a file. */ -int -xfs_reflink_reserve_cow_range( - struct xfs_inode *ip, - xfs_off_t offset, - xfs_off_t count) -{ - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb, end_fsb; - bool skipped = false; - int error = 0; - - trace_xfs_reflink_reserve_cow_range(ip, offset, count); - - offset_fsb = XFS_B_TO_FSBT(mp, offset); - end_fsb = XFS_B_TO_FSB(mp, offset + count); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - while (offset_fsb < end_fsb) { - error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb, - &skipped); - if (error) { - trace_xfs_reflink_reserve_cow_range_error(ip, error, - _RET_IP_); - break; - } - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - return error; + return 0; } /* Allocate all CoW reservations covering a range of blocks in a file. */ @@ -359,9 +329,8 @@ __xfs_reflink_allocate_cow( struct xfs_defer_ops dfops; struct xfs_trans *tp; xfs_fsblock_t first_block; - xfs_fileoff_t next_fsb; int nimaps = 1, error; - bool skipped = false; + bool shared; xfs_defer_init(&dfops, &first_block); @@ -372,33 +341,38 @@ __xfs_reflink_allocate_cow( xfs_ilock(ip, XFS_ILOCK_EXCL); - next_fsb = *offset_fsb; - error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); + /* Read extent from the source file. */ + nimaps = 1; + error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, + &imap, &nimaps, 0); + if (error) + goto out_unlock; + ASSERT(nimaps == 1); + + error = xfs_reflink_reserve_cow(ip, &imap, &shared); if (error) goto out_trans_cancel; - if (skipped) { - *offset_fsb = next_fsb; + if (!shared) { + *offset_fsb = imap.br_startoff + imap.br_blockcount; goto out_trans_cancel; } xfs_trans_ijoin(tp, ip, 0); - error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, + error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount, XFS_BMAPI_COWFORK, &first_block, XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), &imap, &nimaps, &dfops); if (error) goto out_trans_cancel; - /* We might not have been able to map the whole delalloc extent */ - *offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb); - error = xfs_defer_finish(&tp, &dfops, NULL); if (error) goto out_trans_cancel; error = xfs_trans_commit(tp); + *offset_fsb = imap.br_startoff + imap.br_blockcount; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 7ddd9f69560d..fad11607c9ad 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno, extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); -extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, - xfs_off_t offset, xfs_off_t count); +extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, + struct xfs_bmbt_irec *imap, bool *shared); extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index ad188d3a83f3..72f9f6b7a76a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); -DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); +DEFINE_RW_EVENT(xfs_reflink_reserve_cow); DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); @@ -3358,7 +3358,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); -DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); From fa5c836ca8eb5bad6316ddfc066acbc4e2485356 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:54:14 +1100 Subject: [PATCH 361/884] xfs: refactor xfs_bunmapi_cow Split out two helpers for deleting delayed or real extents from the COW fork. This allows to call them directly from xfs_reflink_cow_end_io once that function is refactored to iterate the extent tree. It will also allow to reuse the delalloc deletion from xfs_bunmapi in the future. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 374 +++++++++++++++++++++++---------------- fs/xfs/libxfs/xfs_bmap.h | 5 + fs/xfs/xfs_reflink.c | 5 - 3 files changed, 225 insertions(+), 159 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 381e7659598c..d7ad51132f4f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4859,6 +4859,219 @@ xfs_bmap_split_indlen( return stolen; } +int +xfs_bmap_del_extent_delay( + struct xfs_inode *ip, + int whichfork, + xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); + struct xfs_bmbt_irec new; + int64_t da_old, da_new, da_diff = 0; + xfs_fileoff_t del_endoff, got_endoff; + xfs_filblks_t got_indlen, new_indlen, stolen; + int error = 0, state = 0; + bool isrt; + + XFS_STATS_INC(mp, xs_del_exlist); + + isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got->br_startoff + got->br_blockcount; + da_old = startblockval(got->br_startblock); + da_new = 0; + + ASSERT(*idx >= 0); + ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(del->br_blockcount > 0); + ASSERT(got->br_startoff <= del->br_startoff); + ASSERT(got_endoff >= del_endoff); + + if (isrt) { + int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount); + + do_div(rtexts, mp->m_sb.sb_rextsize); + xfs_mod_frextents(mp, rtexts); + } + + /* + * Update the inode delalloc counter now and wait to update the + * sb counters as we might have to borrow some blocks for the + * indirect block accounting. + */ + xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0, + isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS); + ip->i_delayed_blks -= del->br_blockcount; + + if (whichfork == XFS_COW_FORK) + state |= BMAP_COWFORK; + + if (got->br_startoff == del->br_startoff) + state |= BMAP_LEFT_CONTIG; + if (got_endoff == del_endoff) + state |= BMAP_RIGHT_CONTIG; + + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, *idx, 1, state); + --*idx; + break; + case BMAP_LEFT_CONTIG: + /* + * Deleting the first part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_startoff = del_endoff; + got->br_blockcount -= del->br_blockcount; + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, + got->br_blockcount), da_old); + got->br_startblock = nullstartblock((int)da_new); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case BMAP_RIGHT_CONTIG: + /* + * Deleting the last part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount = got->br_blockcount - del->br_blockcount; + da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, + got->br_blockcount), da_old); + got->br_startblock = nullstartblock((int)da_new); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case 0: + /* + * Deleting the middle of the extent. + * + * Distribute the original indlen reservation across the two new + * extents. Steal blocks from the deleted extent if necessary. + * Stealing blocks simply fudges the fdblocks accounting below. + * Warn if either of the new indlen reservations is zero as this + * can lead to delalloc problems. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + + got->br_blockcount = del->br_startoff - got->br_startoff; + got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount); + + new.br_blockcount = got_endoff - del_endoff; + new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount); + + WARN_ON_ONCE(!got_indlen || !new_indlen); + stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen, + del->br_blockcount); + + got->br_startblock = nullstartblock((int)got_indlen); + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_); + + new.br_startoff = del_endoff; + new.br_state = got->br_state; + new.br_startblock = nullstartblock((int)new_indlen); + + ++*idx; + xfs_iext_insert(ip, *idx, 1, &new, state); + + da_new = got_indlen + new_indlen - stolen; + del->br_blockcount -= stolen; + break; + } + + ASSERT(da_old >= da_new); + da_diff = da_old - da_new; + if (!isrt) + da_diff += del->br_blockcount; + if (da_diff) + xfs_mod_fdblocks(mp, da_diff, false); + return error; +} + +void +xfs_bmap_del_extent_cow( + struct xfs_inode *ip, + xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec new; + xfs_fileoff_t del_endoff, got_endoff; + int state = BMAP_COWFORK; + + XFS_STATS_INC(mp, xs_del_exlist); + + del_endoff = del->br_startoff + del->br_blockcount; + got_endoff = got->br_startoff + got->br_blockcount; + + ASSERT(*idx >= 0); + ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); + ASSERT(del->br_blockcount > 0); + ASSERT(got->br_startoff <= del->br_startoff); + ASSERT(got_endoff >= del_endoff); + ASSERT(!isnullstartblock(got->br_startblock)); + + if (got->br_startoff == del->br_startoff) + state |= BMAP_LEFT_CONTIG; + if (got_endoff == del_endoff) + state |= BMAP_RIGHT_CONTIG; + + switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) { + case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG: + /* + * Matches the whole extent. Delete the entry. + */ + xfs_iext_remove(ip, *idx, 1, state); + --*idx; + break; + case BMAP_LEFT_CONTIG: + /* + * Deleting the first part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_startoff = del_endoff; + got->br_blockcount -= del->br_blockcount; + got->br_startblock = del->br_startblock + del->br_blockcount; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case BMAP_RIGHT_CONTIG: + /* + * Deleting the last part of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount -= del->br_blockcount; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + break; + case 0: + /* + * Deleting the middle of the extent. + */ + trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_); + got->br_blockcount = del->br_startoff - got->br_startoff; + xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got); + trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); + + new.br_startoff = del_endoff; + new.br_blockcount = got_endoff - del_endoff; + new.br_state = got->br_state; + new.br_startblock = del->br_startblock + del->br_blockcount; + + ++*idx; + xfs_iext_insert(ip, *idx, 1, &new, state); + break; + } +} + /* * Called by xfs_bmapi to update file extent records and the btree * after removing space (or undoing a delayed allocation). @@ -5207,167 +5420,20 @@ xfs_bunmapi_cow( struct xfs_inode *ip, struct xfs_bmbt_irec *del) { - xfs_filblks_t da_new; - xfs_filblks_t da_old; - xfs_fsblock_t del_endblock = 0; - xfs_fileoff_t del_endoff; - int delay; struct xfs_bmbt_rec_host *ep; - int error; struct xfs_bmbt_irec got; - xfs_fileoff_t got_endoff; - struct xfs_ifork *ifp; - struct xfs_mount *mp; - xfs_filblks_t nblks; struct xfs_bmbt_irec new; - /* REFERENCED */ - uint qfield; - xfs_filblks_t temp; - xfs_filblks_t temp2; - int state = BMAP_COWFORK; int eof; xfs_extnum_t eidx; - mp = ip->i_mount; - XFS_STATS_INC(mp, xs_del_exlist); - ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof, - &eidx, &got, &new); - - ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); - ASSERT((eidx >= 0) && (eidx < ifp->if_bytes / - (uint)sizeof(xfs_bmbt_rec_t))); - ASSERT(del->br_blockcount > 0); - ASSERT(got.br_startoff <= del->br_startoff); - del_endoff = del->br_startoff + del->br_blockcount; - got_endoff = got.br_startoff + got.br_blockcount; - ASSERT(got_endoff >= del_endoff); - delay = isnullstartblock(got.br_startblock); - ASSERT(isnullstartblock(del->br_startblock) == delay); - qfield = 0; - error = 0; - /* - * If deleting a real allocation, must free up the disk space. - */ - if (!delay) { - nblks = del->br_blockcount; - qfield = XFS_TRANS_DQ_BCOUNT; - /* - * Set up del_endblock and cur for later. - */ - del_endblock = del->br_startblock + del->br_blockcount; - da_old = da_new = 0; - } else { - da_old = startblockval(got.br_startblock); - da_new = 0; - nblks = 0; - } - qfield = qfield; - nblks = nblks; - - /* - * Set flag value to use in switch statement. - * Left-contig is 2, right-contig is 1. - */ - switch (((got.br_startoff == del->br_startoff) << 1) | - (got_endoff == del_endoff)) { - case 3: - /* - * Matches the whole extent. Delete the entry. - */ - xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK); - --eidx; - break; - - case 2: - /* - * Deleting the first part of the extent. - */ - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_startoff(ep, del_endoff); - temp = got.br_blockcount - del->br_blockcount; - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - da_new = temp; - break; - } - xfs_bmbt_set_startblock(ep, del_endblock); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - break; - - case 1: - /* - * Deleting the last part of the extent. - */ - temp = got.br_blockcount - del->br_blockcount; - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - if (delay) { - temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp), - da_old); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - da_new = temp; - break; - } - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - break; - - case 0: - /* - * Deleting the middle of the extent. - */ - temp = del->br_startoff - got.br_startoff; - trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_); - xfs_bmbt_set_blockcount(ep, temp); - new.br_startoff = del_endoff; - temp2 = got_endoff - del_endoff; - new.br_blockcount = temp2; - new.br_state = got.br_state; - if (!delay) { - new.br_startblock = del_endblock; - } else { - temp = xfs_bmap_worst_indlen(ip, temp); - xfs_bmbt_set_startblock(ep, nullstartblock((int)temp)); - temp2 = xfs_bmap_worst_indlen(ip, temp2); - new.br_startblock = nullstartblock((int)temp2); - da_new = temp + temp2; - while (da_new > da_old) { - if (temp) { - temp--; - da_new--; - xfs_bmbt_set_startblock(ep, - nullstartblock((int)temp)); - } - if (da_new == da_old) - break; - if (temp2) { - temp2--; - da_new--; - new.br_startblock = - nullstartblock((int)temp2); - } - } - } - trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_); - xfs_iext_insert(ip, eidx + 1, 1, &new, state); - ++eidx; - break; - } - - /* - * Account for change in delayed indirect blocks. - * Nothing to do for disk quota accounting here. - */ - ASSERT(da_old >= da_new); - if (da_old > da_new) - xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false); - - return error; + &eidx, &got, &new); + ASSERT(ep); + if (isnullstartblock(got.br_startblock)) + xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &eidx, &got, del); + else + xfs_bmap_del_extent_cow(ip, &eidx, &got, del); + return 0; } /* diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index eb86af0c988b..5f18248cbac2 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -224,6 +224,11 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extnum_t nexts, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops, int *done); int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del); +int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, + xfs_extnum_t *idx, struct xfs_bmbt_irec *got, + struct xfs_bmbt_irec *del); +void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx, + struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, xfs_extnum_t num); uint xfs_default_attroffset(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 9c477de3c1ac..09bd5dcd90cd 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -534,11 +534,6 @@ xfs_reflink_cancel_cow_blocks( trace_xfs_reflink_cancel_cow(ip, &irec); if (irec.br_startblock == DELAYSTARTBLOCK) { - /* Free a delayed allocation. */ - xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount, - false); - ip->i_delayed_blks -= irec.br_blockcount; - /* Remove the mapping from the CoW fork. */ error = xfs_bunmapi_cow(ip, &irec); if (error) From 3e0ee78f7a5a8ed43b129e9e4c5c0ff10c71df74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:54:31 +1100 Subject: [PATCH 362/884] xfs: optimize xfs_reflink_cancel_cow_blocks Rewrite xfs_reflink_cancel_cow_blocks so that we only do a search for the first extent in the extent list and then iterate over the remaining extents using the extent index, passing the extent we operate on directly to xfs_bmap_del_extent_delay or xfs_bmap_del_extent_cow instead of going through xfs_bunmapi and doing yet another extent list lookup. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 51 ++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 28 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 09bd5dcd90cd..1e9c589d22cc 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -511,53 +511,49 @@ xfs_reflink_cancel_cow_blocks( xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb) { - struct xfs_bmbt_irec irec; - xfs_filblks_t count_fsb; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got, prev, del; + xfs_extnum_t idx; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; - int error = 0; - int nimaps; + int error = 0, eof = 0; if (!xfs_is_reflink_inode(ip)) return 0; - /* Go find the old extent in the CoW fork. */ - while (offset_fsb < end_fsb) { - nimaps = 1; - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); - error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, - &nimaps, XFS_BMAPI_COWFORK); - if (error) - break; - ASSERT(nimaps == 1); + xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx, + &got, &prev); + if (eof) + return 0; - trace_xfs_reflink_cancel_cow(ip, &irec); + while (got.br_startoff < end_fsb) { + del = got; + xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); + trace_xfs_reflink_cancel_cow(ip, &del); - if (irec.br_startblock == DELAYSTARTBLOCK) { - /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &irec); + if (isnullstartblock(del.br_startblock)) { + error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, + &idx, &got, &del); if (error) break; - } else if (irec.br_startblock == HOLESTARTBLOCK) { - /* empty */ } else { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); /* Free the CoW orphan record. */ error = xfs_refcount_free_cow_extent(ip->i_mount, - &dfops, irec.br_startblock, - irec.br_blockcount); + &dfops, del.br_startblock, + del.br_blockcount); if (error) break; xfs_bmap_add_free(ip->i_mount, &dfops, - irec.br_startblock, irec.br_blockcount, + del.br_startblock, del.br_blockcount, NULL); /* Update quota accounting */ xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT, - -(long)irec.br_blockcount); + -(long)del.br_blockcount); /* Roll the transaction */ error = xfs_defer_finish(tpp, &dfops, ip); @@ -567,13 +563,12 @@ xfs_reflink_cancel_cow_blocks( } /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &irec); - if (error) - break; + xfs_bmap_del_extent_cow(ip, &idx, &got, &del); } - /* Roll on... */ - offset_fsb = irec.br_startoff + irec.br_blockcount; + if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)) + return 0; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); } return error; From c1112b6e626637ec09319883b63e705a931c398b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:54:45 +1100 Subject: [PATCH 363/884] xfs: optimize xfs_reflink_end_cow Instead of doing a full extent list search for each extent that is to be deleted using xfs_bmapi_read and then doing another one inside of xfs_bunmapi_cow use the same scheme that xfs_bumapi uses: look up the last extent to be deleted and then use the extent index to walk downward until we are outside the range to be deleted. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 129 ++++++++++++++++++++----------------------- fs/xfs/xfs_trace.h | 1 - 2 files changed, 61 insertions(+), 69 deletions(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 1e9c589d22cc..cd308f119e20 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -633,25 +633,26 @@ xfs_reflink_end_cow( xfs_off_t offset, xfs_off_t count) { - struct xfs_bmbt_irec irec; - struct xfs_bmbt_irec uirec; + struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); + struct xfs_bmbt_irec got, prev, del; struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; - xfs_filblks_t count_fsb; xfs_fsblock_t firstfsb; struct xfs_defer_ops dfops; - int error; + int error, eof = 0; unsigned int resblks; - xfs_filblks_t ilen; xfs_filblks_t rlen; - int nimaps; + xfs_extnum_t idx; trace_xfs_reflink_end_cow(ip, offset, count); + /* No COW extents? That's easy! */ + if (ifp->if_bytes == 0) + return 0; + offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); /* Start a rolling transaction to switch the mappings */ resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK); @@ -663,72 +664,65 @@ xfs_reflink_end_cow( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - /* Go find the old extent in the CoW fork. */ - while (offset_fsb < end_fsb) { - /* Read extent from the source file */ - nimaps = 1; - count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb); - error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec, - &nimaps, XFS_BMAPI_COWFORK); - if (error) - goto out_cancel; - ASSERT(nimaps == 1); + xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx, + &got, &prev); - ASSERT(irec.br_startblock != DELAYSTARTBLOCK); - trace_xfs_reflink_cow_remap(ip, &irec); + /* If there is a hole at end_fsb - 1 go to the previous extent */ + if (eof || got.br_startoff > end_fsb) { + ASSERT(idx > 0); + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got); + } - /* - * We can have a hole in the CoW fork if part of a directio - * write is CoW but part of it isn't. - */ - rlen = ilen = irec.br_blockcount; - if (irec.br_startblock == HOLESTARTBLOCK) + /* Walk backwards until we're out of the I/O range... */ + while (got.br_startoff + got.br_blockcount > offset_fsb) { + del = got; + xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb); + + /* Extent delete may have bumped idx forward */ + if (!del.br_blockcount) { + idx--; goto next_extent; - - /* Unmap the old blocks in the data fork. */ - while (rlen) { - xfs_defer_init(&dfops, &firstfsb); - error = __xfs_bunmapi(tp, ip, irec.br_startoff, - &rlen, 0, 1, &firstfsb, &dfops); - if (error) - goto out_defer; - - /* - * Trim the extent to whatever got unmapped. - * Remember, bunmapi works backwards. - */ - uirec.br_startblock = irec.br_startblock + rlen; - uirec.br_startoff = irec.br_startoff + rlen; - uirec.br_blockcount = irec.br_blockcount - rlen; - irec.br_blockcount = rlen; - trace_xfs_reflink_cow_remap_piece(ip, &uirec); - - /* Free the CoW orphan record. */ - error = xfs_refcount_free_cow_extent(tp->t_mountp, - &dfops, uirec.br_startblock, - uirec.br_blockcount); - if (error) - goto out_defer; - - /* Map the new blocks into the data fork. */ - error = xfs_bmap_map_extent(tp->t_mountp, &dfops, - ip, &uirec); - if (error) - goto out_defer; - - /* Remove the mapping from the CoW fork. */ - error = xfs_bunmapi_cow(ip, &uirec); - if (error) - goto out_defer; - - error = xfs_defer_finish(&tp, &dfops, ip); - if (error) - goto out_defer; } + ASSERT(!isnullstartblock(got.br_startblock)); + + /* Unmap the old blocks in the data fork. */ + xfs_defer_init(&dfops, &firstfsb); + rlen = del.br_blockcount; + error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1, + &firstfsb, &dfops); + if (error) + goto out_defer; + + /* Trim the extent to whatever got unmapped. */ + if (rlen) { + xfs_trim_extent(&del, del.br_startoff + rlen, + del.br_blockcount - rlen); + } + trace_xfs_reflink_cow_remap(ip, &del); + + /* Free the CoW orphan record. */ + error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops, + del.br_startblock, del.br_blockcount); + if (error) + goto out_defer; + + /* Map the new blocks into the data fork. */ + error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del); + if (error) + goto out_defer; + + /* Remove the mapping from the CoW fork. */ + xfs_bmap_del_extent_cow(ip, &idx, &got, &del); + + error = xfs_defer_finish(&tp, &dfops, ip); + if (error) + goto out_defer; + next_extent: - /* Roll on... */ - offset_fsb = irec.br_startoff + ilen; + if (idx < 0) + break; + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); } error = xfs_trans_commit(tp); @@ -739,7 +733,6 @@ next_extent: out_defer: xfs_defer_cancel(&dfops); -out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 72f9f6b7a76a..0907752be62d 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3356,7 +3356,6 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range); DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); -DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); From 64e6428ddd00f864e3ca105f914a2b6920c2bc41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 15:54:59 +1100 Subject: [PATCH 364/884] xfs: remove xfs_bunmapi_cow Since no one uses it anymore. Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_bmap.c | 22 ---------------------- fs/xfs/libxfs/xfs_bmap.h | 1 - 2 files changed, 23 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d7ad51132f4f..c6eb21940783 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5414,28 +5414,6 @@ done: return error; } -/* Remove an extent from the CoW fork. Similar to xfs_bmap_del_extent. */ -int -xfs_bunmapi_cow( - struct xfs_inode *ip, - struct xfs_bmbt_irec *del) -{ - struct xfs_bmbt_rec_host *ep; - struct xfs_bmbt_irec got; - struct xfs_bmbt_irec new; - int eof; - xfs_extnum_t eidx; - - ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof, - &eidx, &got, &new); - ASSERT(ep); - if (isnullstartblock(got.br_startblock)) - xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &eidx, &got, del); - else - xfs_bmap_del_extent_cow(ip, &eidx, &got, del); - return 0; -} - /* * Unmap (remove) blocks from a file. * If nexts is nonzero then the number of extents to remove is limited to diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 5f18248cbac2..7cae6ec27fa6 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -223,7 +223,6 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, int flags, xfs_extnum_t nexts, xfs_fsblock_t *firstblock, struct xfs_defer_ops *dfops, int *done); -int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del); int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, xfs_extnum_t *idx, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); From aed3f249f93fe15f16242bbd4e6c3931674b8d91 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 3 Oct 2016 12:36:24 -0700 Subject: [PATCH 365/884] thermal: intel_pch_thermal: Add an ACPI passive trip On the platforms which has an ACPI companion device associated with PCH thermal device, read passive trip temperature via ACPI _PSV control method. Signed-off-by: Srinivas Pandruvada Signed-off-by: Zhang Rui --- drivers/thermal/intel_pch_thermal.c | 51 +++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c index 9b4815e81b0d..5cf644a2c012 100644 --- a/drivers/thermal/intel_pch_thermal.c +++ b/drivers/thermal/intel_pch_thermal.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -66,9 +67,53 @@ struct pch_thermal_device { unsigned long crt_temp; int hot_trip_id; unsigned long hot_temp; + int psv_trip_id; + unsigned long psv_temp; bool bios_enabled; }; +#ifdef CONFIG_ACPI + +/* + * On some platforms, there is a companion ACPI device, which adds + * passive trip temperature using _PSV method. There is no specific + * passive temperature setting in MMIO interface of this PCI device. + */ +static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, + int *nr_trips) +{ + struct acpi_device *adev; + + ptd->psv_trip_id = -1; + + adev = ACPI_COMPANION(&ptd->pdev->dev); + if (adev) { + unsigned long long r; + acpi_status status; + + status = acpi_evaluate_integer(adev->handle, "_PSV", NULL, + &r); + if (ACPI_SUCCESS(status)) { + unsigned long trip_temp; + + trip_temp = DECI_KELVIN_TO_MILLICELSIUS(r); + if (trip_temp) { + ptd->psv_temp = trip_temp; + ptd->psv_trip_id = *nr_trips; + ++(*nr_trips); + } + } + } +} +#else +static void pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, + int *nr_trips) +{ + ptd->psv_trip_id = -1; + +} +#endif + static int pch_wpt_init(struct pch_thermal_device *ptd, int *nr_trips) { u8 tsel; @@ -119,6 +164,8 @@ read_trips: ++(*nr_trips); } + pch_wpt_add_acpi_psv_trip(ptd, nr_trips); + return 0; } @@ -194,6 +241,8 @@ static int pch_get_trip_type(struct thermal_zone_device *tzd, int trip, *type = THERMAL_TRIP_CRITICAL; else if (ptd->hot_trip_id == trip) *type = THERMAL_TRIP_HOT; + else if (ptd->psv_trip_id == trip) + *type = THERMAL_TRIP_PASSIVE; else return -EINVAL; @@ -208,6 +257,8 @@ static int pch_get_trip_temp(struct thermal_zone_device *tzd, int trip, int *tem *temp = ptd->crt_temp; else if (ptd->hot_trip_id == trip) *temp = ptd->hot_temp; + else if (ptd->psv_trip_id == trip) + *temp = ptd->psv_temp; else return -EINVAL; From 33086a9a3071812a829d4b63bed826736a5d8b17 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 3 Oct 2016 12:36:02 -0700 Subject: [PATCH 366/884] thermal: intel_pch_thermal: Enable Haswell PCH Added missing support for Haswell PCH thermal sensor. Signed-off-by: Srinivas Pandruvada Signed-off-by: Zhang Rui --- drivers/thermal/intel_pch_thermal.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/thermal/intel_pch_thermal.c b/drivers/thermal/intel_pch_thermal.c index 5cf644a2c012..19bf2028e508 100644 --- a/drivers/thermal/intel_pch_thermal.c +++ b/drivers/thermal/intel_pch_thermal.c @@ -25,6 +25,8 @@ #include /* Intel PCH thermal Device IDs */ +#define PCH_THERMAL_DID_HSW_1 0x9C24 /* Haswell PCH */ +#define PCH_THERMAL_DID_HSW_2 0x8C24 /* Haswell PCH */ #define PCH_THERMAL_DID_WPT 0x9CA4 /* Wildcat Point */ #define PCH_THERMAL_DID_SKL 0x9D31 /* Skylake PCH */ @@ -293,6 +295,11 @@ static int intel_pch_thermal_probe(struct pci_dev *pdev, ptd->ops = &pch_dev_ops_wpt; dev_name = "pch_skylake"; break; + case PCH_THERMAL_DID_HSW_1: + case PCH_THERMAL_DID_HSW_2: + ptd->ops = &pch_dev_ops_wpt; + dev_name = "pch_haswell"; + break; default: dev_err(&pdev->dev, "unknown pch thermal device\n"); return -ENODEV; @@ -375,6 +382,8 @@ static int intel_pch_thermal_resume(struct device *device) static struct pci_device_id intel_pch_thermal_id[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2) }, { 0, }, }; MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id); From 3105f234e0aba43e44e277c20f9b32ee8add43d4 Mon Sep 17 00:00:00 2001 From: Eric Ernst Date: Thu, 6 Oct 2016 08:56:49 -0700 Subject: [PATCH 367/884] thermal/powerclamp: correct cpu support check Initial logic for checking CPU match resulted in OR of CPU features rather than the intended AND. Updated to use boot_cpu_has macro rather than x86_match_cpu. In addition, MWAIT is the only required CPU feature for idle injection to work. Drop other feature requirements since they are only needed for optimal efficiency. CC: stable@vger.kernel.org #v4.7 Signed-off-by: Eric Ernst Acked-by: Jacob Pan Signed-off-by: Zhang Rui --- drivers/thermal/intel_powerclamp.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 0e4dc0afcfd2..7a223074df3d 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -669,20 +669,10 @@ static struct thermal_cooling_device_ops powerclamp_cooling_ops = { .set_cur_state = powerclamp_set_cur_state, }; -static const struct x86_cpu_id intel_powerclamp_ids[] __initconst = { - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_MWAIT }, - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_ARAT }, - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_NONSTOP_TSC }, - { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_CONSTANT_TSC}, - {} -}; -MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); - static int __init powerclamp_probe(void) { - if (!x86_match_cpu(intel_powerclamp_ids)) { - pr_err("Intel powerclamp does not run on family %d model %d\n", - boot_cpu_data.x86, boot_cpu_data.x86_model); + if (!boot_cpu_has(X86_FEATURE_MWAIT)) { + pr_err("CPU does not support MWAIT"); return -ENODEV; } From caef78b6cdeddf4ad364f95910bba6b43b8eb9bf Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Wed, 19 Oct 2016 20:48:51 -0500 Subject: [PATCH 368/884] x86/platform/UV: Fix support for EFI_OLD_MEMMAP after BIOS callback updates Some time ago, we brought our UV BIOS callback code up to speed with the new EFI memory mapping scheme, in commit: d1be84a232e3 ("x86/uv: Update uv_bios_call() to use efi_call_virt_pointer()") By leveraging some changes that I made to a few of the EFI runtime callback mechanisms, in commit: 80e75596079f ("efi: Convert efi_call_virt() to efi_call_virt_pointer()") This got everything running smoothly on UV, with the new EFI mapping code. However, this left one, small loose end, in that EFI_OLD_MEMMAP (a.k.a. efi=old_map) will no longer work on UV, on kernels that include the aforementioned changes. At the time this was not a major issue (in fact, it still really isn't), but there's no reason that EFI_OLD_MEMMAP *shouldn't* work on our systems. This commit adds a check into uv_bios_call(), to see if we have the EFI_OLD_MEMMAP bit set in efi.flags. If it is set, we fall back to using our old callback method, which uses efi_call() directly on the __va() of our function pointer. Signed-off-by: Alex Thorlton Acked-by: Matt Fleming Cc: # v4.7 and later Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dimitri Sivanich Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Masahiro Yamada Cc: Mike Travis Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1476928131-170101-1-git-send-email-athorlton@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/bios_uv.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index b4d5e95fe4df..4a6a5a26c582 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -40,7 +40,15 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) */ return BIOS_STATUS_UNIMPLEMENTED; - ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + /* + * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI + * callback method, which uses efi_call() directly, with the kernel page tables: + */ + if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags))) + ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); + else + ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + return ret; } EXPORT_SYMBOL_GPL(uv_bios_call); From 0a1eb2d474edfe75466be6b4677ad84e5e8ca3f5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 30 Sep 2016 10:58:56 -0700 Subject: [PATCH 369/884] fs/proc: Stop reporting eip and esp in /proc/PID/stat Reporting these fields on a non-current task is dangerous. If the task is in any state other than normal kernel code, they may contain garbage or even kernel addresses on some architectures. (x86_64 used to do this. I bet lots of architectures still do.) With CONFIG_THREAD_INFO_IN_TASK=y, it can OOPS, too. As far as I know, there are no use programs that make any material use of these fields, so just get rid of them. Reported-by: Jann Horn Signed-off-by: Andy Lutomirski Acked-by: Thomas Gleixner Cc: Al Viro Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Kees Cook Cc: Linus Torvalds Cc: Linux API Cc: Peter Zijlstra Cc: Tetsuo Handa Cc: Tycho Andersen Link: http://lkml.kernel.org/r/a5fed4c3f4e33ed25d4bb03567e329bc5a712bcc.1475257877.git.luto@kernel.org Signed-off-by: Ingo Molnar --- fs/proc/array.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 89600fd5963d..81818adb8e9e 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -412,10 +412,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, mm = get_task_mm(task); if (mm) { vsize = task_vsize(mm); - if (permitted) { - eip = KSTK_EIP(task); - esp = KSTK_ESP(task); - } + /* + * esp and eip are intentionally zeroed out. There is no + * non-racy way to read them without freezing the task. + * Programs that need reliable values can use ptrace(2). + */ } get_task_comm(tcomm, task); From b18cb64ead400c01bf1580eeba330ace51f8087d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 30 Sep 2016 10:58:57 -0700 Subject: [PATCH 370/884] fs/proc: Stop trying to report thread stacks This reverts more of: b76437579d13 ("procfs: mark thread stack correctly in proc//maps") ... which was partially reverted by: 65376df58217 ("proc: revert /proc//maps [stack:TID] annotation") Originally, /proc/PID/task/TID/maps was the same as /proc/TID/maps. In current kernels, /proc/PID/maps (or /proc/TID/maps even for threads) shows "[stack]" for VMAs in the mm's stack address range. In contrast, /proc/PID/task/TID/maps uses KSTK_ESP to guess the target thread's stack's VMA. This is racy, probably returns garbage and, on arches with CONFIG_TASK_INFO_IN_THREAD=y, is also crash-prone: KSTK_ESP is not safe to use on tasks that aren't known to be running ordinary process-context kernel code. This patch removes the difference and just shows "[stack]" for VMAs in the mm's stack range. This is IMO much more sensible -- the actual "stack" address really is treated specially by the VM code, and the current thread stack isn't even well-defined for programs that frequently switch stacks on their own. Reported-by: Jann Horn Signed-off-by: Andy Lutomirski Acked-by: Thomas Gleixner Cc: Al Viro Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Johannes Weiner Cc: Kees Cook Cc: Linus Torvalds Cc: Linux API Cc: Peter Zijlstra Cc: Tycho Andersen Link: http://lkml.kernel.org/r/3e678474ec14e0a0ec34c611016753eea2e1b8ba.1475257877.git.luto@kernel.org Signed-off-by: Ingo Molnar --- Documentation/filesystems/proc.txt | 26 -------------------------- fs/proc/task_mmu.c | 29 ++++++++++------------------- fs/proc/task_nommu.c | 26 +++++++++----------------- 3 files changed, 19 insertions(+), 62 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 219ffd41a911..74329fd0add2 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -395,32 +395,6 @@ is not associated with a file: or if empty, the mapping is anonymous. -The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint -of the individual tasks of a process. In this file you will see a mapping marked -as [stack] if that task sees it as a stack. Hence, for the example above, the -task-level map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this: - -08048000-08049000 r-xp 00000000 03:00 8312 /opt/test -08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test -0804a000-0806b000 rw-p 00000000 00:00 0 [heap] -a7cb1000-a7cb2000 ---p 00000000 00:00 0 -a7cb2000-a7eb2000 rw-p 00000000 00:00 0 -a7eb2000-a7eb3000 ---p 00000000 00:00 0 -a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack] -a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6 -a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6 -a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6 -a800b000-a800e000 rw-p 00000000 00:00 0 -a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0 -a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0 -a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0 -a8024000-a8027000 rw-p 00000000 00:00 0 -a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2 -a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2 -a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2 -aff35000-aff4a000 rw-p 00000000 00:00 0 -ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso] - The /proc/PID/smaps is an extension based on maps, showing the memory consumption for each of the process's mappings. For each of mappings there is a series of lines such as the following: diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6909582ce5e5..35b92d81692f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -266,24 +266,15 @@ static int do_maps_open(struct inode *inode, struct file *file, * /proc/PID/maps that is the stack of the main task. */ static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma, int is_pid) + struct vm_area_struct *vma) { - int stack = 0; - - if (is_pid) { - stack = vma->vm_start <= vma->vm_mm->start_stack && - vma->vm_end >= vma->vm_mm->start_stack; - } else { - struct inode *inode = priv->inode; - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) - stack = vma_is_stack_for_task(vma, task); - rcu_read_unlock(); - } - return stack; + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= vma->vm_mm->start_stack && + vma->vm_end >= vma->vm_mm->start_stack; } static void @@ -354,7 +345,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid) goto done; } - if (is_stack(priv, vma, is_pid)) + if (is_stack(priv, vma)) name = "[stack]"; } @@ -1669,7 +1660,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) seq_file_path(m, file, "\n\t= "); } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { seq_puts(m, " heap"); - } else if (is_stack(proc_priv, vma, is_pid)) { + } else if (is_stack(proc_priv, vma)) { seq_puts(m, " stack"); } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index faacb0c0d857..37175621e890 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -124,25 +124,17 @@ unsigned long task_statm(struct mm_struct *mm, } static int is_stack(struct proc_maps_private *priv, - struct vm_area_struct *vma, int is_pid) + struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; - int stack = 0; - if (is_pid) { - stack = vma->vm_start <= mm->start_stack && - vma->vm_end >= mm->start_stack; - } else { - struct inode *inode = priv->inode; - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(proc_pid(inode), PIDTYPE_PID); - if (task) - stack = vma_is_stack_for_task(vma, task); - rcu_read_unlock(); - } - return stack; + /* + * We make no effort to guess what a given thread considers to be + * its "stack". It's not even well-defined for programs written + * languages like Go. + */ + return vma->vm_start <= mm->start_stack && + vma->vm_end >= mm->start_stack; } /* @@ -184,7 +176,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma, if (file) { seq_pad(m, ' '); seq_file_path(m, file, ""); - } else if (mm && is_stack(priv, vma, is_pid)) { + } else if (mm && is_stack(priv, vma)) { seq_pad(m, ' '); seq_printf(m, "[stack]"); } From d17af5056cf9e9fc05e68832f7c15687fcc12281 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 30 Sep 2016 10:58:58 -0700 Subject: [PATCH 371/884] mm: Change vm_is_stack_for_task() to vm_is_stack_for_current() Asking for a non-current task's stack can't be done without races unless the task is frozen in kernel mode. As far as I know, vm_is_stack_for_task() never had a safe non-current use case. The __unused annotation is because some KSTK_ESP implementations ignore their parameter, which IMO is further justification for this patch. Signed-off-by: Andy Lutomirski Acked-by: Thomas Gleixner Cc: Al Viro Cc: Andrew Morton Cc: Borislav Petkov Cc: Brian Gerst Cc: Jann Horn Cc: Kees Cook Cc: Linus Torvalds Cc: Linux API Cc: Peter Zijlstra Cc: Tycho Andersen Link: http://lkml.kernel.org/r/4c3f68f426e6c061ca98b4fc7ef85ffbb0a25b0c.1475257877.git.luto@kernel.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 2 +- mm/util.c | 4 +++- security/selinux/hooks.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index e9caec6a51e9..a658a5167bce 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1391,7 +1391,7 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma, !vma_growsup(vma->vm_next, addr); } -int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t); +int vma_is_stack_for_current(struct vm_area_struct *vma); extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, diff --git a/mm/util.c b/mm/util.c index 662cddf914af..c174e8921995 100644 --- a/mm/util.c +++ b/mm/util.c @@ -230,8 +230,10 @@ void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, } /* Check if the vma is being used as a stack by this task */ -int vma_is_stack_for_task(struct vm_area_struct *vma, struct task_struct *t) +int vma_is_stack_for_current(struct vm_area_struct *vma) { + struct task_struct * __maybe_unused t = current; + return (vma->vm_start <= KSTK_ESP(t) && vma->vm_end >= KSTK_ESP(t)); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 085057936287..09fd6108e421 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3557,7 +3557,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, } else if (!vma->vm_file && ((vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) || - vma_is_stack_for_task(vma, current))) { + vma_is_stack_for_current(vma))) { rc = current_has_perm(current, PROCESS__EXECSTACK); } else if (vma->vm_file && vma->anon_vma) { /* From 6add49fff9233a5c43011eb42b521257cbaa9bda Mon Sep 17 00:00:00 2001 From: Jacob Siverskog Date: Thu, 20 Oct 2016 09:05:09 +0200 Subject: [PATCH 372/884] Bluetooth: btwilink: Fix probe return value Probe functions should return 0 on success. This driver's probe returns the value returned by hci_register_dev(), which is the hci index. This works for systems with only one hci device (id = 0) but for systems where the btwilink device ends up with an id larger than 0, things will start to fall apart. Make the probe function return 0 on success. Signed-off-by: Jacob Siverskog Signed-off-by: Marcel Holtmann --- drivers/bluetooth/btwilink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c index ef51c9c864c5..b6bb58c41df5 100644 --- a/drivers/bluetooth/btwilink.c +++ b/drivers/bluetooth/btwilink.c @@ -310,7 +310,7 @@ static int bt_ti_probe(struct platform_device *pdev) BT_DBG("HCI device registered (hdev %p)", hdev); dev_set_drvdata(&pdev->dev, hst); - return err; + return 0; } static int bt_ti_remove(struct platform_device *pdev) From 87261d19046aeaeed8eb3d2793fde850ae1b5c9e Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Wed, 19 Oct 2016 14:40:54 +0100 Subject: [PATCH 373/884] arm64: Cortex-A53 errata workaround: check for kernel addresses Commit 7dd01aef0557 ("arm64: trap userspace "dc cvau" cache operation on errata-affected core") adds code to execute cache maintenance instructions in the kernel on behalf of userland on CPUs with certain ARM CPU errata. It turns out that the address hasn't been checked to be a valid user space address, allowing userland to clean cache lines in kernel space. Fix this by introducing an address check before executing the instructions on behalf of userland. Since the address doesn't come via a syscall parameter, we can't just reject tagged pointers and instead have to remove the tag when checking against the user address limit. Cc: Fixes: 7dd01aef0557 ("arm64: trap userspace "dc cvau" cache operation on errata-affected core") Reported-by: Kristina Martsenko Signed-off-by: Andre Przywara [will: rework commit message + replace access_ok with max_user_addr()] Signed-off-by: Will Deacon --- arch/arm64/include/asm/uaccess.h | 8 ++++++++ arch/arm64/kernel/traps.c | 27 +++++++++++++++------------ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index bcaf6fba1b65..55d0adbf6509 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -21,6 +21,7 @@ /* * User space memory access functions */ +#include #include #include #include @@ -102,6 +103,13 @@ static inline void set_fs(mm_segment_t fs) flag; \ }) +/* + * When dealing with data aborts or instruction traps we may end up with + * a tagged userland pointer. Clear the tag to get a sane pointer to pass + * on to access_ok(), for instance. + */ +#define untagged_addr(addr) sign_extend64(addr, 55) + #define access_ok(type, addr, size) __range_ok(addr, size) #define user_addr_max get_fs diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5ff020f8fb7f..7255c9d6cfb7 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -434,18 +434,21 @@ void cpu_enable_cache_maint_trap(void *__unused) } #define __user_cache_maint(insn, address, res) \ - asm volatile ( \ - "1: " insn ", %1\n" \ - " mov %w0, #0\n" \ - "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %w2\n" \ - " b 2b\n" \ - " .popsection\n" \ - _ASM_EXTABLE(1b, 3b) \ - : "=r" (res) \ - : "r" (address), "i" (-EFAULT) ) + if (untagged_addr(address) >= user_addr_max()) \ + res = -EFAULT; \ + else \ + asm volatile ( \ + "1: " insn ", %1\n" \ + " mov %w0, #0\n" \ + "2:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %w2\n" \ + " b 2b\n" \ + " .popsection\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (res) \ + : "r" (address), "i" (-EFAULT) ) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { From 2a6dcb2b5f3e21592ca8dfa198dcce7bec09b020 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 18 Oct 2016 11:27:46 +0100 Subject: [PATCH 374/884] arm64: cpufeature: Schedule enable() calls instead of calling them via IPI The enable() call for a cpufeature/errata is called using on_each_cpu(). This issues a cross-call IPI to get the work done. Implicitly, this stashes the running PSTATE in SPSR when the CPU receives the IPI, and restores it when we return. This means an enable() call can never modify PSTATE. To allow PAN to do this, change the on_each_cpu() call to use stop_machine(). This schedules the work on each CPU which allows us to modify PSTATE. This involves changing the protype of all the enable() functions. enable_cpu_capabilities() is called during boot and enables the feature on all online CPUs. This path now uses stop_machine(). CPU features for hotplug'd CPUs are enabled by verify_local_cpu_features() which only acts on the local CPU, and can already modify the running PSTATE as it is called from secondary_start_kernel(). Reported-by: Tony Thompson Reported-by: Vladimir Murzin Signed-off-by: James Morse Cc: Suzuki K Poulose Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/processor.h | 6 +++--- arch/arm64/kernel/cpu_errata.c | 3 ++- arch/arm64/kernel/cpufeature.c | 10 +++++++++- arch/arm64/kernel/traps.c | 3 ++- arch/arm64/mm/fault.c | 6 ++++-- 6 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 758d74fedfad..a27c3245ba21 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -94,7 +94,7 @@ struct arm64_cpu_capabilities { u16 capability; int def_scope; /* default scope */ bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); - void (*enable)(void *); /* Called on all active CPUs */ + int (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index df2e53d3a969..60e34824e18c 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -188,8 +188,8 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(void *__unused); -void cpu_enable_uao(void *__unused); -void cpu_enable_cache_maint_trap(void *__unused); +int cpu_enable_pan(void *__unused); +int cpu_enable_uao(void *__unused); +int cpu_enable_cache_maint_trap(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 0150394f4cab..b75e917aac46 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -39,10 +39,11 @@ has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); } -static void cpu_enable_trap_ctr_access(void *__unused) +static int cpu_enable_trap_ctr_access(void *__unused) { /* Clear SCTLR_EL1.UCT */ config_sctlr_el1(SCTLR_EL1_UCT, 0); + return 0; } #define MIDR_RANGE(model, min, max) \ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d577f263cc4a..c02504ea304b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -19,7 +19,9 @@ #define pr_fmt(fmt) "CPU features: " fmt #include +#include #include +#include #include #include #include @@ -941,7 +943,13 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) if (caps->enable && cpus_have_cap(caps->capability)) - on_each_cpu(caps->enable, NULL, true); + /* + * Use stop_machine() as it schedules the work allowing + * us to modify PSTATE, instead of on_each_cpu() which + * uses an IPI, giving us a PSTATE that disappears when + * we return. + */ + stop_machine(caps->enable, NULL, cpu_online_mask); } /* diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7255c9d6cfb7..c9986b3e0a96 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -428,9 +428,10 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); } -void cpu_enable_cache_maint_trap(void *__unused) +int cpu_enable_cache_maint_trap(void *__unused) { config_sctlr_el1(SCTLR_EL1_UCI, 0); + return 0; } #define __user_cache_maint(insn, address, res) \ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 53d9159662fe..3e9ff9b0c78d 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -670,9 +670,10 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(void *__unused) +int cpu_enable_pan(void *__unused) { config_sctlr_el1(SCTLR_EL1_SPAN, 0); + return 0; } #endif /* CONFIG_ARM64_PAN */ @@ -683,8 +684,9 @@ void cpu_enable_pan(void *__unused) * We need to enable the feature at runtime (instead of adding it to * PSR_MODE_EL1h) as the feature may not be implemented by the cpu. */ -void cpu_enable_uao(void *__unused) +int cpu_enable_uao(void *__unused) { asm(SET_PSTATE_UAO(1)); + return 0; } #endif /* CONFIG_ARM64_UAO */ From 7209c868600bd8926e37c10b9aae83124ccc1dd8 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 18 Oct 2016 11:27:47 +0100 Subject: [PATCH 375/884] arm64: mm: Set PSTATE.PAN from the cpu_enable_pan() call Commit 338d4f49d6f7 ("arm64: kernel: Add support for Privileged Access Never") enabled PAN by enabling the 'SPAN' feature-bit in SCTLR_EL1. This means the PSTATE.PAN bit won't be set until the next return to the kernel from userspace. On a preemptible kernel we may schedule work that accesses userspace on a CPU before it has done this. Now that cpufeature enable() calls are scheduled via stop_machine(), we can set PSTATE.PAN from the cpu_enable_pan() call. Add WARN_ON_ONCE(in_interrupt()) to check the PSTATE value we updated is not immediately discarded. Reported-by: Tony Thompson Reported-by: Vladimir Murzin Signed-off-by: James Morse [will: fixed typo in comment] Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3e9ff9b0c78d..0f8788374815 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -29,7 +29,9 @@ #include #include #include +#include +#include #include #include #include @@ -672,7 +674,14 @@ NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN int cpu_enable_pan(void *__unused) { + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + config_sctlr_el1(SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); return 0; } #endif /* CONFIG_ARM64_PAN */ From d08544127d9fb4505635e3cb6871fd50a42947bd Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 18 Oct 2016 11:27:48 +0100 Subject: [PATCH 376/884] arm64: suspend: Reconfigure PSTATE after resume from idle The suspend/resume path in kernel/sleep.S, as used by cpu-idle, does not save/restore PSTATE. As a result of this cpufeatures that were detected and have bits in PSTATE get lost when we resume from idle. UAO gets set appropriately on the next context switch. PAN will be re-enabled next time we return from user-space, but on a preemptible kernel we may run work accessing user space before this point. Add code to re-enable theses two features in __cpu_suspend_exit(). We re-use uao_thread_switch() passing current. Signed-off-by: James Morse Cc: Lorenzo Pieralisi Signed-off-by: Will Deacon --- arch/arm64/include/asm/exec.h | 3 +++ arch/arm64/kernel/process.c | 3 ++- arch/arm64/kernel/suspend.c | 11 +++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h index db0563c23482..f7865dd9d868 100644 --- a/arch/arm64/include/asm/exec.h +++ b/arch/arm64/include/asm/exec.h @@ -18,6 +18,9 @@ #ifndef __ASM_EXEC_H #define __ASM_EXEC_H +#include + extern unsigned long arch_align_stack(unsigned long sp); +void uao_thread_switch(struct task_struct *next); #endif /* __ASM_EXEC_H */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 27b2f1387df4..4f186c56c5eb 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -301,7 +302,7 @@ static void tls_thread_switch(struct task_struct *next) } /* Restore the UAO state depending on next's addr_limit */ -static void uao_thread_switch(struct task_struct *next) +void uao_thread_switch(struct task_struct *next) { if (IS_ENABLED(CONFIG_ARM64_UAO)) { if (task_thread_info(next)->addr_limit == KERNEL_DS) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index ad734142070d..bb0cd787a9d3 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,8 +1,11 @@ #include #include #include +#include #include +#include #include +#include #include #include #include @@ -49,6 +52,14 @@ void notrace __cpu_suspend_exit(void) */ set_my_cpu_offset(per_cpu_offset(cpu)); + /* + * PSTATE was not saved over suspend/resume, re-enable any detected + * features that might not have been set correctly. + */ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, + CONFIG_ARM64_PAN)); + uao_thread_switch(current); + /* * Restore HW breakpoint registers to sane values * before debug exceptions are possibly reenabled From de24e0a108bc48062e1c7acaa97014bce32a919f Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 19 Oct 2016 15:45:07 +0200 Subject: [PATCH 377/884] USB: serial: cp210x: fix tiocmget error handling The current tiocmget implementation would fail to report errors up the stack and instead leaked a few bits from the stack as a mask of modem-status flags. Fixes: 39a66b8d22a3 ("[PATCH] USB: CP2101 Add support for flow control") Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 54a4de0efdba..f61477bed3a8 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1077,7 +1077,9 @@ static int cp210x_tiocmget(struct tty_struct *tty) u8 control; int result; - cp210x_read_u8_reg(port, CP210X_GET_MDMSTS, &control); + result = cp210x_read_u8_reg(port, CP210X_GET_MDMSTS, &control); + if (result) + return result; result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0) |((control & CONTROL_RTS) ? TIOCM_RTS : 0) From 3602ffdee9afa727320d33bda57a6957d72b1df2 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 19 Oct 2016 17:53:52 +0000 Subject: [PATCH 378/884] irqchip/jcore: Don't show Kconfig menu item for driver Core drivers for J-Core SoCs will be selected implicitly via CONFIG_SH_JCORE_SOC instead. Based on a corresponding change to the clocksource/timer driver requested by Daniel Lezcano. Signed-off-by: Rich Felker Cc: Marc Zyngier Cc: Jason Cooper Cc: linux-sh@vger.kernel.org Link: http://lkml.kernel.org/r/883a3d17084003e3cf21bab73ec12828fe4ff6c6.1476899495.git.dalias@libc.org Signed-off-by: Thomas Gleixner --- drivers/irqchip/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 82b0b5daf3f5..bc0af3307bbf 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -158,8 +158,8 @@ config PIC32_EVIC select IRQ_DOMAIN config JCORE_AIC - bool "J-Core integrated AIC" - depends on OF && (SUPERH || COMPILE_TEST) + bool "J-Core integrated AIC" if COMPILE_TEST + depends on OF select IRQ_DOMAIN help Support for the J-Core integrated AIC. From 6aecd8715802d23dc6a0859b50c62d2b0a99de3a Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Thu, 20 Oct 2016 14:03:33 +0800 Subject: [PATCH 379/884] ALSA: hda - Fix headset mic detection problem for two Dell laptops They uses the codec ALC255, and have the different pin cfg definition from the ones in the existing pin quirk table. Now adding them into the table to fix the problem. Cc: stable@vger.kernel.org Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8483fc20f635..b582d57fe184 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5856,10 +5856,18 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x14, 0x90170110}, {0x1b, 0x02011020}, {0x21, 0x0221101f}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x14, 0x90170110}, + {0x1b, 0x01011020}, + {0x21, 0x0221101f}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x14, 0x90170130}, {0x1b, 0x01014020}, {0x21, 0x0221103f}), + SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, + {0x14, 0x90170130}, + {0x1b, 0x01011020}, + {0x21, 0x0221103f}), SND_HDA_PIN_QUIRK(0x10ec0255, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE, {0x14, 0x90170130}, {0x1b, 0x02011020}, From fd5bed48b446d5edfb319b5ecbef7154f29bd73e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Oct 2016 11:21:01 +0100 Subject: [PATCH 380/884] irqchip/gic: Add missing \n to CPU IF adjustment message It really looks bad without a newline. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 58e5b4e87056..d6c404b3584d 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1279,7 +1279,7 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base) */ *base += 0xf000; cpuif_res.start += 0xf000; - pr_warn("GIC: Adjusting CPU interface base to %pa", + pr_warn("GIC: Adjusting CPU interface base to %pa\n", &cpuif_res.start); } From b9dce7f1ba01be340975c17bd37a46ec6054bd2b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 20 Oct 2016 11:12:57 +0100 Subject: [PATCH 381/884] arm64: kernel: force ET_DYN ELF type for CONFIG_RELOCATABLE=y GNU ld used to set the ELF file type to ET_DYN for PIE executables, which is the same file type used for shared libraries. However, this was changed recently, and now PIE executables are emitted as ET_EXEC instead. The distinction is only relevant for ELF loaders, and so there is little reason to care about the difference when building the kernel, which is why the change has gone unnoticed until now. However, debuggers do use the ELF binary, and expect ET_EXEC type files to appear in memory at the exact offset described in the ELF metadata. This means source level debugging is no longer possible when KASLR is in effect or when executing the stub. So add the -shared LD option when building with CONFIG_RELOCATABLE=y. This forces the ELF file type to be set to ET_DYN (which is what you get when building with binutils 2.24 and earlier anyway), and has no other ill effects. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index ab51aed6b6c1..3635b8662724 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,7 +15,7 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 ifneq ($(CONFIG_RELOCATABLE),) -LDFLAGS_vmlinux += -pie -Bsymbolic +LDFLAGS_vmlinux += -pie -shared -Bsymbolic endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) From ed1e7db33c3354e4f8b594738c5e793690213b43 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Thu, 20 Oct 2016 00:53:08 +0300 Subject: [PATCH 382/884] x86/signal: Remove bogus user_64bit_mode() check from sigaction_compat_abi() The recent introduction of SA_X32/IA32 sa_flags added a check for user_64bit_mode() into sigaction_compat_abi(). user_64bit_mode() is true for native 64-bit processes and x32 processes. Due to that the function returns w/o setting the SA_X32_ABI flag for X32 processes. In consequence the kernel attempts to deliver the signal to the X32 process in native 64-bit mode causing the process to segfault. Remove the check, so the actual check for X32 mode which sets the ABI flag can be reached. There is no side effect for native 64-bit mode. [ tglx: Rewrote changelog ] Fixes: 6846351052e6 ("x86/signal: Add SA_{X32,IA32}_ABI sa_flags") Reported-by: Mikulas Patocka Tested-by: Adam Borowski Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Dmitry Safonov Cc: Oleg Nesterov Cc: linux-mm@kvack.org Cc: Cyrill Gorcunov Cc: Pavel Emelyanov Link: http://lkml.kernel.org/r/CAJwJo6Z8ZWPqNfT6t-i8GW1MKxQrKDUagQqnZ%2B0%2B697%3DMyVeGg@mail.gmail.com Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/signal_compat.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 40df33753bae..ec1f756f9dc9 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -105,9 +105,6 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) /* Don't let flags to be set from userspace */ act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); - if (user_64bit_mode(current_pt_regs())) - return; - if (in_ia32_syscall()) act->sa.sa_flags |= SA_IA32_ABI; if (in_x32_syscall()) From c8061485a0d7569a865a3cc3c63347b0f42b3765 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 19 Oct 2016 19:28:11 +0100 Subject: [PATCH 383/884] sched/core, x86: Make struct thread_info arch specific again The following commit: c65eacbe290b ("sched/core: Allow putting thread_info into task_struct") ... made 'struct thread_info' a generic struct with only a single ::flags member, if CONFIG_THREAD_INFO_IN_TASK_STRUCT=y is selected. This change however seems to be quite x86 centric, since at least the generic preemption code (asm-generic/preempt.h) assumes that struct thread_info also has a preempt_count member, which apparently was not true for x86. We could add a bit more #ifdefs to solve this problem too, but it seems to be much simpler to make struct thread_info arch specific again. This also makes the conversion to THREAD_INFO_IN_TASK_STRUCT a bit easier for architectures that have a couple of arch specific stuff in their thread_info definition. The arch specific stuff _could_ be moved to thread_struct. However keeping them in thread_info makes it easier: accessing thread_info members is simple, since it is at the beginning of the task_struct, while the thread_struct is at the end. At least on s390 the offsets needed to access members of the thread_struct (with task_struct as base) are too large for various asm instructions. This is not a problem when keeping these members within thread_info. Signed-off-by: Heiko Carstens Signed-off-by: Mark Rutland Acked-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: keescook@chromium.org Cc: linux-arch@vger.kernel.org Link: http://lkml.kernel.org/r/1476901693-8492-2-git-send-email-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/thread_info.h | 9 +++++++++ include/linux/thread_info.h | 11 ----------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2aaca53c0974..ad6f5eb07a95 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -52,6 +52,15 @@ struct task_struct; #include #include +struct thread_info { + unsigned long flags; /* low level flags */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = 0, \ +} + #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 45f004e9cc59..2873baf5372a 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -13,17 +13,6 @@ struct timespec; struct compat_timespec; -#ifdef CONFIG_THREAD_INFO_IN_TASK -struct thread_info { - unsigned long flags; /* low level flags */ -}; - -#define INIT_THREAD_INFO(tsk) \ -{ \ - .flags = 0, \ -} -#endif - #ifdef CONFIG_THREAD_INFO_IN_TASK #define current_thread_info() ((struct thread_info *)current) #endif From 15e2a357876910623953a7c9b071096e6d976ca9 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Mon, 3 Oct 2016 10:43:46 +0200 Subject: [PATCH 384/884] gpio/board.txt: point to gpiod_set_value gpiod_set_value() is preffered interface these days, so add a pointer. Also fix a missing ). Signed-off-by: Pavel Machek [Fixed some grammar and reworded] Signed-off-by: Linus Walleij --- Documentation/gpio/board.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt index 40884c4fe40c..a0f61898d493 100644 --- a/Documentation/gpio/board.txt +++ b/Documentation/gpio/board.txt @@ -6,7 +6,7 @@ Note that it only applies to the new descriptor-based interface. For a description of the deprecated integer-based GPIO interface please refer to gpio-legacy.txt (actually, there is no real mapping possible with the old interface; you just fetch an integer from somewhere and request the -corresponding GPIO. +corresponding GPIO). All platforms can enable the GPIO library, but if the platform strictly requires GPIO functionality to be present, it needs to select GPIOLIB from its @@ -162,6 +162,9 @@ The driver controlling "foo.0" will then be able to obtain its GPIOs as follows: Since the "led" GPIOs are mapped as active-high, this example will switch their signals to 1, i.e. enabling the LEDs. And for the "power" GPIO, which is mapped -as active-low, its actual signal will be 0 after this code. Contrary to the legacy -integer GPIO interface, the active-low property is handled during mapping and is -thus transparent to GPIO consumers. +as active-low, its actual signal will be 0 after this code. Contrary to the +legacy integer GPIO interface, the active-low property is handled during +mapping and is thus transparent to GPIO consumers. + +A set of functions such as gpiod_set_value() is available to work with +the new descriptor-oriented interface. From 44df08198bc98d75085bb0ff4b54bf43e0bc40c0 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Wed, 5 Oct 2016 15:08:36 +0530 Subject: [PATCH 385/884] gpio: mxs: Unmap region obtained by of_iomap Free memory mapping, if mxs_gpio_probe is not successful. Signed-off-by: Arvind Yadav Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mxs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-mxs.c b/drivers/gpio/gpio-mxs.c index b9daa0bf32a4..ee1724806f46 100644 --- a/drivers/gpio/gpio-mxs.c +++ b/drivers/gpio/gpio-mxs.c @@ -308,8 +308,10 @@ static int mxs_gpio_probe(struct platform_device *pdev) writel(~0U, port->base + PINCTRL_IRQSTAT(port) + MXS_CLR); irq_base = irq_alloc_descs(-1, 0, 32, numa_node_id()); - if (irq_base < 0) - return irq_base; + if (irq_base < 0) { + err = irq_base; + goto out_iounmap; + } port->domain = irq_domain_add_legacy(np, 32, irq_base, 0, &irq_domain_simple_ops, NULL); @@ -349,6 +351,8 @@ out_irqdomain_remove: irq_domain_remove(port->domain); out_irqdesc_free: irq_free_descs(irq_base, 32); +out_iounmap: + iounmap(port->base); return err; } From d1ca19cb3bc88318a54643f7a250ec6abab51108 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 09:25:20 +0300 Subject: [PATCH 386/884] gpio: stmpe: || vs && typo && was obviously intended here. Fixes: 6936e1f88d23 ('gpio: stmpe: Write int status register only when needed') Signed-off-by: Dan Carpenter Acked-by: Patrice Chotard Signed-off-by: Linus Walleij --- drivers/gpio/gpio-stmpe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-stmpe.c b/drivers/gpio/gpio-stmpe.c index e7d422a6b90b..5b0042776ec7 100644 --- a/drivers/gpio/gpio-stmpe.c +++ b/drivers/gpio/gpio-stmpe.c @@ -409,7 +409,7 @@ static irqreturn_t stmpe_gpio_irq(int irq, void *dev) * 801/1801/1600, bits are cleared when read. * Edge detect register is not present on 801/1600/1801 */ - if (stmpe->partnum != STMPE801 || stmpe->partnum != STMPE1600 || + if (stmpe->partnum != STMPE801 && stmpe->partnum != STMPE1600 && stmpe->partnum != STMPE1801) { stmpe_reg_write(stmpe, statmsbreg + i, status[i]); stmpe_reg_write(stmpe, From 0cb940927df72a514588d8286916eda4aaa4d011 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 10 Oct 2016 14:42:46 +0200 Subject: [PATCH 387/884] gpio: mockup: add sysfs dependency Building the gpio-mockup driver without SYSFS results in a harmless Kconfig warning: warning: (GPIO_MOCKUP) selects GPIO_SYSFS which has unmet direct dependencies (GPIOLIB && SYSFS) We can easily avoid that warning by adding a dependency on SYSFS. Fixes: 0f98dd1b27d2 ("gpio/mockup: add virtual gpio device") Signed-off-by: Arnd Bergmann Signed-off-by: Linus Walleij --- drivers/gpio/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 26ee00f6bd58..d011cb89d25e 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -284,7 +284,7 @@ config GPIO_MM_LANTIQ config GPIO_MOCKUP tristate "GPIO Testing Driver" - depends on GPIOLIB + depends on GPIOLIB && SYSFS select GPIO_SYSFS help This enables GPIO Testing driver, which provides a way to test GPIO From 67bf5156edc4f58241fd7c119ae145c552adddd6 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Wed, 12 Oct 2016 18:40:30 +0200 Subject: [PATCH 388/884] gpio / ACPI: fix returned error from acpi_dev_gpio_irq_get() acpi_dev_gpio_irq_get() currently ignores the error returned by acpi_get_gpiod_by_index() and overwrites it with -ENOENT. Problem is this error can be -EPROBE_DEFER, which just blows up some drivers when the module ordering is not correct. Cc: stable@vger.kernel.org Signed-off-by: David Arcari Signed-off-by: Benjamin Tissoires Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 58ece201b8e6..72a4b326fd0d 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -653,14 +653,17 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { int idx, i; unsigned int irq_flags; + int ret = -ENOENT; for (i = 0, idx = 0; idx <= index; i++) { struct acpi_gpio_info info; struct gpio_desc *desc; desc = acpi_get_gpiod_by_index(adev, NULL, i, &info); - if (IS_ERR(desc)) + if (IS_ERR(desc)) { + ret = PTR_ERR(desc); break; + } if (info.gpioint && idx++ == index) { int irq = gpiod_to_irq(desc); @@ -679,7 +682,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) } } - return -ENOENT; + return ret; } EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get); From 5fb57ab328aaf313c5ad9c4c48900f632e7c6f1c Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Oct 2016 12:30:36 +0200 Subject: [PATCH 389/884] drm/imx: ipuv3-plane: make sure x/y offsets are even in case of chroma subsampling Odd x/y offsets are not allowed for horizontally/vertically chroma subsampled planar YUV formats. Signed-off-by: Philipp Zabel Acked-by: Liu Ying --- drivers/gpu/drm/imx/ipuv3-plane.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index 5c342997863e..e1ad844abafb 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -259,6 +259,7 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, struct drm_framebuffer *fb = state->fb; struct drm_framebuffer *old_fb = old_state->fb; unsigned long eba, ubo, vbo, old_ubo, old_vbo; + int hsub, vsub; /* Ok to disable */ if (!fb) @@ -372,6 +373,16 @@ static int ipu_plane_atomic_check(struct drm_plane *plane, if (old_fb && old_fb->pitches[1] != fb->pitches[1]) crtc_state->mode_changed = true; + + /* + * The x/y offsets must be even in case of horizontal/vertical + * chroma subsampling. + */ + hsub = drm_format_horz_chroma_subsampling(fb->pixel_format); + vsub = drm_format_vert_chroma_subsampling(fb->pixel_format); + if (((state->src_x >> 16) & (hsub - 1)) || + ((state->src_y >> 16) & (vsub - 1))) + return -EINVAL; } return 0; From 86126748cd5063aa888ce252f16b89b35e7d4707 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 18 Oct 2016 17:09:30 +0200 Subject: [PATCH 390/884] drm/imx: ipuv3-plane: disable local alpha for planes without alpha channel Without this patch, after enabling the overlay plane with an RGBA framebuffer, switching to a framebuffer without alpha channel would cause the plane to vanish, since the pixel local alpha is constant zero in that case. Disable local alpha again when setting an opaque framebuffer. Signed-off-by: Philipp Zabel --- drivers/gpu/drm/imx/ipuv3-plane.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c index e1ad844abafb..d5864ed4d772 100644 --- a/drivers/gpu/drm/imx/ipuv3-plane.c +++ b/drivers/gpu/drm/imx/ipuv3-plane.c @@ -437,6 +437,7 @@ static void ipu_plane_atomic_update(struct drm_plane *plane, ipu_dp_set_global_alpha(ipu_plane->dp, false, 0, false); break; default: + ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true); break; } } From 758f588d6fa2b1e64b6ae4bc4e7e86331bdee479 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 4 Sep 2016 19:13:57 +0200 Subject: [PATCH 391/884] kvm/x86: Fix unused variable warning in kvm_timer_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_CPU_FREQ is not set, int cpu is unused and gcc rightfully warns about it: arch/x86/kvm/x86.c: In function ‘kvm_timer_init’: arch/x86/kvm/x86.c:5697:6: warning: unused variable ‘cpu’ [-Wunused-variable] int cpu; ^~~ But since it is used only in the CONFIG_CPU_FREQ block, simply move it there, thus squashing the warning too. Signed-off-by: Borislav Petkov Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6c633de84dd7..e375235d81c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5733,13 +5733,13 @@ static int kvmclock_cpu_online(unsigned int cpu) static void kvm_timer_init(void) { - int cpu; - max_tsc_khz = tsc_khz; if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { #ifdef CONFIG_CPU_FREQ struct cpufreq_policy policy; + int cpu; + memset(&policy, 0, sizeof(policy)); cpu = get_cpu(); cpufreq_get_policy(&policy, cpu); From 8678654e3c7ad7b0f4beb03fa89691279cba71f9 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 13 Oct 2016 17:45:20 +0200 Subject: [PATCH 392/884] kvm: x86: memset whole irq_eoi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc 7 warns: arch/x86/kvm/ioapic.c: In function 'kvm_ioapic_reset': arch/x86/kvm/ioapic.c:597:2: warning: 'memset' used with length equal to number of elements without multiplication by element size [-Wmemset-elt-size] And it is right. Memset whole array using sizeof operator. Signed-off-by: Jiri Slaby Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Reviewed-by: Paolo Bonzini [Added x86 subject tag] Signed-off-by: Radim Krčmář --- arch/x86/kvm/ioapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index c7220ba94aa7..1a22de70f7f7 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -594,7 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->irr = 0; ioapic->irr_delivered = 0; ioapic->id = 0; - memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); + memset(ioapic->irq_eoi, 0x00, sizeof(ioapic->irq_eoi)); rtc_irq_eoi_tracking_reset(ioapic); } From db4b0710fae90a4407bfa77b23db396e580b9e23 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 Oct 2016 12:23:16 +0100 Subject: [PATCH 393/884] arm64: fix show_regs fallout from KERN_CONT changes Recently in commit 4bcc595ccd80decb ("printk: reinstate KERN_CONT for printing continuation lines"), the behaviour of printk changed w.r.t. KERN_CONT. Now, KERN_CONT is mandatory to continue existing lines. Without this, prefixes are inserted, making output illegible, e.g. [ 1007.069010] pc : [] lr : [] pstate: 40000145 [ 1007.076329] sp : ffff000008d53ec0 [ 1007.079606] x29: ffff000008d53ec0 [ 1007.082797] x28: 0000000080c50018 [ 1007.086160] [ 1007.087630] x27: ffff000008e0c7f8 [ 1007.090820] x26: ffff80097631ca00 [ 1007.094183] [ 1007.095653] x25: 0000000000000001 [ 1007.098843] x24: 000000ea68b61cac [ 1007.102206] ... or when dumped with the userpace dmesg tool, which has slightly different implicit newline behaviour. e.g. [ 1007.069010] pc : [] lr : [] pstate: 40000145 [ 1007.076329] sp : ffff000008d53ec0 [ 1007.079606] x29: ffff000008d53ec0 [ 1007.082797] x28: 0000000080c50018 [ 1007.086160] [ 1007.087630] x27: ffff000008e0c7f8 [ 1007.090820] x26: ffff80097631ca00 [ 1007.094183] [ 1007.095653] x25: 0000000000000001 [ 1007.098843] x24: 000000ea68b61cac [ 1007.102206] We can't simply always use KERN_CONT for lines which may or may not be continuations. That causes line prefixes (e.g. timestamps) to be supressed, and the alignment of all but the first line will be broken. For even more fun, we can't simply insert some dummy empty-string printk calls, as GCC warns for an empty printk string, and even if we pass KERN_DEFAULT explcitly to silence the warning, the prefix gets swallowed unless there is an additional part to the string. Instead, we must manually iterate over pairs of registers, which gives us the legible output we want in either case, e.g. [ 169.771790] pc : [] lr : [] pstate: 40000145 [ 169.779109] sp : ffff000008d53ec0 [ 169.782386] x29: ffff000008d53ec0 x28: 0000000080c50018 [ 169.787650] x27: ffff000008e0c7f8 x26: ffff80097631de00 [ 169.792913] x25: 0000000000000001 x24: 00000027827b2cf4 Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/process.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 4f186c56c5eb..01753cd7d3f0 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -187,10 +187,19 @@ void __show_regs(struct pt_regs *regs) printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", regs->pc, lr, regs->pstate); printk("sp : %016llx\n", sp); - for (i = top_reg; i >= 0; i--) { + + i = top_reg; + + while (i >= 0) { printk("x%-2d: %016llx ", i, regs->regs[i]); - if (i % 2 == 0) - printk("\n"); + i--; + + if (i % 2 == 0) { + pr_cont("x%-2d: %016llx ", i, regs->regs[i]); + i--; + } + + pr_cont("\n"); } printk("\n"); } From f7881bd644474a4a62d7bd1ec801176f635f59ae Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 20 Oct 2016 12:24:53 +0100 Subject: [PATCH 394/884] arm64: remove pr_cont abuse from mem_init All the lines printed by mem_init are independent, with each ending with a newline. While they logically form a large block, none are actually continuations of previous lines. The kernel-side printk code and the userspace demsg tool differ in their handling of KERN_CONT following a newline, and while this isn't always a problem kernel-side, it does cause difficulty for userspace. Using pr_cont causes the userspace tool to not print line prefix (e.g. timestamps) even when following a newline, mis-aligning the output and making it harder to read, e.g. [ 0.000000] Virtual kernel memory layout: [ 0.000000] modules : 0xffff000000000000 - 0xffff000008000000 ( 128 MB) vmalloc : 0xffff000008000000 - 0xffff7dffbfff0000 (129022 GB) .text : 0xffff000008080000 - 0xffff0000088b0000 ( 8384 KB) .rodata : 0xffff0000088b0000 - 0xffff000008c50000 ( 3712 KB) .init : 0xffff000008c50000 - 0xffff000008d50000 ( 1024 KB) .data : 0xffff000008d50000 - 0xffff000008e25200 ( 853 KB) .bss : 0xffff000008e25200 - 0xffff000008e6bec0 ( 284 KB) fixed : 0xffff7dfffe7fd000 - 0xffff7dfffec00000 ( 4108 KB) PCI I/O : 0xffff7dfffee00000 - 0xffff7dffffe00000 ( 16 MB) vmemmap : 0xffff7e0000000000 - 0xffff800000000000 ( 2048 GB maximum) 0xffff7e0000000000 - 0xffff7e0026000000 ( 608 MB actual) memory : 0xffff800000000000 - 0xffff800980000000 ( 38912 MB) [ 0.000000] SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=6, Nodes=1 Fix this by using pr_notice consistently for all lines, which both the kernel and userspace are happy with. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Kefeng Wang Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 21c489bdeb4e..212c4d1e2f26 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -421,35 +421,35 @@ void __init mem_init(void) pr_notice("Virtual kernel memory layout:\n"); #ifdef CONFIG_KASAN - pr_cont(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n", + pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(KASAN_SHADOW_START, KASAN_SHADOW_END)); #endif - pr_cont(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n", + pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(MODULES_VADDR, MODULES_END)); - pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", + pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(VMALLOC_START, VMALLOC_END)); - pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(_text, _etext)); - pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__start_rodata, __init_begin)); - pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__init_begin, __init_end)); - pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(_sdata, _edata)); - pr_cont(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__bss_start, __bss_stop)); - pr_cont(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n", + pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n", MLK(FIXADDR_START, FIXADDR_TOP)); - pr_cont(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n", + pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(PCI_IO_START, PCI_IO_END)); #ifdef CONFIG_SPARSEMEM_VMEMMAP - pr_cont(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n", + pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n", MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE)); - pr_cont(" 0x%16lx - 0x%16lx (%6ld MB actual)\n", + pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n", MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), (unsigned long)virt_to_page(high_memory))); #endif - pr_cont(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n", + pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(__phys_to_virt(memblock_start_of_DRAM()), (unsigned long)high_memory)); From c6fe46a79ecd79606bb96fada4515f6b23f87b62 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Tue, 18 Oct 2016 00:41:12 +0900 Subject: [PATCH 395/884] cpufreq: fix overflow in cpufreq_table_find_index_dl() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 'best' is always less or equals to 'pos', so `best - pos' returns a negative value which is then getting casted to `unsigned int' and passed to __cpufreq_driver_target()->acpi_cpufreq_target() for policy->freq_table selection. This results in BUG: unable to handle kernel paging request at ffff881019b469f8 IP: [] acpi_cpufreq_target+0x4f/0x190 [acpi_cpufreq] PGD 267f067 PUD 0 Oops: 0000 [#1] PREEMPT SMP CPU: 6 PID: 70 Comm: kworker/6:1 Not tainted 4.9.0-rc1-next-20161017-dbg-dirty Workqueue: events dbs_work_handler task: ffff88041b808000 task.stack: ffff88041b810000 RIP: 0010:[] [] acpi_cpufreq_target+0x4f/0x190 [acpi_cpufreq] RSP: 0018:ffff88041b813c60 EFLAGS: 00010282 RAX: ffff880419b46a00 RBX: ffff88041b848400 RCX: ffff880419b20f80 RDX: 00000000001dff38 RSI: 00000000ffffffff RDI: ffff88041b848400 RBP: ffff88041b813cb0 R08: 0000000000000006 R09: 0000000000000040 R10: ffffffff8207f9e0 R11: ffffffff8173595b R12: 0000000000000000 R13: ffff88041f1dff38 R14: 0000000000262900 R15: 0000000bfffffff4 FS: 0000000000000000(0000) GS:ffff88041f000000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff881019b469f8 CR3: 000000041a2d3000 CR4: 00000000001406e0 Stack: ffff88041b813cb0 ffffffff813347f9 ffff88041b813ca0 ffffffff81334663 ffff88041f1d4bc0 ffff88041b848400 0000000000000000 0000000000000000 0000000000262900 0000000000000000 ffff88041b813d00 ffffffff813355dc Call Trace: [] ? cpufreq_freq_transition_begin+0xf1/0xfc [] ? get_cpu_idle_time+0x97/0xa6 [] __cpufreq_driver_target+0x3b6/0x44e [] cs_dbs_timer+0x11a/0x135 [] dbs_work_handler+0x39/0x62 [] process_one_work+0x280/0x4a5 [] worker_thread+0x24f/0x397 [] ? rescuer_thread+0x30b/0x30b [] ? nl80211_get_key+0x29/0x36a [] kthread+0xfc/0x104 [] ? put_lock_stats.isra.9+0xe/0x20 [] ? kthread_create_on_node+0x3f/0x3f [] ret_from_fork+0x22/0x30 Code: 56 4d 6b ff 0c 41 55 41 54 53 48 83 ec 28 48 8b 15 ad 1e 00 00 44 8b 41 08 48 8b 87 c8 00 00 00 49 89 d5 4e 03 2c c5 80 b2 78 81 <46> 8b 74 38 04 45 3b 75 00 75 11 31 c0 83 39 00 0f 84 1c 01 00 RIP [] acpi_cpufreq_target+0x4f/0x190 [acpi_cpufreq] RSP CR2: ffff881019b469f8 ---[ end trace 16d9fc7a17897d37 ]--- [ rjw: In some cases this bug may also cause incorrect frequencies to be selected by cpufreq governors. ] Fixes: 899bb6642f2a (cpufreq: skip invalid entries when searching the frequency) Link: http://marc.info/?l=linux-kernel&m=147672030714331&w=2 Reported-and-tested-by: Sedat Dilek Reported-and-tested-by: Jörg Otte Signed-off-by: Sergey Senozhatsky Acked-by: Viresh Kumar Cc: 4.8+ # 4.8+ Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 5fa55fc56e18..32dc0cbd51ca 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -677,10 +677,10 @@ static inline int cpufreq_table_find_index_dl(struct cpufreq_policy *policy, if (best == table - 1) return pos - table; - return best - pos; + return best - table; } - return best - pos; + return best - table; } /* Works only on sorted freq-tables */ From 19271c1a083282fbd568f3f214a6182e973626eb Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 20 Oct 2016 16:05:42 +0200 Subject: [PATCH 396/884] mlxsw: spectrum_router: Use correct tree index for binding By a mistake, there is tree index 0 passed to RALTB. Should be MLXSW_SP_LPM_TREE_MIN. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Reported-by: Yotam Gigi Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 78fc557d6dd7..1b3a2cb65837 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1862,7 +1862,8 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (err) return err; - mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 0); + mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_SP_LPM_TREE_MIN); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); if (err) return err; From 37956d78b8f0c5560421f83d1a5e337ee81c685c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 20 Oct 2016 16:05:43 +0200 Subject: [PATCH 397/884] mlxsw: spectrum_router: Make mlxsw_sp_router_fib4_del return void and remove warn The function return value is not checked anywhere. Also, the warning causes huge slowdown when removing large number of FIB entries which were not offloaded, because of ordering issue. Ido's preparing a patchset to fix the ordering issue, but that is definitelly not net tree material. Fixes: b45f64d16d45 ("mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls") Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlxsw/spectrum_router.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1b3a2cb65837..f3d50d369bbe 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1820,19 +1820,17 @@ err_fib_entry_insert: return err; } -static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, - struct fib_entry_notifier_info *fen_info) +static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_entry *fib_entry; if (mlxsw_sp->router.aborted) - return 0; + return; fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); - if (!fib_entry) { - dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); - return -ENOENT; - } + if (!fib_entry) + return; if (fib_entry->ref_count == 1) { mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); @@ -1840,7 +1838,6 @@ static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, } mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); - return 0; } static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) From 7fb6a36bab6b0b158f93eb13faa1b440f8b26009 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Thu, 20 Oct 2016 16:05:44 +0200 Subject: [PATCH 398/884] mlxsw: switchx2: Fix ethernet port initialization When creating an ethernet port fails, we must move the port to disable, otherwise putting the port in switch partition 0 (ETH) or 1 (IB) will always fails. Fixes: 31557f0f9755 ("mlxsw: Introduce Mellanox SwitchX-2 ASIC support") Signed-off-by: Elad Raz Reviewed-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index c0c23e2f3275..92bda8703f87 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1088,6 +1088,7 @@ err_port_stp_state_set: err_port_admin_status_set: err_port_mtu_set: err_port_speed_set: + mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: err_port_system_port_mapping_set: port_not_usable: From 164c971d1cc365dc85e5cf9a9302a91c0cf7f126 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 20 Oct 2016 16:05:45 +0200 Subject: [PATCH 399/884] mlxsw: pci: Fix reset wait for SwitchX2 SwitchX2 firmware does not implement reset done yet. Moreover, when busy-polled for ready magic, that slows down firmware and reset takes longer than the defined timeout, causing initialization to fail. So restore the previous behaviour and just sleep in this case. Fixes: 233fa44bd67a ("mlxsw: pci: Implement reset done check") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index e742bd4e8894..912f71f84209 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1838,11 +1838,17 @@ static const struct mlxsw_bus mlxsw_pci_bus = { .cmd_exec = mlxsw_pci_cmd_exec, }; -static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci) +static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, + const struct pci_device_id *id) { unsigned long end; mlxsw_pci_write32(mlxsw_pci, SW_RESET, MLXSW_PCI_SW_RESET_RST_BIT); + if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) { + msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); + return 0; + } + wmb(); /* reset needs to be written before we read control register */ end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); do { @@ -1909,7 +1915,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mlxsw_pci->pdev = pdev; pci_set_drvdata(pdev, mlxsw_pci); - err = mlxsw_pci_sw_reset(mlxsw_pci); + err = mlxsw_pci_sw_reset(mlxsw_pci, id); if (err) { dev_err(&pdev->dev, "Software reset failed\n"); goto err_sw_reset; From 5712bf9c5c30ade3204016147d7b04bece6952d9 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 19 Oct 2016 17:42:39 +0300 Subject: [PATCH 400/884] net/sched: act_mirred: Use passed lastuse argument stats_update callback is called by NIC drivers doing hardware offloading of the mirred action. Lastuse is passed as argument to specify when the stats was actually last updated and is not always the current time. Fixes: 9798e6fe4f9b ('net: act_mirred: allow statistic updates from offloaded actions') Signed-off-by: Paul Blakey Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 667dc382df82..6b07fba5770b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -207,8 +207,11 @@ out: static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, u64 lastuse) { - tcf_lastuse_update(&a->tcfa_tm); + struct tcf_mirred *m = to_mirred(a); + struct tcf_t *tm = &m->tcf_tm; + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + tm->lastuse = lastuse; } static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, From 7ba5c003db59a6f738d87d92b68d4a91cdf22f60 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2016 11:45:49 +1100 Subject: [PATCH 401/884] net/ncsi: Avoid if statements in ncsi_suspend_channel() There are several if/else statements in the state machine implemented by switch/case in ncsi_suspend_channel() to avoid duplicated code. It makes the code a bit hard to be understood. This drops if/else statements in ncsi_suspend_channel() to improve the code readability as Joel Stanley suggested. Also, it becomes easy to add more states in the state machine without affecting current code. No logical changes introduced by this. Suggested-by: Joel Stanley Signed-off-by: Gavin Shan Signed-off-by: David S. Miller --- net/ncsi/ncsi-manage.c | 78 +++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 28 deletions(-) diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 5e509e547c2d..655d0d653926 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -540,42 +540,60 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_suspend_select; /* Fall through */ case ncsi_dev_state_suspend_select: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_SP; + nca.package = np->id; + nca.channel = NCSI_RESERVED_CHANNEL; + if (ndp->flags & NCSI_DEV_HWA) + nca.bytes[0] = 0; + else + nca.bytes[0] = 1; + + nd->state = ncsi_dev_state_suspend_dcnt; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + break; case ncsi_dev_state_suspend_dcnt: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_DCNT; + nca.package = np->id; + nca.channel = nc->id; + + nd->state = ncsi_dev_state_suspend_dc; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + break; case ncsi_dev_state_suspend_dc: + ndp->pending_req_num = 1; + + nca.type = NCSI_PKT_CMD_DC; + nca.package = np->id; + nca.channel = nc->id; + nca.bytes[0] = 1; + + nd->state = ncsi_dev_state_suspend_deselect; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + break; case ncsi_dev_state_suspend_deselect: ndp->pending_req_num = 1; - np = ndp->active_package; - nc = ndp->active_channel; + nca.type = NCSI_PKT_CMD_DP; nca.package = np->id; - if (nd->state == ncsi_dev_state_suspend_select) { - nca.type = NCSI_PKT_CMD_SP; - nca.channel = NCSI_RESERVED_CHANNEL; - if (ndp->flags & NCSI_DEV_HWA) - nca.bytes[0] = 0; - else - nca.bytes[0] = 1; - nd->state = ncsi_dev_state_suspend_dcnt; - } else if (nd->state == ncsi_dev_state_suspend_dcnt) { - nca.type = NCSI_PKT_CMD_DCNT; - nca.channel = nc->id; - nd->state = ncsi_dev_state_suspend_dc; - } else if (nd->state == ncsi_dev_state_suspend_dc) { - nca.type = NCSI_PKT_CMD_DC; - nca.channel = nc->id; - nca.bytes[0] = 1; - nd->state = ncsi_dev_state_suspend_deselect; - } else if (nd->state == ncsi_dev_state_suspend_deselect) { - nca.type = NCSI_PKT_CMD_DP; - nca.channel = NCSI_RESERVED_CHANNEL; - nd->state = ncsi_dev_state_suspend_done; - } + nca.channel = NCSI_RESERVED_CHANNEL; + nd->state = ncsi_dev_state_suspend_done; ret = ncsi_xmit_cmd(&nca); - if (ret) { - nd->state = ncsi_dev_state_functional; - return; - } + if (ret) + goto error; break; case ncsi_dev_state_suspend_done: @@ -589,6 +607,10 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n", nd->state); } + + return; +error: + nd->state = ncsi_dev_state_functional; } static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) From 008a424a24a904ed12c03b203f6f257bcaf12358 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2016 11:45:50 +1100 Subject: [PATCH 402/884] net/ncsi: Fix stale link state of inactive channels on failover The issue was found on BCM5718 which has two NCSI channels in one package: C0 and C1. Both of them are connected to different LANs, means they are in link-up state and C0 is chosen as the active one until resetting BCM5718 happens as below. Resetting BCM5718 results in LSC (Link State Change) AEN packet received on C0, meaning LSC AEN is missed on C1. When LSC AEN packet received on C0 to report link-down, it fails over to C1 because C1 is in link-up state as software can see. However, C1 is in link-down state in hardware. It means the link state is out of synchronization between hardware and software, resulting in inappropriate channel (C1) selected as active one. This resolves the issue by sending separate GLS (Get Link Status) commands to all channels in the package before trying to do failover. The last link states of all channels in the package are retrieved. With it, C0 (not C1) is selected as active one as expected. Signed-off-by: Gavin Shan Signed-off-by: David S. Miller --- net/ncsi/internal.h | 1 + net/ncsi/ncsi-manage.c | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 13290a70fa71..eac48584cdd1 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -246,6 +246,7 @@ enum { ncsi_dev_state_config_gls, ncsi_dev_state_config_done, ncsi_dev_state_suspend_select = 0x0401, + ncsi_dev_state_suspend_gls, ncsi_dev_state_suspend_dcnt, ncsi_dev_state_suspend_dc, ncsi_dev_state_suspend_deselect, diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 655d0d653926..4789f8681695 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -550,11 +550,37 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) else nca.bytes[0] = 1; - nd->state = ncsi_dev_state_suspend_dcnt; + /* To retrieve the last link states of channels in current + * package when current active channel needs fail over to + * another one. It means we will possibly select another + * channel as next active one. The link states of channels + * are most important factor of the selection. So we need + * accurate link states. Unfortunately, the link states on + * inactive channels can't be updated with LSC AEN in time. + */ + if (ndp->flags & NCSI_DEV_RESHUFFLE) + nd->state = ncsi_dev_state_suspend_gls; + else + nd->state = ncsi_dev_state_suspend_dcnt; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; + break; + case ncsi_dev_state_suspend_gls: + ndp->pending_req_num = np->channel_num; + + nca.type = NCSI_PKT_CMD_GLS; + nca.package = np->id; + + nd->state = ncsi_dev_state_suspend_dcnt; + NCSI_FOR_EACH_CHANNEL(np, nc) { + nca.channel = nc->id; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + } + break; case ncsi_dev_state_suspend_dcnt: ndp->pending_req_num = 1; From bbc7c01e95ceef4e23e343f8cbb6edca887121a5 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2016 11:45:51 +1100 Subject: [PATCH 403/884] net/ncsi: Choose hot channel as active one if necessary The issue was found on BCM5718 which has two NCSI channels in one package: C0 and C1. C0 is in link-up state while C1 is in link-down state. C0 is chosen as active channel until unplugging and plugging C0's cable: On unplugging C0's cable, LSC (Link State Change) AEN packet received on C0 to report link-down event. After that, C1 is chosen as active channel. LSC AEN for link-up event is lost on C0 when plugging C0's cable back. We lose the network even C0 is usable. This resolves the issue by recording the (hot) channel that was ever chosen as active one. The hot channel is chosen to be active one if none of available channels in link-up state. With this, C0 is still the active one after unplugging C0's cable. LSC AEN packet received on C0 when plugging its cable back. Signed-off-by: Gavin Shan Signed-off-by: David S. Miller --- net/ncsi/internal.h | 1 + net/ncsi/ncsi-manage.c | 22 +++++++++++++++++++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index eac48584cdd1..1308a56f2591 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -265,6 +265,7 @@ struct ncsi_dev_priv { #endif unsigned int package_num; /* Number of packages */ struct list_head packages; /* List of packages */ + struct ncsi_channel *hot_channel; /* Channel was ever active */ struct ncsi_request requests[256]; /* Request table */ unsigned int request_id; /* Last used request ID */ #define NCSI_REQ_START_IDX 1 diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 4789f8681695..a3bd5fa8ad09 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -645,6 +645,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) struct net_device *dev = nd->dev; struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; + struct ncsi_channel *hot_nc = NULL; struct ncsi_cmd_arg nca; unsigned char index; unsigned long flags; @@ -750,12 +751,20 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_config_done: spin_lock_irqsave(&nc->lock, flags); - if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) + if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { + hot_nc = nc; nc->state = NCSI_CHANNEL_ACTIVE; - else + } else { + hot_nc = NULL; nc->state = NCSI_CHANNEL_INACTIVE; + } spin_unlock_irqrestore(&nc->lock, flags); + /* Update the hot channel */ + spin_lock_irqsave(&ndp->lock, flags); + ndp->hot_channel = hot_nc; + spin_unlock_irqrestore(&ndp->lock, flags); + ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); break; @@ -773,10 +782,14 @@ error: static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) { struct ncsi_package *np; - struct ncsi_channel *nc, *found; + struct ncsi_channel *nc, *found, *hot_nc; struct ncsi_channel_mode *ncm; unsigned long flags; + spin_lock_irqsave(&ndp->lock, flags); + hot_nc = ndp->hot_channel; + spin_unlock_irqrestore(&ndp->lock, flags); + /* The search is done once an inactive channel with up * link is found. */ @@ -794,6 +807,9 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) if (!found) found = nc; + if (nc == hot_nc) + found = nc; + ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { spin_unlock_irqrestore(&nc->lock, flags); From 22d8aa93d7ea169a349fb2f9dee5babc68f6a683 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 20 Oct 2016 11:45:52 +1100 Subject: [PATCH 404/884] net/ncsi: Improve HNCDSC AEN handler This improves AEN handler for Host Network Controller Driver Status Change (HNCDSC): * The channel's lock should be hold when accessing its state. * Do failover when host driver isn't ready. * Configure channel when host driver becomes ready. Signed-off-by: Gavin Shan Signed-off-by: David S. Miller --- net/ncsi/ncsi-aen.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index b41a6617d498..6898e7229285 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -141,23 +141,35 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, return -ENODEV; /* If the channel is active one, we need reconfigure it */ + spin_lock_irqsave(&nc->lock, flags); ncm = &nc->modes[NCSI_MODE_LINK]; hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; ncm->data[3] = ntohl(hncdsc->status); if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE || - (ncm->data[3] & 0x1)) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); return 0; + } - if (ndp->flags & NCSI_DEV_HWA) + spin_unlock_irqrestore(&nc->lock, flags); + if (!(ndp->flags & NCSI_DEV_HWA) && !(ncm->data[3] & 0x1)) ndp->flags |= NCSI_DEV_RESHUFFLE; /* If this channel is the active one and the link doesn't * work, we have to choose another channel to be active one. * The logic here is exactly similar to what we do when link * is down on the active channel. + * + * On the other hand, we need configure it when host driver + * state on the active channel becomes ready. */ ncsi_stop_channel_monitor(nc); + + spin_lock_irqsave(&nc->lock, flags); + nc->state = (ncm->data[3] & 0x1) ? NCSI_CHANNEL_INACTIVE : + NCSI_CHANNEL_ACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); From 8be0328e52dc2c8c6f61563c9ccc95ae4d63e9ce Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Thu, 20 Oct 2016 10:01:28 +0200 Subject: [PATCH 405/884] stmmac: display the descriptors if DES0 = 0 It makes sense to display the descriptors even if DES0 is zero. This helps for example in case of it is needed to dump rx write-back descriptors to get timestamp status. Signed-off-by: Giuseppe Cavallaro Cc: Alexandre TORGUE Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 4ec7397e7fb3..a1b17cd7886b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -347,10 +347,9 @@ static void dwmac4_display_ring(void *head, unsigned int size, bool rx) pr_info("%s descriptor ring:\n", rx ? "RX" : "TX"); for (i = 0; i < size; i++) { - if (p->des0) - pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(p), - p->des0, p->des1, p->des2, p->des3); + pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", + i, (unsigned int)virt_to_phys(p), + p->des0, p->des1, p->des2, p->des3); p++; } } From 4c39135aa412d2f1381e43802523da110ca7855c Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 20 Oct 2016 18:09:18 +0300 Subject: [PATCH 406/884] xhci: add restart quirk for Intel Wildcatpoint PCH xHC in Wildcatpoint-LP PCH is similar to LynxPoint-LP and need the same quirks to prevent machines from spurious restart while shutting them down. Reported-by: Hasan Mahmood CC: Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index d7b0f97abbad..314179b4824e 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -45,6 +45,7 @@ #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 +#define PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_XHCI 0x9cb1 #define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI 0x22b5 #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI 0xa12f #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI 0x9d2f @@ -153,7 +154,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_SPURIOUS_REBOOT; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && - pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI) { + (pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_WILDCATPOINT_LP_XHCI)) { xhci->quirks |= XHCI_SPURIOUS_REBOOT; xhci->quirks |= XHCI_SPURIOUS_WAKEUP; } From 346e99736c3ce328fd42d678343b70243aca5f36 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 20 Oct 2016 18:09:19 +0300 Subject: [PATCH 407/884] xhci: workaround for hosts missing CAS bit If a device is unplugged and replugged during Sx system suspend some Intel xHC hosts will overwrite the CAS (Cold attach status) flag and no device connection is noticed in resume. A device in this state can be identified in resume if its link state is in polling or compliance mode, and the current connect status is 0. A device in this state needs to be warm reset. Intel 100/c230 series PCH specification update Doc #332692-006 Errata #8 Observed on Cherryview and Apollolake as they go into compliance mode if LFPS times out during polling, and re-plugged devices are not discovered at resume. Signed-off-by: Mathias Nyman CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/usb/host/xhci-pci.c | 6 ++++++ drivers/usb/host/xhci.h | 3 +++ 3 files changed, 46 insertions(+) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 730b9fd26685..c047362b3768 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1355,6 +1355,35 @@ int xhci_bus_suspend(struct usb_hcd *hcd) return 0; } +/* + * Workaround for missing Cold Attach Status (CAS) if device re-plugged in S3. + * warm reset a USB3 device stuck in polling or compliance mode after resume. + * See Intel 100/c230 series PCH specification update Doc #332692-006 Errata #8 + */ +static bool xhci_port_missing_cas_quirk(int port_index, + __le32 __iomem **port_array) +{ + u32 portsc; + + portsc = readl(port_array[port_index]); + + /* if any of these are set we are not stuck */ + if (portsc & (PORT_CONNECT | PORT_CAS)) + return false; + + if (((portsc & PORT_PLS_MASK) != XDEV_POLLING) && + ((portsc & PORT_PLS_MASK) != XDEV_COMP_MODE)) + return false; + + /* clear wakeup/change bits, and do a warm port reset */ + portsc &= ~(PORT_RWC_BITS | PORT_CEC | PORT_WAKE_BITS); + portsc |= PORT_WR; + writel(portsc, port_array[port_index]); + /* flush write */ + readl(port_array[port_index]); + return true; +} + int xhci_bus_resume(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -1392,6 +1421,14 @@ int xhci_bus_resume(struct usb_hcd *hcd) u32 temp; temp = readl(port_array[port_index]); + + /* warm reset CAS limited ports stuck in polling/compliance */ + if ((xhci->quirks & XHCI_MISSING_CAS) && + (hcd->speed >= HCD_USB3) && + xhci_port_missing_cas_quirk(port_index, port_array)) { + xhci_dbg(xhci, "reset stuck port %d\n", port_index); + continue; + } if (DEV_SUPERSPEED_ANY(temp)) temp &= ~(PORT_RWC_BITS | PORT_CEC | PORT_WAKE_BITS); else diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 314179b4824e..e96ae80d107e 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -51,6 +51,7 @@ #define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI 0x9d2f #define PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI 0x0aa8 #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 +#define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 static const char hcd_name[] = "xhci_hcd"; @@ -171,6 +172,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI) { xhci->quirks |= XHCI_SSIC_PORT_UNUSED; } + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + (pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI)) + xhci->quirks |= XHCI_MISSING_CAS; + if (pdev->vendor == PCI_VENDOR_ID_ETRON && pdev->device == PCI_DEVICE_ID_EJ168) { xhci->quirks |= XHCI_RESET_ON_RESUME; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index b2c1dc5dc0f3..f945380035d0 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -314,6 +314,8 @@ struct xhci_op_regs { #define XDEV_U2 (0x2 << 5) #define XDEV_U3 (0x3 << 5) #define XDEV_INACTIVE (0x6 << 5) +#define XDEV_POLLING (0x7 << 5) +#define XDEV_COMP_MODE (0xa << 5) #define XDEV_RESUME (0xf << 5) /* true: port has power (see HCC_PPC) */ #define PORT_POWER (1 << 9) @@ -1653,6 +1655,7 @@ struct xhci_hcd { #define XHCI_MTK_HOST (1 << 21) #define XHCI_SSIC_PORT_UNUSED (1 << 22) #define XHCI_NO_64BIT_SUPPORT (1 << 23) +#define XHCI_MISSING_CAS (1 << 24) unsigned int num_active_eps; unsigned int limit_active_eps; /* There are two roothubs to keep track of bus suspend info for */ From 7d3b016a6f5a0fa610dfd02b05654c08fa4ae514 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 20 Oct 2016 18:09:20 +0300 Subject: [PATCH 408/884] xhci: use default USB_RESUME_TIMEOUT when resuming ports. USB2 host inititated resume, and system suspend bus resume need to use the same USB_RESUME_TIMEOUT as elsewhere. This resolves a device disconnect issue at system resume seen on Intel Braswell and Apollolake, but is in no way limited to those platforms. Signed-off-by: Mathias Nyman CC: Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index c047362b3768..0ef16900efed 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1166,7 +1166,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, xhci_set_link_state(xhci, port_array, wIndex, XDEV_RESUME); spin_unlock_irqrestore(&xhci->lock, flags); - msleep(20); + msleep(USB_RESUME_TIMEOUT); spin_lock_irqsave(&xhci->lock, flags); xhci_set_link_state(xhci, port_array, wIndex, XDEV_U0); @@ -1447,7 +1447,7 @@ int xhci_bus_resume(struct usb_hcd *hcd) if (need_usb2_u3_exit) { spin_unlock_irqrestore(&xhci->lock, flags); - msleep(20); + msleep(USB_RESUME_TIMEOUT); spin_lock_irqsave(&xhci->lock, flags); } From a478b097474cd9f2268ab1beaca74ff09e652b9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Oct 2016 17:15:41 +0200 Subject: [PATCH 409/884] ahci: fix nvec check commit 17a51f12 ("ahci: only try to use multi-MSI mode if there is more than 1 port") lead to a case where nvec isn't initialized before it's used. Fix this by moving the check into the n_ports conditional. Reported-and-reviewed-by Colin Ian King Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo --- drivers/ata/ahci.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index ed311a040fed..60e42e2ed68f 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1436,14 +1436,14 @@ static int ahci_init_msi(struct pci_dev *pdev, unsigned int n_ports, "ahci: MRSM is on, fallback to single MSI\n"); pci_free_irq_vectors(pdev); } - } - /* - * -ENOSPC indicated we don't have enough vectors. Don't bother trying - * a single vectors for any other error: - */ - if (nvec < 0 && nvec != -ENOSPC) - return nvec; + /* + * -ENOSPC indicated we don't have enough vectors. Don't bother + * trying a single vectors for any other error: + */ + if (nvec < 0 && nvec != -ENOSPC) + return nvec; + } /* * If the host is not capable of supporting per-port vectors, fall From 7bb6615d395a7c130053728f3a64f6df6b2e1f18 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 18 Oct 2016 14:37:32 +0200 Subject: [PATCH 410/884] netfilter: conntrack: restart gc immediately if GC_MAX_EVICTS is reached When the maximum evictions number is reached, do not wait 5 seconds before the next run. CC: Florian Westphal Signed-off-by: Nicolas Dichtel Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ba6a1d421222..df2f5a3901df 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -983,7 +983,7 @@ static void gc_worker(struct work_struct *work) return; ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio >= 90) + if (ratio >= 90 || expired_count == GC_MAX_EVICTS) next_run = 0; gc_work->last_bucket = i; From 7034b566a4e7d550621c2dfafd380b77b3787cd9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 17 Oct 2016 18:05:32 +0100 Subject: [PATCH 411/884] netfilter: fix nf_queue handling nf_queue handling is broken since e3b37f11e6e4 ("netfilter: replace list_head with single linked list") for two reasons: 1) If the bypass flag is set on, there are no userspace listeners and we still have more hook entries to iterate over, then jump to the next hook. Otherwise accept the packet. On nf_reinject() path, the okfn() needs to be invoked. 2) We should not re-enter the same hook on packet reinjection. If the packet is accepted, we have to skip the current hook from where the packet was enqueued, otherwise the packets gets enqueued over and over again. This restores the previous list_for_each_entry_continue() behaviour happening from nf_iterate() that was dealing with these two cases. This patch introduces a new nf_queue() wrapper function so this fix becomes simpler. Fixes: e3b37f11e6e4 ("netfilter: replace list_head with single linked list") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/core.c | 13 +++------- net/netfilter/nf_internals.h | 2 +- net/netfilter/nf_queue.c | 48 ++++++++++++++++++++++++------------ 3 files changed, 36 insertions(+), 27 deletions(-) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index fcb5d1df11e9..004af030ef1a 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -361,16 +361,9 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err; - - RCU_INIT_POINTER(state->hook_entries, entry); - err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); - if (err < 0) { - if (err == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) - goto next_hook; - kfree_skb(skb); - } + ret = nf_queue(skb, state, &entry, verdict); + if (ret == 1 && entry) + goto next_hook; } return ret; } diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index e0adb5959342..9fdb655f85bc 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -18,7 +18,7 @@ unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - unsigned int queuenum); + struct nf_hook_entry **entryp, unsigned int verdict); void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); int __init netfilter_queue_init(void); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 96964a0070e1..8f08d759844a 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -107,13 +107,8 @@ void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) rcu_read_unlock(); } -/* - * Any packet that leaves via this function must come back - * through nf_reinject(). - */ -int nf_queue(struct sk_buff *skb, - struct nf_hook_state *state, - unsigned int queuenum) +static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, + unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -161,6 +156,27 @@ err: return status; } +/* Packets leaving via this function must come back through nf_reinject(). */ +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry **entryp, unsigned int verdict) +{ + struct nf_hook_entry *entry = *entryp; + int ret; + + RCU_INIT_POINTER(state->hook_entries, entry); + ret = __nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); + if (ret < 0) { + if (ret == -ESRCH && + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { + *entryp = rcu_dereference(entry->next); + return 1; + } + kfree_skb(skb); + } + + return 0; +} + void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct nf_hook_entry *hook_entry; @@ -187,26 +203,26 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) entry->state.thresh = INT_MIN; if (verdict == NF_ACCEPT) { - next_hook: - verdict = nf_iterate(skb, &entry->state, &hook_entry); + hook_entry = rcu_dereference(hook_entry->next); + if (hook_entry) +next_hook: + verdict = nf_iterate(skb, &entry->state, &hook_entry); } switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_STOP: +okfn: local_bh_disable(); entry->state.okfn(entry->state.net, entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: - RCU_INIT_POINTER(entry->state.hook_entries, hook_entry); - err = nf_queue(skb, &entry->state, - verdict >> NF_VERDICT_QBITS); - if (err < 0) { - if (err == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) + err = nf_queue(skb, &entry->state, &hook_entry, verdict); + if (err == 1) { + if (hook_entry) goto next_hook; - kfree_skb(skb); + goto okfn; } break; case NF_STOLEN: From a5efb6b6c99a3a6dc4330f51d8066f638bdea0ac Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 28 Sep 2016 16:18:47 +0200 Subject: [PATCH 412/884] KVM: s390: reject invalid modes for runtime instrumentation Usually a validity intercept is a programming error of the host because of invalid entries in the state description. We can get a validity intercept if the mode of the runtime instrumentation control block is wrong. As the host does not know which modes are valid, this can be used by userspace to trigger a WARN. Instead of printing a WARN let's return an error to userspace as this can only happen if userspace provides a malformed initial value (e.g. on migration). The kernel should never warn on bogus input. Instead let's log it into the s390 debug feature. While at it, let's return -EINVAL for all validity intercepts as this will trigger an error in QEMU like error: kvm run failed Invalid argument PSW=mask 0404c00180000000 addr 000000000063c226 cc 00 R00=000000000000004f R01=0000000000000004 R02=0000000000760005 R03=000000007fe0a000 R04=000000000064ba2a R05=000000049db73dd0 R06=000000000082c4b0 R07=0000000000000041 R08=0000000000000002 R09=000003e0804042a8 R10=0000000496152c42 R11=000000007fe0afb0 [...] This will avoid an endless loop of validity intercepts. Cc: stable@vger.kernel.org # v4.5+ Fixes: c6e5f166373a ("KVM: s390: implement the RI support of guest") Acked-by: Fan Zhang Reviewed-by: Pierre Morel Signed-off-by: Christian Borntraeger --- arch/s390/kvm/intercept.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 1cab8a177d0e..7a27eebab28a 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -119,8 +119,13 @@ static int handle_validity(struct kvm_vcpu *vcpu) vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); - return -EOPNOTSUPP; + KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy, + current->pid, vcpu->kvm); + + /* do not warn on invalid runtime instrumentation mode */ + WARN_ONCE(viwhy != 0x44, "kvm: unhandled validity intercept 0x%x\n", + viwhy); + return -EINVAL; } static int handle_instruction(struct kvm_vcpu *vcpu) From a2ce092be34c4951e23104a0bfdec08f9577fada Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 13 Oct 2016 21:51:06 +0000 Subject: [PATCH 413/884] of: Add J-Core timer bindings Signed-off-by: Rich Felker Acked-by: Rob Herring Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: Daniel Lezcano Cc: Rob Herring Link: http://lkml.kernel.org/r/8b107c292ed8cf8eed0fa283071fc8a930098628.1476393790.git.dalias@libc.org Signed-off-by: Thomas Gleixner --- .../devicetree/bindings/timer/jcore,pit.txt | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 Documentation/devicetree/bindings/timer/jcore,pit.txt diff --git a/Documentation/devicetree/bindings/timer/jcore,pit.txt b/Documentation/devicetree/bindings/timer/jcore,pit.txt new file mode 100644 index 000000000000..af5dd35469d7 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/jcore,pit.txt @@ -0,0 +1,24 @@ +J-Core Programmable Interval Timer and Clocksource + +Required properties: + +- compatible: Must be "jcore,pit". + +- reg: Memory region(s) for timer/clocksource registers. For SMP, + there should be one region per cpu, indexed by the sequential, + zero-based hardware cpu number. + +- interrupts: An interrupt to assign for the timer. The actual pit + core is integrated with the aic and allows the timer interrupt + assignment to be programmed by software, but this property is + required in order to reserve an interrupt number that doesn't + conflict with other devices. + + +Example: + +timer@200 { + compatible = "jcore,pit"; + reg = < 0x200 0x30 0x500 0x30 >; + interrupts = < 0x48 >; +}; From 9995f4f184613fb02ee73092b03545520a72b104 Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Thu, 13 Oct 2016 21:51:06 +0000 Subject: [PATCH 414/884] clocksource: Add J-Core timer/clocksource driver At the hardware level, the J-Core PIT is integrated with the interrupt controller, but it is represented as its own device and has an independent programming interface. It provides a 12-bit countdown timer, which is not presently used, and a periodic timer. The interval length for the latter is programmable via a 32-bit throttle register whose units are determined by a bus-period register. The periodic timer is used to implement both periodic and oneshot clock event modes; in oneshot mode the interrupt handler simply disables the timer as soon as it fires. Despite its device tree node representing an interrupt for the PIT, the actual irq generated is programmable, not hard-wired. The driver is responsible for programming the PIT to generate the hardware irq number that the DT assigns to it. On SMP configurations, J-Core provides cpu-local instances of the PIT; no broadcast timer is needed. This driver supports the creation of the necessary per-cpu clock_event_device instances. A nanosecond-resolution clocksource is provided using the J-Core "RTC" registers, which give a 64-bit seconds count and 32-bit nanoseconds that wrap every second. The driver converts these to a full-range 32-bit nanoseconds count. Signed-off-by: Rich Felker Cc: Mark Rutland Cc: devicetree@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: Daniel Lezcano Cc: Rob Herring Link: http://lkml.kernel.org/r/b591ff12cc5ebf63d1edc98da26046f95a233814.1476393790.git.dalias@libc.org Signed-off-by: Thomas Gleixner --- drivers/clocksource/Kconfig | 10 ++ drivers/clocksource/Makefile | 1 + drivers/clocksource/jcore-pit.c | 249 ++++++++++++++++++++++++++++++++ include/linux/cpuhotplug.h | 1 + 4 files changed, 261 insertions(+) create mode 100644 drivers/clocksource/jcore-pit.c diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 245190839359..e2c6e43cf8ca 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -417,6 +417,16 @@ config SYS_SUPPORTS_SH_TMU config SYS_SUPPORTS_EM_STI bool +config CLKSRC_JCORE_PIT + bool "J-Core PIT timer driver" if COMPILE_TEST + depends on OF + depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM + select CLKSRC_MMIO + help + This enables build of clocksource and clockevent driver for + the integrated PIT in the J-Core synthesizable, open source SoC. + config SH_TIMER_CMT bool "Renesas CMT timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index fd9d6df0bbc0..cf87f407f1ad 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o obj-$(CONFIG_X86_PM_TIMER) += acpi_pm.o obj-$(CONFIG_SCx200HR_TIMER) += scx200_hrt.o obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o +obj-$(CONFIG_CLKSRC_JCORE_PIT) += jcore-pit.o obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o diff --git a/drivers/clocksource/jcore-pit.c b/drivers/clocksource/jcore-pit.c new file mode 100644 index 000000000000..54e1665aa03c --- /dev/null +++ b/drivers/clocksource/jcore-pit.c @@ -0,0 +1,249 @@ +/* + * J-Core SoC PIT/clocksource driver + * + * Copyright (C) 2015-2016 Smart Energy Instruments, Inc. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PIT_IRQ_SHIFT 12 +#define PIT_PRIO_SHIFT 20 +#define PIT_ENABLE_SHIFT 26 +#define PIT_PRIO_MASK 0xf + +#define REG_PITEN 0x00 +#define REG_THROT 0x10 +#define REG_COUNT 0x14 +#define REG_BUSPD 0x18 +#define REG_SECHI 0x20 +#define REG_SECLO 0x24 +#define REG_NSEC 0x28 + +struct jcore_pit { + struct clock_event_device ced; + void __iomem *base; + unsigned long periodic_delta; + u32 enable_val; +}; + +static void __iomem *jcore_pit_base; +static struct jcore_pit __percpu *jcore_pit_percpu; + +static notrace u64 jcore_sched_clock_read(void) +{ + u32 seclo, nsec, seclo0; + __iomem void *base = jcore_pit_base; + + seclo = readl(base + REG_SECLO); + do { + seclo0 = seclo; + nsec = readl(base + REG_NSEC); + seclo = readl(base + REG_SECLO); + } while (seclo0 != seclo); + + return seclo * NSEC_PER_SEC + nsec; +} + +static cycle_t jcore_clocksource_read(struct clocksource *cs) +{ + return jcore_sched_clock_read(); +} + +static int jcore_pit_disable(struct jcore_pit *pit) +{ + writel(0, pit->base + REG_PITEN); + return 0; +} + +static int jcore_pit_set(unsigned long delta, struct jcore_pit *pit) +{ + jcore_pit_disable(pit); + writel(delta, pit->base + REG_THROT); + writel(pit->enable_val, pit->base + REG_PITEN); + return 0; +} + +static int jcore_pit_set_state_shutdown(struct clock_event_device *ced) +{ + struct jcore_pit *pit = container_of(ced, struct jcore_pit, ced); + + return jcore_pit_disable(pit); +} + +static int jcore_pit_set_state_oneshot(struct clock_event_device *ced) +{ + struct jcore_pit *pit = container_of(ced, struct jcore_pit, ced); + + return jcore_pit_disable(pit); +} + +static int jcore_pit_set_state_periodic(struct clock_event_device *ced) +{ + struct jcore_pit *pit = container_of(ced, struct jcore_pit, ced); + + return jcore_pit_set(pit->periodic_delta, pit); +} + +static int jcore_pit_set_next_event(unsigned long delta, + struct clock_event_device *ced) +{ + struct jcore_pit *pit = container_of(ced, struct jcore_pit, ced); + + return jcore_pit_set(delta, pit); +} + +static int jcore_pit_local_init(unsigned cpu) +{ + struct jcore_pit *pit = this_cpu_ptr(jcore_pit_percpu); + unsigned buspd, freq; + + pr_info("Local J-Core PIT init on cpu %u\n", cpu); + + buspd = readl(pit->base + REG_BUSPD); + freq = DIV_ROUND_CLOSEST(NSEC_PER_SEC, buspd); + pit->periodic_delta = DIV_ROUND_CLOSEST(NSEC_PER_SEC, HZ * buspd); + + clockevents_config_and_register(&pit->ced, freq, 1, ULONG_MAX); + + return 0; +} + +static irqreturn_t jcore_timer_interrupt(int irq, void *dev_id) +{ + struct jcore_pit *pit = this_cpu_ptr(dev_id); + + if (clockevent_state_oneshot(&pit->ced)) + jcore_pit_disable(pit); + + pit->ced.event_handler(&pit->ced); + + return IRQ_HANDLED; +} + +static int __init jcore_pit_init(struct device_node *node) +{ + int err; + unsigned pit_irq, cpu; + unsigned long hwirq; + u32 irqprio, enable_val; + + jcore_pit_base = of_iomap(node, 0); + if (!jcore_pit_base) { + pr_err("Error: Cannot map base address for J-Core PIT\n"); + return -ENXIO; + } + + pit_irq = irq_of_parse_and_map(node, 0); + if (!pit_irq) { + pr_err("Error: J-Core PIT has no IRQ\n"); + return -ENXIO; + } + + pr_info("Initializing J-Core PIT at %p IRQ %d\n", + jcore_pit_base, pit_irq); + + err = clocksource_mmio_init(jcore_pit_base, "jcore_pit_cs", + NSEC_PER_SEC, 400, 32, + jcore_clocksource_read); + if (err) { + pr_err("Error registering clocksource device: %d\n", err); + return err; + } + + sched_clock_register(jcore_sched_clock_read, 32, NSEC_PER_SEC); + + jcore_pit_percpu = alloc_percpu(struct jcore_pit); + if (!jcore_pit_percpu) { + pr_err("Failed to allocate memory for clock event device\n"); + return -ENOMEM; + } + + err = request_irq(pit_irq, jcore_timer_interrupt, + IRQF_TIMER | IRQF_PERCPU, + "jcore_pit", jcore_pit_percpu); + if (err) { + pr_err("pit irq request failed: %d\n", err); + free_percpu(jcore_pit_percpu); + return err; + } + + /* + * The J-Core PIT is not hard-wired to a particular IRQ, but + * integrated with the interrupt controller such that the IRQ it + * generates is programmable, as follows: + * + * The bit layout of the PIT enable register is: + * + * .....e..ppppiiiiiiii............ + * + * where the .'s indicate unrelated/unused bits, e is enable, + * p is priority, and i is hard irq number. + * + * For the PIT included in AIC1 (obsolete but still in use), + * any hard irq (trap number) can be programmed via the 8 + * iiiiiiii bits, and a priority (0-15) is programmable + * separately in the pppp bits. + * + * For the PIT included in AIC2 (current), the programming + * interface is equivalent modulo interrupt mapping. This is + * why a different compatible tag was not used. However only + * traps 64-127 (the ones actually intended to be used for + * interrupts, rather than syscalls/exceptions/etc.) can be + * programmed (the high 2 bits of i are ignored) and the + * priority pppp is <<2'd and or'd onto the irq number. This + * choice seems to have been made on the hardware engineering + * side under an assumption that preserving old AIC1 priority + * mappings was important. Future models will likely ignore + * the pppp field. + */ + hwirq = irq_get_irq_data(pit_irq)->hwirq; + irqprio = (hwirq >> 2) & PIT_PRIO_MASK; + enable_val = (1U << PIT_ENABLE_SHIFT) + | (hwirq << PIT_IRQ_SHIFT) + | (irqprio << PIT_PRIO_SHIFT); + + for_each_present_cpu(cpu) { + struct jcore_pit *pit = per_cpu_ptr(jcore_pit_percpu, cpu); + + pit->base = of_iomap(node, cpu); + if (!pit->base) { + pr_err("Unable to map PIT for cpu %u\n", cpu); + continue; + } + + pit->ced.name = "jcore_pit"; + pit->ced.features = CLOCK_EVT_FEAT_PERIODIC + | CLOCK_EVT_FEAT_ONESHOT + | CLOCK_EVT_FEAT_PERCPU; + pit->ced.cpumask = cpumask_of(cpu); + pit->ced.rating = 400; + pit->ced.irq = pit_irq; + pit->ced.set_state_shutdown = jcore_pit_set_state_shutdown; + pit->ced.set_state_periodic = jcore_pit_set_state_periodic; + pit->ced.set_state_oneshot = jcore_pit_set_state_oneshot; + pit->ced.set_next_event = jcore_pit_set_next_event; + + pit->enable_val = enable_val; + } + + cpuhp_setup_state(CPUHP_AP_JCORE_TIMER_STARTING, + "AP_JCORE_TIMER_STARTING", + jcore_pit_local_init, NULL); + + return 0; +} + +CLOCKSOURCE_OF_DECLARE(jcore_pit, "jcore,pit", jcore_pit_init); diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 9b207a8c5af3..afe641c02dca 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -81,6 +81,7 @@ enum cpuhp_state { CPUHP_AP_ARM_ARCH_TIMER_STARTING, CPUHP_AP_ARM_GLOBAL_TIMER_STARTING, CPUHP_AP_DUMMY_TIMER_STARTING, + CPUHP_AP_JCORE_TIMER_STARTING, CPUHP_AP_EXYNOS4_MCT_TIMER_STARTING, CPUHP_AP_ARM_TWD_STARTING, CPUHP_AP_METAG_TIMER_STARTING, From 7aa8e63f0d0f2e0ae353632bca1ce75a258696c6 Mon Sep 17 00:00:00 2001 From: Jiri Bohac Date: Thu, 20 Oct 2016 12:29:26 +0200 Subject: [PATCH 415/884] ipv6: properly prevent temp_prefered_lft sysctl race The check for an underflow of tmp_prefered_lft is always false because tmp_prefered_lft is unsigned. The intention of the check was to guard against racing with an update of the temp_prefered_lft sysctl, potentially resulting in an underflow. As suggested by David Miller, the best way to prevent the race is by reading the sysctl variable using READ_ONCE. Signed-off-by: Jiri Bohac Reported-by: Julia Lawall Fixes: 76506a986dc3 ("IPv6: fix DESYNC_FACTOR") Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cc7c26d05f45..060dd9922018 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1185,6 +1185,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i u32 addr_flags; unsigned long now = jiffies; long max_desync_factor; + s32 cnf_temp_preferred_lft; write_lock_bh(&idev->lock); if (ift) { @@ -1228,9 +1229,10 @@ retry: /* recalculate max_desync_factor each time and update * idev->desync_factor if it's larger */ + cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft); max_desync_factor = min_t(__u32, idev->cnf.max_desync_factor, - idev->cnf.temp_prefered_lft - regen_advance); + cnf_temp_preferred_lft - regen_advance); if (unlikely(idev->desync_factor > max_desync_factor)) { if (max_desync_factor > 0) { @@ -1245,11 +1247,8 @@ retry: tmp_valid_lft = min_t(__u32, ifp->valid_lft, idev->cnf.temp_valid_lft + age); - tmp_prefered_lft = idev->cnf.temp_prefered_lft + age - + tmp_prefered_lft = cnf_temp_preferred_lft + age - idev->desync_factor; - /* guard against underflow in case of concurrent updates to cnf */ - if (unlikely(tmp_prefered_lft < 0)) - tmp_prefered_lft = 0; tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft); tmp_plen = ifp->prefix_len; tmp_tstamp = ifp->tstamp; From fcd91dd449867c6bfe56a81cabba76b829fd05cd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 20 Oct 2016 15:58:02 +0200 Subject: [PATCH 416/884] net: add recursion limit to GRO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, GRO can do unlimited recursion through the gro_receive handlers. This was fixed for tunneling protocols by limiting tunnel GRO to one level with encap_mark, but both VLAN and TEB still have this problem. Thus, the kernel is vulnerable to a stack overflow, if we receive a packet composed entirely of VLAN headers. This patch adds a recursion counter to the GRO layer to prevent stack overflow. When a gro_receive function hits the recursion limit, GRO is aborted for this skb and it is processed normally. This recursion counter is put in the GRO CB, but could be turned into a percpu counter if we run out of space in the CB. Thanks to Vladimír Beneš for the initial bug report. Fixes: CVE-2016-7039 Fixes: 9b174d88c257 ("net: Add Transparent Ethernet Bridging GRO support.") Fixes: 66e5133f19e9 ("vlan: Add GRO support for non hardware accelerated vlan") Signed-off-by: Sabrina Dubroca Reviewed-by: Jiri Benc Acked-by: Hannes Frederic Sowa Acked-by: Tom Herbert Signed-off-by: David S. Miller --- drivers/net/geneve.c | 2 +- drivers/net/vxlan.c | 2 +- include/linux/netdevice.h | 39 ++++++++++++++++++++++++++++++++++++++- net/8021q/vlan.c | 2 +- net/core/dev.c | 1 + net/ethernet/eth.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/fou.c | 4 ++-- net/ipv4/gre_offload.c | 2 +- net/ipv4/udp_offload.c | 2 +- net/ipv6/ip6_offload.c | 2 +- 11 files changed, 49 insertions(+), 11 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 3c20e87bb761..16af1ce99233 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -453,7 +453,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk, skb_gro_pull(skb, gh_len); skb_gro_postpull_rcsum(skb, gh, gh_len); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e7d16687538b..c1639a3e95a4 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -583,7 +583,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk, } } - pp = eth_gro_receive(head, skb); + pp = call_gro_receive(eth_gro_receive, head, skb); flush = 0; out: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 465e128699a1..91ee3643ccc8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2169,7 +2169,10 @@ struct napi_gro_cb { /* Used to determine if flush_id can be ignored */ u8 is_atomic:1; - /* 5 bit hole */ + /* Number of gro_receive callbacks this packet already went through */ + u8 recursion_counter:4; + + /* 1 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; @@ -2180,6 +2183,40 @@ struct napi_gro_cb { #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) +#define GRO_RECURSION_LIMIT 15 +static inline int gro_recursion_inc_test(struct sk_buff *skb) +{ + return ++NAPI_GRO_CB(skb)->recursion_counter == GRO_RECURSION_LIMIT; +} + +typedef struct sk_buff **(*gro_receive_t)(struct sk_buff **, struct sk_buff *); +static inline struct sk_buff **call_gro_receive(gro_receive_t cb, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(head, skb); +} + +typedef struct sk_buff **(*gro_receive_sk_t)(struct sock *, struct sk_buff **, + struct sk_buff *); +static inline struct sk_buff **call_gro_receive_sk(gro_receive_sk_t cb, + struct sock *sk, + struct sk_buff **head, + struct sk_buff *skb) +{ + if (unlikely(gro_recursion_inc_test(skb))) { + NAPI_GRO_CB(skb)->flush |= 1; + return NULL; + } + + return cb(sk, head, skb); +} + struct packet_type { __be16 type; /* This is really htons(ether_type). */ struct net_device *dev; /* NULL is wildcarded here */ diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 8de138d3306b..f2531ad66b68 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -664,7 +664,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*vhdr)); skb_gro_postpull_rcsum(skb, vhdr, sizeof(*vhdr)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/core/dev.c b/net/core/dev.c index b09ac57f4348..dbc871306910 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4511,6 +4511,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; + NAPI_GRO_CB(skb)->recursion_counter = 0; NAPI_GRO_CB(skb)->is_fou = 0; NAPI_GRO_CB(skb)->is_atomic = 1; NAPI_GRO_CB(skb)->gro_remcsum_start = 0; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 66dff5e3d772..02acfff36028 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -439,7 +439,7 @@ struct sk_buff **eth_gro_receive(struct sk_buff **head, skb_gro_pull(skb, sizeof(*eh)); skb_gro_postpull_rcsum(skb, eh, sizeof(*eh)); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 1effc986739e..9648c97e541f 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1391,7 +1391,7 @@ struct sk_buff **inet_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index cf50f7e2b012..030d1531e897 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -249,7 +249,7 @@ static struct sk_buff **fou_gro_receive(struct sock *sk, if (!ops || !ops->callbacks.gro_receive) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); @@ -441,7 +441,7 @@ next_proto: if (WARN_ON_ONCE(!ops || !ops->callbacks.gro_receive)) goto out_unlock; - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index 96e0efecefa6..d5cac99170b1 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -229,7 +229,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head, /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/ skb_gro_postpull_rcsum(skb, greh, grehlen); - pp = ptype->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out_unlock: diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index f9333c963607..b2be1d9757ef 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -295,7 +295,7 @@ unflush: skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */ skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr)); - pp = udp_sk(sk)->gro_receive(sk, head, skb); + pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e7bfd55899a3..1fcf61f1cbc3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -246,7 +246,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, skb_gro_postpull_rcsum(skb, iph, nlen); - pp = ops->callbacks.gro_receive(head, skb); + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); out_unlock: rcu_read_unlock(); From 0d906b1e8d4002cdd59590fec630f4e75023e288 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 20 Oct 2016 17:13:53 +0200 Subject: [PATCH 417/884] bpf, test: fix ld_abs + vlan push/pop stress test After commit 636c2628086e ("net: skbuff: Remove errornous length validation in skb_vlan_pop()") mentioned test case stopped working, throwing a -12 (ENOMEM) return code. The issue however is not due to 636c2628086e, but rather due to a buggy test case that got uncovered from the change in behaviour in 636c2628086e. The data_size of that test case for the skb was set to 1. In the bpf_fill_ld_abs_vlan_push_pop() handler bpf insns are generated that loop with: reading skb data, pushing 68 tags, reading skb data, popping 68 tags, reading skb data, etc, in order to force a skb expansion and thus trigger that JITs recache skb->data. Problem is that initial data_size is too small. While before 636c2628086e, the test silently bailed out due to the skb->len < VLAN_ETH_HLEN check with returning 0, and now throwing an error from failing skb_ensure_writable(). Set at least minimum of ETH_HLEN as an initial length so that on first push of data, equivalent pop will succeed. Fixes: 4d9c5c53ac99 ("test_bpf: add bpf_skb_vlan_push/pop() tests") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- lib/test_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 94346b4d8984..0362da0b66c3 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4831,7 +4831,7 @@ static struct bpf_test tests[] = { { }, INTERNAL, { 0x34 }, - { { 1, 0xbef } }, + { { ETH_HLEN, 0xbef } }, .fill_helper = bpf_fill_ld_abs_vlan_push_pop, }, /* From 2399d6143f85b155ae84ccd94237befd36b8f6c7 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Oct 2016 09:32:19 -0700 Subject: [PATCH 418/884] net: dsa: bcm_sf2: Prevent GPHY shutdown for kexec'd kernels For a kernel that is being kexec'd we re-enable the integrated GPHY in order for the subsequent MDIO bus scan to succeed and properly bind to the bcm7xxx PHY driver. If we did not do that, the GPHY would be shut down by the time the MDIO driver is probing the bus, and it would fail to read the correct PHY OUI and therefore bind to an appropriate PHY driver. Later on, this would cause DSA not to be able to successfully attach to the PHY, and the interface would not be created at all. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0427009bc924..077a24541584 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "bcm_sf2.h" #include "bcm_sf2_regs.h" @@ -1133,6 +1134,18 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev) return 0; } +static void bcm_sf2_sw_shutdown(struct platform_device *pdev) +{ + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + + /* For a kernel about to be kexec'd we want to keep the GPHY on for a + * successful MDIO bus scan to occur. If we did turn off the GPHY + * before (e.g: port_disable), this will also power it back on. + */ + if (priv->hw_params.num_gphy == 1) + bcm_sf2_gphy_enable_set(priv->dev->ds, kexec_in_progress); +} + #ifdef CONFIG_PM_SLEEP static int bcm_sf2_suspend(struct device *dev) { @@ -1163,6 +1176,7 @@ MODULE_DEVICE_TABLE(of, bcm_sf2_of_match); static struct platform_driver bcm_sf2_driver = { .probe = bcm_sf2_sw_probe, .remove = bcm_sf2_sw_remove, + .shutdown = bcm_sf2_sw_shutdown, .driver = { .name = "brcm-sf2", .of_match_table = bcm_sf2_of_match, From 286c72deabaa240b7eebbd99496ed3324d69f3c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Oct 2016 09:39:40 -0700 Subject: [PATCH 419/884] udp: must lock the socket in udp_disconnect() Baozeng Ding reported KASAN traces showing uses after free in udp_lib_get_port() and other related UDP functions. A CONFIG_DEBUG_PAGEALLOC=y kernel would eventually crash. I could write a reproducer with two threads doing : static int sock_fd; static void *thr1(void *arg) { for (;;) { connect(sock_fd, (const struct sockaddr *)arg, sizeof(struct sockaddr_in)); } } static void *thr2(void *arg) { struct sockaddr_in unspec; for (;;) { memset(&unspec, 0, sizeof(unspec)); connect(sock_fd, (const struct sockaddr *)&unspec, sizeof(unspec)); } } Problem is that udp_disconnect() could run without holding socket lock, and this was causing list corruptions. Signed-off-by: Eric Dumazet Reported-by: Baozeng Ding Signed-off-by: David S. Miller --- include/net/udp.h | 1 + net/ipv4/ping.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 13 +++++++++++-- net/ipv6/ping.c | 2 +- net/ipv6/raw.c | 2 +- net/l2tp/l2tp_ip.c | 2 +- net/l2tp/l2tp_ip6.c | 2 +- 8 files changed, 18 insertions(+), 8 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index ea53a87d880f..4948790d393d 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -258,6 +258,7 @@ void udp_flush_pending_frames(struct sock *sk); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); +int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7cf7d6e380c2..205e2000d395 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -994,7 +994,7 @@ struct proto ping_prot = { .init = ping_init_sock, .close = ping_close, .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, .sendmsg = ping_v4_sendmsg, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 90a85c955872..ecbe5a7c2d6d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -918,7 +918,7 @@ struct proto raw_prot = { .close = raw_close, .destroy = raw_destroy, .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .ioctl = raw_ioctl, .init = raw_init, .setsockopt = raw_setsockopt, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7d96dc2d3d08..311613e413cb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1345,7 +1345,7 @@ csum_copy_err: goto try_again; } -int udp_disconnect(struct sock *sk, int flags) +int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); /* @@ -1367,6 +1367,15 @@ int udp_disconnect(struct sock *sk, int flags) sk_dst_reset(sk); return 0; } +EXPORT_SYMBOL(__udp_disconnect); + +int udp_disconnect(struct sock *sk, int flags) +{ + lock_sock(sk); + __udp_disconnect(sk, flags); + release_sock(sk); + return 0; +} EXPORT_SYMBOL(udp_disconnect); void udp_lib_unhash(struct sock *sk) @@ -2193,7 +2202,7 @@ int udp_abort(struct sock *sk, int err) sk->sk_err = err; sk->sk_error_report(sk); - udp_disconnect(sk, 0); + __udp_disconnect(sk, 0); release_sock(sk); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 0e983b694ee8..66e2d9dfc43a 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -180,7 +180,7 @@ struct proto pingv6_prot = { .init = ping_init_sock, .close = ping_close, .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .sendmsg = ping_v6_sendmsg, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 54404f08efcc..054a1d84fc5e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1241,7 +1241,7 @@ struct proto rawv6_prot = { .close = rawv6_close, .destroy = raw6_destroy, .connect = ip6_datagram_connect_v6_only, - .disconnect = udp_disconnect, + .disconnect = __udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, .setsockopt = rawv6_setsockopt, diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 42de4ccd159f..fce25afb652a 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -338,7 +338,7 @@ static int l2tp_ip_disconnect(struct sock *sk, int flags) if (sock_flag(sk, SOCK_ZAPPED)) return 0; - return udp_disconnect(sk, flags); + return __udp_disconnect(sk, flags); } static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index ea2ae6664cc8..ad3468c32b53 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -410,7 +410,7 @@ static int l2tp_ip6_disconnect(struct sock *sk, int flags) if (sock_flag(sk, SOCK_ZAPPED)) return 0; - return udp_disconnect(sk, flags); + return __udp_disconnect(sk, flags); } static int l2tp_ip6_getname(struct socket *sock, struct sockaddr *uaddr, From a681574c99be23e4d20b769bf0e543239c364af5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Oct 2016 10:26:48 -0700 Subject: [PATCH 420/884] ipv4: disable BH in set_ping_group_range() In commit 4ee3bd4a8c746 ("ipv4: disable BH when changing ip local port range") Cong added BH protection in set_local_port_range() but missed that same fix was needed in set_ping_group_range() Fixes: b8f1a55639e6 ("udp: Add function to make source port for UDP tunnels") Signed-off-by: Eric Dumazet Reported-by: Eric Salo Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1cb67de106fe..500ae4010bed 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -109,10 +109,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock_bh(&net->ipv4.ip_local_ports.lock); data[0] = low; data[1] = high; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ From 593876838826914a7e4e05fbbcb728be6fbc4d89 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Tue, 18 Oct 2016 13:49:18 +0800 Subject: [PATCH 421/884] Revert "clocksource/drivers/timer_sun5i: Replace code by clocksource_mmio_init" struct clocksource is also used by the clk notifier callback, to unregister and re-register the clocksource with a different clock rate. clocksource_mmio_init does not pass back a pointer to the struct used, and the clk notifier callback assumes that the struct clocksource in struct sun5i_timer_clksrc is valid. This results in a kernel NULL pointer dereference when the hstimer clock is changed: Unable to handle kernel NULL pointer dereference at virtual address 00000004 [] (clocksource_unbind) from [] (clocksource_unregister+0x2c/0x44) [] (clocksource_unregister) from [] (sun5i_rate_cb_clksrc+0x34/0x3c) [] (sun5i_rate_cb_clksrc) from [] (notifier_call_chain+0x44/0x84) [] (notifier_call_chain) from [] (__srcu_notifier_call_chain+0x44/0x60) [] (__srcu_notifier_call_chain) from [] (srcu_notifier_call_chain+0x18/0x20) [] (srcu_notifier_call_chain) from [] (__clk_notify+0x70/0x7c) [] (__clk_notify) from [] (clk_propagate_rate_change+0xa4/0xc4) [] (clk_propagate_rate_change) from [] (clk_propagate_rate_change+0x6c/0xc4) Revert the commit for now. clocksource_mmio_init can be made to pass back a pointer, but the code churn and usage of an inner struct might not be worth it. Fixes: 157dfadef832 ("clocksource/drivers/timer_sun5i: Replace code by clocksource_mmio_init") Reported-by: Maxime Ripard Signed-off-by: Chen-Yu Tsai Cc: linux-sunxi@googlegroups.com Cc: Daniel Lezcano Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20161018054918.26855-1-wens@csie.org Signed-off-by: Thomas Gleixner --- drivers/clocksource/timer-sun5i.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index c184eb84101e..4f87f3e76d83 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -152,6 +152,13 @@ static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static cycle_t sun5i_clksrc_read(struct clocksource *clksrc) +{ + struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc); + + return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1)); +} + static int sun5i_rate_cb_clksrc(struct notifier_block *nb, unsigned long event, void *data) { @@ -210,8 +217,13 @@ static int __init sun5i_setup_clocksource(struct device_node *node, writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD, base + TIMER_CTL_REG(1)); - ret = clocksource_mmio_init(base + TIMER_CNTVAL_LO_REG(1), node->name, - rate, 340, 32, clocksource_mmio_readl_down); + cs->clksrc.name = node->name; + cs->clksrc.rating = 340; + cs->clksrc.read = sun5i_clksrc_read; + cs->clksrc.mask = CLOCKSOURCE_MASK(32); + cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS; + + ret = clocksource_register_hz(&cs->clksrc, rate); if (ret) { pr_err("Couldn't register clock source.\n"); goto err_remove_notifier; From 28e3d7002ba9f773662b2cf75d28cadfa29dc442 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 20 Oct 2016 18:03:36 +0300 Subject: [PATCH 422/884] watchdog: wdat_wdt: Ping the watchdog on resume It turns out we need to ping the watchdog hardware on resume when we re-program it. Otherwise this causes inadvertent reset to trigger right after the resume is complete. Fixes: 058dfc767008 (ACPI / watchdog: Add support for WDAT hardware watchdog) Signed-off-by: Mika Westerberg Acked-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki --- drivers/watchdog/wdat_wdt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index e473e3b23720..6d1fbda0f461 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -499,6 +499,10 @@ static int wdat_wdt_resume_noirq(struct device *dev) ret = wdat_wdt_enable_reboot(wdat); if (ret) return ret; + + ret = wdat_wdt_ping(&wdat->wdd); + if (ret) + return ret; } return wdat_wdt_start(&wdat->wdd); From 91bbc174d45c347aa7aedb2215cc7d2013c06c1f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 25 Sep 2016 13:53:58 +0200 Subject: [PATCH 423/884] clk: at91: Fix a return value in case of error If 'clk_hw_register()' fails, it is likely that we expect to return an error instead of a valid pointer (which would mean success). Fix commit f5644f10dcfb ("clk: at91: Migrate to clk_hw based registration and OF APIs") Signed-off-by: Christophe JAILLET Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-programmable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c index 190122e64a3a..85a449cf61e3 100644 --- a/drivers/clk/at91/clk-programmable.c +++ b/drivers/clk/at91/clk-programmable.c @@ -203,7 +203,7 @@ at91_clk_register_programmable(struct regmap *regmap, ret = clk_hw_register(NULL, &prog->hw); if (ret) { kfree(prog); - hw = &prog->hw; + hw = ERR_PTR(ret); } return hw; From 97dcaa0fcfd24daa9a36c212c1ad1d5a97759212 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 20 Oct 2016 18:15:16 -0700 Subject: [PATCH 424/884] kexec: Export kexec_in_progress to modules The bcm_sf2 driver uses kexec_in_progress to know whether it can power down an integrated PHY during shutdown, and can be built as a module. Other modules may be using this in the future, so export it. Fixes: 2399d6143f85 ("net: dsa: bcm_sf2: Prevent GPHY shutdown for kexec'd kernels") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- kernel/kexec_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 561675589511..786ab85a5452 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -59,6 +59,7 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; +EXPORT_SYMBOL_GPL(kexec_in_progress); /* Location of the reserved area for the crash kernel */ From 1b4c59b7a1d0b9d8019254a5f2e35b2663f49a9e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Aug 2016 10:54:07 +0200 Subject: [PATCH 425/884] target: fix potential race window in target_sess_cmd_list_waiting() target_sess_cmd_list_waiting() might hit on a condition where the kref for the command is already 0, but the destructor has not been called yet (or is stuck in waiting for a spin lock). Rather than leaving the command on the list we should explicitly remove it to avoid race issues later on. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index e825d580ccee..7dfefd66df93 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2535,7 +2535,9 @@ int target_get_sess_cmd(struct se_cmd *se_cmd, bool ack_kref) * invocations before se_cmd descriptor release. */ if (ack_kref) { - kref_get(&se_cmd->cmd_kref); + if (!kref_get_unless_zero(&se_cmd->cmd_kref)) + return -EINVAL; + se_cmd->se_cmd_flags |= SCF_ACK_KREF; } @@ -2616,7 +2618,7 @@ EXPORT_SYMBOL(target_put_sess_cmd); */ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) { - struct se_cmd *se_cmd; + struct se_cmd *se_cmd, *tmp_cmd; unsigned long flags; int rc; @@ -2628,14 +2630,16 @@ void target_sess_cmd_list_set_waiting(struct se_session *se_sess) se_sess->sess_tearing_down = 1; list_splice_init(&se_sess->sess_cmd_list, &se_sess->sess_wait_list); - list_for_each_entry(se_cmd, &se_sess->sess_wait_list, se_cmd_list) { + list_for_each_entry_safe(se_cmd, tmp_cmd, + &se_sess->sess_wait_list, se_cmd_list) { rc = kref_get_unless_zero(&se_cmd->cmd_kref); if (rc) { se_cmd->cmd_wait_set = 1; spin_lock(&se_cmd->t_state_lock); se_cmd->transport_state |= CMD_T_FABRIC_STOP; spin_unlock(&se_cmd->t_state_lock); - } + } else + list_del_init(&se_cmd->se_cmd_list); } spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); From b04bf5833e8a0d6ae4e3f7b35b30c4ab6ff7ea1b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Aug 2016 10:54:08 +0200 Subject: [PATCH 426/884] target/tcm_fc: print command pointer in debug message When allocating a new command we should add the pointer to the debug statements; that allows us to match this with other debug statements for handling data. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/tcm_fc/tfc_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 216e18cc9133..36f08644250c 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -575,7 +575,7 @@ static void ft_send_work(struct work_struct *work) TARGET_SCF_ACK_KREF)) goto err; - pr_debug("r_ctl %x alloc target_submit_cmd\n", fh->fh_r_ctl); + pr_debug("r_ctl %x target_submit_cmd %p\n", fh->fh_r_ctl, cmd); return; err: From 91b385b4909e040450389b11961a8ba8f8e9a35e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Aug 2016 10:54:09 +0200 Subject: [PATCH 427/884] target/tcm_fc: return detailed error in ft_sess_create() Not every failure is due to out-of-memory; the ACLs might not be set, too. So return a detailed error code in ft_sess_create() instead of just a NULL pointer. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/tcm_fc/tfc_sess.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index 6ffbb603d912..cc8c2614397b 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -223,7 +223,7 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id, sess = kzalloc(sizeof(*sess), GFP_KERNEL); if (!sess) - return NULL; + return ERR_PTR(-ENOMEM); kref_init(&sess->kref); /* ref for table entry */ sess->tport = tport; @@ -234,8 +234,9 @@ static struct ft_sess *ft_sess_create(struct ft_tport *tport, u32 port_id, TARGET_PROT_NORMAL, &initiatorname[0], sess, ft_sess_alloc_cb); if (IS_ERR(sess->se_sess)) { + int rc = PTR_ERR(sess->se_sess); kfree(sess); - return NULL; + sess = ERR_PTR(rc); } return sess; } From 8962a4d29bcb3d12164c02d207c8ff1ab8b04558 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Aug 2016 10:54:10 +0200 Subject: [PATCH 428/884] target/tcm_fc: Update debugging statements to match libfc usage Update the debug statements to match those from libfc. Signed-off-by: Hannes Reinecke Signed-off-by: Nicholas Bellinger --- drivers/target/tcm_fc/tfc_sess.c | 37 ++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c index cc8c2614397b..fd5c3de79470 100644 --- a/drivers/target/tcm_fc/tfc_sess.c +++ b/drivers/target/tcm_fc/tfc_sess.c @@ -39,6 +39,11 @@ #include "tcm_fc.h" +#define TFC_SESS_DBG(lport, fmt, args...) \ + pr_debug("host%u: rport %6.6x: " fmt, \ + (lport)->host->host_no, \ + (lport)->port_id, ##args ) + static void ft_sess_delete_all(struct ft_tport *); /* @@ -167,24 +172,29 @@ static struct ft_sess *ft_sess_get(struct fc_lport *lport, u32 port_id) struct ft_tport *tport; struct hlist_head *head; struct ft_sess *sess; + char *reason = "no session created"; rcu_read_lock(); tport = rcu_dereference(lport->prov[FC_TYPE_FCP]); - if (!tport) + if (!tport) { + reason = "not an FCP port"; goto out; + } head = &tport->hash[ft_sess_hash(port_id)]; hlist_for_each_entry_rcu(sess, head, hash) { if (sess->port_id == port_id) { kref_get(&sess->kref); rcu_read_unlock(); - pr_debug("port_id %x found %p\n", port_id, sess); + TFC_SESS_DBG(lport, "port_id %x found %p\n", + port_id, sess); return sess; } } out: rcu_read_unlock(); - pr_debug("port_id %x not found\n", port_id); + TFC_SESS_DBG(lport, "port_id %x not found, %s\n", + port_id, reason); return NULL; } @@ -195,7 +205,7 @@ static int ft_sess_alloc_cb(struct se_portal_group *se_tpg, struct ft_tport *tport = sess->tport; struct hlist_head *head = &tport->hash[ft_sess_hash(sess->port_id)]; - pr_debug("port_id %x sess %p\n", sess->port_id, sess); + TFC_SESS_DBG(tport->lport, "port_id %x sess %p\n", sess->port_id, sess); hlist_add_head_rcu(&sess->hash, head); tport->sess_count++; @@ -320,7 +330,7 @@ void ft_sess_close(struct se_session *se_sess) mutex_unlock(&ft_lport_lock); return; } - pr_debug("port_id %x\n", port_id); + TFC_SESS_DBG(sess->tport->lport, "port_id %x close session\n", port_id); ft_sess_unhash(sess); mutex_unlock(&ft_lport_lock); ft_close_sess(sess); @@ -380,8 +390,13 @@ static int ft_prli_locked(struct fc_rport_priv *rdata, u32 spp_len, if (!(fcp_parm & FCP_SPPF_INIT_FCN)) return FC_SPP_RESP_CONF; sess = ft_sess_create(tport, rdata->ids.port_id, rdata); - if (!sess) - return FC_SPP_RESP_RES; + if (IS_ERR(sess)) { + if (PTR_ERR(sess) == -EACCES) { + spp->spp_flags &= ~FC_SPP_EST_IMG_PAIR; + return FC_SPP_RESP_CONF; + } else + return FC_SPP_RESP_RES; + } if (!sess->params) rdata->prli_count++; sess->params = fcp_parm; @@ -424,8 +439,8 @@ static int ft_prli(struct fc_rport_priv *rdata, u32 spp_len, mutex_lock(&ft_lport_lock); ret = ft_prli_locked(rdata, spp_len, rspp, spp); mutex_unlock(&ft_lport_lock); - pr_debug("port_id %x flags %x ret %x\n", - rdata->ids.port_id, rspp ? rspp->spp_flags : 0, ret); + TFC_SESS_DBG(rdata->local_port, "port_id %x flags %x ret %x\n", + rdata->ids.port_id, rspp ? rspp->spp_flags : 0, ret); return ret; } @@ -478,11 +493,11 @@ static void ft_recv(struct fc_lport *lport, struct fc_frame *fp) struct ft_sess *sess; u32 sid = fc_frame_sid(fp); - pr_debug("sid %x\n", sid); + TFC_SESS_DBG(lport, "recv sid %x\n", sid); sess = ft_sess_get(lport, sid); if (!sess) { - pr_debug("sid %x sess lookup failed\n", sid); + TFC_SESS_DBG(lport, "sid %x sess lookup failed\n", sid); /* TBD XXX - if FCP_CMND, send PRLO */ fc_frame_free(fp); return; From 1ba0158fa66b5b2c597a748f87be1650c9960ccc Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Aug 2016 10:54:11 +0200 Subject: [PATCH 429/884] target/tcm_fc: use CPU affinity for responses The libfc stack assigns exchange IDs based on the CPU the request was received on, so we need to send the responses via the same CPU. Otherwise the send logic gets confuses and responses will be delayed, causing exchange timeouts on the initiator side. Signed-off-by: Hannes Reinecke Cc: stable@vger.kernel.org # 4.5+ Signed-off-by: Nicholas Bellinger --- drivers/target/tcm_fc/tfc_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c index 36f08644250c..ff5de9a96643 100644 --- a/drivers/target/tcm_fc/tfc_cmd.c +++ b/drivers/target/tcm_fc/tfc_cmd.c @@ -572,7 +572,7 @@ static void ft_send_work(struct work_struct *work) if (target_submit_cmd(&cmd->se_cmd, cmd->sess->se_sess, fcp->fc_cdb, &cmd->ft_sense_buffer[0], scsilun_to_int(&fcp->fc_lun), ntohl(fcp->fc_dl), task_attr, data_dir, - TARGET_SCF_ACK_KREF)) + TARGET_SCF_ACK_KREF | TARGET_SCF_USE_CPUID)) goto err; pr_debug("r_ctl %x target_submit_cmd %p\n", fh->fh_r_ctl, cmd); From 3118dac501bc0317de099db81618d589503351e1 Mon Sep 17 00:00:00 2001 From: Sudip Mukherjee Date: Thu, 6 Oct 2016 23:06:43 +0530 Subject: [PATCH 430/884] kernel/irq: Export irq_set_parent() The TPS65217 driver grew interrupt support which uses irq_set_parent(). While it's not yet clear why this is used in the first place, building the driver as a module fails with: ERROR: ".irq_set_parent" [drivers/mfd/tps65217.ko] undefined! The correctness of the driver change is still investigated, but for now it's less trouble to export irq_set_parent() than dealing with the build wreckage. [ tglx: Rewrote changelog and made the export GPL ] Fixes: 6556bdacf646 ("mfd: tps65217: Add support for IRQs") Signed-off-by: Sudip Mukherjee Cc: Sudip Mukherjee Cc: Marcin Niestroj Cc: Grygorii Strashko Cc: Tony Lindgren Cc: Lee Jones Link: http://lkml.kernel.org/r/1475775403-27207-1-git-send-email-sudipm.mukherjee@gmail.com Signed-off-by: Thomas Gleixner --- kernel/irq/manage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0c5f1a5db654..9c4d30483264 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -721,6 +721,7 @@ int irq_set_parent(int irq, int parent_irq) irq_put_desc_unlock(desc, flags); return 0; } +EXPORT_SYMBOL_GPL(irq_set_parent); #endif /* From 1f1cc4566bd9dd8d3cf19965a4b6392143618536 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 18 Oct 2016 16:53:59 +0200 Subject: [PATCH 431/884] gpio: GPIO_GET_CHIPINFO_IOCTL: Fix line offset validation The current line offset validation is off by one. Depending on the data stored behind the descs array this can either cause undefined behavior or disclose arbitrary, potentially sensitive, memory to the issuing userspace application. Make sure that offset is within the bounds of the desc array. Cc: stable@vger.kernel.org Fixes: 521a2ad6f862 ("gpio: add userspace ABI for GPIO line information") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index f0fc3a0d37c8..dac975397253 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -839,7 +839,7 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (copy_from_user(&lineinfo, ip, sizeof(lineinfo))) return -EFAULT; - if (lineinfo.line_offset > gdev->ngpio) + if (lineinfo.line_offset >= gdev->ngpio) return -EINVAL; desc = &gdev->descs[lineinfo.line_offset]; From 0f4bbb233743bdfd51d47688b0bc2561f310488b Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 18 Oct 2016 16:54:00 +0200 Subject: [PATCH 432/884] gpio: GPIO_GET_CHIPINFO_IOCTL: Fix information leak The GPIO_GET_CHIPINFO_IOCTL handler allocates a gpiochip_info struct on the stack and then passes it to copy_to_user(). But depending on the length of the GPIO chip name and label the struct is only partially initialized. This exposes the previous, potentially sensitive, stack content to the issuing userspace application. To avoid this make sure that the struct is fully initialized. Cc: stable@vger.kernel.org Fixes: 521a2ad6f862 ("gpio: add userspace ABI for GPIO line information") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index dac975397253..7a01969169e8 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -823,6 +823,8 @@ static long gpio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (cmd == GPIO_GET_CHIPINFO_IOCTL) { struct gpiochip_info chipinfo; + memset(&chipinfo, 0, sizeof(chipinfo)); + strncpy(chipinfo.name, dev_name(&gdev->dev), sizeof(chipinfo.name)); chipinfo.name[sizeof(chipinfo.name)-1] = '\0'; From e405f9fcb63602d35f7a419ededa3f952a395a72 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 18 Oct 2016 16:54:01 +0200 Subject: [PATCH 433/884] gpio: GPIO_GET_LINEHANDLE_IOCTL: Validate line offset The line offset that is used as an index into the descs array is provided by userspace and might go beyond the bounds of the array. If that happens undefined behavior will occur. Make sure that the offset is within the bounds of the desc array and reject any requests that specify a value outside of it. Cc: stable@vger.kernel.org Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7a01969169e8..d287cb4e97c4 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -444,6 +444,11 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) u32 lflags = handlereq.flags; struct gpio_desc *desc; + if (offset >= gdev->ngpio) { + ret = -EINVAL; + goto out_free_descs; + } + desc = &gdev->descs[offset]; ret = gpiod_request(desc, lh->label); if (ret) From d82aa4a8f2e8df9673ddb099262355da4c9b99b1 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 18 Oct 2016 16:54:04 +0200 Subject: [PATCH 434/884] gpio: GPIOHANDLE_GET_LINE_VALUES_IOCTL: Fix information leak The GPIOHANDLE_GET_LINE_VALUES_IOCTL handler allocates a gpiohandle_data struct on the stack and then passes it to copy_to_user(). But only the first element of the values array in the struct is set, which leaves the struct partially initialized. This exposes the previous, potentially sensitive, stack content to the issuing userspace application. To avoid this make sure that the struct is fully initialized. Cc: stable@vger.kernel.org Fixes: 61f922db7221 ("gpio: userspace ABI for reading GPIO line events") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index d287cb4e97c4..4ed26bcf054c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -628,6 +628,8 @@ static long lineevent_ioctl(struct file *filep, unsigned int cmd, if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) { int val; + memset(&ghd, 0, sizeof(ghd)); + val = gpiod_get_value_cansleep(le->desc); if (val < 0) return val; From b8b0e3d303654b3bb7b31b0266c513fd6f4132ce Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 18 Oct 2016 16:54:03 +0200 Subject: [PATCH 435/884] gpio: GPIO_GET_LINEEVENT_IOCTL: Validate line offset The line offset that is used as an index into the descs array is provided by userspace and might go beyond the bounds of the array. If that happens undefined behavior will occur. Make sure that the offset is within the bounds of the desc array and reject any requests that specify a value outside of it. Cc: stable@vger.kernel.org Fixes: 61f922db7221 ("gpio: userspace ABI for reading GPIO line events") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4ed26bcf054c..db44a7da5eb4 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -733,6 +733,11 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) lflags = eventreq.handleflags; eflags = eventreq.eventflags; + if (offset >= gdev->ngpio) { + ret = -EINVAL; + goto out_free_label; + } + /* This is just wrong: we don't look for events on output lines */ if (lflags & GPIOHANDLE_REQUEST_OUTPUT) { ret = -EINVAL; From 3eded5d83bf4e36ad78775c7ceb44a45480b0abd Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 18 Oct 2016 16:54:02 +0200 Subject: [PATCH 436/884] gpio: GPIOHANDLE_GET_LINE_VALUES_IOCTL: Fix information leak The GPIOHANDLE_GET_LINE_VALUES_IOCTL handler allocates a gpiohandle_data struct on the stack and then passes it to copy_to_user(). But depending on the number of requested line handles the struct is only partially initialized. This exposes the previous, potentially sensitive, stack content to the issuing userspace application. To avoid this make sure that the struct is fully initialized. Cc: stable@vger.kernel.org Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index db44a7da5eb4..7f92f8964efd 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -344,6 +344,8 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd, if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) { int val; + memset(&ghd, 0, sizeof(ghd)); + /* TODO: check if descriptors are really input */ for (i = 0; i < lh->numdescs; i++) { val = gpiod_get_value_cansleep(lh->descs[i]); From 9799c50372b23ed774791bdb87d700f1286ee8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Thu, 6 Oct 2016 01:43:08 +0200 Subject: [PATCH 437/884] batman-adv: fix splat on disabling an interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As long as there is still a reference for a hard interface held, there might still be a forwarding packet relying on its attributes. Therefore avoid setting hard_iface->soft_iface to NULL when disabling a hard interface. This fixes the following, potential splat: batman_adv: bat0: Interface deactivated: eth1 batman_adv: bat0: Removing interface: eth1 cgroup: new mount options do not match the existing superblock, will be ignored batman_adv: bat0: Interface deactivated: eth3 batman_adv: bat0: Removing interface: eth3 ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1986 at ./net/batman-adv/bat_iv_ogm.c:549 batadv_iv_send_outstanding_bat_ogm_packet+0x145/0x643 [batman_adv] Modules linked in: batman_adv(O-) <...> CPU: 3 PID: 1986 Comm: kworker/u8:2 Tainted: G W O 4.6.0-rc6+ #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.7.5-20140531_083030-gandalf 04/01/2014 Workqueue: bat_events batadv_iv_send_outstanding_bat_ogm_packet [batman_adv] 0000000000000000 ffff88001d93bca0 ffffffff8126c26b 0000000000000000 0000000000000000 ffff88001d93bcf0 ffffffff81051615 ffff88001f19f818 000002251d93bd68 0000000000000046 ffff88001dc04a00 ffff88001becbe48 Call Trace: [] dump_stack+0x67/0x90 [] __warn+0xc7/0xe5 [] warn_slowpath_null+0x18/0x1a [] batadv_iv_send_outstanding_bat_ogm_packet+0x145/0x643 [batman_adv] [] ? __lock_is_held+0x32/0x54 [] process_one_work+0x2a8/0x4f5 [] ? process_one_work+0x15c/0x4f5 [] worker_thread+0x1d5/0x2c0 [] ? process_scheduled_works+0x2e/0x2e [] ? process_scheduled_works+0x2e/0x2e [] kthread+0xc0/0xc8 [] ret_from_fork+0x22/0x40 [] ? __init_kthread_worker+0x55/0x55 ---[ end trace 647f9f325123dc05 ]--- What happened here is, that there was still a forw_packet (here: a BATMAN IV OGM) in the queue of eth3 with the forw_packet->if_incoming set to eth1 and the forw_packet->if_outgoing set to eth3. When eth3 is to be deactivated and removed, then this thread waits for the forw_packet queued on eth3 to finish. Because eth1 was deactivated and removed earlier and by that had forw_packet->if_incoming->soft_iface, set to NULL, the splat when trying to send/flush the OGM on eth3 occures. Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") Signed-off-by: Linus Lüssing [sven@narfation.org: Reduced size of Oops message] Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/hard-interface.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 08ce36147c4c..e034afbd1bb0 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -652,7 +652,6 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_softif_destroy_sysfs(hard_iface->soft_iface); } - hard_iface->soft_iface = NULL; batadv_hardif_put(hard_iface); out: From e3e847c7f15a27c80f526b2a7a8d4dd7ce0960a0 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 18 Oct 2016 16:54:05 +0200 Subject: [PATCH 438/884] gpio: GPIO_GET_LINEHANDLE_IOCTL: Reject invalid line flags The GPIO_GET_LINEHANDLE_IOCTL currently ignores unknown or undefined linehandle flags. From a backwards and forwards compatibility viewpoint it is highly desirable to reject unknown flags though. On one hand an application that is using newer flags and is running on an older kernel has no way to detect if the new flags were handled correctly if they are silently discarded. On the other hand an application that (accidentally) passes undefined flags will run fine on an older kernel, but may break on a newer kernel when these flags get defined. Ensure that requests that have undefined flags set are rejected with an error, rather than silently discarding the undefined flags. Cc: stable@vger.kernel.org Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 7f92f8964efd..b5b1a8425907 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -333,6 +333,13 @@ struct linehandle_state { u32 numdescs; }; +#define GPIOHANDLE_REQUEST_VALID_FLAGS \ + (GPIOHANDLE_REQUEST_INPUT | \ + GPIOHANDLE_REQUEST_OUTPUT | \ + GPIOHANDLE_REQUEST_ACTIVE_LOW | \ + GPIOHANDLE_REQUEST_OPEN_DRAIN | \ + GPIOHANDLE_REQUEST_OPEN_SOURCE) + static long linehandle_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { @@ -451,6 +458,12 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) goto out_free_descs; } + /* Return an error if a unknown flag is set */ + if (lflags & ~GPIOHANDLE_REQUEST_VALID_FLAGS) { + ret = -EINVAL; + goto out_free_descs; + } + desc = &gdev->descs[offset]; ret = gpiod_request(desc, lh->label); if (ret) From ac7dbb991ee5afc0beacce3a252dcaaa249a7786 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 18 Oct 2016 16:54:06 +0200 Subject: [PATCH 439/884] gpio: GPIO_GET_LINEEVENT_IOCTL: Reject invalid line and event flags The GPIO_GET_LINEEVENT_IOCTL currently ignores unknown or undefined linehandle and lineevent flags. From a backwards and forwards compatibility viewpoint it is highly desirable to reject unknown flags though. On one hand an application that is using newer flags and is running on an older kernel has no way to detect if the new flags were handled correctly if they are silently discarded. On the other hand an application that (accidentally) passes undefined flags will run fine on an older kernel, but may break on a newer kernel when these flags get defined. Ensure that requests that have undefined flags set are rejected with an error, rather than silently discarding the undefined flags. Cc: stable@vger.kernel.org Fixes: 61f922db7221 ("gpio: userspace ABI for reading GPIO line events") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index b5b1a8425907..20e09b7c2de3 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -556,6 +556,10 @@ struct lineevent_state { struct mutex read_lock; }; +#define GPIOEVENT_REQUEST_VALID_FLAGS \ + (GPIOEVENT_REQUEST_RISING_EDGE | \ + GPIOEVENT_REQUEST_FALLING_EDGE) + static unsigned int lineevent_poll(struct file *filep, struct poll_table_struct *wait) { @@ -753,6 +757,13 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) goto out_free_label; } + /* Return an error if a unknown flag is set */ + if ((lflags & ~GPIOHANDLE_REQUEST_VALID_FLAGS) || + (eflags & ~GPIOEVENT_REQUEST_VALID_FLAGS)) { + ret = -EINVAL; + goto out_free_label; + } + /* This is just wrong: we don't look for events on output lines */ if (lflags & GPIOHANDLE_REQUEST_OUTPUT) { ret = -EINVAL; From 91f1551a746f62937957fa1bff4165e1b7a45337 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 18 Oct 2016 17:44:02 -0300 Subject: [PATCH 440/884] gpio: ts4800: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/gpio/gpio-ts4800.ko | grep alias $ After this patch: $ modinfo drivers/gpio/gpio-ts4800.ko | grep alias alias: of:N*T*Ctechnologic,ts4800-gpioC* alias: of:N*T*Ctechnologic,ts4800-gpio Signed-off-by: Javier Martinez Canillas Signed-off-by: Linus Walleij --- drivers/gpio/gpio-ts4800.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-ts4800.c b/drivers/gpio/gpio-ts4800.c index 99256115bea5..c2a80b4cbf32 100644 --- a/drivers/gpio/gpio-ts4800.c +++ b/drivers/gpio/gpio-ts4800.c @@ -66,6 +66,7 @@ static const struct of_device_id ts4800_gpio_of_match[] = { { .compatible = "technologic,ts4800-gpio", }, {}, }; +MODULE_DEVICE_TABLE(of, ts4800_gpio_of_match); static struct platform_driver ts4800_gpio_driver = { .driver = { From 6a34e0e6b49f50d2626c15ed75b76031f12bd637 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 22 Sep 2016 03:35:16 -0500 Subject: [PATCH 441/884] arm64: dts: Add timer erratum property for LS2080A and LS1043A Both the LS1043A and LS2080A platforms are affected by the Freescale A008585 erratum. Advertise it in their respective device trees. Signed-off-by: Scott Wood Acked-by: Marc Zyngier Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 1 + arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 220ac7057d12..97d331ec2500 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -123,6 +123,7 @@ <1 14 0xf08>, /* Physical Non-Secure PPI */ <1 11 0xf08>, /* Virtual PPI */ <1 10 0xf08>; /* Hypervisor PPI */ + fsl,erratum-a008585; }; pmu { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi index 337da90bd7da..7f0dc13b4087 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi @@ -195,6 +195,7 @@ <1 14 4>, /* Physical Non-Secure PPI, active-low */ <1 11 4>, /* Virtual PPI, active-low */ <1 10 4>; /* Hypervisor PPI, active-low */ + fsl,erratum-a008585; }; pmu { From 126d26f66d9890a69158812a6caa248c05359daa Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 21 Oct 2016 12:56:27 +0200 Subject: [PATCH 442/884] USB: serial: fix potential NULL-dereference at probe Make sure we have at least one port before attempting to register a console. Currently, at least one driver binds to a "dummy" interface and requests zero ports for it. Should such an interface also lack endpoints, we get a NULL-deref during probe. Fixes: e5b1e2062e05 ("USB: serial: make minor allocation dynamic") Cc: stable # 3.11 Signed-off-by: Johan Hovold --- drivers/usb/serial/usb-serial.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index d213cf44a7e4..4a037b4a79cf 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1078,7 +1078,8 @@ static int usb_serial_probe(struct usb_interface *interface, serial->disconnected = 0; - usb_serial_console_init(serial->port[0]->minor); + if (num_ports > 0) + usb_serial_console_init(serial->port[0]->minor); exit: module_put(type->driver.owner); return 0; From 02a1b8f4167eac709d269341f7c3c340984c28a6 Mon Sep 17 00:00:00 2001 From: Joao Pinto Date: Fri, 21 Oct 2016 10:31:48 +0100 Subject: [PATCH 443/884] PCI: designware-plat: Update author email address Although I am leaving Synopsys, I would like to keep working with the linux kernel community and help in what you might find useful. For that I am sending this patch to change my contact e-mail. Signed-off-by: Joao Pinto Signed-off-by: Bjorn Helgaas --- drivers/pci/host/pcie-designware-plat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pcie-designware-plat.c b/drivers/pci/host/pcie-designware-plat.c index 537f58a664fa..8df6312ed300 100644 --- a/drivers/pci/host/pcie-designware-plat.c +++ b/drivers/pci/host/pcie-designware-plat.c @@ -3,7 +3,7 @@ * * Copyright (C) 2015-2016 Synopsys, Inc. (www.synopsys.com) * - * Authors: Joao Pinto + * Authors: Joao Pinto * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as From 77f7f71f5be1087823296f2dd8114ff7246ae877 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 14 Oct 2016 11:11:06 -0400 Subject: [PATCH 444/884] drm/amdgpu/powerplay/smu7: fix static checker warning Casting of voltage values to a larger size results in overwriting adjacent memory in the structure. Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 609996c84ad5..13333d01d2ef 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -2127,15 +2127,18 @@ static int smu7_patch_acp_vddc(struct pp_hwmgr *hwmgr, } static int smu7_patch_limits_vddc(struct pp_hwmgr *hwmgr, - struct phm_clock_and_voltage_limits *tab) + struct phm_clock_and_voltage_limits *tab) { + uint32_t vddc, vddci; struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); if (tab) { - smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, (uint32_t *)&tab->vddc, - &data->vddc_leakage); - smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, (uint32_t *)&tab->vddci, - &data->vddci_leakage); + smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, &vddc, + &data->vddc_leakage); + tab->vddc = vddc; + smu7_patch_ppt_v0_with_vdd_leakage(hwmgr, &vddci, + &data->vddci_leakage); + tab->vddci = vddci; } return 0; From fb9a5b0c1c9893db2e0d18544fd49e19d784a87d Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Thu, 13 Oct 2016 12:38:07 -0400 Subject: [PATCH 445/884] drm/radeon/si_dpm: Limit clocks on HD86xx part Limit clocks on a specific HD86xx part to avoid crashes (while awaiting an appropriate PP fix). Signed-off-by: Tom St Denis Reviewed-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/si_dpm.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 89bdf20344ae..e22b11189628 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -3021,6 +3021,12 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, max_sclk = 75000; max_mclk = 80000; } + /* limit clocks on HD8600 series */ + if (rdev->pdev->device == 0x6660 && + rdev->pdev->revision == 0x83) { + max_sclk = 75000; + max_mclk = 80000; + } if (rps->vce_active) { rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; From 08e23a02e3e2dd6dde0f2f5bb90d03f5bc2a5804 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 6 Oct 2016 11:25:25 -0400 Subject: [PATCH 446/884] drm/amdgpu: drop atom scratch save/restore in gpu reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is already handled by the dce IP modules in their suspend and resume code. No need to do it again. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b4f4a9239069..2b0267494bca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2268,8 +2268,6 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) } if (need_full_reset) { - /* save scratch */ - amdgpu_atombios_scratch_regs_save(adev); r = amdgpu_suspend(adev); retry: @@ -2288,8 +2286,6 @@ retry: dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); r = amdgpu_resume(adev); } - /* restore scratch */ - amdgpu_atombios_scratch_regs_restore(adev); } if (!r) { amdgpu_irq_gpu_reset_resume_helper(adev); From e695e77c6b49c49e5eafdc6239d6b22bfa30c7cc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Oct 2016 14:40:58 -0400 Subject: [PATCH 447/884] drm/amdgpu: move atom scratch register save/restore to common code We need this for more than just DCE. Move it out of the DCE modules and into the device code. This way we can be sure the scratch registers are initialized properly before we run asic_init which happens before DCE IPs are restored. Fixes atombios hangs in asic_init. Reviewed-by: JimQu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 6 ------ drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 6 ------ drivers/gpu/drm/amd/amdgpu/dce_v6_0.c | 6 ------ drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 6 ------ 5 files changed, 4 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2b0267494bca..7ca07e7b25c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1959,6 +1959,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) /* evict remaining vram memory */ amdgpu_bo_evict_vram(adev); + amdgpu_atombios_scratch_regs_save(adev); pci_save_state(dev->pdev); if (suspend) { /* Shut down the device */ @@ -2010,6 +2011,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) return r; } } + amdgpu_atombios_scratch_regs_restore(adev); /* post card */ if (!amdgpu_card_posted(adev) || !resume) { @@ -2277,8 +2279,9 @@ retry: amdgpu_display_stop_mc_access(adev, &save); amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC); } - + amdgpu_atombios_scratch_regs_save(adev); r = amdgpu_asic_reset(adev); + amdgpu_atombios_scratch_regs_restore(adev); /* post card */ amdgpu_atom_asic_init(adev->mode_info.atom_context); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 4108c686aa7c..9260caef74fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3151,10 +3151,6 @@ static int dce_v10_0_hw_fini(void *handle) static int dce_v10_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_atombios_scratch_regs_save(adev); - return dce_v10_0_hw_fini(handle); } @@ -3165,8 +3161,6 @@ static int dce_v10_0_resume(void *handle) ret = dce_v10_0_hw_init(handle); - amdgpu_atombios_scratch_regs_restore(adev); - /* turn on the BL */ if (adev->mode_info.bl_encoder) { u8 bl_level = amdgpu_display_backlight_get_level(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index f264b8f17ad1..367739bd1927 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3215,10 +3215,6 @@ static int dce_v11_0_hw_fini(void *handle) static int dce_v11_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_atombios_scratch_regs_save(adev); - return dce_v11_0_hw_fini(handle); } @@ -3229,8 +3225,6 @@ static int dce_v11_0_resume(void *handle) ret = dce_v11_0_hw_init(handle); - amdgpu_atombios_scratch_regs_restore(adev); - /* turn on the BL */ if (adev->mode_info.bl_encoder) { u8 bl_level = amdgpu_display_backlight_get_level(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index b948d6cb1399..15f9fc0514b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2482,10 +2482,6 @@ static int dce_v6_0_hw_fini(void *handle) static int dce_v6_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_atombios_scratch_regs_save(adev); - return dce_v6_0_hw_fini(handle); } @@ -2496,8 +2492,6 @@ static int dce_v6_0_resume(void *handle) ret = dce_v6_0_hw_init(handle); - amdgpu_atombios_scratch_regs_restore(adev); - /* turn on the BL */ if (adev->mode_info.bl_encoder) { u8 bl_level = amdgpu_display_backlight_get_level(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 5966166ec94c..8c4d808db0f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3033,10 +3033,6 @@ static int dce_v8_0_hw_fini(void *handle) static int dce_v8_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - amdgpu_atombios_scratch_regs_save(adev); - return dce_v8_0_hw_fini(handle); } @@ -3047,8 +3043,6 @@ static int dce_v8_0_resume(void *handle) ret = dce_v8_0_hw_init(handle); - amdgpu_atombios_scratch_regs_restore(adev); - /* turn on the BL */ if (adev->mode_info.bl_encoder) { u8 bl_level = amdgpu_display_backlight_get_level(adev, From 1304f0c7ddd80b359282bb2077c8a7bc51f06c3a Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 17 Oct 2016 09:49:29 +0800 Subject: [PATCH 448/884] drm/amd/amdgpu: expose max engine and memory clock for powerplay enabled case Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index c2c7fb140338..203d98b00555 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -459,10 +459,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file /* return all clocks in KHz */ dev_info.gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10; if (adev->pm.dpm_enabled) { - dev_info.max_engine_clock = - adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk * 10; - dev_info.max_memory_clock = - adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.mclk * 10; + dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; + dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; } else { dev_info.max_engine_clock = adev->pm.default_sclk * 10; dev_info.max_memory_clock = adev->pm.default_mclk * 10; From 0711257ee6d1e8bec08fdf26021f968d4d041e91 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 18 Oct 2016 00:32:00 -0400 Subject: [PATCH 449/884] drm/amdgpu/st: move ATC CG golden init from gfx to mc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's technically an MC register so make sure we initialize it in the MC module rather than the gfx module. Since other bits in the same register are used to enable ATC CG features make sure we apply the golden setting first. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 - drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index ee6a48a09214..bb97182dc749 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -640,7 +640,6 @@ static const u32 stoney_mgcg_cgcg_init[] = mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201, mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201, mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200, - mmATC_MISC_CG, 0xffffffff, 0x000c0200, }; static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index c22ef140a542..a16b2201d52c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -100,6 +100,7 @@ static const u32 cz_mgcg_cgcg_init[] = static const u32 stoney_mgcg_cgcg_init[] = { + mmATC_MISC_CG, 0xffffffff, 0x000c0200, mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; From e6b2a7d294360ecf128891d8288603526ce4d6ad Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Oct 2016 13:06:14 -0400 Subject: [PATCH 450/884] drm/amdgpu: explicitly set pg_flags for ST No need to retain previous settings as this is the first time we set pg_flags. Probably a copy/paste typo from the CZ code. Avoids confusion. No change in behavior as adev is kzallocated. Reviewed-by: Tom St Denis Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index c0d9aad7126f..7c13090df7c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1651,7 +1651,7 @@ static int vi_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCE_MGCG; - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | AMD_PG_SUPPORT_UVD | From 57d7f9b64ad9d4d9c87ed1d6b58e5bfabf2dfc6c Mon Sep 17 00:00:00 2001 From: jimqu Date: Thu, 20 Oct 2016 14:58:04 +0800 Subject: [PATCH 451/884] drm/amdgpu: avoid drm error log during S3 on RHEL7.3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: JimQu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b0f6e6957536..82dc8d20e28a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -519,7 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); if (unlikely(r != 0)) { - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); goto error_free_pages; } From 51585e03f5b26521d7840c06f4807ee62b23af3a Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 17 Oct 2016 18:15:26 +0800 Subject: [PATCH 452/884] drm/amd/powerplay: fix static checker warning in process_pptables_v1_0.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rex Zhu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- .../gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c index 7de701d8a450..4477c55a58e3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/process_pptables_v1_0.c @@ -1201,12 +1201,15 @@ static uint32_t make_classification_flags(struct pp_hwmgr *hwmgr, static int ppt_get_num_of_vce_state_table_entries_v1_0(struct pp_hwmgr *hwmgr) { const ATOM_Tonga_POWERPLAYTABLE *pp_table = get_powerplay_table(hwmgr); - const ATOM_Tonga_VCE_State_Table *vce_state_table = - (ATOM_Tonga_VCE_State_Table *)(((unsigned long)pp_table) + le16_to_cpu(pp_table->usVCEStateTableOffset)); + const ATOM_Tonga_VCE_State_Table *vce_state_table; - if (vce_state_table == NULL) + + if (pp_table == NULL) return 0; + vce_state_table = (void *)pp_table + + le16_to_cpu(pp_table->usVCEStateTableOffset); + return vce_state_table->ucNumEntries; } From 8861a8209782faffedb6d64572fa968ee9c1c375 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Thu, 20 Oct 2016 15:51:25 +0800 Subject: [PATCH 453/884] drm/amd/powerplay: don't give up if DPM is already running Currently the driver crashes if smu7_enable_dpm_tasks() returns early, which it does if DPM is already active. It seems to be better just to continue anyway, at least I haven't noticed any ill effects. It's also unclear at what state the hardware was left by the previous driver, so IMO it's better to always fully initialize. Way to reproduce: with GPU passthrough. forced power off the VM or forced reset the VM without shutting down the Os. Signed-off-by: Grazvydas Ignotas Reviewed-by: Edward O'Callaghan Reviewed-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index 13333d01d2ef..75854021f403 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -1168,8 +1168,8 @@ int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr) tmp_result = (!smum_is_dpm_running(hwmgr)) ? 0 : -1; PP_ASSERT_WITH_CODE(tmp_result == 0, - "DPM is already running right now, no need to enable DPM!", - return 0); + "DPM is already running", + ); if (smu7_voltage_control(hwmgr)) { tmp_result = smu7_enable_voltage_control(hwmgr); From 8651be8f14a12d24f203f283601d9b0418c389ff Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 19 Oct 2016 23:35:12 -0700 Subject: [PATCH 454/884] ipv6: fix a potential deadlock in do_ipv6_setsockopt() Baozeng reported this deadlock case: CPU0 CPU1 ---- ---- lock([ 165.136033] sk_lock-AF_INET6); lock([ 165.136033] rtnl_mutex); lock([ 165.136033] sk_lock-AF_INET6); lock([ 165.136033] rtnl_mutex); Similar to commit 87e9f0315952 ("ipv4: fix a potential deadlock in mcast getsockopt() path") this is due to we still have a case, ipv6_sock_mc_close(), where we acquire sk_lock before rtnl_lock. Close this deadlock with the similar solution, that is always acquire rtnl lock first. Fixes: baf606d9c9b1 ("ipv4,ipv6: grab rtnl before locking the socket") Reported-by: Baozeng Ding Tested-by: Baozeng Ding Cc: Marcelo Ricardo Leitner Signed-off-by: Cong Wang Reviewed-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/addrconf.h | 1 + net/ipv6/ipv6_sockglue.c | 3 ++- net/ipv6/mcast.c | 17 ++++++++++++----- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index f2d072787947..8f998afc1384 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -174,6 +174,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr); int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr); +void __ipv6_sock_mc_close(struct sock *sk); void ipv6_sock_mc_close(struct sock *sk); bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, const struct in6_addr *src_addr); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5330262ab673..636ec56f5f50 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -120,6 +120,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, static bool setsockopt_needs_rtnl(int optname) { switch (optname) { + case IPV6_ADDRFORM: case IPV6_ADD_MEMBERSHIP: case IPV6_DROP_MEMBERSHIP: case IPV6_JOIN_ANYCAST: @@ -198,7 +199,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } fl6_free_socklist(sk); - ipv6_sock_mc_close(sk); + __ipv6_sock_mc_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 75c1fc54f188..14a3903f1c82 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -276,16 +276,14 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, return idev; } -void ipv6_sock_mc_close(struct sock *sk) +void __ipv6_sock_mc_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - if (!rcu_access_pointer(np->ipv6_mc_list)) - return; + ASSERT_RTNL(); - rtnl_lock(); while ((mc_lst = rtnl_dereference(np->ipv6_mc_list)) != NULL) { struct net_device *dev; @@ -303,8 +301,17 @@ void ipv6_sock_mc_close(struct sock *sk) atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); - } +} + +void ipv6_sock_mc_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + if (!rcu_access_pointer(np->ipv6_mc_list)) + return; + rtnl_lock(); + __ipv6_sock_mc_close(sk); rtnl_unlock(); } From 60e21a0ef54cd836b9eb22c7cb396989b5b11648 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 29 Sep 2016 12:37:01 +0100 Subject: [PATCH 455/884] arm64: KVM: Take S1 walks into account when determining S2 write faults The WnR bit in the HSR/ESR_EL2 indicates whether a data abort was generated by a read or a write instruction. For stage 2 data aborts generated by a stage 1 translation table walk (i.e. the actual page table access faults at EL2), the WnR bit therefore reports whether the instruction generating the walk was a load or a store, *not* whether the page table walker was reading or writing the entry. For page tables marked as read-only at stage 2 (e.g. due to KSM merging them with the tables from another guest), this could result in livelock, where a page table walk generated by a load instruction attempts to set the access flag in the stage 1 descriptor, but fails to trigger CoW in the host since only a read fault is reported. This patch modifies the arm64 kvm_vcpu_dabt_iswrite function to take into account stage 2 faults in stage 1 walks. Since DBM cannot be disabled at EL2 for CPUs that implement it, we assume that these faults are always causes by writes, avoiding the livelock situation at the expense of occasional, spurious CoWs. We could, in theory, do a bit better by checking the guest TCR configuration and inspecting the page table to see why the PTE faulted. However, I doubt this is measurable in practice, and the threat of livelock is real. Cc: Cc: Julien Grall Reviewed-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm64/include/asm/kvm_emulate.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fd9d5fd788f5..f5ea0ba70f07 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -178,11 +178,6 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); } -static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) -{ - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR); -} - static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); @@ -203,6 +198,12 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); } +static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || + kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ +} + static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); From c8ea0395ff3bd5f0fd3c3aa69b383b2d1231e9fd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 20 Oct 2016 10:17:21 +0100 Subject: [PATCH 456/884] arm/arm64: KVM: Map the BSS at HYP When used with a compiler that doesn't implement "asm goto" (such as the AArch64 port of GCC 4.8), jump labels generate a memory access to find out about the value of the key (instead of just patching the code). The key itself is likely to be stored in the BSS. This is perfectly fine, except that we don't map the BSS at HYP, leading to an exploding kernel at the first access. The obvious fix is simply to map the BSS there (which should have been done a long while ago, but hey...). Reported-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Marc Zyngier --- arch/arm/kvm/arm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 03e9273f1876..08bb84f2ad58 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1312,6 +1312,13 @@ static int init_hyp_mode(void) goto out_err; } + err = create_hyp_mappings(kvm_ksym_ref(__bss_start), + kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map bss section\n"); + goto out_err; + } + /* * Map the Hyp stack pages */ From a6c6ead14183ea4ec8ce7551e1f3451024b9c4db Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 19 Oct 2016 02:57:22 +0200 Subject: [PATCH 457/884] cpufreq: intel_pstate: Set P-state upfront in performance mode After commit a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization update callbacks) the cpufreq governor callbacks may not be invoked on NOHZ_FULL CPUs and, in particular, switching to the "performance" policy via sysfs may not have any effect on them. That is a problem, because it usually is desirable to squeeze the last bit of performance out of those CPUs, so work around it by setting the maximum P-state (within the limits) in intel_pstate_set_policy() upfront when the policy is CPUFREQ_POLICY_PERFORMANCE. Fixes: a4675fbc4a7a (cpufreq: intel_pstate: Replace timers with utilization update callbacks) Signed-off-by: Rafael J. Wysocki Acked-by: Srinivas Pandruvada --- drivers/cpufreq/intel_pstate.c | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f535f8123258..ac7c58d20b58 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1142,10 +1142,8 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf); } -static void intel_pstate_set_min_pstate(struct cpudata *cpu) +static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate) { - int pstate = cpu->pstate.min_pstate; - trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu); cpu->pstate.current_pstate = pstate; /* @@ -1157,6 +1155,20 @@ static void intel_pstate_set_min_pstate(struct cpudata *cpu) pstate_funcs.get_val(cpu, pstate)); } +static void intel_pstate_set_min_pstate(struct cpudata *cpu) +{ + intel_pstate_set_pstate(cpu, cpu->pstate.min_pstate); +} + +static void intel_pstate_max_within_limits(struct cpudata *cpu) +{ + int min_pstate, max_pstate; + + update_turbo_state(); + intel_pstate_get_min_max(cpu, &min_pstate, &max_pstate); + intel_pstate_set_pstate(cpu, max_pstate); +} + static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); @@ -1491,7 +1503,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) pr_debug("set_policy cpuinfo.max %u policy->max %u\n", policy->cpuinfo.max_freq, policy->max); - cpu = all_cpu_data[0]; + cpu = all_cpu_data[policy->cpu]; if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && policy->max < policy->cpuinfo.max_freq && policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { @@ -1535,6 +1547,15 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_perf = round_up(limits->max_perf, FRAC_BITS); out: + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + /* + * NOHZ_FULL CPUs need this as the governor callback may not + * be invoked on them. + */ + intel_pstate_clear_update_util_hook(policy->cpu); + intel_pstate_max_within_limits(cpu); + } + intel_pstate_set_update_util_hook(policy->cpu); intel_pstate_hwp_set_policy(policy); From 62623d5f918fb1c8ed86b03b9a86cc81f1cb1878 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Oct 2016 13:32:55 +1100 Subject: [PATCH 458/884] KVM: PPC: Book3S HV: Fix build error when SMP=n Commit 5d375199ea96 ("KVM: PPC: Book3S HV: Set server for passed-through interrupts") broke the SMP=n build: arch/powerpc/kvm/book3s_hv_rm_xics.c:758:2: error: implicit declaration of function 'get_hard_smp_processor_id' That is because we lost the implicit include of asm/smp.h, so include it explicitly to get the definition for get_hard_smp_processor_id(). Fixes: 5d375199ea96 ("KVM: PPC: Book3S HV: Set server for passed-through interrupts") Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_rm_xics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 82ff5de8b1e7..a0ea63ac2b52 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "book3s_xics.h" From 80f23935cadb1c654e81951f5a8b7ceae0acc1b4 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Thu, 6 Oct 2016 13:42:19 +0000 Subject: [PATCH 459/884] powerpc: Convert cmp to cmpd in idle enter sequence PowerPC's "cmp" instruction has four operands. Normally people write "cmpw" or "cmpd" for the second cmp operand 0 or 1. But, frequently people forget, and write "cmp" with just three operands. With older binutils this is silently accepted as if this was "cmpw", while often "cmpd" is wanted. With newer binutils GAS will complain about this for 64-bit code. For 32-bit code it still silently assumes "cmpw" is what is meant. In this instance the code comes directly from ISA v2.07, including the cmp, but cmpd is correct. Backport to stable so that new toolchains can build old kernels. Fixes: 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode") Cc: stable@vger.kernel.org # v3.0 Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Segher Boessenkool Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 01b8a13f0224..3919332965af 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state; std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ +1: cmpd cr0,r0,r0; \ bne 1b; \ IDLE_INST; \ b . From f660f6066716b700148f60dba3461e65efff3123 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Oct 2016 15:10:51 +0300 Subject: [PATCH 460/884] softirq: Display IRQ_POLL for irq-poll statistics This library was moved to the generic area and was renamed to irq-poll. Hence, update proc/softirqs output accordingly. Signed-off-by: Sagi Grimberg Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- kernel/softirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index 1bf81ef91375..744fa611cae0 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -58,7 +58,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp DEFINE_PER_CPU(struct task_struct *, ksoftirqd); const char * const softirq_to_name[NR_SOFTIRQS] = { - "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL", + "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; From b4a1278c78bc939b3e29c3ad21ceaa636b0ca8c8 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 20 Oct 2016 14:40:06 -0700 Subject: [PATCH 461/884] badblocks: badblocks_set/clear update unacked_exist When bandblocks_set acknowledges a range or badblocks_clear a range, it's possible all badblocks are acknowledged. We should update unacked_exist if this occurs. Signed-off-by: Shaohua Li Reviewed-by: Tomasz Majchrzak Tested-by: Tomasz Majchrzak Signed-off-by: Jens Axboe --- block/badblocks.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/block/badblocks.c b/block/badblocks.c index 6610e282a03e..6ebcef282314 100644 --- a/block/badblocks.c +++ b/block/badblocks.c @@ -133,6 +133,26 @@ retry: } EXPORT_SYMBOL_GPL(badblocks_check); +static void badblocks_update_acked(struct badblocks *bb) +{ + u64 *p = bb->page; + int i; + bool unacked = false; + + if (!bb->unacked_exist) + return; + + for (i = 0; i < bb->count ; i++) { + if (!BB_ACK(p[i])) { + unacked = true; + break; + } + } + + if (!unacked) + bb->unacked_exist = 0; +} + /** * badblocks_set() - Add a range of bad blocks to the table. * @bb: the badblocks structure that holds all badblock information @@ -294,6 +314,8 @@ int badblocks_set(struct badblocks *bb, sector_t s, int sectors, bb->changed = 1; if (!acknowledged) bb->unacked_exist = 1; + else + badblocks_update_acked(bb); write_sequnlock_irqrestore(&bb->lock, flags); return rv; @@ -401,6 +423,7 @@ int badblocks_clear(struct badblocks *bb, sector_t s, int sectors) } } + badblocks_update_acked(bb); bb->changed = 1; out: write_sequnlock_irq(&bb->lock); From ff8560512b8d4b7ca3ef4fd69166634ac30b2525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Sat, 22 Oct 2016 05:18:04 +0300 Subject: [PATCH 462/884] x86/boot/smp: Don't try to poke disabled/non-existent APIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Apparently trying to poke a disabled or non-existent APIC leads to a box that doesn't even boot. Let's not do that. No real clue if this is the right fix, but at least my P3 machine boots again. Signed-off-by: Ville Syrjälä Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Eric Biederman Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Juergen Gross Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Prarit Bhargava Cc: Thomas Gleixner Cc: Yinghai Lu Cc: dyoung@redhat.com Cc: kexec@lists.infradead.org Cc: stable@vger.kernel.org Fixes: 2a51fe083eba ("arch/x86: Handle non enumerated CPU after physical hotplug") Link: http://lkml.kernel.org/r/1477102684-5092-1-git-send-email-ville.syrjala@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 951f093a96fe..42f5eb7b4f6c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1409,15 +1409,17 @@ __init void prefill_possible_map(void) /* No boot processor was found in mptable or ACPI MADT */ if (!num_processors) { - int apicid = boot_cpu_physical_apicid; - int cpu = hard_smp_processor_id(); + if (boot_cpu_has(X86_FEATURE_APIC)) { + int apicid = boot_cpu_physical_apicid; + int cpu = hard_smp_processor_id(); - pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu); + pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu); - /* Make sure boot cpu is enumerated */ - if (apic->cpu_present_to_apicid(0) == BAD_APICID && - apic->apic_id_valid(apicid)) - generic_processor_info(apicid, boot_cpu_apic_version); + /* Make sure boot cpu is enumerated */ + if (apic->cpu_present_to_apicid(0) == BAD_APICID && + apic->apic_id_valid(apicid)) + generic_processor_info(apicid, boot_cpu_apic_version); + } if (!num_processors) num_processors = 1; From bcf4f311e034dc661b5e641595ef1f50af27b5bf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 20 Sep 2016 13:43:19 +0900 Subject: [PATCH 463/884] ARM: uniphier: select ARCH_HAS_RESET_CONTROLLER The UniPhier reset driver (drivers/reset/reset-uniphier.c) has been merged. Select ARCH_HAS_RESET_CONTROLLER from the SoC Kconfig. Signed-off-by: Masahiro Yamada Acked-by: Philipp Zabel --- arch/arm/mach-uniphier/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-uniphier/Kconfig b/arch/arm/mach-uniphier/Kconfig index 82dddee3a469..3930fbba30b4 100644 --- a/arch/arm/mach-uniphier/Kconfig +++ b/arch/arm/mach-uniphier/Kconfig @@ -1,6 +1,7 @@ config ARCH_UNIPHIER bool "Socionext UniPhier SoCs" depends on ARCH_MULTI_V7 + select ARCH_HAS_RESET_CONTROLLER select ARM_AMBA select ARM_GLOBAL_TIMER select ARM_GIC From 75924903c51d0697b989035f6baebccb2a7367cd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 8 Oct 2016 11:25:34 +0900 Subject: [PATCH 464/884] arm64: uniphier: select ARCH_HAS_RESET_CONTROLLER The UniPhier reset driver (drivers/reset/reset-uniphier.c) has been merged. Select ARCH_HAS_RESET_CONTROLLER from the SoC Kconfig. Signed-off-by: Masahiro Yamada --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index cfbdf02ef566..101794f5ce10 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -190,6 +190,7 @@ config ARCH_THUNDER config ARCH_UNIPHIER bool "Socionext UniPhier SoC Family" + select ARCH_HAS_RESET_CONTROLLER select PINCTRL help This enables support for Socionext UniPhier SoC family. From 19eb4a47224da4bc118f1e304e16b6c08ed172c9 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 19 Oct 2016 17:23:49 +0900 Subject: [PATCH 465/884] reset: uniphier: rename MIO reset to SD reset for Pro5, PXs2, LD20 SoCs I made a mistake as for naming for this block. The MIO block is not implemented for these 3 SoCs in the first place. The current naming will be a trouble if an SoC with both MIO and SD-ctrl blocks appear in the future. This driver has just been merged in the previous merge window. Rename it before the release. Signed-off-by: Masahiro Yamada Acked-by: Philipp Zabel --- .../bindings/reset/uniphier-reset.txt | 62 +++++++++---------- drivers/reset/reset-uniphier.c | 16 ++--- 2 files changed, 39 insertions(+), 39 deletions(-) diff --git a/Documentation/devicetree/bindings/reset/uniphier-reset.txt b/Documentation/devicetree/bindings/reset/uniphier-reset.txt index e6bbfccd56c3..5020524cddeb 100644 --- a/Documentation/devicetree/bindings/reset/uniphier-reset.txt +++ b/Documentation/devicetree/bindings/reset/uniphier-reset.txt @@ -6,25 +6,25 @@ System reset Required properties: - compatible: should be one of the following: - "socionext,uniphier-sld3-reset" - for PH1-sLD3 SoC. - "socionext,uniphier-ld4-reset" - for PH1-LD4 SoC. - "socionext,uniphier-pro4-reset" - for PH1-Pro4 SoC. - "socionext,uniphier-sld8-reset" - for PH1-sLD8 SoC. - "socionext,uniphier-pro5-reset" - for PH1-Pro5 SoC. - "socionext,uniphier-pxs2-reset" - for ProXstream2/PH1-LD6b SoC. - "socionext,uniphier-ld11-reset" - for PH1-LD11 SoC. - "socionext,uniphier-ld20-reset" - for PH1-LD20 SoC. + "socionext,uniphier-sld3-reset" - for sLD3 SoC. + "socionext,uniphier-ld4-reset" - for LD4 SoC. + "socionext,uniphier-pro4-reset" - for Pro4 SoC. + "socionext,uniphier-sld8-reset" - for sLD8 SoC. + "socionext,uniphier-pro5-reset" - for Pro5 SoC. + "socionext,uniphier-pxs2-reset" - for PXs2/LD6b SoC. + "socionext,uniphier-ld11-reset" - for LD11 SoC. + "socionext,uniphier-ld20-reset" - for LD20 SoC. - #reset-cells: should be 1. Example: sysctrl@61840000 { - compatible = "socionext,uniphier-ld20-sysctrl", + compatible = "socionext,uniphier-ld11-sysctrl", "simple-mfd", "syscon"; reg = <0x61840000 0x4000>; reset { - compatible = "socionext,uniphier-ld20-reset"; + compatible = "socionext,uniphier-ld11-reset"; #reset-cells = <1>; }; @@ -32,30 +32,30 @@ Example: }; -Media I/O (MIO) reset ---------------------- +Media I/O (MIO) reset, SD reset +------------------------------- Required properties: - compatible: should be one of the following: - "socionext,uniphier-sld3-mio-reset" - for PH1-sLD3 SoC. - "socionext,uniphier-ld4-mio-reset" - for PH1-LD4 SoC. - "socionext,uniphier-pro4-mio-reset" - for PH1-Pro4 SoC. - "socionext,uniphier-sld8-mio-reset" - for PH1-sLD8 SoC. - "socionext,uniphier-pro5-mio-reset" - for PH1-Pro5 SoC. - "socionext,uniphier-pxs2-mio-reset" - for ProXstream2/PH1-LD6b SoC. - "socionext,uniphier-ld11-mio-reset" - for PH1-LD11 SoC. - "socionext,uniphier-ld20-mio-reset" - for PH1-LD20 SoC. + "socionext,uniphier-sld3-mio-reset" - for sLD3 SoC. + "socionext,uniphier-ld4-mio-reset" - for LD4 SoC. + "socionext,uniphier-pro4-mio-reset" - for Pro4 SoC. + "socionext,uniphier-sld8-mio-reset" - for sLD8 SoC. + "socionext,uniphier-pro5-sd-reset" - for Pro5 SoC. + "socionext,uniphier-pxs2-sd-reset" - for PXs2/LD6b SoC. + "socionext,uniphier-ld11-mio-reset" - for LD11 SoC. + "socionext,uniphier-ld20-sd-reset" - for LD20 SoC. - #reset-cells: should be 1. Example: mioctrl@59810000 { - compatible = "socionext,uniphier-ld20-mioctrl", + compatible = "socionext,uniphier-ld11-mioctrl", "simple-mfd", "syscon"; reg = <0x59810000 0x800>; reset { - compatible = "socionext,uniphier-ld20-mio-reset"; + compatible = "socionext,uniphier-ld11-mio-reset"; #reset-cells = <1>; }; @@ -68,24 +68,24 @@ Peripheral reset Required properties: - compatible: should be one of the following: - "socionext,uniphier-ld4-peri-reset" - for PH1-LD4 SoC. - "socionext,uniphier-pro4-peri-reset" - for PH1-Pro4 SoC. - "socionext,uniphier-sld8-peri-reset" - for PH1-sLD8 SoC. - "socionext,uniphier-pro5-peri-reset" - for PH1-Pro5 SoC. - "socionext,uniphier-pxs2-peri-reset" - for ProXstream2/PH1-LD6b SoC. - "socionext,uniphier-ld11-peri-reset" - for PH1-LD11 SoC. - "socionext,uniphier-ld20-peri-reset" - for PH1-LD20 SoC. + "socionext,uniphier-ld4-peri-reset" - for LD4 SoC. + "socionext,uniphier-pro4-peri-reset" - for Pro4 SoC. + "socionext,uniphier-sld8-peri-reset" - for sLD8 SoC. + "socionext,uniphier-pro5-peri-reset" - for Pro5 SoC. + "socionext,uniphier-pxs2-peri-reset" - for PXs2/LD6b SoC. + "socionext,uniphier-ld11-peri-reset" - for LD11 SoC. + "socionext,uniphier-ld20-peri-reset" - for LD20 SoC. - #reset-cells: should be 1. Example: perictrl@59820000 { - compatible = "socionext,uniphier-ld20-perictrl", + compatible = "socionext,uniphier-ld11-perictrl", "simple-mfd", "syscon"; reg = <0x59820000 0x200>; reset { - compatible = "socionext,uniphier-ld20-peri-reset"; + compatible = "socionext,uniphier-ld11-peri-reset"; #reset-cells = <1>; }; diff --git a/drivers/reset/reset-uniphier.c b/drivers/reset/reset-uniphier.c index 8b2558e7363e..968c3ae4535c 100644 --- a/drivers/reset/reset-uniphier.c +++ b/drivers/reset/reset-uniphier.c @@ -154,7 +154,7 @@ const struct uniphier_reset_data uniphier_sld3_mio_reset_data[] = { UNIPHIER_RESET_END, }; -const struct uniphier_reset_data uniphier_pro5_mio_reset_data[] = { +const struct uniphier_reset_data uniphier_pro5_sd_reset_data[] = { UNIPHIER_MIO_RESET_SD(0, 0), UNIPHIER_MIO_RESET_SD(1, 1), UNIPHIER_MIO_RESET_EMMC_HW_RESET(6, 1), @@ -360,7 +360,7 @@ static const struct of_device_id uniphier_reset_match[] = { .compatible = "socionext,uniphier-ld20-reset", .data = uniphier_ld20_sys_reset_data, }, - /* Media I/O reset */ + /* Media I/O reset, SD reset */ { .compatible = "socionext,uniphier-sld3-mio-reset", .data = uniphier_sld3_mio_reset_data, @@ -378,20 +378,20 @@ static const struct of_device_id uniphier_reset_match[] = { .data = uniphier_sld3_mio_reset_data, }, { - .compatible = "socionext,uniphier-pro5-mio-reset", - .data = uniphier_pro5_mio_reset_data, + .compatible = "socionext,uniphier-pro5-sd-reset", + .data = uniphier_pro5_sd_reset_data, }, { - .compatible = "socionext,uniphier-pxs2-mio-reset", - .data = uniphier_pro5_mio_reset_data, + .compatible = "socionext,uniphier-pxs2-sd-reset", + .data = uniphier_pro5_sd_reset_data, }, { .compatible = "socionext,uniphier-ld11-mio-reset", .data = uniphier_sld3_mio_reset_data, }, { - .compatible = "socionext,uniphier-ld20-mio-reset", - .data = uniphier_pro5_mio_reset_data, + .compatible = "socionext,uniphier-ld20-sd-reset", + .data = uniphier_pro5_sd_reset_data, }, /* Peripheral reset */ { From 1bdb60ef18655596d56b9b7268ad0bf5214e00e4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Oct 2016 17:27:57 +0900 Subject: [PATCH 466/884] ARM: dts: uniphier: change MIO node to SD control node I made a mistake bacuse the Media I/O block is not implemented in these SoCs. Signed-off-by: Masahiro Yamada --- arch/arm/boot/dts/uniphier-pro5.dtsi | 4 ++-- arch/arm/boot/dts/uniphier-pxs2.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/uniphier-pro5.dtsi b/arch/arm/boot/dts/uniphier-pro5.dtsi index 2c49c3614bda..5357ea9c14b1 100644 --- a/arch/arm/boot/dts/uniphier-pro5.dtsi +++ b/arch/arm/boot/dts/uniphier-pro5.dtsi @@ -184,11 +184,11 @@ }; &mio_clk { - compatible = "socionext,uniphier-pro5-mio-clock"; + compatible = "socionext,uniphier-pro5-sd-clock"; }; &mio_rst { - compatible = "socionext,uniphier-pro5-mio-reset"; + compatible = "socionext,uniphier-pro5-sd-reset"; }; &peri_clk { diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi index 8789cd518933..950f07ba0337 100644 --- a/arch/arm/boot/dts/uniphier-pxs2.dtsi +++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi @@ -197,11 +197,11 @@ }; &mio_clk { - compatible = "socionext,uniphier-pxs2-mio-clock"; + compatible = "socionext,uniphier-pxs2-sd-clock"; }; &mio_rst { - compatible = "socionext,uniphier-pxs2-mio-reset"; + compatible = "socionext,uniphier-pxs2-sd-reset"; }; &peri_clk { From 8e68c65d111a57a4cbe41dc886bb2a1e671e0b6e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Oct 2016 16:45:21 +0900 Subject: [PATCH 467/884] arm64: dts: uniphier: change MIO node to SD control node I made a mistake bacuse the Media I/O block is not implemented in this SoC. Signed-off-by: Masahiro Yamada --- arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi index 08fd7cf7769c..56a1b2e92cf3 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi @@ -257,18 +257,18 @@ reg = <0x59801000 0x400>; }; - mioctrl@59810000 { - compatible = "socionext,uniphier-mioctrl", + sdctrl@59810000 { + compatible = "socionext,uniphier-ld20-sdctrl", "simple-mfd", "syscon"; reg = <0x59810000 0x800>; - mio_clk: clock { - compatible = "socionext,uniphier-ld20-mio-clock"; + sd_clk: clock { + compatible = "socionext,uniphier-ld20-sd-clock"; #clock-cells = <1>; }; - mio_rst: reset { - compatible = "socionext,uniphier-ld20-mio-reset"; + sd_rst: reset { + compatible = "socionext,uniphier-ld20-sd-reset"; #reset-cells = <1>; }; }; From 2a73306b6096fafd5c2ae06ded1f92bbacb39df2 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 21 Oct 2016 21:49:14 -0400 Subject: [PATCH 468/884] netns: revert "netns: avoid disabling irq for netns id" This reverts commit bc51dddf98c9 ("netns: avoid disabling irq for netns id") as it was found to cause problems with systems running SELinux/audit, see the mailing list thread below: * http://marc.info/?t=147694653900002&r=1&w=2 Eventually we should be able to reintroduce this code once we have rewritten the audit multicast code to queue messages much the same way we do for unicast messages. A tracking issue for this can be found below: * https://github.com/linux-audit/audit-kernel/issues/23 Reported-by: Stephen Smalley Reported-by: Elad Raz Cc: Cong Wang Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- net/core/net_namespace.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 989434f36f96..f61c0e02a413 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -215,13 +215,14 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id); */ int peernet2id_alloc(struct net *net, struct net *peer) { + unsigned long flags; bool alloc; int id; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; @@ -230,11 +231,12 @@ int peernet2id_alloc(struct net *net, struct net *peer) /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { + unsigned long flags; int id; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); id = __peernet2id(net, peer); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); return id; } EXPORT_SYMBOL(peernet2id); @@ -249,17 +251,18 @@ bool peernet_has_id(struct net *net, struct net *peer) struct net *get_net_ns_by_id(struct net *net, int id) { + unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); rcu_read_unlock(); return peer; @@ -422,17 +425,17 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id; - spin_lock_bh(&tmp->nsid_lock); + spin_lock_irq(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_bh(&tmp->nsid_lock); + spin_unlock_irq(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id); } - spin_lock_bh(&net->nsid_lock); + spin_lock_irq(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irq(&net->nsid_lock); } rtnl_unlock(); @@ -561,6 +564,7 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; + unsigned long flags; struct net *peer; int nsid, err; @@ -581,15 +585,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); if (__peernet2id(net, peer) >= 0) { - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; @@ -711,10 +715,11 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; + unsigned long flags; - spin_lock_bh(&net->nsid_lock); + spin_lock_irqsave(&net->nsid_lock, flags); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); - spin_unlock_bh(&net->nsid_lock); + spin_unlock_irqrestore(&net->nsid_lock, flags); cb->args[0] = net_cb.idx; return skb->len; From 0ce66d4f70d3bcfb67c89f473e76e7cf0a01288f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 21 Oct 2016 14:21:55 -0700 Subject: [PATCH 469/884] Revert "kexec: Export kexec_in_progress to modules" This reverts commit 97dcaa0fcfd24daa9a36c212c1ad1d5a97759212. Based on the review discussion with Eric, we will come up with a different fix for the bcm_sf2 driver which does not make it rely on the kexec_in_progress value. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- kernel/kexec_core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 786ab85a5452..561675589511 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -59,7 +59,6 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); /* Flag to indicate we are going to kexec a new kernel */ bool kexec_in_progress = false; -EXPORT_SYMBOL_GPL(kexec_in_progress); /* Location of the reserved area for the crash kernel */ From 4a2947e32ea9ca2bc36d68022b7d7b1014dca18f Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 21 Oct 2016 14:21:56 -0700 Subject: [PATCH 470/884] net: dsa: bcm_sf2: Do not rely on kexec_in_progress After discussing with Eric, it turns out that, while using kexec_in_progress is a nice optimization, which prevents us from always powering on the integrated PHY, let's just turn it on in the shutdown path. This removes a dependency on kexec_in_progress which, according to Eric should not be used by modules Fixes: 2399d6143f85 ("net: dsa: bcm_sf2: Prevent GPHY shutdown for kexec'd kernels") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 077a24541584..e3ee27ce13dd 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -30,7 +30,6 @@ #include #include #include -#include #include "bcm_sf2.h" #include "bcm_sf2_regs.h" @@ -1141,9 +1140,11 @@ static void bcm_sf2_sw_shutdown(struct platform_device *pdev) /* For a kernel about to be kexec'd we want to keep the GPHY on for a * successful MDIO bus scan to occur. If we did turn off the GPHY * before (e.g: port_disable), this will also power it back on. + * + * Do not rely on kexec_in_progress, just power the PHY on. */ if (priv->hw_params.num_gphy == 1) - bcm_sf2_gphy_enable_set(priv->dev->ds, kexec_in_progress); + bcm_sf2_gphy_enable_set(priv->dev->ds, true); } #ifdef CONFIG_PM_SLEEP From 396a30cce15d084b2b1a395aa6d515c3d559c674 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Thu, 20 Oct 2016 14:19:46 -0700 Subject: [PATCH 471/884] ipv4: use the right lock for ping_group_range This reverts commit a681574c99be23e4d20b769bf0e543239c364af5 ("ipv4: disable BH in set_ping_group_range()") because we never read ping_group_range in BH context (unlike local_port_range). Then, since we already have a lock for ping_group_range, those using ip_local_ports.lock for ping_group_range are clearly typos. We might consider to share a same lock for both ping_group_range and local_port_range w.r.t. space saving, but that should be for net-next. Fixes: a681574c99be ("ipv4: disable BH in set_ping_group_range()") Fixes: ba6b918ab234 ("ping: move ping_group_range out of CONFIG_SYSCTL") Cc: Eric Dumazet Cc: Eric Salo Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/sysctl_net_ipv4.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 500ae4010bed..80bc36b25de2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -96,11 +96,11 @@ static void inet_get_ping_group_range_table(struct ctl_table *table, kgid_t *low container_of(table->data, struct net, ipv4.ping_group_range.range); unsigned int seq; do { - seq = read_seqbegin(&net->ipv4.ip_local_ports.lock); + seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; - } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq)); + } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } /* Update system visible IP port range */ @@ -109,10 +109,10 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig kgid_t *data = table->data; struct net *net = container_of(table->data, struct net, ipv4.ping_group_range.range); - write_seqlock_bh(&net->ipv4.ip_local_ports.lock); + write_seqlock(&net->ipv4.ping_group_range.lock); data[0] = low; data[1] = high; - write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); + write_sequnlock(&net->ipv4.ping_group_range.lock); } /* Validate changes from /proc interface. */ From bdc8cbd34d5d9d82c3db4df6a182b0530040062d Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:39 -0400 Subject: [PATCH 472/884] qede: get_channels() need to populate max tx/rx coalesce values Recent changes in kernel ethtool implementation requires the driver callback for get_channels() has to populate the values for max tx/rx coalesce fields. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 25a9b293ee8f..16a1a478a2bd 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -756,6 +756,8 @@ static void qede_get_channels(struct net_device *dev, struct qede_dev *edev = netdev_priv(dev); channels->max_combined = QEDE_MAX_RSS_CNT(edev); + channels->max_rx = QEDE_MAX_RSS_CNT(edev); + channels->max_tx = QEDE_MAX_RSS_CNT(edev); channels->combined_count = QEDE_QUEUE_CNT(edev) - edev->fp_num_tx - edev->fp_num_rx; channels->tx_count = edev->fp_num_tx; From ba300ce35123a9ba8ea55318b0f579cc1ab3f51a Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:40 -0400 Subject: [PATCH 473/884] qede: Do not allow RSS config for 100G devices RSS configuration is not supported for 100G adapters. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 16a1a478a2bd..bf04fb9e1ae6 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1055,6 +1055,12 @@ static int qede_set_rxfh(struct net_device *dev, const u32 *indir, struct qede_dev *edev = netdev_priv(dev); int i; + if (edev->dev_info.common.num_hwfns > 1) { + DP_INFO(edev, + "RSS configuration is not supported for 100G devices\n"); + return -EOPNOTSUPP; + } + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; From 837d4eb6ed7cb0341079fac97e3037df6bef7482 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:41 -0400 Subject: [PATCH 474/884] qede: Loopback implementation should ignore the normal traffic During the execution of loopback test, driver may receive the packets which are not originated by this test, loopback implementation need to skip those packets. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede.h | 1 + .../net/ethernet/qlogic/qede/qede_ethtool.c | 80 +++++++++++-------- drivers/net/ethernet/qlogic/qede/qede_main.c | 3 +- 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 28c0e9f42c9e..d6a4a9a5eb4d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -348,6 +348,7 @@ bool qede_has_rx_work(struct qede_rx_queue *rxq); int qede_txq_has_work(struct qede_tx_queue *txq); void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev, u8 count); +void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq); #define RX_RING_SIZE_POW 13 #define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index bf04fb9e1ae6..7da184cfe72e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1207,8 +1207,8 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) struct qede_rx_queue *rxq = NULL; struct sw_rx_data *sw_rx_data; union eth_rx_cqe *cqe; + int i, rc = 0; u8 *data_ptr; - int i; for_each_queue(i) { if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { @@ -1227,46 +1227,60 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev) * queue and that the loopback traffic is not IP. */ for (i = 0; i < QEDE_SELFTEST_POLL_COUNT; i++) { - if (qede_has_rx_work(rxq)) + if (!qede_has_rx_work(rxq)) { + usleep_range(100, 200); + continue; + } + + hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr); + sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring); + + /* Memory barrier to prevent the CPU from doing speculative + * reads of CQE/BD before reading hw_comp_cons. If the CQE is + * read before it is written by FW, then FW writes CQE and SB, + * and then the CPU reads the hw_comp_cons, it will use an old + * CQE. + */ + rmb(); + + /* Get the CQE from the completion ring */ + cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring); + + /* Get the data from the SW ring */ + sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX; + sw_rx_data = &rxq->sw_rx_ring[sw_rx_index]; + fp_cqe = &cqe->fast_path_regular; + len = le16_to_cpu(fp_cqe->len_on_first_bd); + data_ptr = (u8 *)(page_address(sw_rx_data->data) + + fp_cqe->placement_offset + + sw_rx_data->page_offset); + if (ether_addr_equal(data_ptr, edev->ndev->dev_addr) && + ether_addr_equal(data_ptr + ETH_ALEN, + edev->ndev->dev_addr)) { + for (i = ETH_HLEN; i < len; i++) + if (data_ptr[i] != (unsigned char)(i & 0xff)) { + rc = -1; + break; + } + + qede_recycle_rx_bd_ring(rxq, edev, 1); + qed_chain_recycle_consumed(&rxq->rx_comp_ring); break; - usleep_range(100, 200); + } + + DP_INFO(edev, "Not the transmitted packet\n"); + qede_recycle_rx_bd_ring(rxq, edev, 1); + qed_chain_recycle_consumed(&rxq->rx_comp_ring); } - if (!qede_has_rx_work(rxq)) { + if (i == QEDE_SELFTEST_POLL_COUNT) { DP_NOTICE(edev, "Failed to receive the traffic\n"); return -1; } - hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr); - sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring); + qede_update_rx_prod(edev, rxq); - /* Memory barrier to prevent the CPU from doing speculative reads of CQE - * / BD before reading hw_comp_cons. If the CQE is read before it is - * written by FW, then FW writes CQE and SB, and then the CPU reads the - * hw_comp_cons, it will use an old CQE. - */ - rmb(); - - /* Get the CQE from the completion ring */ - cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring); - - /* Get the data from the SW ring */ - sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX; - sw_rx_data = &rxq->sw_rx_ring[sw_rx_index]; - fp_cqe = &cqe->fast_path_regular; - len = le16_to_cpu(fp_cqe->len_on_first_bd); - data_ptr = (u8 *)(page_address(sw_rx_data->data) + - fp_cqe->placement_offset + sw_rx_data->page_offset); - for (i = ETH_HLEN; i < len; i++) - if (data_ptr[i] != (unsigned char)(i & 0xff)) { - DP_NOTICE(edev, "Loopback test failed\n"); - qede_recycle_rx_bd_ring(rxq, edev, 1); - return -1; - } - - qede_recycle_rx_bd_ring(rxq, edev, 1); - - return 0; + return rc; } static int qede_selftest_run_loopback(struct qede_dev *edev, u32 loopback_mode) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 343038ca047d..da8ef69cedd6 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -943,8 +943,7 @@ static inline int qede_realloc_rx_buffer(struct qede_dev *edev, return 0; } -static inline void qede_update_rx_prod(struct qede_dev *edev, - struct qede_rx_queue *rxq) +void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq) { u16 bd_prod = qed_chain_get_prod_idx(&rxq->rx_bd_ring); u16 cqe_prod = qed_chain_get_prod_idx(&rxq->rx_comp_ring); From 0e191827383a6503a3bc547e63c74ff093f450f5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:42 -0400 Subject: [PATCH 475/884] qed*: Reduce the memory footprint for Rx path With the current default values for Rx path i.e., 8 queues of 8Kb entries each with 4Kb size, interface will consume 256Mb for Rx. The default values causing the driver probe to fail when the system memory is low. Based on the perforamnce results, rx-ring count value of 1Kb gives the comparable performance with Rx coalesce timeout of 12 seconds. Updating the default values. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_main.c | 1 + drivers/net/ethernet/qlogic/qede/qede.h | 2 +- include/linux/qed/qed_if.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 8dc3f4670f64..c418360ba02a 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -878,6 +878,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, } } + cdev->rx_coalesce_usecs = QED_DEFAULT_RX_USECS; rc = qed_nic_setup(cdev); if (rc) goto err; diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index d6a4a9a5eb4d..974689a13337 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -354,7 +354,7 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq); #define RX_RING_SIZE ((u16)BIT(RX_RING_SIZE_POW)) #define NUM_RX_BDS_MAX (RX_RING_SIZE - 1) #define NUM_RX_BDS_MIN 128 -#define NUM_RX_BDS_DEF NUM_RX_BDS_MAX +#define NUM_RX_BDS_DEF ((u16)BIT(10) - 1) #define TX_RING_SIZE_POW 13 #define TX_RING_SIZE ((u16)BIT(TX_RING_SIZE_POW)) diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index f9ae903bbb84..8978a60371f4 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -146,6 +146,7 @@ enum qed_led_mode { #define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr)) #define QED_COALESCE_MAX 0xFF +#define QED_DEFAULT_RX_USECS 12 /* forward */ struct qed_dev; From ed0dd91515bbe19a19cfccca366cf163ed581cac Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:43 -0400 Subject: [PATCH 476/884] qede: Reconfigure rss indirection direction table when rss count is updated Rx indirection table entries are in the range [0, (rss_count - 1)]. If user reduces the rss count, the table entries may not be in the ccorrect range. Need to reconfigure the table with new rss_count as a basis. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 7da184cfe72e..d47bdaa1f9a8 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -822,6 +822,13 @@ static int qede_set_channels(struct net_device *dev, edev->req_queues = count; edev->req_num_tx = channels->tx_count; edev->req_num_rx = channels->rx_count; + /* Reset the indirection table if rx queue count is updated */ + if ((edev->req_queues - edev->req_num_tx) != QEDE_RSS_COUNT(edev)) { + edev->rss_params_inited &= ~QEDE_RSS_INDIR_INITED; + memset(&edev->rss_params.rss_ind_table, 0, + sizeof(edev->rss_params.rss_ind_table)); + } + if (netif_running(dev)) qede_reload(edev, NULL, NULL); From 15c6de2c65048d45484e396194ba2598618d4cb4 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 04:43:44 -0400 Subject: [PATCH 477/884] qed: Zero-out the buffer paased to dcbx_query() API qed_dcbx_query_params() implementation populate the values to input buffer based on the dcbx mode and, the current negotiated state/params, the caller of this API need to memset the buffer to zero. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 130da1c0490b..a4789a93b692 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1190,6 +1190,7 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, if (!dcbx_info) return -ENOMEM; + memset(dcbx_info, 0, sizeof(*dcbx_info)); rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB); if (rc) { kfree(dcbx_info); @@ -1225,6 +1226,7 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn, if (!dcbx_info) return NULL; + memset(dcbx_info, 0, sizeof(*dcbx_info)); if (qed_dcbx_query_params(hwfn, dcbx_info, type)) { kfree(dcbx_info); return NULL; From fabd545c6d27ac1977fe567c43cd4c72fad04172 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 21 Oct 2016 04:43:45 -0400 Subject: [PATCH 478/884] qede: Fix incorrrect usage of APIs for un-mapping DMA memory Driver uses incorrect APIs to unmap DMA memory which were mapped using dma_map_single(). This patch fixes it to use appropriate APIs for un-mapping DMA memory. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 4 ++-- drivers/net/ethernet/qlogic/qede/qede_main.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index d47bdaa1f9a8..12251a1032d1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -1199,8 +1199,8 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev, } first_bd = (struct eth_tx_1st_bd *)qed_chain_consume(&txq->tx_pbl); - dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), - BD_UNMAP_LEN(first_bd), DMA_TO_DEVICE); + dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), + BD_UNMAP_LEN(first_bd), DMA_TO_DEVICE); txq->sw_tx_cons++; txq->sw_tx_ring[idx].skb = NULL; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index da8ef69cedd6..444b271059b2 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -313,8 +313,8 @@ static int qede_free_tx_pkt(struct qede_dev *edev, split_bd_len = BD_UNMAP_LEN(split); bds_consumed++; } - dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), - BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE); + dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), + BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE); /* Unmap the data of the skb frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, bds_consumed++) { @@ -359,8 +359,8 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev, nbd--; } - dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), - BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE); + dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd), + BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE); /* Unmap the data of the skb frags */ for (i = 0; i < nbd; i++) { From d1fe85ec7702917f2f1515b4c421d5d4792201a0 Mon Sep 17 00:00:00 2001 From: Sandhya Bankar Date: Sun, 25 Sep 2016 00:46:21 +0530 Subject: [PATCH 479/884] iio:chemical:atlas-ph-sensor: Fix use of 32 bit int to hold 16 bit big endian value This will result in a random value being reported on big endian architectures. (thanks to Lars-Peter Clausen for pointing out the effects of this bug) Only effects a value printed to the log, but as this reports the settings of the probe in question it may be of direct interest to users. Also, fixes the following sparse endianness warnings: drivers/iio/chemical/atlas-ph-sensor.c:215:9: warning: cast to restricted __be16 drivers/iio/chemical/atlas-ph-sensor.c:215:9: warning: cast to restricted __be16 drivers/iio/chemical/atlas-ph-sensor.c:215:9: warning: cast to restricted __be16 drivers/iio/chemical/atlas-ph-sensor.c:215:9: warning: cast to restricted __be16 drivers/iio/chemical/atlas-ph-sensor.c:215:9: warning: cast to restricted __be16 drivers/iio/chemical/atlas-ph-sensor.c:215:9: warning: cast to restricted __be16 drivers/iio/chemical/atlas-ph-sensor.c:215:9: warning: cast to restricted __be16 drivers/iio/chemical/atlas-ph-sensor.c:215:9: warning: cast to restricted __be16 Signed-off-by: Sandhya Bankar Fixes: e8dd92bfbff25 ("iio: chemical: atlas-ph-sensor: add EC feature") Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/atlas-ph-sensor.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/chemical/atlas-ph-sensor.c b/drivers/iio/chemical/atlas-ph-sensor.c index bd321b305a0a..ef761a508630 100644 --- a/drivers/iio/chemical/atlas-ph-sensor.c +++ b/drivers/iio/chemical/atlas-ph-sensor.c @@ -213,13 +213,14 @@ static int atlas_check_ec_calibration(struct atlas_data *data) struct device *dev = &data->client->dev; int ret; unsigned int val; + __be16 rval; - ret = regmap_bulk_read(data->regmap, ATLAS_REG_EC_PROBE, &val, 2); + ret = regmap_bulk_read(data->regmap, ATLAS_REG_EC_PROBE, &rval, 2); if (ret) return ret; - dev_info(dev, "probe set to K = %d.%.2d", be16_to_cpu(val) / 100, - be16_to_cpu(val) % 100); + val = be16_to_cpu(rval); + dev_info(dev, "probe set to K = %d.%.2d", val / 100, val % 100); ret = regmap_read(data->regmap, ATLAS_REG_EC_CALIB_STATUS, &val); if (ret) From 64bc2d02d754f4143d65cc21c644176db12ab5c8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:13:35 +0200 Subject: [PATCH 480/884] iio: accel: sca3000_core: avoid potentially uninitialized variable The newly added __sca3000_get_base_freq function handles all valid modes of the SCA3000_REG_ADDR_MODE register, but gcc notices that any other value (i.e. 0x00) causes the base_freq variable to not get initialized: drivers/staging/iio/accel/sca3000_core.c: In function 'sca3000_write_raw': drivers/staging/iio/accel/sca3000_core.c:527:23: error: 'base_freq' may be used uninitialized in this function [-Werror=maybe-uninitialized] This adds explicit error handling for unexpected register values, to ensure this cannot happen. Fixes: e0f3fc9b47e6 ("iio: accel: sca3000_core: implemented IIO_CHAN_INFO_SAMP_FREQ") Signed-off-by: Arnd Bergmann Cc: Ico Doornekamp Cc: Jonathan Cameron Signed-off-by: Jonathan Cameron --- drivers/staging/iio/accel/sca3000_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/iio/accel/sca3000_core.c b/drivers/staging/iio/accel/sca3000_core.c index d626125d7af9..564b36d4f648 100644 --- a/drivers/staging/iio/accel/sca3000_core.c +++ b/drivers/staging/iio/accel/sca3000_core.c @@ -468,6 +468,8 @@ static inline int __sca3000_get_base_freq(struct sca3000_state *st, case SCA3000_MEAS_MODE_OP_2: *base_freq = info->option_mode_2_freq; break; + default: + ret = -EINVAL; } error_ret: return ret; From a6e2846cacf97d4c70c5e923325b015cfa1e9053 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Fri, 21 Oct 2016 02:09:17 -0400 Subject: [PATCH 481/884] bnx2x: Use the correct divisor value for PHC clock readings. Time Sync (PTP) implementation uses the divisor/shift value for converting the clock ticks to nanoseconds. Driver currently defines shift value as 1, this results in the nanoseconds value to be calculated as half the actual value. Hence the user application fails to synchronize the device clock value with the PTP master device clock. Need to use the 'shift' value of 0. Signed-off-by: Sony.Chacko Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 20fe6a8c35c1..0cee4c0283f9 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -15241,7 +15241,7 @@ static void bnx2x_init_cyclecounter(struct bnx2x *bp) memset(&bp->cyclecounter, 0, sizeof(bp->cyclecounter)); bp->cyclecounter.read = bnx2x_cyclecounter_read; bp->cyclecounter.mask = CYCLECOUNTER_MASK(64); - bp->cyclecounter.shift = 1; + bp->cyclecounter.shift = 0; bp->cyclecounter.mult = 1; } From b678aa578c9e400429e027269e8de2783e5e73ce Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Fri, 21 Oct 2016 18:28:25 +0900 Subject: [PATCH 482/884] ipv6: do not increment mac header when it's unset Otherwise we'll overflow the integer. This occurs when layer 3 tunneled packets are handed off to the IPv6 layer. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/ipv6/reassembly.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2160d5d009cb..3815e8505ed2 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -456,7 +456,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; memmove(head->head + sizeof(struct frag_hdr), head->head, (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac_header += sizeof(struct frag_hdr); + if (skb_mac_header_was_set(head)) + head->mac_header += sizeof(struct frag_hdr); head->network_header += sizeof(struct frag_hdr); skb_reset_transport_header(head); From 235bde1ed3f0fff0f68f367ec8807b89ea151258 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 21 Oct 2016 09:34:29 -0200 Subject: [PATCH 483/884] net: fec: Call swap_buffer() prior to IP header alignment Commit 3ac72b7b63d5 ("net: fec: align IP header in hardware") breaks networking on mx28. There is an erratum on mx28 (ENGR121613 - ENET big endian mode not compatible with ARM little endian) that requires an additional byte-swap operation to workaround this problem. So call swap_buffer() prior to performing the IP header alignment to restore network functionality on mx28. Fixes: 3ac72b7b63d5 ("net: fec: align IP header in hardware") Reported-and-tested-by: Henri Roosen Signed-off-by: Fabio Estevam Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fec_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 48a033e64423..5aa9d4ded214 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1430,14 +1430,14 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) skb_put(skb, pkt_len - 4); data = skb->data; + if (!is_copybreak && need_swap) + swap_buffer(data, pkt_len); + #if !defined(CONFIG_M5272) if (fep->quirks & FEC_QUIRK_HAS_RACC) data = skb_pull_inline(skb, 2); #endif - if (!is_copybreak && need_swap) - swap_buffer(data, pkt_len); - /* Extract the enhanced buffer descriptor */ ebdp = NULL; if (fep->bufdesc_ex) From a4b8e71b05c27bae6bad3bdecddbc6b68a3ad8cf Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Fri, 21 Oct 2016 14:13:24 +0200 Subject: [PATCH 484/884] net: sctp, forbid negative length Most of getsockopt handlers in net/sctp/socket.c check len against sizeof some structure like: if (len < sizeof(int)) return -EINVAL; On the first look, the check seems to be correct. But since len is int and sizeof returns size_t, int gets promoted to unsigned size_t too. So the test returns false for negative lengths. Yes, (-1 < sizeof(long)) is false. Fix this in sctp by explicitly checking len < 0 before any getsockopt handler is called. Note that sctp_getsockopt_events already handled the negative case. Since we added the < 0 check elsewhere, this one can be removed. If not checked, this is the result: UBSAN: Undefined behaviour in ../mm/page_alloc.c:2722:19 shift exponent 52 is too large for 32-bit type 'int' CPU: 1 PID: 24535 Comm: syz-executor Not tainted 4.8.1-0-syzkaller #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 0000000000000000 ffff88006d99f2a8 ffffffffb2f7bdea 0000000041b58ab3 ffffffffb4363c14 ffffffffb2f7bcde ffff88006d99f2d0 ffff88006d99f270 0000000000000000 0000000000000000 0000000000000034 ffffffffb5096422 Call Trace: [] ? __ubsan_handle_shift_out_of_bounds+0x29c/0x300 ... [] ? kmalloc_order+0x24/0x90 [] ? kmalloc_order_trace+0x24/0x220 [] ? __kmalloc+0x330/0x540 [] ? sctp_getsockopt_local_addrs+0x174/0xca0 [sctp] [] ? sctp_getsockopt+0x10d/0x1b0 [sctp] [] ? sock_common_getsockopt+0xb9/0x150 [] ? SyS_getsockopt+0x1a5/0x270 Signed-off-by: Jiri Slaby Cc: Vlad Yasevich Cc: Neil Horman Cc: "David S. Miller" Cc: linux-sctp@vger.kernel.org Cc: netdev@vger.kernel.org Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index fb02c7033307..9fbb6feb8c27 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4687,7 +4687,7 @@ static int sctp_getsockopt_disable_fragments(struct sock *sk, int len, static int sctp_getsockopt_events(struct sock *sk, int len, char __user *optval, int __user *optlen) { - if (len <= 0) + if (len == 0) return -EINVAL; if (len > sizeof(struct sctp_event_subscribe)) len = sizeof(struct sctp_event_subscribe); @@ -6430,6 +6430,9 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; + if (len < 0) + return -EINVAL; + lock_sock(sk); switch (optname) { From 963d790468a2f581abf039b45edac79af5e16e55 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Wed, 20 Jul 2016 14:53:51 -0700 Subject: [PATCH 485/884] arm64: dts: Updated NAND DT properties for NS2 SVK This patch adds NAND DT properties for NS2 SVK to configure the bus width width and OOB sector size Signed-off-by: Prafulla Kota Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/ns2-svk.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts index 2d7872a36b91..b09f3bc5c6c1 100644 --- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts @@ -164,6 +164,8 @@ nand-ecc-mode = "hw"; nand-ecc-strength = <8>; nand-ecc-step-size = <512>; + nand-bus-width = <16>; + brcm,nand-oob-sector-size = <16>; #address-cells = <1>; #size-cells = <1>; }; From 6d8d271eee0eaa3c3ffd6db29a825e02316359d4 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 18 Oct 2016 17:44:01 -0300 Subject: [PATCH 486/884] gpio: ath79: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Before this patch: $ modinfo drivers/gpio/gpio-ath79.ko | grep alias $ After this patch: $ modinfo drivers/gpio/gpio-ath79.ko | grep alias alias: of:N*T*Cqca,ar9340-gpioC* alias: of:N*T*Cqca,ar9340-gpio alias: of:N*T*Cqca,ar7100-gpioC* alias: of:N*T*Cqca,ar7100-gpio Signed-off-by: Javier Martinez Canillas Acked-by: Aban Bedel Signed-off-by: Linus Walleij --- drivers/gpio/gpio-ath79.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-ath79.c b/drivers/gpio/gpio-ath79.c index 9457e2022bf6..dc37dbe4b46d 100644 --- a/drivers/gpio/gpio-ath79.c +++ b/drivers/gpio/gpio-ath79.c @@ -219,6 +219,7 @@ static const struct of_device_id ath79_gpio_of_match[] = { { .compatible = "qca,ar9340-gpio" }, {}, }; +MODULE_DEVICE_TABLE(of, ath79_gpio_of_match); static int ath79_gpio_probe(struct platform_device *pdev) { From 07d9a380680d1c0eb51ef87ff2eab5c994949e69 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Oct 2016 17:10:14 -0700 Subject: [PATCH 487/884] Linux 4.9-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 512e47a53e9a..93beca4312c4 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From d71cf15b865bdd45925f7b094d169aaabd705145 Mon Sep 17 00:00:00 2001 From: Liu Gang Date: Fri, 21 Oct 2016 15:31:28 +0800 Subject: [PATCH 488/884] gpio: mpc8xxx: Correct irq handler function From the beginning of the gpio-mpc8xxx.c, the "handle_level_irq" has being used to handle GPIO interrupts in the PowerPC/Layerscape platforms. But actually, almost all PowerPC/Layerscape platforms assert an interrupt request upon either a high-to-low change or any change on the state of the signal. So the "handle_level_irq" is not reasonable for PowerPC/Layerscape GPIO interrupt, it should be "handle_edge_irq". Otherwise the system may lost some interrupts from the PIN's state changes. Signed-off-by: Liu Gang Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mpc8xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 425501c39527..793518a30afe 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -239,7 +239,7 @@ static int mpc8xxx_gpio_irq_map(struct irq_domain *h, unsigned int irq, irq_hw_number_t hwirq) { irq_set_chip_data(irq, h->host_data); - irq_set_chip_and_handler(irq, &mpc8xxx_irq_chip, handle_level_irq); + irq_set_chip_and_handler(irq, &mpc8xxx_irq_chip, handle_edge_irq); return 0; } From a05b82d5149dfeef05254a11c3636a89a854520a Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 21 Oct 2016 14:53:53 +0530 Subject: [PATCH 489/884] cxl: Fix leaking pid refs in some error paths In some error paths in functions cxl_start_context and afu_ioctl_start_work pid references to the current & group-leader tasks can leak after they are taken. This patch fixes these error paths to release these pid references before exiting the error path. Fixes: 7b8ad495d592 ("cxl: Fix DSI misses when the context owning task exits") Cc: stable@vger.kernel.org # v4.5+ Reviewed-by: Andrew Donnellan Reported-by: Frederic Barrat Signed-off-by: Vaibhav Jain Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 2 ++ drivers/misc/cxl/file.c | 22 +++++++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index af23d7dfe752..2e5233b60971 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -247,7 +247,9 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, cxl_ctx_get(); if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) { + put_pid(ctx->glpid); put_pid(ctx->pid); + ctx->glpid = ctx->pid = NULL; cxl_adapter_context_put(ctx->afu->adapter); cxl_ctx_put(); goto out; diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index d0b421f49b39..77080cc5fa0a 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -193,6 +193,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); + /* + * Increment the mapped context count for adapter. This also checks + * if adapter_context_lock is taken. + */ + rc = cxl_adapter_context_get(ctx->afu->adapter); + if (rc) { + afu_release_irqs(ctx, ctx); + goto out; + } + /* * We grab the PID here and not in the file open to allow for the case * where a process (master, some daemon, etc) has opened the chardev on @@ -205,15 +215,6 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, ctx->pid = get_task_pid(current, PIDTYPE_PID); ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID); - /* - * Increment the mapped context count for adapter. This also checks - * if adapter_context_lock is taken. - */ - rc = cxl_adapter_context_get(ctx->afu->adapter); - if (rc) { - afu_release_irqs(ctx, ctx); - goto out; - } trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr); @@ -221,6 +222,9 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, amr))) { afu_release_irqs(ctx, ctx); cxl_adapter_context_put(ctx->afu->adapter); + put_pid(ctx->glpid); + put_pid(ctx->pid); + ctx->glpid = ctx->pid = NULL; goto out; } From c663e29f8885269c1608b5aa6057729fa9267b35 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 24 Oct 2016 14:20:25 +1100 Subject: [PATCH 490/884] fs: Do to trim high file position bits in iomap_page_mkwrite_actor iomap_page_mkwrite_actor() calls __block_write_begin_int() with position masked as pos & ~PAGE_MASK which is equivalent to pos & (PAGE_SIZE-1). Thus it masks off high bits of file position. However __block_write_begin_int() expects full file position on input. This does not cause any visible issues because all __block_write_begin_int() really cares about are low file position bits but still it is a bug waiting to happen. Signed-off-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Dave Chinner --- fs/iomap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index a92204012e2d..a8ee8c33ca78 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, struct page *page = data; int ret; - ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length, - NULL, iomap); + ret = __block_write_begin_int(page, pos, length, NULL, iomap); if (ret) return ret; From 7b7381f043568224af798b1decb607dca97b4114 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 24 Oct 2016 14:21:00 +1100 Subject: [PATCH 491/884] xfs: fix up inode cowblocks tracking tracepoints These calls are still using the eofblocks tracepoints. The cowblocks equivalents are already defined, we just aren't actually calling them. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_icache.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 14796b744e0a..f295049db681 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1656,9 +1656,9 @@ void xfs_inode_set_cowblocks_tag( xfs_inode_t *ip) { - trace_xfs_inode_set_eofblocks_tag(ip); + trace_xfs_inode_set_cowblocks_tag(ip); return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, - trace_xfs_perag_set_eofblocks, + trace_xfs_perag_set_cowblocks, XFS_ICI_COWBLOCKS_TAG); } @@ -1666,7 +1666,7 @@ void xfs_inode_clear_cowblocks_tag( xfs_inode_t *ip) { - trace_xfs_inode_clear_eofblocks_tag(ip); + trace_xfs_inode_clear_cowblocks_tag(ip); return __xfs_inode_clear_eofblocks_tag(ip, - trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG); + trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); } From c17a8ef43d6b80ed3519b828c37d18645445949f Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 24 Oct 2016 14:21:08 +1100 Subject: [PATCH 492/884] xfs: clear cowblocks tag when cow fork is emptied The background cowblocks scan job takes care of scanning for inodes with potentially lingering blocks in the cow fork and clearing them out. If the background scanner reclaims the cow fork blocks, however, it doesn't immediately clear the cowblocks tag from the inode. Instead, the inode remains tagged until the background scanner comes around again, discovers the inode cow fork has no blocks, clears the tag and fires the trace_xfs_inode_free_cowblocks_invalid() tracepoint to indicate that the inode may have been incorrectly tagged. This is not a major functional problem as the tag is ultimately cleared. Nonetheless, clear the tag when an inode cow fork is explicitly emptied to avoid the extra round trip through the background scanner and spurious "invalid" tracepoint. Signed-off-by: Brian Foster Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Signed-off-by: Dave Chinner --- fs/xfs/xfs_reflink.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cd308f119e20..a279b4e7f5fe 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -567,10 +567,14 @@ xfs_reflink_cancel_cow_blocks( } if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)) - return 0; + break; xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got); } + /* clear tag if cow fork is emptied */ + if (!ifp->if_bytes) + xfs_inode_clear_cowblocks_tag(ip); + return error; } From eef0b282bb586259d35548851cf6a4ce847bb804 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 22 Oct 2016 10:20:55 -0200 Subject: [PATCH 493/884] ARM: imx: gpc: Initialize all power domains Since commit 0159ec670763dd ("PM / Domains: Verify the PM domain is present when adding a provider") the following regression is observed on imx6: imx-gpc: probe of 20dc000.gpc failed with error -22 The gpc probe fails because of_genpd_add_provider_onecell() now checks if all the domains are initialized via pm_genpd_present() function and it fails because not all the power domains are initialized. In order to fix this error, initialize all the power domains from imx_gpc_domains[], not only the imx6q_pu_domain.base one. Reported-by: Olof's autobooter Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/gpc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 0df062d8b2c9..d0463e9d8a08 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -430,7 +430,8 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) return 0; - pm_genpd_init(&imx6q_pu_domain.base, NULL, false); + for (i = 0; i < ARRAY_SIZE(imx_gpc_domains); i++) + pm_genpd_init(imx_gpc_domains[i], NULL, false); return of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); From f9d1f7a7ad919c93dfb708aae6e19d33c5437443 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 22 Oct 2016 10:20:56 -0200 Subject: [PATCH 494/884] ARM: imx: gpc: Fix the imx_gpc_genpd_init() error path If of_genpd_add_provider_onecell() fails the following kernel crash is observed on a kernel built with multi_v7_defconfig: [ 1.739301] [00000040] *pgd=00000000 [ 1.739310] Internal error: Oops: 5 [#1] SMP ARM [ 1.739319] Modules linked in: [ 1.739328] CPU: 1 PID: 95 Comm: kworker/1:4 Not tainted 4.8.0-11897-g6b5e09a #1 [ 1.739331] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 1.739352] Workqueue: pm genpd_power_off_work_fn [ 1.739356] task: ee63d400 task.stack: ee70a000 [ 1.739365] PC is at mutex_lock+0xc/0x4c [ 1.739374] LR is at regulator_disable+0x2c/0x60 [ 1.739379] pc : [] lr : [] psr: 60000013 [ 1.739379] sp : ee70beb0 ip : 10624dd3 fp : ee6e6280 [ 1.739382] r10: eefb0900 r9 : 00000000 r8 : c1309918 [ 1.739385] r7 : 00000000 r6 : 00000040 r5 : 00000000 r4 : 00000040 [ 1.739390] r3 : 0000004c r2 : 7fffd540 r1 : 000001e4 r0 : 00000040 Instead of returning of_genpd_add_provider_onecell() directly, we should check its return value and in the case of error we should unwind the previously taken actions, which in these case are: - Call imx6q_pm_pu_power_off() - Set imx6q_pu_domain.reg back to NULL Setting imx6q_pu_domain.reg to NULL in the error case is important as it will prevent further operations in the pu_reg regulator. This kernel crash is not observed with imx_v6_v7_defconfig because it selects GPU and VPU drivers, which are consumers of the GPC block and thus change the refcount of the pu_reg regulator. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/gpc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index d0463e9d8a08..b54db47f6f32 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -408,7 +408,7 @@ static struct genpd_onecell_data imx_gpc_onecell_data = { static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) { struct clk *clk; - int i; + int i, ret; imx6q_pu_domain.reg = pu_reg; @@ -432,12 +432,20 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) for (i = 0; i < ARRAY_SIZE(imx_gpc_domains); i++) pm_genpd_init(imx_gpc_domains[i], NULL, false); - return of_genpd_add_provider_onecell(dev->of_node, - &imx_gpc_onecell_data); + ret = of_genpd_add_provider_onecell(dev->of_node, + &imx_gpc_onecell_data); + if (ret) + goto power_off; + + return 0; + +power_off: + imx6q_pm_pu_power_off(&imx6q_pu_domain.base); clk_err: while (i--) clk_put(imx6q_pu_domain.clk[i]); + imx6q_pu_domain.reg = NULL; return -EINVAL; } From 47ece7fef4e4206cdcee7c28ac3bca3ede0a1908 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 19 Oct 2016 13:42:55 +0200 Subject: [PATCH 495/884] s390/dumpstack: use pr_cont within show_stack and die Use pr_cont instead of printk calls also within show_stack and die in order to avoid extra line breaks. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dumpstack.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 34345c0a3c46..55d4fe174fd9 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -119,14 +119,14 @@ void show_stack(struct task_struct *task, unsigned long *sp) else stack = (unsigned long *)task->thread.ksp; } + printk(KERN_DEFAULT "Stack:\n"); for (i = 0; i < 20; i++) { if (((addr_t) stack & (THREAD_SIZE-1)) == 0) break; - if ((i * sizeof(long) % 32) == 0) - printk("%s ", i == 0 ? "" : "\n"); - printk("%016lx ", *stack++); + if (i % 4 == 0) + printk(KERN_DEFAULT " "); + pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' '); } - printk("\n"); show_trace(task, (unsigned long)sp); } @@ -186,14 +186,14 @@ void die(struct pt_regs *regs, const char *str) printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); #ifdef CONFIG_PREEMPT - printk("PREEMPT "); + pr_cont("PREEMPT "); #endif #ifdef CONFIG_SMP - printk("SMP "); + pr_cont("SMP "); #endif if (debug_pagealloc_enabled()) - printk("DEBUG_PAGEALLOC"); - printk("\n"); + pr_cont("DEBUG_PAGEALLOC"); + pr_cont("\n"); notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); print_modules(); show_regs(regs); From 4a65429457a5d271dd3b00598b3ec75fe8b5103c Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 18 Oct 2016 17:32:18 +0200 Subject: [PATCH 496/884] s390/mm: fix zone calculation in arch_add_memory() Standby (hotplug) memory should be added to ZONE_MOVABLE on s390. After commit 199071f1 "s390/mm: make arch_add_memory() NUMA aware", arch_add_memory() used memblock_end_of_DRAM() to find out the end of ZONE_NORMAL and the beginning of ZONE_MOVABLE. However, commit 7f36e3e5 "memory-hotplug: add hot-added memory ranges to memblock before allocate node_data for a node." moved the call of memblock_add_node() before the call of arch_add_memory() in add_memory_resource(), and thus changed the return value of memblock_end_of_DRAM() when called in arch_add_memory(). As a result, arch_add_memory() will think that all memory blocks should be added to ZONE_NORMAL. Fix this by changing the logic in arch_add_memory() so that it will manually iterate over all zones of a given node to find out which zone a memory block should be added to. Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/mm/init.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f56a39bd8ba6..b3e9d18f2ec6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -151,36 +151,40 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { - unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM()); - unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS); + unsigned long zone_start_pfn, zone_end_pfn, nr_pages; unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); - unsigned long nr_pages; - int rc, zone_enum; + pg_data_t *pgdat = NODE_DATA(nid); + struct zone *zone; + int rc, i; rc = vmem_add_mapping(start, size); if (rc) return rc; - while (size_pages > 0) { - if (start_pfn < dma_end_pfn) { - nr_pages = (start_pfn + size_pages > dma_end_pfn) ? - dma_end_pfn - start_pfn : size_pages; - zone_enum = ZONE_DMA; - } else if (start_pfn < normal_end_pfn) { - nr_pages = (start_pfn + size_pages > normal_end_pfn) ? - normal_end_pfn - start_pfn : size_pages; - zone_enum = ZONE_NORMAL; + for (i = 0; i < MAX_NR_ZONES; i++) { + zone = pgdat->node_zones + i; + if (zone_idx(zone) != ZONE_MOVABLE) { + /* Add range within existing zone limits, if possible */ + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; } else { - nr_pages = size_pages; - zone_enum = ZONE_MOVABLE; + /* Add remaining range to ZONE_MOVABLE */ + zone_start_pfn = start_pfn; + zone_end_pfn = start_pfn + size_pages; } - rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum, - start_pfn, size_pages); + if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) + continue; + nr_pages = (start_pfn + size_pages > zone_end_pfn) ? + zone_end_pfn - start_pfn : size_pages; + rc = __add_pages(nid, zone, start_pfn, nr_pages); if (rc) break; start_pfn += nr_pages; size_pages -= nr_pages; + if (!size_pages) + break; } if (rc) vmem_remove_mapping(start, size); From 56c46222af0d09149fadec2a3ce9d4889de01cc6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Oct 2016 20:03:05 +1100 Subject: [PATCH 497/884] powerpc/64: Re-fix race condition between going idle and entering guest Commit 8117ac6a6c2f ("powerpc/powernv: Switch off MMU before entering nap/sleep/rvwinkle mode", 2014-12-10) fixed a race condition where one thread entering a KVM guest could switch the MMU context to the guest while another thread was still in host kernel context with the MMU on. That commit moved the point where a thread entering a power-saving mode set its kvm_hstate.hwthread_state field in its PACA to KVM_HWTHREAD_IN_IDLE from a point where the MMU was on to after the MMU had been switched off. That commit also added a comment explaining that we have to switch to real mode before setting hwthread_state to avoid this race. Nevertheless, commit 4eae2c9ae54a ("powerpc/powernv: Make pnv_powersave_common more generic", 2016-07-08) subsequently moved the setting of hwthread_state back to a point where the MMU is on, thus reintroducing the race, despite the comment saying that this should not be done being included in full in the context lines of the patch that did it. This fixes the race again and adds a bigger and shoutier comment explaining the potential race condition. Fixes: 4eae2c9ae54a ("powerpc/powernv: Make pnv_powersave_common more generic") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Paul Mackerras Reviewed-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 32 +++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index bd739fed26e3..0d8712a835d2 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -163,12 +163,6 @@ _GLOBAL(pnv_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_IDLE - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - /* * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, @@ -185,6 +179,26 @@ _GLOBAL(pnv_powersave_common) .globl pnv_enter_arch207_idle_mode pnv_enter_arch207_idle_mode: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + /******************************************************/ + /* N O T E W E L L ! ! ! N O T E W E L L */ + /* The following store to HSTATE_HWTHREAD_STATE(r13) */ + /* MUST occur in real mode, i.e. with the MMU off, */ + /* and the MMU must stay off until we clear this flag */ + /* and test HSTATE_HWTHREAD_REQ(r13) in the system */ + /* reset interrupt vector in exceptions-64s.S. */ + /* The reason is that another thread can switch the */ + /* MMU to a guest context whenever this flag is set */ + /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ + /* that would potentially cause this thread to start */ + /* executing instructions from guest memory in */ + /* hypervisor mode, leading to a host crash or data */ + /* corruption, or worse. */ + /******************************************************/ + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f @@ -250,6 +264,12 @@ enter_winkle: * r3 - requested stop state */ power_enter_stop: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + /* DO THIS IN REAL MODE! See comment above. */ + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif /* * Check if the requested state is a deep idle state. */ From 09b7e37b18eecc1e347f4b1a3bc863f32801f634 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Oct 2016 20:04:17 +1100 Subject: [PATCH 498/884] powerpc/64: Fix race condition in setting lock bit in idle/wakeup code This fixes a race condition where one thread that is entering or leaving a power-saving state can inadvertently ignore the lock bit that was set by another thread, and potentially also clear it. The core_idle_lock_held function is called when the lock bit is seen to be set. It polls the lock bit until it is clear, then does a lwarx to load the word containing the lock bit and thread idle bits so it can be updated. However, it is possible that the value loaded with the lwarx has the lock bit set, even though an immediately preceding lwz loaded a value with the lock bit clear. If this happens then we go ahead and update the word despite the lock bit being set, and when called from pnv_enter_arch207_idle_mode, we will subsequently clear the lock bit. No identifiable misbehaviour has been attributed to this race. This fixes it by checking the lock bit in the value loaded by the lwarx. If it is set then we just go back and keep on polling. Fixes: b32aadc1a8ed ("powerpc/powernv: Fix race in updating core_idle_state") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 0d8712a835d2..72dac0b58061 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -90,6 +90,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) * Threads will spin in HMT_LOW until the lock bit is cleared. * r14 - pointer to core_idle_state * r15 - used to load contents of core_idle_state + * r9 - used as a temporary variable */ core_idle_lock_held: @@ -99,6 +100,8 @@ core_idle_lock_held: bne 3b HMT_MEDIUM lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bne core_idle_lock_held blr /* From 44d524218c65e1f2e6d945b09165562852298015 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 17 Oct 2016 18:51:27 -0700 Subject: [PATCH 499/884] ARM: dts: vf610: fix IRQ flag of global timer The global timer IRQ (PPI[0], PPI 11 in device tree terms) is a rising edge interrupt. The ARM Cortex-A5 MPCore TRM in Chapter 10.1.2. Interrupt types and sources says: "Interrupt is rising-edge sensitive." The bits seem to be read-only, hence this missconfiguration had no negative effect. However, with commit 992345a58e0c ("irqchip/gic: WARN if setting the interrupt type for a PPI fails") warnings such as this get printed: GIC: PPI11 is secure or misconfigured With this change the new configuration matches the default configuration and no warning is printed anymore. Signed-off-by: Stefan Agner Signed-off-by: Shawn Guo --- arch/arm/boot/dts/vf500.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/vf500.dtsi b/arch/arm/boot/dts/vf500.dtsi index a3824e61bd72..d7fdb2a7d97b 100644 --- a/arch/arm/boot/dts/vf500.dtsi +++ b/arch/arm/boot/dts/vf500.dtsi @@ -70,7 +70,7 @@ global_timer: timer@40002200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x40002200 0x20>; - interrupts = ; + interrupts = ; interrupt-parent = <&intc>; clocks = <&clks VF610_CLK_PLATFORM_BUS>; }; From eeaed4bb5a35591470b545590bb2f26dbe7653a2 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Mon, 24 Oct 2016 00:31:30 -0400 Subject: [PATCH 500/884] ACPI/PCI/IRQ: assign ISA IRQ directly during early boot stages We do not want to store the SCI penalty in the acpi_isa_irq_penalty[] table because acpi_isa_irq_penalty[] only holds ISA IRQ penalties and there's no guarantee that the SCI is an ISA IRQ. We add in the SCI penalty as a special case in acpi_irq_get_penalty(). But if we called acpi_penalize_isa_irq() or acpi_irq_penalty_update() for an SCI that happened to be an ISA IRQ, they stored the SCI penalty (part of the acpi_irq_get_penalty() return value) in acpi_isa_irq_penalty[]. Subsequent calls to acpi_irq_get_penalty() returned a penalty that included *two* SCI penalties. Fixes: 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements) Signed-off-by: Sinan Kaya Acked-by: Bjorn Helgaas Tested-by: Jonathan Liu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_link.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index c983bf733ad3..6229b022a5d4 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -849,7 +849,7 @@ static int __init acpi_irq_penalty_update(char *str, int used) continue; if (used) - new_penalty = acpi_irq_get_penalty(irq) + + new_penalty = acpi_isa_irq_penalty[irq] + PIRQ_PENALTY_ISA_USED; else new_penalty = 0; @@ -871,7 +871,7 @@ static int __init acpi_irq_penalty_update(char *str, int used) void acpi_penalize_isa_irq(int irq, int active) { if ((irq >= 0) && (irq < ARRAY_SIZE(acpi_isa_irq_penalty))) - acpi_isa_irq_penalty[irq] = acpi_irq_get_penalty(irq) + + acpi_isa_irq_penalty[irq] += (active ? PIRQ_PENALTY_ISA_USED : PIRQ_PENALTY_PCI_USING); } From f1caa61df2a3dc4c58316295c5dc5edba4c68d85 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Mon, 24 Oct 2016 00:31:31 -0400 Subject: [PATCH 501/884] ACPI/PCI: pci_link: penalize SCI correctly Ondrej reported that IRQs stopped working in v4.7 on several platforms. A typical scenario, from Ondrej's VT82C694X/694X, is: ACPI: Using PIC for interrupt routing ACPI: PCI Interrupt Link [LNKA] (IRQs 1 3 4 5 6 7 10 *11 12 14 15) ACPI: No IRQ available for PCI Interrupt Link [LNKA] 8139too 0000:00:0f.0: PCI INT A: no GSI We're using PIC routing, so acpi_irq_balance == 0, and LNKA is already active at IRQ 11. In that case, acpi_pci_link_allocate() only tries to use the active IRQ (IRQ 11) which also happens to be the SCI. We should penalize the SCI by PIRQ_PENALTY_PCI_USING, but irq_get_trigger_type(11) returns something other than IRQ_TYPE_LEVEL_LOW, so we penalize it by PIRQ_PENALTY_ISA_ALWAYS instead, which makes acpi_pci_link_allocate() assume the IRQ isn't available and give up. Add acpi_penalize_sci_irq() so platforms can tell us the SCI IRQ, trigger, and polarity directly and we don't have to depend on irq_get_trigger_type(). Fixes: 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements) Link: http://lkml.kernel.org/r/201609251512.05657.linux@rainbow-software.org Reported-by: Ondrej Zary Acked-by: Bjorn Helgaas Signed-off-by: Sinan Kaya Tested-by: Jonathan Liu Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/boot.c | 1 + drivers/acpi/pci_link.c | 30 +++++++++++++++--------------- include/linux/acpi.h | 1 + 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8a5abaa7d453..931ced8ca345 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -454,6 +454,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* * stash over-ride to indicate we've been here diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 6229b022a5d4..74bf96efae95 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -87,6 +87,7 @@ struct acpi_pci_link { static LIST_HEAD(acpi_link_list); static DEFINE_MUTEX(acpi_link_lock); +static int sci_irq = -1, sci_penalty; /* -------------------------------------------------------------------------- PCI Link Device Management @@ -496,25 +497,13 @@ static int acpi_irq_get_penalty(int irq) { int penalty = 0; - /* - * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict - * with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be - * use for PCI IRQs. - */ - if (irq == acpi_gbl_FADT.sci_interrupt) { - u32 type = irq_get_trigger_type(irq) & IRQ_TYPE_SENSE_MASK; - - if (type != IRQ_TYPE_LEVEL_LOW) - penalty += PIRQ_PENALTY_ISA_ALWAYS; - else - penalty += PIRQ_PENALTY_PCI_USING; - } + if (irq == sci_irq) + penalty += sci_penalty; if (irq < ACPI_MAX_ISA_IRQS) return penalty + acpi_isa_irq_penalty[irq]; - penalty += acpi_irq_pci_sharing_penalty(irq); - return penalty; + return penalty + acpi_irq_pci_sharing_penalty(irq); } int __init acpi_irq_penalty_init(void) @@ -881,6 +870,17 @@ bool acpi_isa_irq_available(int irq) acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS); } +void acpi_penalize_sci_irq(int irq, int trigger, int polarity) +{ + sci_irq = irq; + + if (trigger == ACPI_MADT_TRIGGER_LEVEL && + polarity == ACPI_MADT_POLARITY_ACTIVE_LOW) + sci_penalty = PIRQ_PENALTY_PCI_USING; + else + sci_penalty = PIRQ_PENALTY_ISA_ALWAYS; +} + /* * Over-ride default table to reserve additional IRQs for use by ISA * e.g. acpi_irq_isa=5 diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ddbeda6dbdc8..689a8b9b9c8f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -326,6 +326,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); +void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); From 98756f5319c64c883caa910dce702d9edefe7810 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Mon, 24 Oct 2016 00:31:32 -0400 Subject: [PATCH 502/884] ACPI/PCI: pci_link: Include PIRQ_PENALTY_PCI_USING for ISA IRQs Commit 103544d86976 ("ACPI,PCI,IRQ: reduce resource requirements") replaced the addition of PIRQ_PENALTY_PCI_USING in acpi_pci_link_allocate() with an addition in acpi_irq_pci_sharing_penalty(), but f7eca374f000 ("ACPI,PCI,IRQ: separate ISA penalty calculation") removed the use of acpi_irq_pci_sharing_penalty() for ISA IRQs. Therefore, PIRQ_PENALTY_PCI_USING is missing from ISA IRQs used by interrupt links. Include that penalty by adding it in the acpi_pci_link_allocate() path. Fixes: f7eca374f000 (ACPI,PCI,IRQ: separate ISA penalty calculation) Signed-off-by: Sinan Kaya Acked-by: Bjorn Helgaas Tested-by: Jonathan Liu Signed-off-by: Rafael J. Wysocki --- drivers/acpi/pci_link.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 74bf96efae95..bc3d914dfc3e 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -608,6 +608,10 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link) acpi_device_bid(link->device)); return -ENODEV; } else { + if (link->irq.active < ACPI_MAX_ISA_IRQS) + acpi_isa_irq_penalty[link->irq.active] += + PIRQ_PENALTY_PCI_USING; + printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n", acpi_device_name(link->device), acpi_device_bid(link->device), link->irq.active); From ed19ece135a6908244e22e4ab395165999a4d3ab Mon Sep 17 00:00:00 2001 From: Wenyou Yang Date: Thu, 29 Sep 2016 18:07:07 +0800 Subject: [PATCH 503/884] usb: ohci-at91: Set RemoteWakeupConnected bit explicitly. The reset value of RWC is 0, set RemoteWakeupConnected bit explicitly before calling ohci_run, it also fixes the issue that the mass storage stick connected wasn't suspended when the system suspend. Signed-off-by: Wenyou Yang Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-at91.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index 5b5880c0ae19..b38a228134df 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -221,6 +221,12 @@ static int usb_hcd_at91_probe(const struct hc_driver *driver, ohci->num_ports = board->ports; at91_start_hc(pdev); + /* + * The RemoteWakeupConnected bit has to be set explicitly + * before calling ohci_run. The reset value of this bit is 0. + */ + ohci->hc_control = OHCI_CTRL_RWC; + retval = usb_add_hcd(hcd, irq, IRQF_SHARED); if (retval == 0) { device_wakeup_enable(hcd->self.controller); @@ -677,9 +683,6 @@ ohci_hcd_at91_drv_suspend(struct device *dev) * REVISIT: some boards will be able to turn VBUS off... */ if (!ohci_at91->wakeup) { - ohci->hc_control = ohci_readl(ohci, &ohci->regs->control); - ohci->hc_control &= OHCI_CTRL_RWC; - ohci_writel(ohci, ohci->hc_control, &ohci->regs->control); ohci->rh_state = OHCI_RH_HALTED; /* flush the writes */ From 1e4b4348753ba555ea93470ea8af821425d9c826 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Oct 2016 20:11:59 +0900 Subject: [PATCH 504/884] usb: ehci-platform: increase EHCI_MAX_RSTS to 4 Socionext LD11 SoC (arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi) needs to handle 4 reset lines for EHCI. Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c index 876dca4fc216..a268d9e8d6cf 100644 --- a/drivers/usb/host/ehci-platform.c +++ b/drivers/usb/host/ehci-platform.c @@ -39,7 +39,7 @@ #define DRIVER_DESC "EHCI generic platform driver" #define EHCI_MAX_CLKS 4 -#define EHCI_MAX_RSTS 3 +#define EHCI_MAX_RSTS 4 #define hcd_to_ehci_priv(h) ((struct ehci_platform_priv *)hcd_to_ehci(h)->priv) struct ehci_platform_priv { From d8e5f0eca1e88215e45aca27115ea747e6164da1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 19 Oct 2016 12:03:39 -0500 Subject: [PATCH 505/884] usb: musb: Fix hardirq-safe hardirq-unsafe lock order error If we configure musb with 2430 glue as a peripheral, and then rmmod omap2430 module, we'll get the following error: [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ] ... rmmod/413 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire: (&phy->mutex){+.+.+.}, at: [] phy_power_off+0x1c/0xb8 [ 204.678710] and this task is already holding: (&(&musb->lock)->rlock){-.-...}, at: [] musb_gadget_stop+0x24/0xec [musb_hdrc] which would create a new lock dependency: (&(&musb->lock)->rlock){-.-...} -> (&phy->mutex){+.+.+.} ... This is because some glue layers expect musb_platform_enable/disable to be called with spinlock held, and 2430 glue layer has USB PHY on the I2C bus using a mutex. We could fix the glue layers to take the spinlock, but we still have a problem of musb_plaform_enable/disable being called in an unbalanced manner. So that would still lead into USB PHY enable/disable related problems for omap2430 glue layer. While it makes sense to only enable USB PHY when needed from PM point of view, in this case we just can't do it yet without breaking things. So let's just revert phy_enable/disable related changes instead and reconsider this after we have fixed musb_platform_enable/disable to be balanced. Fixes: a83e17d0f73b ("usb: musb: Improve PM runtime and phy handling for 2430 glue layer") Reviewed-by: Laurent Pinchart Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/omap2430.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 1ab6973d4f61..cc1225485509 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -287,6 +287,7 @@ static int omap2430_musb_init(struct musb *musb) } musb->isr = omap2430_musb_interrupt; phy_init(musb->phy); + phy_power_on(musb->phy); l = musb_readl(musb->mregs, OTG_INTERFSEL); @@ -323,8 +324,6 @@ static void omap2430_musb_enable(struct musb *musb) struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev); struct omap_musb_board_data *data = pdata->board_data; - if (!WARN_ON(!musb->phy)) - phy_power_on(musb->phy); switch (glue->status) { @@ -361,9 +360,6 @@ static void omap2430_musb_disable(struct musb *musb) struct device *dev = musb->controller; struct omap2430_glue *glue = dev_get_drvdata(dev->parent); - if (!WARN_ON(!musb->phy)) - phy_power_off(musb->phy); - if (glue->status != MUSB_UNKNOWN) omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DISCONNECT); @@ -375,6 +371,7 @@ static int omap2430_musb_exit(struct musb *musb) struct omap2430_glue *glue = dev_get_drvdata(dev->parent); omap2430_low_level_exit(musb); + phy_power_off(musb->phy); phy_exit(musb->phy); musb->phy = NULL; cancel_work_sync(&glue->omap_musb_mailbox_work); From cacaaf80c3a6f6036d290f353c4c1db237b42647 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 19 Oct 2016 12:03:40 -0500 Subject: [PATCH 506/884] usb: musb: Call pm_runtime from musb_gadget_queue If we're booting pandaboard using NFSroot over built-in g_ether, we can get the following after booting once and doing a warm reset: g_ether gadget: ecm_open g_ether gadget: notify connect true ... WARNING: CPU: 0 PID: 1 at drivers/bus/omap_l3_noc.c:147 l3_interrupt_handler+0x220/0x34c 44000000.ocp:L3 Custom Error: MASTER MPU TARGET L4CFG (Read): Data Access in User mode du ring Functional access ... Fix the issue by calling pm_runtime functions from musb_gadget_queue. Note that in the long run we should be able to queue the pending transfers if pm_runtime is not active, and flush the queue from pm_runtime_resume. Reported-by: Laurent Pinchart Tested-by: Laurent Pinchart Signed-off-by: Tony Lindgren Signed-off-by: Bin Liu Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_gadget.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index bff4869a57cd..4042ea017985 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1255,6 +1255,7 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req, map_dma_buffer(request, musb, musb_ep); + pm_runtime_get_sync(musb->controller); spin_lock_irqsave(&musb->lock, lockflags); /* don't queue if the ep is down */ @@ -1275,6 +1276,9 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req, unlock: spin_unlock_irqrestore(&musb->lock, lockflags); + pm_runtime_mark_last_busy(musb->controller); + pm_runtime_put_autosuspend(musb->controller); + return status; } From ed6d6f8f42d7302f6f9b6245f34927ec20d26c12 Mon Sep 17 00:00:00 2001 From: Bryan Paluch Date: Mon, 17 Oct 2016 08:54:46 -0400 Subject: [PATCH 507/884] usb: increase ohci watchdog delay to 275 msec Increase ohci watchout delay to 275 ms. Previous delay was 250 ms with 20 ms of slack, after removing slack time some ohci controllers don't respond in time. Logs from systems with controllers that have the issue would show "HcDoneHead not written back; disabled" Signed-off-by: Bryan Paluch Acked-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ohci-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 1700908b84ef..86612ac3fda2 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -72,7 +72,7 @@ static const char hcd_name [] = "ohci_hcd"; #define STATECHANGE_DELAY msecs_to_jiffies(300) -#define IO_WATCHDOG_DELAY msecs_to_jiffies(250) +#define IO_WATCHDOG_DELAY msecs_to_jiffies(275) #include "ohci.h" #include "pci-quirks.h" From 806487a8fc8f385af75ed261e9ab658fc845e633 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Tue, 18 Oct 2016 17:07:19 +0100 Subject: [PATCH 508/884] ACPI / APEI: Fix incorrect return value of ghes_proc() Although ghes_proc() tests for errors while reading the error status, it always return success (0). Fix this by propagating the return value. Fixes: d334a49113a4a33 (ACPI, APEI, Generic Hardware Error Source memory error support) Signed-of-by: Punit Agrawal Tested-by: Tyler Baicar Reviewed-by: Borislav Petkov [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki --- drivers/acpi/apei/ghes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index f0a029e68d3e..0d099a24f776 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -662,7 +662,7 @@ static int ghes_proc(struct ghes *ghes) ghes_do_proc(ghes, ghes->estatus); out: ghes_clear_estatus(ghes); - return 0; + return rc; } static void ghes_add_timer(struct ghes *ghes) From b76032396d7958f006bccf5fb2535beb5526837c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Thu, 20 Oct 2016 13:19:19 +0900 Subject: [PATCH 509/884] usb: renesas_usbhs: add wait after initialization for R-Car Gen3 Since the controller on R-Car Gen3 doesn't have any status registers to detect initialization (LPSTS.SUSPM = 1) and the initialization needs up to 45 usec, this patch adds wait after the initialization. Otherwise, writing other registers (e.g. INTENB0) will fail. Fixes: de18757e272d ("usb: renesas_usbhs: add R-Car Gen3 power control") Cc: # v4.6+ Cc: Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- drivers/usb/renesas_usbhs/rcar3.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/rcar3.c b/drivers/usb/renesas_usbhs/rcar3.c index 1d70add926f0..d544b331c9f2 100644 --- a/drivers/usb/renesas_usbhs/rcar3.c +++ b/drivers/usb/renesas_usbhs/rcar3.c @@ -9,6 +9,7 @@ * */ +#include #include #include "common.h" #include "rcar3.h" @@ -35,10 +36,13 @@ static int usbhs_rcar3_power_ctrl(struct platform_device *pdev, usbhs_write32(priv, UGCTRL2, UGCTRL2_RESERVED_3 | UGCTRL2_USB0SEL_OTG); - if (enable) + if (enable) { usbhs_bset(priv, LPSTS, LPSTS_SUSPM, LPSTS_SUSPM); - else + /* The controller on R-Car Gen3 needs to wait up to 45 usec */ + udelay(45); + } else { usbhs_bset(priv, LPSTS, LPSTS_SUSPM, 0); + } return 0; } From 1adb469b9b76276d7e5ea36a20a24c47d6618a0b Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 21 Oct 2016 16:24:09 +0100 Subject: [PATCH 510/884] PM / suspend: Fix missing KERN_CONT for suspend message Commit 4bcc595ccd80 (printk: reinstate KERN_CONT for printing continuation lines) exposed a missing KERN_CONT from one of the messages shown on entering suspend. With v4.9-rc1, the 'done.' shown after syncing the filesystems no longer appears as a continuation but a new message with its own timestamp. [ 9.259566] PM: Syncing filesystems ... [ 9.264119] done. Fix this by adding the KERN_CONT log level for the 'done.' part of the message seen after syncing filesystems. While we are at it, convert these suspend printks to pr_info and pr_cont, respectively. Signed-off-by: Jon Hunter Signed-off-by: Rafael J. Wysocki --- kernel/power/suspend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 1e7f5da648d9..6ccb08f57fcb 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -498,9 +498,9 @@ static int enter_state(suspend_state_t state) #ifndef CONFIG_SUSPEND_SKIP_SYNC trace_suspend_resume(TPS("sync_filesystems"), 0, true); - printk(KERN_INFO "PM: Syncing filesystems ... "); + pr_info("PM: Syncing filesystems ... "); sys_sync(); - printk("done.\n"); + pr_cont("done.\n"); trace_suspend_resume(TPS("sync_filesystems"), 0, false); #endif From 4edd601c5a9c5094daa714e65063e623826f3bcc Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 24 Oct 2016 10:32:12 -0200 Subject: [PATCH 511/884] ARM: imx: mach-imx6q: Fix the PHY ID mask for AR8031 AR8031 and AR8035 have the same PHY ID mask of 0xffffffef. So fix it and make it match with the PHY ID mask definition at drivers/net/phy/at803x.c. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mach-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 97fd25105e2c..45801b27ee5c 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -173,7 +173,7 @@ static void __init imx6q_enet_phy_init(void) ksz9021rn_phy_fixup); phy_register_fixup_for_uid(PHY_ID_KSZ9031, MICREL_PHY_ID_MASK, ksz9031rn_phy_fixup); - phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffff, + phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffef, ar8031_phy_fixup); phy_register_fixup_for_uid(PHY_ID_AR8035, 0xffffffef, ar8035_phy_fixup); From cf55902b9c306ed259eb57ff111a0c152620f4a6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Oct 2016 15:55:08 +0300 Subject: [PATCH 512/884] staging: android: ion: Fix error handling in ion_query_heaps() If the copy_to_user() fails we should unlock and return directly without updating "cnt". Also the return value should be -EFAULT instead of the number of bytes remaining. Fixes: 02b23803c6af ("staging: android: ion: Add ioctl to query available heaps") Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/android/ion/ion.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index 396ded52ab70..209a8f7ef02b 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -1187,8 +1187,10 @@ int ion_query_heaps(struct ion_client *client, struct ion_heap_query *query) hdata.type = heap->type; hdata.heap_id = heap->id; - ret = copy_to_user(&buffer[cnt], - &hdata, sizeof(hdata)); + if (copy_to_user(&buffer[cnt], &hdata, sizeof(hdata))) { + ret = -EFAULT; + goto out; + } cnt++; if (cnt >= max_cnt) From 25633d1f5dd7ea35c77aae12c039a80e46abec01 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 17 Oct 2016 16:37:20 +0000 Subject: [PATCH 513/884] greybus: arche-platform: Add missing of_node_put() in arche_platform_change_state() This node pointer is returned by of_find_compatible_node() with refcount incremented in this function. of_node_put() on it before exitting this function. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/arche-platform.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/greybus/arche-platform.c b/drivers/staging/greybus/arche-platform.c index e36ee984485b..34307ac3f255 100644 --- a/drivers/staging/greybus/arche-platform.c +++ b/drivers/staging/greybus/arche-platform.c @@ -128,6 +128,7 @@ int arche_platform_change_state(enum arche_platform_state state, pdev = of_find_device_by_node(np); if (!pdev) { pr_err("arche-platform device not found\n"); + of_node_put(np); return -ENODEV; } From 1305f2b2f52af5986f44dfbb1a6fe58ae875aa61 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 19 Oct 2016 13:17:53 +0000 Subject: [PATCH 514/884] greybus: es2: fix error return code in ap_probe() Fix to return a negative error code from the es2_arpc_in_enable() error handling case instead of 0, as done elsewhere in this function. Fixes: 9d9d3777a9db ("greybus: es2: Add a new bulk in endpoint for APBridgeA RPC") Signed-off-by: Wei Yongjun Acked-by: Viresh Kumar Reviewed-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/es2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/staging/greybus/es2.c b/drivers/staging/greybus/es2.c index 071bb1cfd3ae..baab460eeaa3 100644 --- a/drivers/staging/greybus/es2.c +++ b/drivers/staging/greybus/es2.c @@ -1548,7 +1548,8 @@ static int ap_probe(struct usb_interface *interface, INIT_LIST_HEAD(&es2->arpcs); spin_lock_init(&es2->arpc_lock); - if (es2_arpc_in_enable(es2)) + retval = es2_arpc_in_enable(es2); + if (retval) goto error; retval = gb_hd_add(hd); From e866dd8aab76b6a0ee8428491e65fa5c83a6ae5a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2016 22:18:21 +0300 Subject: [PATCH 515/884] greybus: fix a leak on error in gb_module_create() We should release ->interfaces[0] as well. Fixes: b15d97d77017 ("greybus: core: add module abstraction") Signed-off-by: Dan Carpenter Acked-by: Viresh Kumar Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/greybus/module.c b/drivers/staging/greybus/module.c index 69f67ddbd4a3..660b4674a76f 100644 --- a/drivers/staging/greybus/module.c +++ b/drivers/staging/greybus/module.c @@ -127,7 +127,7 @@ struct gb_module *gb_module_create(struct gb_host_device *hd, u8 module_id, return module; err_put_interfaces: - for (--i; i > 0; --i) + for (--i; i >= 0; --i) gb_interface_put(module->interfaces[i]); put_device(&module->dev); From 44b3c7af02ca2701b6b90ee30c9d1d9c3ae07653 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 11 Oct 2016 13:34:16 +0200 Subject: [PATCH 516/884] xenbus: advertise control feature flags The Xen docs specify several flags which a guest can set to advertise which values of the xenstore control/shutdown key it will recognize. This patch adds code to write all the relevant feature-flag keys. Based-on-patch-by: Paul Durrant Signed-off-by: Juergen Gross Reviewed-by: David Vrabel Reviewed-by: Paul Durrant Signed-off-by: David Vrabel --- drivers/xen/manage.c | 45 ++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index e12bd3635f83..26e5e8507f03 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c @@ -168,7 +168,9 @@ out: #endif /* CONFIG_HIBERNATE_CALLBACKS */ struct shutdown_handler { - const char *command; +#define SHUTDOWN_CMD_SIZE 11 + const char command[SHUTDOWN_CMD_SIZE]; + bool flag; void (*cb)(void); }; @@ -206,22 +208,22 @@ static void do_reboot(void) ctrl_alt_del(); } +static struct shutdown_handler shutdown_handlers[] = { + { "poweroff", true, do_poweroff }, + { "halt", false, do_poweroff }, + { "reboot", true, do_reboot }, +#ifdef CONFIG_HIBERNATE_CALLBACKS + { "suspend", true, do_suspend }, +#endif +}; + static void shutdown_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) { char *str; struct xenbus_transaction xbt; int err; - static struct shutdown_handler handlers[] = { - { "poweroff", do_poweroff }, - { "halt", do_poweroff }, - { "reboot", do_reboot }, -#ifdef CONFIG_HIBERNATE_CALLBACKS - { "suspend", do_suspend }, -#endif - {NULL, NULL}, - }; - static struct shutdown_handler *handler; + int idx; if (shutting_down != SHUTDOWN_INVALID) return; @@ -238,13 +240,13 @@ static void shutdown_handler(struct xenbus_watch *watch, return; } - for (handler = &handlers[0]; handler->command; handler++) { - if (strcmp(str, handler->command) == 0) + for (idx = 0; idx < ARRAY_SIZE(shutdown_handlers); idx++) { + if (strcmp(str, shutdown_handlers[idx].command) == 0) break; } /* Only acknowledge commands which we are prepared to handle. */ - if (handler->cb) + if (idx < ARRAY_SIZE(shutdown_handlers)) xenbus_write(xbt, "control", "shutdown", ""); err = xenbus_transaction_end(xbt, 0); @@ -253,8 +255,8 @@ static void shutdown_handler(struct xenbus_watch *watch, goto again; } - if (handler->cb) { - handler->cb(); + if (idx < ARRAY_SIZE(shutdown_handlers)) { + shutdown_handlers[idx].cb(); } else { pr_info("Ignoring shutdown request: %s\n", str); shutting_down = SHUTDOWN_INVALID; @@ -310,6 +312,9 @@ static struct notifier_block xen_reboot_nb = { static int setup_shutdown_watcher(void) { int err; + int idx; +#define FEATURE_PATH_SIZE (SHUTDOWN_CMD_SIZE + sizeof("feature-")) + char node[FEATURE_PATH_SIZE]; err = register_xenbus_watch(&shutdown_watch); if (err) { @@ -326,6 +331,14 @@ static int setup_shutdown_watcher(void) } #endif + for (idx = 0; idx < ARRAY_SIZE(shutdown_handlers); idx++) { + if (!shutdown_handlers[idx].flag) + continue; + snprintf(node, FEATURE_PATH_SIZE, "feature-%s", + shutdown_handlers[idx].command); + xenbus_printf(XBT_NIL, "control", node, "%u", 1); + } + return 0; } From cb5f7e7c1ded5ff91b18116669c0f43c82bea3db Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 12 Oct 2016 17:20:38 +0200 Subject: [PATCH 517/884] x86: xen: move cpu_up functions out of ifdef MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three newly introduced functions are not defined when CONFIG_XEN_PVHVM is disabled, but are still being used: arch/x86/xen/enlighten.c:141:12: warning: ‘xen_cpu_up_prepare’ used but never defined arch/x86/xen/enlighten.c:142:12: warning: ‘xen_cpu_up_online’ used but never defined arch/x86/xen/enlighten.c:143:12: warning: ‘xen_cpu_dead’ used but never defined Fixes: 4d737042d6c4 ("xen/x86: Convert to hotplug state machine") Signed-off-by: Arnd Bergmann Signed-off-by: David Vrabel --- arch/x86/xen/enlighten.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 96c2dea798a1..a637f902f59e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1838,6 +1838,7 @@ static void __init init_hvm_pv_info(void) xen_domain_type = XEN_HVM_DOMAIN; } +#endif static int xen_cpu_up_prepare(unsigned int cpu) { @@ -1888,6 +1889,7 @@ static int xen_cpu_up_online(unsigned int cpu) return 0; } +#ifdef CONFIG_XEN_PVHVM #ifdef CONFIG_KEXEC_CORE static void xen_hvm_shutdown(void) { From e1e5b3ff41983f506c3cbcf123fe7d682f61a8f1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 24 Oct 2016 09:03:49 -0600 Subject: [PATCH 518/884] xenbus: prefer list_for_each() This is more efficient than list_for_each_safe() when list modification is accompanied by breaking out of the loop. Signed-off-by: Jan Beulich Reviewed-by: Juergen Gross Signed-off-by: David Vrabel --- drivers/xen/xenbus/xenbus_dev_frontend.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 7487971f9f78..fe60b12de920 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -364,7 +364,7 @@ out: static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) { - struct watch_adapter *watch, *tmp_watch; + struct watch_adapter *watch; char *path, *token; int err, rc; LIST_HEAD(staging_q); @@ -399,7 +399,7 @@ static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) } list_add(&watch->list, &u->watches); } else { - list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + list_for_each_entry(watch, &u->watches, list) { if (!strcmp(watch->token, token) && !strcmp(watch->watch.node, path)) { unregister_xenbus_watch(&watch->watch); From c251f15c7dbf2cb72e7b2b282020b41f4e4d3665 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 24 Oct 2016 09:05:18 -0600 Subject: [PATCH 519/884] xenbus: check return value of xenbus_scanf() Don't ignore errors here: Set backend state to unknown when unsuccessful. Signed-off-by: Jan Beulich Signed-off-by: David Vrabel --- drivers/xen/xenbus/xenbus_probe_frontend.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 611a23119675..6d40a972ffb2 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -335,7 +335,9 @@ static int backend_state; static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, const char **v, unsigned int l) { - xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + if (xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", + &backend_state) != 1) + backend_state = XenbusStateUnknown; printk(KERN_DEBUG "XENBUS: backend %s %s\n", v[XS_WATCH_PATH], xenbus_strstate(backend_state)); wake_up(&backend_state_wq); From dafa724bf582181d9a7d54f5cb4ca0bf8ef29269 Mon Sep 17 00:00:00 2001 From: "tang.junhui" Date: Fri, 21 Oct 2016 09:35:32 +0800 Subject: [PATCH 520/884] dm table: fix missing dm_put_target_type() in dm_table_add_target() dm_get_target_type() was previously called so any error returned from dm_table_add_target() must first call dm_put_target_type(). Otherwise the DM target module's reference count will leak and the associated kernel module will be unable to be removed. Also, leverage the fact that r is already -EINVAL and remove an extra newline. Fixes: 36a0456 ("dm table: add immutable feature") Fixes: cc6cbe1 ("dm table: add always writeable feature") Fixes: 3791e2f ("dm table: add singleton feature") Cc: stable@vger.kernel.org # 3.2+ Signed-off-by: tang.junhui Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 3e407a9cde1f..c4b53b332607 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -695,37 +695,32 @@ int dm_table_add_target(struct dm_table *t, const char *type, tgt->type = dm_get_target_type(type); if (!tgt->type) { - DMERR("%s: %s: unknown target type", dm_device_name(t->md), - type); + DMERR("%s: %s: unknown target type", dm_device_name(t->md), type); return -EINVAL; } if (dm_target_needs_singleton(tgt->type)) { if (t->num_targets) { - DMERR("%s: target type %s must appear alone in table", - dm_device_name(t->md), type); - return -EINVAL; + tgt->error = "singleton target type must appear alone in table"; + goto bad; } t->singleton = true; } if (dm_target_always_writeable(tgt->type) && !(t->mode & FMODE_WRITE)) { - DMERR("%s: target type %s may not be included in read-only tables", - dm_device_name(t->md), type); - return -EINVAL; + tgt->error = "target type may not be included in a read-only table"; + goto bad; } if (t->immutable_target_type) { if (t->immutable_target_type != tgt->type) { - DMERR("%s: immutable target type %s cannot be mixed with other target types", - dm_device_name(t->md), t->immutable_target_type->name); - return -EINVAL; + tgt->error = "immutable target type cannot be mixed with other target types"; + goto bad; } } else if (dm_target_is_immutable(tgt->type)) { if (t->num_targets) { - DMERR("%s: immutable target type %s cannot be mixed with other target types", - dm_device_name(t->md), tgt->type->name); - return -EINVAL; + tgt->error = "immutable target type cannot be mixed with other target types"; + goto bad; } t->immutable_target_type = tgt->type; } @@ -740,7 +735,6 @@ int dm_table_add_target(struct dm_table *t, const char *type, */ if (!adjoin(t, tgt)) { tgt->error = "Gap in table"; - r = -EINVAL; goto bad; } From 0b34c261e235a5c74dcf78bd305845bd15fe2b42 Mon Sep 17 00:00:00 2001 From: Goldwyn Rodrigues Date: Fri, 30 Sep 2016 10:40:52 -0500 Subject: [PATCH 521/884] btrfs: qgroup: Prevent qgroup->reserved from going subzero While free'ing qgroup->reserved resources, we much check if the page has not been invalidated by a truncate operation by checking if the page is still dirty before reducing the qgroup resources. Resources in such a case are free'd when the entire extent is released by delayed_ref. This fixes a double accounting while releasing resources in case of truncating a file, reproduced by the following testcase. SCRATCH_DEV=/dev/vdb SCRATCH_MNT=/mnt mkfs.btrfs -f $SCRATCH_DEV mount -t btrfs $SCRATCH_DEV $SCRATCH_MNT cd $SCRATCH_MNT btrfs quota enable $SCRATCH_MNT btrfs subvolume create a btrfs qgroup limit 500m a $SCRATCH_MNT sync for c in {1..15}; do dd if=/dev/zero bs=1M count=40 of=$SCRATCH_MNT/a/file; done sleep 10 sync sleep 5 touch $SCRATCH_MNT/a/newfile echo "Removing file" rm $SCRATCH_MNT/a/file Fixes: b9d0b38928 ("btrfs: Add handler for invalidate page") Signed-off-by: Goldwyn Rodrigues Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 50ba4ca167e7..6677674d0505 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8930,9 +8930,14 @@ again: * So even we call qgroup_free_data(), it won't decrease reserved * space. * 2) Not written to disk - * This means the reserved space should be freed here. + * This means the reserved space should be freed here. However, + * if a truncate invalidates the page (by clearing PageDirty) + * and the page is accounted for while allocating extent + * in btrfs_check_data_free_space() we let delayed_ref to + * free the entire extent. */ - btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE); + if (PageDirty(page)) + btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE); if (!inode_evicting) { clear_extent_bit(tree, page_start, page_end, EXTENT_LOCKED | EXTENT_DIRTY | From 69ae5e4459e43e56f03d0987e865fbac2b05af2a Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Thu, 13 Oct 2016 09:23:39 +0800 Subject: [PATCH 522/884] btrfs: make file clone aware of fatal signals Indeed this just make the behavior similar to xfs when process has fatal signals pending, and it'll make fstests/generic/298 happy. Signed-off-by: Wang Xiaoguang Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index af69129d7e0e..71634570943c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3814,6 +3814,11 @@ process_slot: } btrfs_release_path(path); key.offset = next_key_min_offset; + + if (fatal_signal_pending(current)) { + ret = -EINTR; + goto out; + } } ret = 0; From dd4b857aabbce57518f2d32d9805365d3ff34fc6 Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Tue, 18 Oct 2016 15:56:13 +0800 Subject: [PATCH 523/884] btrfs: pass correct args to btrfs_async_run_delayed_refs() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In btrfs_truncate_inode_items()->btrfs_async_run_delayed_refs(), we swap the arg2 and arg3 wrongly, fix this. This bug just impacts asynchronous delayed refs handle when we truncate inodes. In delayed_ref_async_start(), there is such codes: trans = btrfs_join_transaction(async->root); if (trans->transid > async->transid) goto end; ret = btrfs_run_delayed_refs(trans, async->root, async->count); From this codes, we can see that this just influence whether can we handle delayed refs or the number of delayed refs to handle, this may impact performance, but will not result in missing delayed refs, all delayed refs will be handled in btrfs_commit_transaction(). Signed-off-by: Wang Xiaoguang Reviewed-by: Holger Hoffstätte Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6677674d0505..1f980efbb2e8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4605,8 +4605,8 @@ delete: BUG_ON(ret); if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, - trans->transid, - trans->delayed_ref_updates * 2, 0); + trans->delayed_ref_updates * 2, + trans->transid, 0); if (be_nice) { if (truncate_space_check(trans, root, extent_num_bytes)) { From 9c894696f56f5d84fb5766af81bcda4a7cb9a842 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 12 Oct 2016 11:33:21 +0300 Subject: [PATCH 524/884] Btrfs: remove some no-op casts We cast 0 to a u8 but then because of type promotion, it's immediately cast to int back to int before we do a bitwise negate. The cast doesn't matter in this case, the code works as intended. It causes a static checker warning though so let's remove it. Signed-off-by: Dan Carpenter Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 66a755150056..8ed05d95584a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5569,7 +5569,7 @@ void le_bitmap_set(u8 *map, unsigned int start, int len) *p |= mask_to_set; len -= bits_to_set; bits_to_set = BITS_PER_BYTE; - mask_to_set = ~(u8)0; + mask_to_set = ~0; p++; } if (len) { @@ -5589,7 +5589,7 @@ void le_bitmap_clear(u8 *map, unsigned int start, int len) *p &= ~mask_to_clear; len -= bits_to_clear; bits_to_clear = BITS_PER_BYTE; - mask_to_clear = ~(u8)0; + mask_to_clear = ~0; p++; } if (len) { @@ -5679,7 +5679,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, kaddr[offset] |= mask_to_set; len -= bits_to_set; bits_to_set = BITS_PER_BYTE; - mask_to_set = ~(u8)0; + mask_to_set = ~0; if (++offset >= PAGE_SIZE && len > 0) { offset = 0; page = eb->pages[++i]; @@ -5721,7 +5721,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, kaddr[offset] &= ~mask_to_clear; len -= bits_to_clear; bits_to_clear = BITS_PER_BYTE; - mask_to_clear = ~(u8)0; + mask_to_clear = ~0; if (++offset >= PAGE_SIZE && len > 0) { offset = 0; page = eb->pages[++i]; From 9d1032cc49a8a1065e79ee323de66bcb4fdbd535 Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang Date: Mon, 20 Jun 2016 09:18:52 +0800 Subject: [PATCH 525/884] btrfs: fix WARNING in btrfs_select_ref_head() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This issue was found when testing in-band dedupe enospc behaviour, sometimes run_one_delayed_ref() may fail for enospc reason, then __btrfs_run_delayed_refs()will return, but forget to add num_heads_read back, which will trigger "WARN_ON(delayed_refs->num_heads_ready == 0)" in btrfs_select_ref_head(). Signed-off-by: Wang Xiaoguang Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 210c94ac8818..4607af38c72e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2647,7 +2647,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, btrfs_free_delayed_extent_op(extent_op); if (ret) { + spin_lock(&delayed_refs->lock); locked_ref->processing = 0; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); btrfs_delayed_ref_unlock(locked_ref); btrfs_put_delayed_ref(ref); btrfs_debug(fs_info, "run_one_delayed_ref returned %d", From 91e040a79df73d371f70792f30380d4e44805250 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Oct 2016 07:39:45 -0700 Subject: [PATCH 526/884] ARC: syscall for userspace cmpxchg assist Older ARC700 cores (ARC750 specifically) lack instructions to implement atomic r-w-w. This is problematic for userspace libraries such as NPTL which need atomic primitives. So enable them by providing kernel assist. This is costly but really the only sane soluton (othern than tight spinning using the otherwise availiable atomic exchange EX instruciton). Good thing is there are only a few of these cores running Linux out in the wild. This only works on UP systems. Reviewed-by: Colin Ian King Signed-off-by: Vineet Gupta --- arch/arc/include/asm/syscalls.h | 1 + arch/arc/include/uapi/asm/unistd.h | 9 ++++---- arch/arc/kernel/process.c | 33 ++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index e56f9fcc5581..772b67ca56e7 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -17,6 +17,7 @@ int sys_clone_wrapper(int, int, int, int, int); int sys_cacheflush(uint32_t, uint32_t uint32_t); int sys_arc_settls(void *); int sys_arc_gettls(void); +int sys_arc_usr_cmpxchg(int *, int, int); #include diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h index 41fa2ec9e02c..9a34136d84b2 100644 --- a/arch/arc/include/uapi/asm/unistd.h +++ b/arch/arc/include/uapi/asm/unistd.h @@ -27,18 +27,19 @@ #define NR_syscalls __NR_syscalls +/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */ +#define __NR_sysfs (__NR_arch_specific_syscall + 3) + /* ARC specific syscall */ #define __NR_cacheflush (__NR_arch_specific_syscall + 0) #define __NR_arc_settls (__NR_arch_specific_syscall + 1) #define __NR_arc_gettls (__NR_arch_specific_syscall + 2) +#define __NR_arc_usr_cmpxchg (__NR_arch_specific_syscall + 4) __SYSCALL(__NR_cacheflush, sys_cacheflush) __SYSCALL(__NR_arc_settls, sys_arc_settls) __SYSCALL(__NR_arc_gettls, sys_arc_gettls) - - -/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */ -#define __NR_sysfs (__NR_arch_specific_syscall + 3) +__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg) __SYSCALL(__NR_sysfs, sys_sysfs) #undef __SYSCALL diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index be1972bd2729..59aa43cb146e 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -41,6 +41,39 @@ SYSCALL_DEFINE0(arc_gettls) return task_thread_info(current)->thr_ptr; } +SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) +{ + int uval; + int ret; + + /* + * This is only for old cores lacking LLOCK/SCOND, which by defintion + * can't possibly be SMP. Thus doesn't need to be SMP safe. + * And this also helps reduce the overhead for serializing in + * the UP case + */ + WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP)); + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + preempt_disable(); + + ret = __get_user(uval, uaddr); + if (ret) + goto done; + + if (uval != expected) + ret = -EAGAIN; + else + ret = __put_user(new, uaddr); + +done: + preempt_enable(); + + return ret; +} + void arch_cpu_idle(void) { /* sleep, but enable all interrupts before committing */ From cf986d470208fbdd68b6934a86ccd81c04408484 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Oct 2016 15:58:59 -0700 Subject: [PATCH 527/884] ARCv2: IOC: use @ioc_enable not @ioc_exist where intended if user disables IOC from debugger at startup (by clearing @ioc_enable), @ioc_exists is cleared too. This means boot prints don't capture the fact that IOC was present but disabled which could be misleading. So invert how we use @ioc_enable and @ioc_exists and make it more canonical. @ioc_exists represent whether hardware is present or not and stays same whether enabled or not. @ioc_enable is still user driven, but will be auto-disabled if IOC hardware is not present, i.e. if @ioc_exist=0. This is opposite to what we were doing before, but much clearer. This means @ioc_enable is now the "exported" toggle in rest of code such as dma mapping API. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cache.h | 2 +- arch/arc/mm/cache.c | 10 ++++++---- arch/arc/mm/dma.c | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index fb781e34f322..b3410ff6a62d 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -53,7 +53,7 @@ extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void read_decode_cache_bcr(void); -extern int ioc_exists; +extern int ioc_enable; extern unsigned long perip_base, perip_end; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 97dddbefb86a..518ff76771f3 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -22,8 +22,8 @@ #include static int l2_line_sz; -int ioc_exists; -volatile int slc_enable = 1, ioc_enable = 1; +static int ioc_exists; +int slc_enable = 1, ioc_enable = 1; unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ @@ -113,8 +113,10 @@ static void read_decode_cache_bcr_arcv2(int cpu) } READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); - if (cbcr.c && ioc_enable) + if (cbcr.c) ioc_exists = 1; + else + ioc_enable = 0; /* HS 2.0 didn't have AUX_VOL */ if (cpuinfo_arc700[cpu].core.family > 0x51) { @@ -1002,7 +1004,7 @@ void arc_cache_init(void) read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE); } - if (is_isa_arcv2() && ioc_exists) { + if (is_isa_arcv2() && ioc_enable) { /* IO coherency base - 0x8z */ write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000); /* IO coherency aperture size - 512Mb: 0x8z-0xAz */ diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 20afc65e22dc..60aab5a7522b 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -45,7 +45,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, * -For coherent data, Read/Write to buffers terminate early in cache * (vs. always going to memory - thus are faster) */ - if ((is_isa_arcv2() && ioc_exists) || + if ((is_isa_arcv2() && ioc_enable) || (attrs & DMA_ATTR_NON_CONSISTENT)) need_coh = 0; @@ -97,7 +97,7 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr, int is_non_coh = 1; is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || - (is_isa_arcv2() && ioc_exists); + (is_isa_arcv2() && ioc_enable); if (PageHighMem(page) || !is_non_coh) iounmap((void __force __iomem *)vaddr); From e0af98a7e025a7263ae7e50264f6f79ed29642a7 Mon Sep 17 00:00:00 2001 From: Ralf Ramsauer Date: Mon, 17 Oct 2016 15:59:56 +0200 Subject: [PATCH 528/884] spi: mark device nodes only in case of successful instantiation Instantiated SPI device nodes are marked with OF_POPULATE. This was introduced in bd6c164. On unloading, loaded device nodes will of course be unmarked. The problem are nodes that fail during initialisation: If a node fails, it won't be unloaded and hence not be unmarked. If a SPI driver module is unloaded and reloaded, it will skip nodes that failed before. Skip device nodes that are already populated and mark them only in case of success. Note that the same issue exists for I2C. Fixes: bd6c164 ("spi: Mark instantiated device nodes with OF_POPULATE") Signed-off-by: Ralf Ramsauer Reviewed-by: Geert Uytterhoeven Acked-by: Pantelis Antoniou Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 5787b723b593..838783c3fed0 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1618,9 +1618,11 @@ static void of_register_spi_devices(struct spi_master *master) if (of_node_test_and_set_flag(nc, OF_POPULATED)) continue; spi = of_register_spi_device(master, nc); - if (IS_ERR(spi)) + if (IS_ERR(spi)) { dev_warn(&master->dev, "Failed to create SPI device for %s\n", nc->full_name); + of_node_clear_flag(nc, OF_POPULATED); + } } } #else @@ -3131,6 +3133,7 @@ static int of_spi_notify(struct notifier_block *nb, unsigned long action, if (IS_ERR(spi)) { pr_err("%s: failed to create for '%s'\n", __func__, rd->dn->full_name); + of_node_clear_flag(rd->dn, OF_POPULATED); return notifier_from_errno(PTR_ERR(spi)); } break; From 0a3ffab93fe52530602fe47cd74802cffdb19c05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 24 Oct 2016 15:20:29 +0200 Subject: [PATCH 529/884] ANDROID: binder: Add strong ref checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent using a binder_ref with only weak references where a strong reference is required. Signed-off-by: Arve Hjønnevåg Signed-off-by: Martijn Coenen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 562af94bec35..3681759c22d7 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1002,7 +1002,7 @@ static int binder_dec_node(struct binder_node *node, int strong, int internal) static struct binder_ref *binder_get_ref(struct binder_proc *proc, - uint32_t desc) + u32 desc, bool need_strong_ref) { struct rb_node *n = proc->refs_by_desc.rb_node; struct binder_ref *ref; @@ -1010,12 +1010,16 @@ static struct binder_ref *binder_get_ref(struct binder_proc *proc, while (n) { ref = rb_entry(n, struct binder_ref, rb_node_desc); - if (desc < ref->desc) + if (desc < ref->desc) { n = n->rb_left; - else if (desc > ref->desc) + } else if (desc > ref->desc) { n = n->rb_right; - else + } else if (need_strong_ref && !ref->strong) { + binder_user_error("tried to use weak ref as strong ref\n"); + return NULL; + } else { return ref; + } } return NULL; } @@ -1285,7 +1289,10 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref; + + ref = binder_get_ref(proc, fp->handle, + fp->type == BINDER_TYPE_HANDLE); if (ref == NULL) { pr_err("transaction release %d bad handle %d\n", @@ -1380,7 +1387,7 @@ static void binder_transaction(struct binder_proc *proc, if (tr->target.handle) { struct binder_ref *ref; - ref = binder_get_ref(proc, tr->target.handle); + ref = binder_get_ref(proc, tr->target.handle, true); if (ref == NULL) { binder_user_error("%d:%d got transaction to invalid handle\n", proc->pid, thread->pid); @@ -1589,7 +1596,10 @@ static void binder_transaction(struct binder_proc *proc, } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { - struct binder_ref *ref = binder_get_ref(proc, fp->handle); + struct binder_ref *ref; + + ref = binder_get_ref(proc, fp->handle, + fp->type == BINDER_TYPE_HANDLE); if (ref == NULL) { binder_user_error("%d:%d got transaction with invalid handle, %d\n", @@ -1800,7 +1810,9 @@ static int binder_thread_write(struct binder_proc *proc, ref->desc); } } else - ref = binder_get_ref(proc, target); + ref = binder_get_ref(proc, target, + cmd == BC_ACQUIRE || + cmd == BC_RELEASE); if (ref == NULL) { binder_user_error("%d:%d refcount change on invalid ref %d\n", proc->pid, thread->pid, target); @@ -1996,7 +2008,7 @@ static int binder_thread_write(struct binder_proc *proc, if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); - ref = binder_get_ref(proc, target); + ref = binder_get_ref(proc, target, false); if (ref == NULL) { binder_user_error("%d:%d %s invalid ref %d\n", proc->pid, thread->pid, From 4afb604e2d14d429ac9e1fd84b952602853b2df5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arve=20Hj=C3=B8nnev=C3=A5g?= Date: Mon, 24 Oct 2016 15:20:30 +0200 Subject: [PATCH 530/884] ANDROID: binder: Clear binder and cookie when setting handle in flat binder struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevents leaking pointers between processes Signed-off-by: Arve Hjønnevåg Signed-off-by: Martijn Coenen Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 3681759c22d7..3c71b982bf2a 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1584,7 +1584,9 @@ static void binder_transaction(struct binder_proc *proc, fp->type = BINDER_TYPE_HANDLE; else fp->type = BINDER_TYPE_WEAK_HANDLE; + fp->binder = 0; fp->handle = ref->desc; + fp->cookie = 0; binder_inc_ref(ref, fp->type == BINDER_TYPE_HANDLE, &thread->todo); @@ -1634,7 +1636,9 @@ static void binder_transaction(struct binder_proc *proc, return_error = BR_FAILED_REPLY; goto err_binder_get_ref_for_node_failed; } + fp->binder = 0; fp->handle = new_ref->desc; + fp->cookie = 0; binder_inc_ref(new_ref, fp->type == BINDER_TYPE_HANDLE, NULL); trace_binder_transaction_ref_to_ref(t, ref, new_ref); @@ -1688,6 +1692,7 @@ static void binder_transaction(struct binder_proc *proc, binder_debug(BINDER_DEBUG_TRANSACTION, " fd %d -> %d\n", fp->handle, target_fd); /* TODO: fput? */ + fp->binder = 0; fp->handle = target_fd; } break; From 43605e293eb13c07acb546c14f407a271837af17 Mon Sep 17 00:00:00 2001 From: Alexander Usyskin Date: Wed, 19 Oct 2016 01:34:48 +0300 Subject: [PATCH 531/884] mei: txe: don't clean an unprocessed interrupt cause. SEC registers are not accessible when the TXE device is in low power state, hence the SEC interrupt cannot be processed if device is not awake. In some rare cases entrance to low power state (aliveness off) and input ready bits can be signaled at the same time, resulting in communication stall as input ready won't be signaled again after waking up. To resolve this IPC_HHIER_SEC bit in HHISR_REG should not be cleaned if the interrupt is not processed. Cc: stable@vger.kernel.org Signed-off-by: Alexander Usyskin Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- drivers/misc/mei/hw-txe.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/mei/hw-txe.c b/drivers/misc/mei/hw-txe.c index e6e5e55a12ed..60415a2bfcbd 100644 --- a/drivers/misc/mei/hw-txe.c +++ b/drivers/misc/mei/hw-txe.c @@ -981,11 +981,13 @@ static bool mei_txe_check_and_ack_intrs(struct mei_device *dev, bool do_ack) hisr = mei_txe_br_reg_read(hw, HISR_REG); aliveness = mei_txe_aliveness_get(dev); - if (hhisr & IPC_HHIER_SEC && aliveness) + if (hhisr & IPC_HHIER_SEC && aliveness) { ipc_isr = mei_txe_sec_reg_read_silent(hw, SEC_IPC_HOST_INT_STATUS_REG); - else + } else { ipc_isr = 0; + hhisr &= ~IPC_HHIER_SEC; + } generated = generated || (hisr & HISR_INT_STS_MSK) || From d624716b6c67e60681180786564b92ddb521148a Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 19 Oct 2016 18:33:29 -0600 Subject: [PATCH 532/884] sparc64: Setup a scheduling domain for highest level cache. Individual scheduler domain should consist different hierarchy consisting of cores sharing similar property. Currently, no scheduler domain is defined separately for the cores that shares the last level cache. As a result, the scheduler fails to take advantage of cache locality while migrating tasks during load balancing. Here are the cpu masks currently present for sparc that are/can be used in scheduler domain construction. cpu_core_map : set based on the cores that shares l1 cache. core_core_sib_map : is set based on the socket id. The prior SPARC notion of socket was defined as highest level of shared cache. However, the MD record on T7 platforms now describes the CPUs that share the physical socket and this is no longer tied to shared cache. That's why a separate cpu mask needs to be created that truly represent highest level of shared cache for all platforms. Signed-off-by: Atish Patra Reviewed-by: Chris Hyser Signed-off-by: David S. Miller --- arch/sparc/include/asm/cpudata_64.h | 5 +-- arch/sparc/include/asm/topology_64.h | 8 ++++- arch/sparc/kernel/mdesc.c | 46 +++++++++++++++++----------- arch/sparc/kernel/smp_64.c | 8 +++++ 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index a6cfdabb6054..5b0ed48e5b0c 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -24,9 +24,10 @@ typedef struct { unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - unsigned short sock_id; + unsigned short sock_id; /* physical package */ unsigned short core_id; - int proc_id; + unsigned short max_cache_id; /* groupings of highest shared cache */ + unsigned short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index bec481aaca16..7b4898a36eee 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -44,14 +44,20 @@ int __node_distance(int, int); #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) +#define topology_core_cache_cpumask(cpu) (&cpu_core_sib_cache_map[cpu]) #define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; extern cpumask_t cpu_core_sib_map[NR_CPUS]; +extern cpumask_t cpu_core_sib_cache_map[NR_CPUS]; + +/** + * Return cores that shares the last level cache. + */ static inline const struct cpumask *cpu_coregroup_mask(int cpu) { - return &cpu_core_map[cpu]; + return &cpu_core_sib_cache_map[cpu]; } #endif /* _ASM_SPARC64_TOPOLOGY_H */ diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 11228861d9b4..8a6982dfd733 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node, cpu_data(*id).core_id = core_id; } -static void __mark_sock_id(struct mdesc_handle *hp, u64 node, - int sock_id) +static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node, + int max_cache_id) { const u64 *id = mdesc_get_property(hp, node, "id", NULL); - if (*id < num_possible_cpus()) - cpu_data(*id).sock_id = sock_id; + if (*id < num_possible_cpus()) { + cpu_data(*id).max_cache_id = max_cache_id; + + /** + * On systems without explicit socket descriptions socket + * is max_cache_id + */ + cpu_data(*id).sock_id = max_cache_id; + } } static void mark_core_ids(struct mdesc_handle *hp, u64 mp, @@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp, find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); } -static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, - int sock_id) +static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp, + int max_cache_id) { - find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); + find_back_node_value(hp, mp, "cpu", __mark_max_cache_id, + max_cache_id, 10); } static void set_core_ids(struct mdesc_handle *hp) @@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp) } } -static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) +static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level) { u64 mp; int idx = 1; int fnd = 0; - /* Identify unique sockets by looking for cpus backpointed to by - * shared level n caches. + /** + * Identify unique highest level of shared cache by looking for cpus + * backpointed to by shared level N caches. */ mdesc_for_each_node_by_name(hp, mp, "cache") { const u64 *cur_lvl; @@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) cur_lvl = mdesc_get_property(hp, mp, "level", NULL); if (*cur_lvl != level) continue; - - mark_sock_ids(hp, mp, idx); + mark_max_cache_ids(hp, mp, idx); idx++; fnd = 1; } @@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp) { u64 mp; - /* If machine description exposes sockets data use it. - * Otherwise fallback to use shared L3 or L2 caches. + /** + * Find the highest level of shared cache which pre-T7 is also + * the socket. */ + if (!set_max_cache_ids_by_cache(hp, 3)) + set_max_cache_ids_by_cache(hp, 2); + + /* If machine description exposes sockets data use it.*/ mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); if (mp != MDESC_NODE_NULL) - return set_sock_ids_by_socket(hp, mp); - - if (!set_sock_ids_by_cache(hp, 3)) - set_sock_ids_by_cache(hp, 2); + set_sock_ids_by_socket(hp, mp); } static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3035ba6cd31..8182f7caf5b1 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly = cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS - 1] = CPU_MASK_NONE }; + EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); EXPORT_SYMBOL(cpu_core_sib_map); +EXPORT_SYMBOL(cpu_core_sib_cache_map); static cpumask_t smp_commenced_mask; @@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void) unsigned int j; for_each_present_cpu(j) { + if (cpu_data(i).max_cache_id == + cpu_data(j).max_cache_id) + cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]); + if (cpu_data(i).sock_id == cpu_data(j).sock_id) cpumask_set_cpu(j, &cpu_core_sib_map[i]); } From 844bdf1b2a2f1790eba9e679bda1c632ee744d4e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Oct 2016 15:25:54 +0200 Subject: [PATCH 533/884] sparc64: Fix old style declaration GCC warnings Fix [-Wold-style-declaration] GCC warnings by moving the inline keyword before the return type. Signed-off-by: Tobias Klnuser Signed-off-by: David S. Miller --- arch/sparc/include/asm/spinlock_64.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 87990b7c6b0d..07c9f2e9bf57 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ -static void inline arch_read_lock(arch_rwlock_t *lock) +static inline void arch_read_lock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock) : "memory"); } -static int inline arch_read_trylock(arch_rwlock_t *lock) +static inline int arch_read_trylock(arch_rwlock_t *lock) { int tmp1, tmp2; @@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock) return tmp1; } -static void inline arch_read_unlock(arch_rwlock_t *lock) +static inline void arch_read_unlock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock) : "memory"); } -static void inline arch_write_lock(arch_rwlock_t *lock) +static inline void arch_write_lock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2; @@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock) : "memory"); } -static void inline arch_write_unlock(arch_rwlock_t *lock) +static inline void arch_write_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( " stw %%g0, [%0]" @@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock) : "memory"); } -static int inline arch_write_trylock(arch_rwlock_t *lock) +static inline int arch_write_trylock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2, result; From ee9e83973d54d94ff776892219b723de54429548 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Oct 2016 15:39:03 +0200 Subject: [PATCH 534/884] sparc32: Fix old style declaration GCC warnings Fix [-Wold-style-declaration] GCC warnings by moving the inline keyword before the return type. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- arch/sparc/include/asm/spinlock_32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index d9c5876c6121..8011e79f59c9 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) *(volatile __u32 *)&lp->lock = ~0U; } -static void inline arch_write_unlock(arch_rwlock_t *lock) +static inline void arch_write_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( " st %%g0, [%0]" From aa95ce361ed95c72ac42dcb315166bce5cf1a014 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 Aug 2016 14:41:33 -0700 Subject: [PATCH 535/884] sparc64: Delete __ret_efault. It is completely unused. Signed-off-by: David S. Miller --- arch/sparc/include/asm/uaccess_64.h | 1 - arch/sparc/kernel/head_64.S | 8 +------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index b68acc563235..f8518df34b63 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si return 1; } -void __ret_efault(void); void __retl_efault(void); /* Uh, these should become the main single-value transfer routines.. diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index beba6c11554c..603d73654295 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -926,13 +926,7 @@ tlb_type: .word 0 /* Must NOT end up in BSS */ EXPORT_SYMBOL(tlb_type) .section ".fixup",#alloc,#execinstr - .globl __ret_efault, __retl_efault, __ret_one, __retl_one -ENTRY(__ret_efault) - ret - restore %g0, -EFAULT, %o0 -ENDPROC(__ret_efault) -EXPORT_SYMBOL(__ret_efault) - + .globl __retl_efault, __ret_one, __retl_one ENTRY(__retl_efault) retl mov -EFAULT, %o0 From d62a9025aee446994a706c711e45c6a655d9d348 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 21 Oct 2016 07:33:57 +0300 Subject: [PATCH 536/884] orangefs: user file_inode() where it is due Replace wrong use of file->f_path.dentry->d_inode with file_inode(file). In case orangefs ever finds itself as an overelayfs layer, it would want to get its own inode and not overlayfs's inode. DISCLAIMER: I did not test this patch because I do not know how to setup an orangefs mount Signed-off-by: Amir Goldstein Signed-off-by: Mike Marshall --- fs/orangefs/file.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index 66ea0cc37b18..02cc6139ec90 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -621,9 +621,9 @@ static int orangefs_file_release(struct inode *inode, struct file *file) * readahead cache (if any); this forces an expensive refresh of * data for the next caller of mmap (or 'get_block' accesses) */ - if (file->f_path.dentry->d_inode && - file->f_path.dentry->d_inode->i_mapping && - mapping_nrpages(&file->f_path.dentry->d_inode->i_data)) { + if (file_inode(file) && + file_inode(file)->i_mapping && + mapping_nrpages(&file_inode(file)->i_data)) { if (orangefs_features & ORANGEFS_FEATURE_READAHEAD) { gossip_debug(GOSSIP_INODE_DEBUG, "calling flush_racache on %pU\n", @@ -632,7 +632,7 @@ static int orangefs_file_release(struct inode *inode, struct file *file) gossip_debug(GOSSIP_INODE_DEBUG, "flush_racache finished\n"); } - truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping, + truncate_inode_pages(file_inode(file)->i_mapping, 0); } return 0; @@ -648,7 +648,7 @@ static int orangefs_fsync(struct file *file, { int ret = -EINVAL; struct orangefs_inode_s *orangefs_inode = - ORANGEFS_I(file->f_path.dentry->d_inode); + ORANGEFS_I(file_inode(file)); struct orangefs_kernel_op_s *new_op = NULL; /* required call */ @@ -661,7 +661,7 @@ static int orangefs_fsync(struct file *file, ret = service_operation(new_op, "orangefs_fsync", - get_interruptible_flag(file->f_path.dentry->d_inode)); + get_interruptible_flag(file_inode(file))); gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_fsync got return value of %d\n", @@ -669,7 +669,7 @@ static int orangefs_fsync(struct file *file, op_release(new_op); - orangefs_flush_inode(file->f_path.dentry->d_inode); + orangefs_flush_inode(file_inode(file)); return ret; } From 83a17d2661674d8c198adc0e183418f72aabab79 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2016 14:47:54 -0700 Subject: [PATCH 537/884] sparc64: Prepare to move to more saner user copy exception handling. The fixup helper function mechanism for handling user copy fault handling is not %100 accurrate, and can never be made so. We are going to transition the code to return the running return return length, which is always kept track in one or more registers of each of these routines. In order to convert them one by one, we have to allow the existing behavior to continue functioning. Therefore make all the copy code that wants the fixup helper to be used return negative one. After all of the user copy routines have been converted, this logic and the fixup helpers themselves can be removed completely. Signed-off-by: David S. Miller --- arch/sparc/include/asm/uaccess_64.h | 21 +++++++++++++++------ arch/sparc/kernel/head_64.S | 23 +++++++++++------------ arch/sparc/lib/GENcopy_from_user.S | 2 +- arch/sparc/lib/GENcopy_to_user.S | 2 +- arch/sparc/lib/NG2copy_from_user.S | 4 ++-- arch/sparc/lib/NG2copy_to_user.S | 4 ++-- arch/sparc/lib/NG4copy_from_user.S | 4 ++-- arch/sparc/lib/NG4copy_to_user.S | 4 ++-- arch/sparc/lib/NGcopy_from_user.S | 2 +- arch/sparc/lib/NGcopy_to_user.S | 2 +- arch/sparc/lib/U1copy_from_user.S | 4 ++-- arch/sparc/lib/U1copy_to_user.S | 4 ++-- arch/sparc/lib/U3copy_from_user.S | 4 ++-- arch/sparc/lib/U3copy_to_user.S | 4 ++-- arch/sparc/lib/copy_in_user.S | 2 +- 15 files changed, 47 insertions(+), 39 deletions(-) diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index f8518df34b63..0244012435c8 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -198,8 +198,11 @@ copy_from_user(void *to, const void __user *from, unsigned long size) check_object_size(to, size, false); ret = ___copy_from_user(to, from, size); - if (unlikely(ret)) - ret = copy_from_user_fixup(to, from, size); + if (unlikely(ret)) { + if ((long)ret < 0) + ret = copy_from_user_fixup(to, from, size); + return ret; + } return ret; } @@ -218,8 +221,11 @@ copy_to_user(void __user *to, const void *from, unsigned long size) check_object_size(from, size, true); ret = ___copy_to_user(to, from, size); - if (unlikely(ret)) - ret = copy_to_user_fixup(to, from, size); + if (unlikely(ret)) { + if ((long)ret < 0) + ret = copy_to_user_fixup(to, from, size); + return ret; + } return ret; } #define __copy_to_user copy_to_user @@ -234,8 +240,11 @@ copy_in_user(void __user *to, void __user *from, unsigned long size) { unsigned long ret = ___copy_in_user(to, from, size); - if (unlikely(ret)) - ret = copy_in_user_fixup(to, from, size); + if (unlikely(ret)) { + if ((long)ret < 0) + ret = copy_in_user_fixup(to, from, size); + return ret; + } return ret; } #define __copy_in_user copy_in_user diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 603d73654295..5f17de6b5fc3 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -926,41 +926,40 @@ tlb_type: .word 0 /* Must NOT end up in BSS */ EXPORT_SYMBOL(tlb_type) .section ".fixup",#alloc,#execinstr - .globl __retl_efault, __ret_one, __retl_one ENTRY(__retl_efault) retl mov -EFAULT, %o0 ENDPROC(__retl_efault) -ENTRY(__retl_one) +ENTRY(__retl_mone) retl - mov 1, %o0 -ENDPROC(__retl_one) + mov -1, %o0 +ENDPROC(__retl_mone) -ENTRY(__retl_one_fp) +ENTRY(__retl_mone_fp) VISExitHalf retl mov 1, %o0 -ENDPROC(__retl_one_fp) +ENDPROC(__retl_mone_fp) -ENTRY(__ret_one_asi) +ENTRY(__ret_mone_asi) wr %g0, ASI_AIUS, %asi ret restore %g0, 1, %o0 -ENDPROC(__ret_one_asi) +ENDPROC(__ret_mone_asi) -ENTRY(__retl_one_asi) +ENTRY(__retl_mone_asi) wr %g0, ASI_AIUS, %asi retl mov 1, %o0 -ENDPROC(__retl_one_asi) +ENDPROC(__retl_mone_asi) -ENTRY(__retl_one_asi_fp) +ENTRY(__retl_mone_asi_fp) wr %g0, ASI_AIUS, %asi VISExitHalf retl mov 1, %o0 -ENDPROC(__retl_one_asi_fp) +ENDPROC(__retl_mone_asi_fp) ENTRY(__retl_o1) retl diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index b7d0bd6b1406..5bce68202246 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index 780550e1afc7..f663ce3ee8d9 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index d5242b8c4f94..4d47fa5519d4 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, __retl_mone_asi;\ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, __retl_mone_asi_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index 4e962d993b10..2078d752709a 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, __retl_mone_asi;\ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, __retl_mone_asi_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S index 2e8ee7ad07a9..f9746e7cf25e 100644 --- a/arch/sparc/lib/NG4copy_from_user.S +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, __retl_mone_asi;\ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, __retl_mone_asi_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S index be0bf4590df8..5fa44349adde 100644 --- a/arch/sparc/lib/NG4copy_to_user.S +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, __retl_mone_asi;\ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, __retl_mone_asi_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index 5d1e4d1ac21e..e61694c444af 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, __ret_mone_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index ff630dcb273c..2d6b33d3998f 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, __ret_mone_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index ecc5692fa2b4..1ad59fbac7a7 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, __retl_mone_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index 9eea392e44d4..adcc3a510185 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, __retl_mone_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S index 88ad73d86fe4..1046e2b083fe 100644 --- a/arch/sparc/lib/U3copy_from_user.S +++ b/arch/sparc/lib/U3copy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, __retl_mone_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index 845139d75537..032b0c5419b0 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, __retl_mone_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 482de093bdae..86e6663dd4e0 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -13,7 +13,7 @@ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; From 0096ac9f47b1a2e851b3165d44065d18e5f13d58 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2016 15:08:18 -0700 Subject: [PATCH 538/884] sparc64: Convert copy_in_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/copy_in_user.S | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 86e6663dd4e0..0252b218de45 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -9,18 +9,33 @@ #define XCC xcc -#define EX(x,y) \ +#define EX(x,y,z) \ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, z; \ .text; \ .align 4; +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) + .register %g2,#scratch .register %g3,#scratch .text +__retl_o4_plus_8: + add %o4, %o2, %o4 + retl + add %o4, 8, %o0 +__retl_o2_plus_4: + retl + add %o2, 4, %o0 +__retl_o2_plus_1: + retl + add %o2, 1, %o0 + .align 32 /* Don't try to get too fancy here, just nice and @@ -45,8 +60,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %o4 and %o2, 0x7, %o2 1: subcc %o4, 0x8, %o4 - EX(ldxa [%o1] %asi, %o5) - EX(stxa %o5, [%o0] %asi) + EX_O4(ldxa [%o1] %asi, %o5) + EX_O4(stxa %o5, [%o0] %asi) add %o1, 0x8, %o1 bgu,pt %XCC, 1b add %o0, 0x8, %o0 @@ -54,8 +69,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX(lduwa [%o1] %asi, %o5) - EX(stwa %o5, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %o5) + EX_O2_4(stwa %o5, [%o0] %asi) add %o1, 0x4, %o1 add %o0, 0x4, %o0 1: cmp %o2, 0 @@ -71,8 +86,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ 82: subcc %o2, 4, %o2 - EX(lduwa [%o1] %asi, %g1) - EX(stwa %g1, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %g1) + EX_O2_4(stwa %g1, [%o0] %asi) add %o1, 4, %o1 bgu,pt %XCC, 82b add %o0, 4, %o0 @@ -83,8 +98,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX(lduba [%o1] %asi, %g1) - EX(stba %g1, [%o0] %asi) + EX_O2_1(lduba [%o1] %asi, %g1) + EX_O2_1(stba %g1, [%o0] %asi) add %o1, 1, %o1 bgu,pt %XCC, 90b add %o0, 1, %o0 From d0796b555ba60c22eb41ae39a8362156cb08eee9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2016 15:26:38 -0700 Subject: [PATCH 539/884] sparc64: Convert GENcopy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/GENcopy_from_user.S | 4 +-- arch/sparc/lib/GENcopy_to_user.S | 4 +-- arch/sparc/lib/GENmemcpy.S | 48 +++++++++++++++++++++--------- 3 files changed, 38 insertions(+), 18 deletions(-) diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index 5bce68202246..69a439fa2fc1 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index f663ce3ee8d9..9947427ce354 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S index 89358ee94851..059ea24ad73d 100644 --- a/arch/sparc/lib/GENmemcpy.S +++ b/arch/sparc/lib/GENmemcpy.S @@ -4,21 +4,18 @@ */ #ifdef __KERNEL__ +#include #define GLOBAL_SPARE %g7 #else #define GLOBAL_SPARE %g5 #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -45,6 +42,29 @@ .register %g3,#scratch .text + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(GEN_retl_o4_1) + add %o4, %o2, %o4 + retl + add %o4, 1, %o0 +ENDPROC(GEN_retl_o4_1) +ENTRY(GEN_retl_g1_8) + add %g1, %o2, %g1 + retl + add %g1, 8, %o0 +ENDPROC(GEN_retl_g1_8) +ENTRY(GEN_retl_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(GEN_retl_o2_4) +ENTRY(GEN_retl_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(GEN_retl_o2_1) +#endif + .align 64 .globl FUNC_NAME @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %g1 sub %o2, %g1, %o2 1: subcc %g1, 0x8, %g1 - EX_LD(LOAD(ldx, %o1, %g2)) - EX_ST(STORE(stx, %g2, %o0)) + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) add %o1, 0x8, %o1 bne,pt %XCC, 1b add %o0, 0x8, %o0 @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl From cb736fdbb208eb3420f1a2eb2bfc024a6e9dcada Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2016 16:07:50 -0700 Subject: [PATCH 540/884] sparc64: Convert U1copy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/U1copy_from_user.S | 8 +- arch/sparc/lib/U1copy_to_user.S | 8 +- arch/sparc/lib/U1memcpy.S | 341 ++++++++++++++++++++---------- 3 files changed, 235 insertions(+), 122 deletions(-) diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index 1ad59fbac7a7..bb6ff73229e3 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index adcc3a510185..ed92ce739558 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index 97e1b211090c..4f0d50b33a72 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -5,6 +5,7 @@ */ #ifdef __KERNEL__ +#include #include #include #include @@ -24,21 +25,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -79,53 +76,169 @@ faligndata %f7, %f8, %f60; \ faligndata %f8, %f9, %f62; -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ - EX_LD_FP(LOAD_BLK(%src, %fdest)); \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ - add %src, 0x40, %src; \ - subcc %len, 0x40, %len; \ - be,pn %xcc, jmptgt; \ - add %dest, 0x40, %dest; \ +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ + add %src, 0x40, %src; \ + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ + be,pn %xcc, jmptgt; \ + add %dest, 0x40, %dest; \ -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) +#define LOOP_CHUNK1(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) +#define LOOP_CHUNK2(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) +#define LOOP_CHUNK3(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) #define DO_SYNC membar #Sync; #define STORE_SYNC(dest, fsrc) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ add %dest, 0x40, %dest; \ DO_SYNC #define STORE_JUMP(dest, fsrc, target) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ add %dest, 0x40, %dest; \ ba,pt %xcc, target; \ nop; -#define FINISH_VISCHUNK(dest, f0, f1, left) \ - subcc %left, 8, %left;\ - bl,pn %xcc, 95f; \ - faligndata %f0, %f1, %f48; \ - EX_ST_FP(STORE(std, %f48, %dest)); \ +#define FINISH_VISCHUNK(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ + faligndata %f0, %f1, %f48; \ + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ add %dest, 8, %dest; -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, 95f; \ +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ fsrc2 %f0, %f1; -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ +#define UNEVEN_VISCHUNK(dest, f0, f1) \ + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ ba,a,pt %xcc, 93f; .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(U1_g1_1_fp) + VISExitHalf + add %g1, 1, %g1 + add %g1, %g2, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1_fp) +ENTRY(U1_g2_0_fp) + VISExitHalf + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_0_fp) +ENTRY(U1_g2_8_fp) + VISExitHalf + add %g2, 8, %g2 + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_8_fp) +ENTRY(U1_gs_0_fp) + VISExitHalf + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_0_fp) +ENTRY(U1_gs_80_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_80_fp) +ENTRY(U1_gs_40_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_40_fp) +ENTRY(U1_g3_0_fp) + VISExitHalf + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_0_fp) +ENTRY(U1_g3_8_fp) + VISExitHalf + add %g3, 8, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_8_fp) +ENTRY(U1_o2_0_fp) + VISExitHalf + retl + mov %o2, %o0 +ENDPROC(U1_o2_0_fp) +ENTRY(U1_o2_1_fp) + VISExitHalf + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1_fp) +ENTRY(U1_gs_0) + VISExitHalf + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0) +ENTRY(U1_gs_8) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x8, %o0 +ENDPROC(U1_gs_8) +ENTRY(U1_gs_10) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x10, %o0 +ENDPROC(U1_gs_10) +ENTRY(U1_o2_0) + retl + mov %o2, %o0 +ENDPROC(U1_o2_0) +ENTRY(U1_o2_8) + retl + add %o2, 8, %o0 +ENDPROC(U1_o2_8) +ENTRY(U1_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(U1_o2_4) +ENTRY(U1_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1) +ENTRY(U1_g1_0) + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_0) +ENTRY(U1_g1_1) + add %g1, 1, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1) +ENTRY(U1_gs_0_o2_adj) + and %o2, 7, %o2 + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0_o2_adj) +ENTRY(U1_gs_8_o2_adj) + and %o2, 7, %o2 + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_8_o2_adj) +#endif + .align 64 .globl FUNC_NAME @@ -167,8 +280,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -179,20 +292,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -215,13 +328,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %g1, %GLOBAL_SPARE, %g1 subcc %o2, %g3, %o2 - EX_LD_FP(LOAD_BLK(%o1, %f0)) + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) add %o1, 0x40, %o1 add %g1, %g3, %g1 - EX_LD_FP(LOAD_BLK(%o1, %f16)) + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) add %o1, 0x40, %o1 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE - EX_LD_FP(LOAD_BLK(%o1, %f32)) + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) add %o1, 0x40, %o1 /* There are 8 instances of the unrolled loop, @@ -241,11 +354,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f0, %f2, %f48 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) @@ -262,11 +375,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 56f) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f2, %f4, %f48 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) @@ -283,11 +396,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 57f) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f4, %f6, %f48 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) @@ -304,11 +417,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 58f) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f6, %f8, %f48 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) @@ -325,11 +438,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 59f) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f8, %f10, %f48 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) @@ -346,11 +459,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 60f) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f10, %f12, %f48 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) @@ -367,11 +480,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 61f) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f12, %f14, %f48 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) @@ -388,11 +501,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 62f) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f14, %f16, %f48 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) @@ -408,53 +521,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 63f) -40: FINISH_VISCHUNK(o0, f0, f2, g3) -41: FINISH_VISCHUNK(o0, f2, f4, g3) -42: FINISH_VISCHUNK(o0, f4, f6, g3) -43: FINISH_VISCHUNK(o0, f6, f8, g3) -44: FINISH_VISCHUNK(o0, f8, f10, g3) -45: FINISH_VISCHUNK(o0, f10, f12, g3) -46: FINISH_VISCHUNK(o0, f12, f14, g3) -47: UNEVEN_VISCHUNK(o0, f14, f0, g3) -48: FINISH_VISCHUNK(o0, f16, f18, g3) -49: FINISH_VISCHUNK(o0, f18, f20, g3) -50: FINISH_VISCHUNK(o0, f20, f22, g3) -51: FINISH_VISCHUNK(o0, f22, f24, g3) -52: FINISH_VISCHUNK(o0, f24, f26, g3) -53: FINISH_VISCHUNK(o0, f26, f28, g3) -54: FINISH_VISCHUNK(o0, f28, f30, g3) -55: UNEVEN_VISCHUNK(o0, f30, f0, g3) -56: FINISH_VISCHUNK(o0, f32, f34, g3) -57: FINISH_VISCHUNK(o0, f34, f36, g3) -58: FINISH_VISCHUNK(o0, f36, f38, g3) -59: FINISH_VISCHUNK(o0, f38, f40, g3) -60: FINISH_VISCHUNK(o0, f40, f42, g3) -61: FINISH_VISCHUNK(o0, f42, f44, g3) -62: FINISH_VISCHUNK(o0, f44, f46, g3) -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) +40: FINISH_VISCHUNK(o0, f0, f2) +41: FINISH_VISCHUNK(o0, f2, f4) +42: FINISH_VISCHUNK(o0, f4, f6) +43: FINISH_VISCHUNK(o0, f6, f8) +44: FINISH_VISCHUNK(o0, f8, f10) +45: FINISH_VISCHUNK(o0, f10, f12) +46: FINISH_VISCHUNK(o0, f12, f14) +47: UNEVEN_VISCHUNK(o0, f14, f0) +48: FINISH_VISCHUNK(o0, f16, f18) +49: FINISH_VISCHUNK(o0, f18, f20) +50: FINISH_VISCHUNK(o0, f20, f22) +51: FINISH_VISCHUNK(o0, f22, f24) +52: FINISH_VISCHUNK(o0, f24, f26) +53: FINISH_VISCHUNK(o0, f26, f28) +54: FINISH_VISCHUNK(o0, f28, f30) +55: UNEVEN_VISCHUNK(o0, f30, f0) +56: FINISH_VISCHUNK(o0, f32, f34) +57: FINISH_VISCHUNK(o0, f34, f36) +58: FINISH_VISCHUNK(o0, f36, f38) +59: FINISH_VISCHUNK(o0, f38, f40) +60: FINISH_VISCHUNK(o0, f40, f42) +61: FINISH_VISCHUNK(o0, f42, f44) +62: FINISH_VISCHUNK(o0, f44, f46) +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) -93: EX_LD_FP(LOAD(ldd, %o1, %f2)) +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bl,pn %xcc, 95f add %o0, 8, %o0 - EX_LD_FP(LOAD(ldd, %o1, %f0)) + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bge,pt %xcc, 93b add %o0, 8, %o0 95: brz,pt %o2, 2f mov %g1, %o1 -1: EX_LD_FP(LOAD(ldub, %o1, %o3)) +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) add %o1, 1, %o1 subcc %o2, 1, %o2 - EX_ST_FP(STORE(stb, %o3, %o0)) + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) bne,pt %xcc, 1b add %o0, 1, %o0 @@ -470,27 +583,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 72: andn %o2, 0xf, %GLOBAL_SPARE and %o2, 0xf, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) sub %o2, 0x8, %o2 - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) sub %o2, 0x4, %o2 - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -504,9 +617,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %g1, %g1 sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1, %o5)) +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) subcc %g1, 1, %g1 - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -522,16 +635,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) sub %o3, %g1, %o3 andn %o2, 0x7, %GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -549,9 +662,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bne,pn %XCC, 90f sub %o0, %o1, %o3 -1: EX_LD(LOAD(lduw, %o1, %g1)) +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) subcc %o2, 4, %o2 - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -559,9 +672,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o4), %o0 .align 32 -90: EX_LD(LOAD(ldub, %o1, %g1)) +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) subcc %o2, 1, %o2 - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl From 804b1737d71253f01621d2a37a0dce6279a2d440 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 17 Oct 2016 10:14:23 +0200 Subject: [PATCH 541/884] orangefs: don't use d_time Instead use d_fsdata which is the same size. Hoping to get rid of d_time, which is used by very few filesystems by this time. Signed-off-by: Miklos Szeredi Reviewed-by: Martin Brandenburg Signed-off-by: Mike Marshall --- fs/orangefs/dcache.c | 5 +++-- fs/orangefs/namei.c | 8 ++++---- fs/orangefs/orangefs-kernel.h | 7 +++++++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index 1e8fe844e69f..5355efba4bc8 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -73,7 +73,7 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) } } - dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; + orangefs_set_timeout(dentry); ret = 1; out_release_op: op_release(new_op); @@ -94,8 +94,9 @@ out_drop: static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags) { int ret; + unsigned long time = (unsigned long) dentry->d_fsdata; - if (time_before(jiffies, dentry->d_time)) + if (time_before(jiffies, time)) return 1; if (flags & LOOKUP_RCU) diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c index d15d3d2dba62..a290ff6ec756 100644 --- a/fs/orangefs/namei.c +++ b/fs/orangefs/namei.c @@ -72,7 +72,7 @@ static int orangefs_create(struct inode *dir, d_instantiate(dentry, inode); unlock_new_inode(inode); - dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; + orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; gossip_debug(GOSSIP_NAME_DEBUG, @@ -183,7 +183,7 @@ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; + orangefs_set_timeout(dentry); inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn); if (IS_ERR(inode)) { @@ -322,7 +322,7 @@ static int orangefs_symlink(struct inode *dir, d_instantiate(dentry, inode); unlock_new_inode(inode); - dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; + orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; gossip_debug(GOSSIP_NAME_DEBUG, @@ -386,7 +386,7 @@ static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode d_instantiate(dentry, inode); unlock_new_inode(inode); - dentry->d_time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; + orangefs_set_timeout(dentry); ORANGEFS_I(inode)->getattr_time = jiffies - 1; gossip_debug(GOSSIP_NAME_DEBUG, diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 0a82048f3aaf..3bf803d732c5 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -580,4 +580,11 @@ static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) #endif } +static inline void orangefs_set_timeout(struct dentry *dentry) +{ + unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; + + dentry->d_fsdata = (void *) time; +} + #endif /* __ORANGEFSKERNEL_H */ From 423221d1745b53656db896bd34646d09d620c759 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 24 Oct 2016 15:13:25 -0400 Subject: [PATCH 542/884] nbd: fix incorrect unlock of nbd->sock_lock in sock_shutdown Commit 0eadf37afc250 ("nbd: allow block mq to deal with timeouts") changed normal usage of nbd->sock_lock to use spin_lock/spin_unlock rather than the *_irq variants, but it missed this unlock in an error path. Found by Coverity, CID 1373871. Signed-off-by: John W. Linville Cc: Josef Bacik Cc: Jens Axboe Cc: Markus Pargmann Fixes: 0eadf37afc250 ("nbd: allow block mq to deal with timeouts") Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index ba405b55329f..19a16b2dbb91 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -164,7 +164,7 @@ static void sock_shutdown(struct nbd_device *nbd) spin_lock(&nbd->sock_lock); if (!nbd->sock) { - spin_unlock_irq(&nbd->sock_lock); + spin_unlock(&nbd->sock_lock); return; } From 0cc11a61b80a1ab1d12f1597b27b8b45ef8bac4a Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 20 Oct 2016 09:34:31 -0400 Subject: [PATCH 543/884] nfsd: move blocked lock handling under a dedicated spinlock Bruce was hitting some lockdep warnings in testing, showing that we could hit a deadlock with the new CB_NOTIFY_LOCK handling, involving a rather complex situation involving four different spinlocks. The crux of the matter is that we end up taking the nn->client_lock in the lm_notify handler. The simplest fix is to just declare a new per-nfsd_net spinlock to protect the new CB_NOTIFY_LOCK structures. Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 5 +++++ fs/nfsd/nfs4state.c | 28 +++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index b10d557f9c9e..ee36efd5aece 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -84,6 +84,8 @@ struct nfsd_net { struct list_head client_lru; struct list_head close_lru; struct list_head del_recall_lru; + + /* protected by blocked_locks_lock */ struct list_head blocked_locks_lru; struct delayed_work laundromat_work; @@ -91,6 +93,9 @@ struct nfsd_net { /* client_lock protects the client lru list and session hash table */ spinlock_t client_lock; + /* protects blocked_locks_lru */ + spinlock_t blocked_locks_lock; + struct file *rec_file; bool in_grace; const struct nfsd4_client_tracking_ops *client_tracking_ops; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9752beb78659..95474196a17e 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -217,7 +217,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, { struct nfsd4_blocked_lock *cur, *found = NULL; - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); list_for_each_entry(cur, &lo->lo_blocked, nbl_list) { if (fh_match(fh, &cur->nbl_fh)) { list_del_init(&cur->nbl_list); @@ -226,7 +226,7 @@ find_blocked_lock(struct nfs4_lockowner *lo, struct knfsd_fh *fh, break; } } - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); if (found) posix_unblock_lock(&found->nbl_lock); return found; @@ -4665,7 +4665,7 @@ nfs4_laundromat(struct nfsd_net *nn) * indefinitely once the lock does become free. */ BUG_ON(!list_empty(&reaplist)); - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); @@ -4678,7 +4678,7 @@ nfs4_laundromat(struct nfsd_net *nn) list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { nbl = list_first_entry(&nn->blocked_locks_lru, @@ -5439,13 +5439,13 @@ nfsd4_lm_notify(struct file_lock *fl) bool queue = false; /* An empty list means that something else is going to be using it */ - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); if (!list_empty(&nbl->nbl_list)) { list_del_init(&nbl->nbl_list); list_del_init(&nbl->nbl_lru); queue = true; } - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); if (queue) nfsd4_run_cb(&nbl->nbl_cb); @@ -5868,10 +5868,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (fl_flags & FL_SLEEP) { nbl->nbl_time = jiffies; - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); } err = vfs_lock_file(filp, F_SETLK, file_lock, conflock); @@ -5900,10 +5900,10 @@ out: if (nbl) { /* dequeue it if we queued it before */ if (fl_flags & FL_SLEEP) { - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); list_del_init(&nbl->nbl_list); list_del_init(&nbl->nbl_lru); - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); } free_blocked_lock(nbl); } @@ -6943,9 +6943,11 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->client_lru); INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); - INIT_LIST_HEAD(&nn->blocked_locks_lru); spin_lock_init(&nn->client_lock); + spin_lock_init(&nn->blocked_locks_lock); + INIT_LIST_HEAD(&nn->blocked_locks_lru); + INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main); get_net(net); @@ -7063,14 +7065,14 @@ nfs4_state_shutdown_net(struct net *net) } BUG_ON(!list_empty(&reaplist)); - spin_lock(&nn->client_lock); + spin_lock(&nn->blocked_locks_lock); while (!list_empty(&nn->blocked_locks_lru)) { nbl = list_first_entry(&nn->blocked_locks_lru, struct nfsd4_blocked_lock, nbl_lru); list_move(&nbl->nbl_lru, &reaplist); list_del_init(&nbl->nbl_list); } - spin_unlock(&nn->client_lock); + spin_unlock(&nn->blocked_locks_lock); while (!list_empty(&reaplist)) { nbl = list_first_entry(&nn->blocked_locks_lru, From e8d7515a983b5ccf6681db0ade3714e837581a97 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Oct 2016 16:16:07 -0400 Subject: [PATCH 544/884] drm/amdgpu: cancel reset work on fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cancel any pending reset work when we tear down the driver. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 278708f5a744..9fa809876339 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -239,6 +239,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (r) { adev->irq.installed = false; flush_work(&adev->hotplug_work); + cancel_work_sync(&adev->reset_work); return r; } @@ -264,6 +265,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) if (adev->irq.msi_enabled) pci_disable_msi(adev->pdev); flush_work(&adev->hotplug_work); + cancel_work_sync(&adev->reset_work); } for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; ++i) { From 4560738a8e9d5c0a053b8b896487b83e6948f4a1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 21 Oct 2016 16:30:10 -0400 Subject: [PATCH 545/884] drm/amdgpu/dpm: flush any thermal work on fini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Flush any outstanding thermal work before tearing down the dpm driver. Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 2 ++ drivers/gpu/drm/amd/amdgpu/kv_dpm.c | 2 ++ drivers/gpu/drm/amd/amdgpu/si_dpm.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 1d8c375a3561..29e6061b9087 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -6236,6 +6236,8 @@ static int ci_dpm_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + flush_work(&adev->pm.dpm.thermal.work); + mutex_lock(&adev->pm.mutex); amdgpu_pm_sysfs_fini(adev); ci_dpm_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index f8618a3881a8..71d2856222fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -3063,6 +3063,8 @@ static int kv_dpm_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + flush_work(&adev->pm.dpm.thermal.work); + mutex_lock(&adev->pm.mutex); amdgpu_pm_sysfs_fini(adev); kv_dpm_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 3de7bca5854b..ee77e9cb55b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -7777,6 +7777,8 @@ static int si_dpm_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + flush_work(&adev->pm.dpm.thermal.work); + mutex_lock(&adev->pm.mutex); amdgpu_pm_sysfs_fini(adev); si_dpm_fini(adev); From 9dc79965b21967caebde575f5f5d8bf1aa2c23ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 24 Oct 2016 16:52:20 +0900 Subject: [PATCH 546/884] Revert "drm/radeon: fix DP link training issue with second 4K monitor" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 1a738347df2ee4977459a8776fe2c62196bdcb1b. It caused at least some Kaveri laptops to incorrectly report DisplayPort connectors as connected. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97857 Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_dp_auxch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_dp_auxch.c b/drivers/gpu/drm/radeon/radeon_dp_auxch.c index 2d465648856a..474a8a1886f7 100644 --- a/drivers/gpu/drm/radeon/radeon_dp_auxch.c +++ b/drivers/gpu/drm/radeon/radeon_dp_auxch.c @@ -105,7 +105,7 @@ radeon_dp_aux_transfer_native(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg tmp &= AUX_HPD_SEL(0x7); tmp |= AUX_HPD_SEL(chan->rec.hpd); - tmp |= AUX_EN | AUX_LS_READ_EN | AUX_HPD_DISCON(0x1); + tmp |= AUX_EN | AUX_LS_READ_EN; WREG32(AUX_CONTROL + aux_offset[instance], tmp); From a053fb7e512c77f0742bceb578b10025256e1911 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 23 Oct 2016 21:31:44 +0300 Subject: [PATCH 547/884] drm/amdgpu: fix sched fence slab teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To free fences, call_rcu() is used, which calls amd_sched_fence_free() after a grace period. During teardown, there is no guarantee all callbacks have finished, so sched_fence_slab may be destroyed before all fences have been freed. If we are lucky, this results in some slab warnings, if not, we get a crash in one of rcu threads because callback is called after amdgpu has already been unloaded. Fix it with a rcu_barrier(). Fixes: 189e0fb76304 ("drm/amdgpu: RCU protected amd_sched_fence_release") Acked-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Grazvydas Ignotas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 963a24d46a93..910b8d5b21c5 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -645,6 +645,7 @@ void amd_sched_fini(struct amd_gpu_scheduler *sched) { if (sched->thread) kthread_stop(sched->thread); + rcu_barrier(); if (atomic_dec_and_test(&sched_fence_slab_ref)) kmem_cache_destroy(sched_fence_slab); } From 2d7c17be00e0dce3bc1a092a2c277a9f86c69ca9 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 23 Oct 2016 21:31:45 +0300 Subject: [PATCH 548/884] drm/amdgpu: fix a vm_flush fence leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like .last_flush reference is left at teardown. Leak reported by CONFIG_SLUB_DEBUG. Fixes: 41d9eb2c5a2a ("drm/amdgpu: add a fence after the VM flush") Reviewed-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Grazvydas Ignotas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 06f24322e7c3..968c4260d7a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1758,5 +1758,6 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) fence_put(adev->vm_manager.ids[i].first); amdgpu_sync_free(&adev->vm_manager.ids[i].active); fence_put(id->flushed_updates); + fence_put(id->last_flush); } } From 9566213359aa75851a9ca7fea613652fb5fcb081 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 23 Oct 2016 21:31:47 +0300 Subject: [PATCH 549/884] drm/amdgpu: update kernel-doc for some functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The names were wrong. Reviewed-by: Edward O'Callaghan Reviewed-by: Christian König Signed-off-by: Grazvydas Ignotas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/scheduler/sched_fence.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c index 6b63beaf7574..3653b5a40494 100644 --- a/drivers/gpu/drm/amd/scheduler/sched_fence.c +++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c @@ -103,7 +103,7 @@ static void amd_sched_fence_free(struct rcu_head *rcu) } /** - * amd_sched_fence_release - callback that fence can be freed + * amd_sched_fence_release_scheduled - callback that fence can be freed * * @fence: fence * @@ -118,7 +118,7 @@ static void amd_sched_fence_release_scheduled(struct fence *f) } /** - * amd_sched_fence_release_scheduled - drop extra reference + * amd_sched_fence_release_finished - drop extra reference * * @f: fence * From 0f10425e811355986907c54f7d1d06703e406092 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 23 Oct 2016 21:31:43 +0300 Subject: [PATCH 550/884] drm/amdgpu: fix fence slab teardown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To free fences, call_rcu() is used, which calls amdgpu_fence_free() after a grace period. During teardown, there is no guarantee all callbacks have finished, so amdgpu_fence_slab may be destroyed before all fences have been freed. If we are lucky, this results in some slab warnings, if not, we get a crash in one of rcu threads because callback is called after amdgpu has already been unloaded. Fix it with a rcu_barrier(). Fixes: b44135351a3a ("drm/amdgpu: RCU protected amdgpu_fence_release") Acked-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Grazvydas Ignotas Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 3a2e42f4b897..77b34ec92632 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -68,6 +68,7 @@ int amdgpu_fence_slab_init(void) void amdgpu_fence_slab_fini(void) { + rcu_barrier(); kmem_cache_destroy(amdgpu_fence_slab); } /* From 2f1d407adab026b34a105ed27b1d4d7e910c4448 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 24 Oct 2016 23:20:25 +0200 Subject: [PATCH 551/884] cpufreq: intel_pstate: Always set max P-state in performance mode The only times at which intel_pstate checks the policy set for a given CPU is the initialization of that CPU and updates of its policy settings from cpufreq when intel_pstate_set_policy() is invoked. That is insufficient, however, because intel_pstate uses the same P-state selection function for all CPUs regardless of the policy setting for each of them and the P-state limits are shared between them. Thus if the policy is set to "performance" for a particular CPU, it may not behave as expected if the cpufreq settings are changed subsequently for another CPU. That can be easily demonstrated by writing "performance" to scaling_governor for all CPUs and then switching it to "powersave" for one of them in which case all of the CPUs will behave as though their scaling_governor were all "powersave" (even though the policy still appears to be "performance" for the remaining CPUs). Fix this problem by modifying intel_pstate_adjust_busy_pstate() to always set the P-state to the maximum allowed by the current limits for all CPUs whose policy is set to "performance". Note that it still is recommended to always change the policy setting in the same way for all CPUs even with this fix applied to avoid confusion. Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index ac7c58d20b58..4737520ec823 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -179,6 +179,7 @@ struct _pid { /** * struct cpudata - Per CPU instance data storage * @cpu: CPU number for this instance data + * @policy: CPUFreq policy value * @update_util: CPUFreq utility callback information * @update_util_set: CPUFreq utility callback is set * @iowait_boost: iowait-related boost fraction @@ -201,6 +202,7 @@ struct _pid { struct cpudata { int cpu; + unsigned int policy; struct update_util_data update_util; bool update_util_set; @@ -1337,7 +1339,8 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu) from = cpu->pstate.current_pstate; - target_pstate = pstate_funcs.get_target_pstate(cpu); + target_pstate = cpu->policy == CPUFREQ_POLICY_PERFORMANCE ? + cpu->pstate.turbo_pstate : pstate_funcs.get_target_pstate(cpu); intel_pstate_update_pstate(cpu, target_pstate); @@ -1504,6 +1507,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) policy->cpuinfo.max_freq, policy->max); cpu = all_cpu_data[policy->cpu]; + cpu->policy = policy->policy; + if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate && policy->max < policy->cpuinfo.max_freq && policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { @@ -1511,7 +1516,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) policy->max = policy->cpuinfo.max_freq; } - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { limits = &performance_limits; if (policy->max >= policy->cpuinfo.max_freq) { pr_debug("set performance\n"); @@ -1547,7 +1552,7 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) limits->max_perf = round_up(limits->max_perf, FRAC_BITS); out: - if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { + if (cpu->policy == CPUFREQ_POLICY_PERFORMANCE) { /* * NOHZ_FULL CPUs need this as the governor callback may not * be invoked on them. From e3f948cd3283e4fbe5907f1f3967c839912f480e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 6 Oct 2016 14:09:16 -0700 Subject: [PATCH 552/884] RAID1: ignore discard error If a write error occurs, raid1 will try to rewrite the bio in small chunk size. If the rewrite fails, raid1 will record the error in bad block. narrow_write_error will always use WRITE for the bio, but actually it could be a discard. Since discard bio hasn't payload, write the bio will cause different issues. But discard error isn't fatal, we can safely ignore it. This is what this patch does. This issue should exist since discard is added, but only exposed with recent arbitrary bio size feature. Reported-and-tested-by: Sitsofe Wheeler Cc: stable@vger.kernel.org (v3.6) Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1961d827dbd1..db536a68b2ee 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -403,11 +403,14 @@ static void raid1_end_write_request(struct bio *bio) struct bio *to_put = NULL; int mirror = find_bio_disk(r1_bio, bio); struct md_rdev *rdev = conf->mirrors[mirror].rdev; + bool discard_error; + + discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD; /* * 'one mirror IO has finished' event handler: */ - if (bio->bi_error) { + if (bio->bi_error && !discard_error) { set_bit(WriteErrorSeen, &rdev->flags); if (!test_and_set_bit(WantReplacement, &rdev->flags)) set_bit(MD_RECOVERY_NEEDED, & @@ -444,7 +447,7 @@ static void raid1_end_write_request(struct bio *bio) /* Maybe we can clear some bad blocks. */ if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors, - &first_bad, &bad_sectors)) { + &first_bad, &bad_sectors) && !discard_error) { r1_bio->bios[mirror] = IO_MADE_GOOD; set_bit(R1BIO_MadeGood, &r1_bio->state); } From 579ed34f7b751b8add233cba4cf755258dbdd60a Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 6 Oct 2016 14:13:52 -0700 Subject: [PATCH 553/884] RAID10: ignore discard error This is the counterpart of raid10 fix. If a write error occurs, raid10 will try to rewrite the bio in small chunk size. If the rewrite fails, raid10 will record the error in bad block. narrow_write_error will always use WRITE for the bio, but actually it could be a discard. Since discard bio hasn't payload, write the bio will cause different issues. But discard error isn't fatal, we can safely ignore it. This is what this patch does. This issue should exist since discard is added, but only exposed with recent arbitrary bio size feature. Cc: Sitsofe Wheeler Cc: stable@vger.kernel.org (v3.6) Signed-off-by: Shaohua Li --- drivers/md/raid10.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index be1a9fca3b2d..39fddda2fef2 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -447,6 +447,9 @@ static void raid10_end_write_request(struct bio *bio) struct r10conf *conf = r10_bio->mddev->private; int slot, repl; struct md_rdev *rdev = NULL; + bool discard_error; + + discard_error = bio->bi_error && bio_op(bio) == REQ_OP_DISCARD; dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl); @@ -460,7 +463,7 @@ static void raid10_end_write_request(struct bio *bio) /* * this branch is our 'one mirror IO has finished' event handler: */ - if (bio->bi_error) { + if (bio->bi_error && !discard_error) { if (repl) /* Never record new bad blocks to replacement, * just fail it. @@ -503,7 +506,7 @@ static void raid10_end_write_request(struct bio *bio) if (is_badblock(rdev, r10_bio->devs[slot].addr, r10_bio->sectors, - &first_bad, &bad_sectors)) { + &first_bad, &bad_sectors) && !discard_error) { bio_put(bio); if (repl) r10_bio->devs[slot].repl_bio = IO_MADE_GOOD; From 28cd88e2b4c54a466dcae7eea1efac766d42386b Mon Sep 17 00:00:00 2001 From: Zhengyuan Liu Date: Mon, 24 Oct 2016 09:55:20 +0800 Subject: [PATCH 554/884] md/raid5: initialize next_checkpoint field before use No initial operation was done to this field when we load/recovery the log, it got assignment only when IO to raid disk was finished. So r5l_quiesce may use wrong next_checkpoint to reclaim log space, that would make reclaimable space calculation confused. Signed-off-by: Zhengyuan Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 1b1ab4a1d132..998ea0025dd0 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1096,6 +1096,8 @@ static int r5l_recovery_log(struct r5l_log *log) log->seq = ctx.seq + 11; log->log_start = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); r5l_write_super(log, ctx.pos); + log->last_checkpoint = ctx.pos; + log->next_checkpoint = ctx.pos; } else { log->log_start = ctx.pos; log->seq = ctx.seq; @@ -1168,6 +1170,7 @@ create: if (log->max_free_space > RECLAIM_MAX_FREE_SPACE) log->max_free_space = RECLAIM_MAX_FREE_SPACE; log->last_checkpoint = cp; + log->next_checkpoint = cp; __free_page(page); From 56056c2e7d58ee705755efbe780aefff987a1dc8 Mon Sep 17 00:00:00 2001 From: Zhengyuan Liu Date: Mon, 24 Oct 2016 16:15:59 +0800 Subject: [PATCH 555/884] md/raid5: write an empty meta-block when creating log super-block If superblock points to an invalid meta block, r5l_load_log will set create_super with true and create an new superblock, this runtime path would always happen if we do no writing I/O to this array since it was created. Writing an empty meta block could avoid this unnecessary action at the first time we created log superblock. Another reason is for the corretness of log recovery. Currently we have bellow code to guarantee log revocery to be correct. if (ctx.seq > log->last_cp_seq + 1) { int ret; ret = r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq + 10); if (ret) return ret; log->seq = ctx.seq + 11; log->log_start = r5l_ring_add(log, ctx.pos, BLOCK_SECTORS); r5l_write_super(log, ctx.pos); } else { log->log_start = ctx.pos; log->seq = ctx.seq; } If we just created a array with a journal device, log->log_start and log->last_checkpoint should all be 0, then we write three meta block which are valid except mid one and supposed crash happened. The ctx.seq would equal to log->last_cp_seq + 1 and log->log_start would be set to position of mid invalid meta block after we did a recovery, this will lead to problems which could be avoided with this patch. Signed-off-by: Zhengyuan Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 998ea0025dd0..981f85515191 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1156,6 +1156,7 @@ create: if (create_super) { log->last_cp_seq = prandom_u32(); cp = 0; + r5l_log_write_empty_meta_block(log, cp, log->last_cp_seq); /* * Make sure super points to correct address. Log might have * data very soon. If super hasn't correct log tail address, From 16f889499a5214ebe038f8bd00f4c0094ed0ed75 Mon Sep 17 00:00:00 2001 From: Tomasz Majchrzak Date: Mon, 24 Oct 2016 12:47:28 +0200 Subject: [PATCH 556/884] md: report 'write_pending' state when array in sync If there is a bad block on a disk and there is a recovery performed from this disk, the same bad block is reported for a new disk. It involves setting MD_CHANGE_PENDING flag in rdev_set_badblocks. For external metadata this flag is not being cleared as array state is reported as 'clean'. The read request to bad block in RAID5 array gets stuck as it is waiting for a flag to be cleared - as per commit c3cce6cda162 ("md/raid5: ensure device failure recorded before write request returns."). The meaning of MD_CHANGE_PENDING and MD_CHANGE_CLEAN flags has been clarified in commit 070dc6dd7103 ("md: resolve confusion of MD_CHANGE_CLEAN"), however MD_CHANGE_PENDING flag has been used in personality error handlers since and it doesn't fully comply with initial purpose. It was supposed to notify that write request is about to start, however now it is also used to request metadata update. Initially (in md_allow_write, md_write_start) MD_CHANGE_PENDING flag has been set and in_sync has been set to 0 at the same time. Error handlers just set the flag without modifying in_sync value. Sysfs array state is a single value so now it reports 'clean' when MD_CHANGE_PENDING flag is set and in_sync is set to 1. Userspace has no idea it is expected to take some action. Swap the order that array state is checked so 'write_pending' is reported ahead of 'clean' ('write_pending' is a misleading name but it is too late to rename it now). Signed-off-by: Tomasz Majchrzak Signed-off-by: Shaohua Li --- drivers/md/md.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index eac84d8ff724..25b57a55db1a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3887,10 +3887,10 @@ array_state_show(struct mddev *mddev, char *page) st = read_auto; break; case 0: - if (mddev->in_sync) - st = clean; - else if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) + if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) st = write_pending; + else if (mddev->in_sync) + st = clean; else if (mddev->safemode) st = active_idle; else From 324ae0958cab5ccdf2851ef0348ba1cb8ad4e34a Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Mon, 24 Oct 2016 16:35:09 -0700 Subject: [PATCH 557/884] Input: psmouse - cleanup Focaltech code psmouse->name "Focaltech Touchpad" is an overkill. In xinput it is too long as "FocaltechPS/2 Focaltech Focaltech Touchpad" In focaltech_report_state() pointer to psmouse->dev is already stored as *dev Signed-off-by: Dmitry Tunin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/focaltech.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/mouse/focaltech.c b/drivers/input/mouse/focaltech.c index 54eceb30ede5..a7d39689bbfb 100644 --- a/drivers/input/mouse/focaltech.c +++ b/drivers/input/mouse/focaltech.c @@ -43,7 +43,7 @@ int focaltech_detect(struct psmouse *psmouse, bool set_properties) if (set_properties) { psmouse->vendor = "FocalTech"; - psmouse->name = "FocalTech Touchpad"; + psmouse->name = "Touchpad"; } return 0; @@ -146,8 +146,8 @@ static void focaltech_report_state(struct psmouse *psmouse) } input_mt_report_pointer_emulation(dev, true); - input_report_key(psmouse->dev, BTN_LEFT, state->pressed); - input_sync(psmouse->dev); + input_report_key(dev, BTN_LEFT, state->pressed); + input_sync(dev); } static void focaltech_process_touch_packet(struct psmouse *psmouse, From 1e793f6fc0db920400574211c48f9157a37e3945 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Fri, 21 Oct 2016 06:33:32 -0700 Subject: [PATCH 558/884] scsi: megaraid_sas: Fix data integrity failure for JBOD (passthrough) devices Commit 02b01e010afe ("megaraid_sas: return sync cache call with success") modified the driver to successfully complete SYNCHRONIZE_CACHE commands without passing them to the controller. Disk drive caches are only explicitly managed by controller firmware when operating in RAID mode. So this commit effectively disabled writeback cache flushing for any drives used in JBOD mode, leading to data integrity failures. [mkp: clarified patch description] Fixes: 02b01e010afeeb49328d35650d70721d2ca3fd59 CC: stable@vger.kernel.org Signed-off-by: Kashyap Desai Signed-off-by: Sumit Saxena Reviewed-by: Tomas Henzl Reviewed-by: Hannes Reinecke Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/megaraid/megaraid_sas_base.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 9ff57dee72d7..d8b1fbd4c8aa 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -1700,16 +1700,13 @@ megasas_queue_command(struct Scsi_Host *shost, struct scsi_cmnd *scmd) goto out_done; } - switch (scmd->cmnd[0]) { - case SYNCHRONIZE_CACHE: - /* - * FW takes care of flush cache on its own - * No need to send it down - */ + /* + * FW takes care of flush cache on its own for Virtual Disk. + * No need to send it down for VD. For JBOD send SYNCHRONIZE_CACHE to FW. + */ + if ((scmd->cmnd[0] == SYNCHRONIZE_CACHE) && MEGASAS_IS_LOGICAL(scmd)) { scmd->result = DID_OK << 16; goto out_done; - default: - break; } return instance->instancet->build_and_issue_cmd(instance, scmd); From 95707704800988093a9b9a27e0f2f67f5b4bf2fa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 18:58:05 -0700 Subject: [PATCH 559/884] sparc64: Convert NG4copy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/NG4copy_from_user.S | 8 +- arch/sparc/lib/NG4copy_to_user.S | 8 +- arch/sparc/lib/NG4memcpy.S | 294 ++++++++++++++++++++++------- 3 files changed, 231 insertions(+), 79 deletions(-) diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S index f9746e7cf25e..16a286c1a528 100644 --- a/arch/sparc/lib/NG4copy_from_user.S +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x, y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S index 5fa44349adde..6b0276ffc858 100644 --- a/arch/sparc/lib/NG4copy_to_user.S +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 8e13ee1f4454..75bb93b1437f 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -46,22 +47,19 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x +#define EX_ST_FP(x,y) x #endif -#ifndef EX_RETVAL -#define EX_RETVAL(x) x -#endif #ifndef LOAD #define LOAD(type,addr,dest) type [addr], dest @@ -94,6 +92,158 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi + +ENTRY(NG4_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG4_retl_o2) +ENTRY(NG4_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG4_retl_o2_plus_1) +ENTRY(NG4_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG4_retl_o2_plus_4) +ENTRY(NG4_retl_o2_plus_o5) + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5) +ENTRY(NG4_retl_o2_plus_o5_plus_4) + add %o5, 4, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_4) +ENTRY(NG4_retl_o2_plus_o5_plus_8) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_8) +ENTRY(NG4_retl_o2_plus_o5_plus_16) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_16) +ENTRY(NG4_retl_o2_plus_o5_plus_24) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_24) +ENTRY(NG4_retl_o2_plus_o5_plus_32) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_32) +ENTRY(NG4_retl_o2_plus_g1) + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1) +ENTRY(NG4_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_1) +ENTRY(NG4_retl_o2_plus_g1_plus_8) + add %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_8) +ENTRY(NG4_retl_o2_plus_o4) + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4) +ENTRY(NG4_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8) +ENTRY(NG4_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16) +ENTRY(NG4_retl_o2_plus_o4_plus_24) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24) +ENTRY(NG4_retl_o2_plus_o4_plus_32) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32) +ENTRY(NG4_retl_o2_plus_o4_plus_40) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40) +ENTRY(NG4_retl_o2_plus_o4_plus_48) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48) +ENTRY(NG4_retl_o2_plus_o4_plus_56) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56) +ENTRY(NG4_retl_o2_plus_o4_plus_64) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64) +ENTRY(NG4_retl_o2_plus_o4_fp) + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) +#endif .align 64 .globl FUNC_NAME @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 51f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) LOAD(prefetch, %o1 + 0x080, #n_reads_strong) @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, .Llarge_aligned sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 8, %o1 subcc %g1, 8, %g1 add %o0, 8, %o0 bne,pt %icc, 1b - EX_ST(STORE(stx, %g2, %o0 - 0x08)) + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) .Llarge_aligned: /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) add %o1, 0x40, %o1 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) - EX_ST(STORE_INIT(%g1, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) - EX_ST(STORE_INIT(%g3, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) - EX_ST(STORE_INIT(%o5, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g3, %o0)) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b LOAD(prefetch, %o1 + 0x200, #n_reads_strong) @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %o4, %o2 alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) faligndata %f2, %f4, %f18 add %g1, 0x40, %g1 faligndata %f4, %f6, %f20 @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) - EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) - EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) - EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) - EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) - EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) - EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) - EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andncc %o2, 0x20 - 1, %o5 be,pn %icc, 2f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) add %o1, 0x20, %o1 subcc %o5, 0x20, %o5 - EX_ST(STORE(stx, %g1, %o0 + 0x00)) - EX_ST(STORE(stx, %g2, %o0 + 0x08)) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) - EX_ST(STORE(stx, %o4, %o0 + 0x18)) + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) bne,pt %icc, 1b add %o0, 0x20, %o0 2: andcc %o2, 0x18, %o5 be,pt %icc, 3f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 add %o0, 0x08, %o0 subcc %o5, 0x08, %o5 bne,pt %icc, 1b - EX_ST(STORE(stx, %g1, %o0 - 0x08)) + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) 3: brz,pt %o2, .Lexit cmp %o2, 0x04 bl,pn %icc, .Ltiny nop - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 0x04, %o1 add %o0, 0x04, %o0 subcc %o2, 0x04, %o2 bne,pn %icc, .Ltiny - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) ba,a,pt %icc, .Lexit .Lmedium_unaligned: /* First get dest 8 byte aligned. */ @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 2f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 2: and %o1, 0x7, %g1 brz,pn %g1, .Lmedium_noprefetch @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov 64, %g2 sub %g2, %g1, %g2 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) sllx %o4, %g1, %o4 andn %o2, 0x08 - 1, %o5 sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 subcc %o5, 0x08, %o5 srlx %g3, %g2, GLOBAL_SPARE or GLOBAL_SPARE, %o4, GLOBAL_SPARE - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b sllx %g3, %g1, %o4 @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %icc, .Lsmall_unaligned .Ltiny: - EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x00)) - EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x01)) - EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) ba,pt %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x02)) + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) .Lsmall: andcc %g2, 0x3, %g0 @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x4 - 1, %o5 sub %o2, %o5, %o2 1: - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x04, %o1 subcc %o5, 0x04, %o5 add %o0, 0x04, %o0 bne,pt %icc, 1b - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) brz,pt %o2, .Lexit nop ba,a,pt %icc, .Ltiny .Lsmall_unaligned: -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 1, %o1 add %o0, 1, %o0 subcc %o2, 1, %o2 bne,pt %icc, 1b - EX_ST(STORE(stb, %g1, %o0 - 0x01)) + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) ba,a,pt %icc, .Lexit .size FUNC_NAME, .-FUNC_NAME From 272ddc8b37354c3fe111ab26d25e792629148eee Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 24 Oct 2016 19:00:44 -0700 Subject: [PATCH 560/884] proc: don't use FOLL_FORCE for reading cmdline and environment Now that Lorenzo cleaned things up and made the FOLL_FORCE users explicit, it becomes obvious how some of them don't really need FOLL_FORCE at all. So remove FOLL_FORCE from the proc code that reads the command line and arguments from user space. The mem_rw() function actually does want FOLL_FORCE, because gdd (and possibly many other debuggers) use it as a much more convenient version of PTRACE_PEEKDATA, but we should consider making the FOLL_FORCE part conditional on actually being a ptracer. This does not actually do that, just moves adds a comment to that effect and moves the gup_flags settings next to each other. Signed-off-by: Linus Torvalds --- fs/proc/base.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 8e654468ab67..adfc5b4986f5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -252,7 +252,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_FORCE); + rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); if (rv <= 0) goto out_free_page; @@ -270,8 +270,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, - FOLL_FORCE); + nr_read = access_remote_vm(mm, p, page, _count, 0); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -306,8 +305,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, - FOLL_FORCE); + nr_read = access_remote_vm(mm, p, page, _count, 0); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -356,8 +354,7 @@ skip_argv: bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, - FOLL_FORCE); + nr_read = access_remote_vm(mm, p, page, _count, 0); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -835,7 +832,7 @@ static ssize_t mem_rw(struct file *file, char __user *buf, unsigned long addr = *ppos; ssize_t copied; char *page; - unsigned int flags = FOLL_FORCE; + unsigned int flags; if (!mm) return 0; @@ -848,6 +845,8 @@ static ssize_t mem_rw(struct file *file, char __user *buf, if (!atomic_inc_not_zero(&mm->mm_users)) goto free; + /* Maybe we should limit FOLL_FORCE to actual ptrace users? */ + flags = FOLL_FORCE; if (write) flags |= FOLL_WRITE; @@ -971,8 +970,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), - page, this_len, FOLL_FORCE); + retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); if (retval <= 0) { ret = retval; From 0d7317598214134d73da59990b846481a9527a00 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 24 Oct 2016 10:57:25 +0100 Subject: [PATCH 561/884] mm: unexport __get_user_pages() This patch unexports the low-level __get_user_pages() function. Recent refactoring of the get_user_pages* functions allow flags to be passed through get_user_pages() which eliminates the need for access to this function from its one user, kvm. We can see that the two calls to get_user_pages() which replace __get_user_pages() in kvm_main.c are equivalent by examining their call stacks: get_user_page_nowait(): get_user_pages(start, 1, flags, page, NULL) __get_user_pages_locked(current, current->mm, start, 1, page, NULL, NULL, false, flags | FOLL_TOUCH) __get_user_pages(current, current->mm, start, 1, flags | FOLL_TOUCH | FOLL_GET, page, NULL, NULL) check_user_page_hwpoison(): get_user_pages(addr, 1, flags, NULL, NULL) __get_user_pages_locked(current, current->mm, addr, 1, NULL, NULL, NULL, false, flags | FOLL_TOUCH) __get_user_pages(current, current->mm, addr, 1, flags | FOLL_TOUCH, NULL, NULL, NULL) Signed-off-by: Lorenzo Stoakes Acked-by: Paolo Bonzini Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 ---- mm/gup.c | 3 +-- mm/nommu.c | 2 +- virt/kvm/kvm_main.c | 10 ++++------ 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 3a191853faaa..a92c8d73aeaf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1271,10 +1271,6 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void * extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf, int len, unsigned int gup_flags); -long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - unsigned int foll_flags, struct page **pages, - struct vm_area_struct **vmas, int *nonblocking); long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, diff --git a/mm/gup.c b/mm/gup.c index 7aa113c2d373..ec4f82704b6f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -526,7 +526,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) * instead of __get_user_pages. __get_user_pages should be used only if * you need some special @gup_flags. */ -long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, +static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *nonblocking) @@ -631,7 +631,6 @@ next_page: } while (nr_pages); return i; } -EXPORT_SYMBOL(__get_user_pages); bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) { diff --git a/mm/nommu.c b/mm/nommu.c index db5fd1795298..8b8faaf2a9e9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -109,7 +109,7 @@ unsigned int kobjsize(const void *objp) return PAGE_SIZE << compound_order(page); } -long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, +static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, unsigned int foll_flags, struct page **pages, struct vm_area_struct **vmas, int *nonblocking) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 28510e72618a..2907b7b78654 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1346,21 +1346,19 @@ unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *w static int get_user_page_nowait(unsigned long start, int write, struct page **page) { - int flags = FOLL_TOUCH | FOLL_NOWAIT | FOLL_HWPOISON | FOLL_GET; + int flags = FOLL_NOWAIT | FOLL_HWPOISON; if (write) flags |= FOLL_WRITE; - return __get_user_pages(current, current->mm, start, 1, flags, page, - NULL, NULL); + return get_user_pages(start, 1, flags, page, NULL); } static inline int check_user_page_hwpoison(unsigned long addr) { - int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE; + int rc, flags = FOLL_HWPOISON | FOLL_WRITE; - rc = __get_user_pages(current, current->mm, addr, 1, - flags, NULL, NULL, NULL); + rc = get_user_pages(addr, 1, flags, NULL, NULL); return rc == -EHWPOISON; } From 7ae3aaf53f1695877ccd5ebbc49ea65991e41f1e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 19:32:12 -0700 Subject: [PATCH 562/884] sparc64: Convert NGcopy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/NGcopy_from_user.S | 4 +- arch/sparc/lib/NGcopy_to_user.S | 4 +- arch/sparc/lib/NGmemcpy.S | 233 ++++++++++++++++++++---------- 3 files changed, 162 insertions(+), 79 deletions(-) diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index e61694c444af..9cd42fcbc781 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index 2d6b33d3998f..5c358afd464e 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index 96a14caf6966..d88c4ed50a00 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -27,15 +28,11 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -79,6 +76,92 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi: + ret + wr %g0, ASI_AIUS, %asi + restore +ENTRY(NG_ret_i2_plus_i4_plus_1) + ba,pt %xcc, __restore_asi + add %i2, %i5, %i0 +ENDPROC(NG_ret_i2_plus_i4_plus_1) +ENTRY(NG_ret_i2_plus_g1) + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1) +ENTRY(NG_ret_i2_plus_g1_minus_8) + sub %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_8) +ENTRY(NG_ret_i2_plus_g1_minus_16) + sub %g1, 16, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_16) +ENTRY(NG_ret_i2_plus_g1_minus_24) + sub %g1, 24, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_24) +ENTRY(NG_ret_i2_plus_g1_minus_32) + sub %g1, 32, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_32) +ENTRY(NG_ret_i2_plus_g1_minus_40) + sub %g1, 40, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_40) +ENTRY(NG_ret_i2_plus_g1_minus_48) + sub %g1, 48, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_48) +ENTRY(NG_ret_i2_plus_g1_minus_56) + sub %g1, 56, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_56) +ENTRY(NG_ret_i2_plus_i4) + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4) +ENTRY(NG_ret_i2_plus_i4_minus_8) + sub %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4_minus_8) +ENTRY(NG_ret_i2_plus_8) + ba,pt %xcc, __restore_asi + add %i2, 8, %i0 +ENDPROC(NG_ret_i2_plus_8) +ENTRY(NG_ret_i2_plus_4) + ba,pt %xcc, __restore_asi + add %i2, 4, %i0 +ENDPROC(NG_ret_i2_plus_4) +ENTRY(NG_ret_i2_plus_1) + ba,pt %xcc, __restore_asi + add %i2, 1, %i0 +ENDPROC(NG_ret_i2_plus_1) +ENTRY(NG_ret_i2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_plus_1) +ENTRY(NG_ret_i2) + ba,pt %xcc, __restore_asi + mov %i2, %i0 +ENDPROC(NG_ret_i2) +ENTRY(NG_ret_i2_and_7_plus_i4) + and %i2, 7, %i2 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) +#endif + .align 64 .globl FUNC_NAME @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %g0, %i4, %i4 ! bytes to align dst sub %i2, %i4, %i2 1: subcc %i4, 1, %i4 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) add %i1, 1, %i1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ and %i4, 0x7, GLOBAL_SPARE sll GLOBAL_SPARE, 3, GLOBAL_SPARE mov 64, %i5 - EX_LD(LOAD_TWIN(%i1, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) sub %i5, GLOBAL_SPARE, %i5 mov 16, %o4 mov 32, %o5 @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ srlx WORD3, PRE_SHIFT, TMP; \ or WORD2, TMP, WORD2; -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g2, %o0 + 0x00)) - EX_ST(STORE_INIT(%g3, %o0 + 0x08)) + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g2, %o0 + 0x20)) - EX_ST(STORE_INIT(%g3, %o0 + 0x28)) + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 8b @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ba,pt %XCC, 60f add %i1, %i4, %i1 -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g3, %o0 + 0x00)) - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%g2, %o0 + 0x18)) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g3, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%g2, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 9b @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ * one twin load ahead, then add 8 back into source when * we finish the loop. */ - EX_LD(LOAD_TWIN(%i1, %o4, %o5)) + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) mov 16, %o7 mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%o4, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) - EX_ST(STORE_INIT(%o5, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%o4, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o5, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o4, %o0 + 0x20)) - EX_ST(STORE_INIT(%o5, %o0 + 0x28)) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ andn %i2, 0xf, %i4 and %i2, 0xf, %i2 1: subcc %i4, 0x10, %i4 - EX_LD(LOAD(ldx, %i1, %o4)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) add %i1, 0x08, %i1 - EX_LD(LOAD(ldx, %i1, %g1)) + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) sub %i1, 0x08, %i1 - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) add %i1, 0x8, %i1 - EX_ST(STORE(stx, %g1, %i1 + %i3)) + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) bgu,pt %XCC, 1b add %i1, 0x8, %i1 73: andcc %i2, 0x8, %g0 be,pt %XCC, 1f nop sub %i2, 0x8, %i2 - EX_LD(LOAD(ldx, %i1, %o4)) - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) add %i1, 0x8, %i1 1: andcc %i2, 0x4, %g0 be,pt %XCC, 1f nop sub %i2, 0x4, %i2 - EX_LD(LOAD(lduw, %i1, %i5)) - EX_ST(STORE(stw, %i5, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) add %i1, 0x4, %i1 1: cmp %i2, 0 be,pt %XCC, 85f @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %i2, %g1, %i2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %i1, %i5)) - EX_ST(STORE(stb, %i5, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) bgu,pt %icc, 1b add %i1, 1, %i1 @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 8: mov 64, %i3 andn %i1, 0x7, %i1 - EX_LD(LOAD(ldx, %i1, %g2)) + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) sub %i3, %g1, %i3 andn %i2, 0x7, %i4 sllx %g2, %g1, %g2 1: add %i1, 0x8, %i1 - EX_LD(LOAD(ldx, %i1, %g3)) + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) subcc %i4, 0x8, %i4 srlx %g3, %i3, %i5 or %i5, %g2, %i5 - EX_ST(STORE(stx, %i5, %o0)) + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 1: subcc %i2, 4, %i2 - EX_LD(LOAD(lduw, %i1, %g1)) - EX_ST(STORE(stw, %g1, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) bgu,pt %XCC, 1b add %i1, 4, %i1 @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ .align 32 90: subcc %i2, 1, %i2 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) bgu,pt %XCC, 90b add %i1, 1, %i1 ret From e93704e4464fdc191f73fce35129c18de2ebf95d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 20:46:44 -0700 Subject: [PATCH 563/884] sparc64: Convert NG2copy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/NG2copy_from_user.S | 8 +- arch/sparc/lib/NG2copy_to_user.S | 8 +- arch/sparc/lib/NG2memcpy.S | 228 ++++++++++++++++++----------- 3 files changed, 153 insertions(+), 91 deletions(-) diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index 4d47fa5519d4..b79a6998d87c 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index 2078d752709a..dcec55f254ab 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index d5f585df2f3f..c629dbd121b6 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -32,21 +33,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -140,45 +137,110 @@ fsrc2 %x6, %f12; \ fsrc2 %x7, %f14; #define FREG_LOAD_1(base, x0) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) #define FREG_LOAD_2(base, x0, x1) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); #define FREG_LOAD_3(base, x0, x1, x2) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); #define FREG_LOAD_4(base, x0, x1, x2, x3) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ - EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi +ENTRY(NG2_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG2_retl_o2) +ENTRY(NG2_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG2_retl_o2_plus_1) +ENTRY(NG2_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG2_retl_o2_plus_4) +ENTRY(NG2_retl_o2_plus_8) + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(NG2_retl_o2_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_1) + add %o4, 1, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_1) +ENTRY(NG2_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_16) +ENTRY(NG2_retl_o2_plus_g1_fp) + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) + add %g1, 64, %g1 + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_1) +ENTRY(NG2_retl_o2_and_7_plus_o4) + and %o2, 7, %o2 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4) +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) + and %o2, 7, %o2 + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) +#endif + .align 64 .globl FUNC_NAME @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 ! bytes to align dst sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop /* fall through for 0 < low bits < 8 */ 110: sub %o4, 64, %g2 - EX_LD_FP(LOAD_BLK(%g2, %f0)) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 120: sub %o4, 56, %g2 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 130: sub %o4, 48, %g2 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_6(f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 140: sub %o4, 40, %g2 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_5(f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 150: sub %o4, 32, %g2 FREG_LOAD_4(%g2, f0, f2, f4, f6) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_4(f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 160: sub %o4, 24, %g2 FREG_LOAD_3(%g2, f0, f2, f4) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_3(f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 170: sub %o4, 16, %g2 FREG_LOAD_2(%g2, f0, f2) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_2(f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 180: sub %o4, 8, %g2 FREG_LOAD_1(%g2, f0) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_1(f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop 190: -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) subcc %g1, 64, %g1 - EX_LD_FP(LOAD_BLK(%o4, %f0)) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) add %o4, 64, %o4 bne,pt %xcc, 1b LOAD(prefetch, %o4 + 64, #one_read) @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, %o4 and %o2, 0xf, %o2 1: subcc %o4, 0x10, %o4 - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x08, %o1 - EX_LD(LOAD(ldx, %o1, %g1)) + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) sub %o1, 0x08, %o1 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, GLOBAL_SPARE andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) sub GLOBAL_SPARE, %g1, GLOBAL_SPARE andn %o2, 0x7, %o4 sllx %g2, %g1, %g2 1: add %o1, 0x8, %o1 - EX_LD(LOAD(ldx, %o1, %g3)) + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) subcc %o4, 0x8, %o4 srlx %g3, GLOBAL_SPARE, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl From ee841d0aff649164080e445e84885015958d8ff4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 21:20:35 -0700 Subject: [PATCH 564/884] sparc64: Convert U3copy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/U3copy_from_user.S | 8 +- arch/sparc/lib/U3copy_to_user.S | 8 +- arch/sparc/lib/U3memcpy.S | 227 ++++++++++++++++++++---------- 3 files changed, 162 insertions(+), 81 deletions(-) diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S index 1046e2b083fe..db73010a1af8 100644 --- a/arch/sparc/lib/U3copy_from_user.S +++ b/arch/sparc/lib/U3copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index 032b0c5419b0..c4ee858e352a 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 491ee69e4995..54f98706b03b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -22,21 +23,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -77,6 +74,87 @@ */ .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf + retl + nop +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) + add %g1, 1, %g1 + add %g2, %g1, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) +ENTRY(U3_retl_o2_plus_g2_fp) + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_fp) +ENTRY(U3_retl_o2_plus_g2_plus_8_fp) + add %g2, 8, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) +ENTRY(U3_retl_o2) + retl + mov %o2, %o0 +ENDPROC(U3_retl_o2) +ENTRY(U3_retl_o2_plus_1) + retl + add %o2, 1, %o0 +ENDPROC(U3_retl_o2_plus_1) +ENTRY(U3_retl_o2_plus_4) + retl + add %o2, 4, %o0 +ENDPROC(U3_retl_o2_plus_4) +ENTRY(U3_retl_o2_plus_8) + retl + add %o2, 8, %o0 +ENDPROC(U3_retl_o2_plus_8) +ENTRY(U3_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + retl + add %o2, %g1, %o0 +ENDPROC(U3_retl_o2_plus_g1_plus_1) +ENTRY(U3_retl_o2_fp) + ba,pt %xcc, __restore_fp + mov %o2, %o0 +ENDPROC(U3_retl_o2_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) + sll %o3, 6, %o3 + add %o3, 0x80, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) + sll %o3, 6, %o3 + add %o3, 0x40, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) +ENTRY(U3_retl_o2_plus_GS_plus_0x10) + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x10) +ENTRY(U3_retl_o2_plus_GS_plus_0x08) + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x08) +ENTRY(U3_retl_o2_and_7_plus_GS) + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o2 +ENDPROC(U3_retl_o2_and_7_plus_GS) +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) + add GLOBAL_SPARE, 8, GLOBAL_SPARE + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o2 +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) +#endif + .align 64 /* The cheetah's flexible spine, oversized liver, enlarged heart, @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f2 - EX_ST_FP(STORE(std, %f2, %o0)) + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) LOAD(prefetch, %o1 + 0x140, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) LOAD(prefetch, %o1 + 0x180, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) faligndata %f10, %f12, %f26 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) subcc %o3, 0x01, %o3 faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f10, %f12, %f26 bg,pt %XCC, 1b @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ /* Finally we copy the last full 64-byte block. */ 2: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f8, %f10, %f24 cmp %g1, 0 be,pt %XCC, 1f add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) 1: faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) add %o0, 0x40, %o0 add %o1, 0x40, %o1 membar #Sync @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g2, %o2 be,a,pt %XCC, 1f - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %XCC, 2f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) bne,pn %XCC, 1b add %o0, 0x8, %o0 @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andcc %o2, 0x8, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x8, %o1 + sub %o2, 8, %o2 1: andcc %o2, 0x4, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x4, %o1 + sub %o2, 4, %o2 1: andcc %o2, 0x2, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduh, %o1, %o5)) - EX_ST(STORE(sth, %o5, %o1 + %o3)) + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x2, %o1 + sub %o2, 2, %o2 1: andcc %o2, 0x1, %g0 be,pt %icc, 85f nop - EX_LD(LOAD(ldub, %o1, %o5)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) ba,pt %xcc, 85f - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) .align 64 70: /* 16 < len <= 64 */ @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE - EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) sub %o3, %g1, %o3 andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl From 614da3d9685b67917cab48c8452fd8bf93de0867 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 21:22:27 -0700 Subject: [PATCH 565/884] sparc64: Delete now unused user copy assembler helpers. All of __ret{,l}_mone{_asi,_fp,_asi_fpu} are now unused. Signed-off-by: David S. Miller --- arch/sparc/kernel/head_64.S | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 5f17de6b5fc3..6aa3da152c20 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -931,36 +931,6 @@ ENTRY(__retl_efault) mov -EFAULT, %o0 ENDPROC(__retl_efault) -ENTRY(__retl_mone) - retl - mov -1, %o0 -ENDPROC(__retl_mone) - -ENTRY(__retl_mone_fp) - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_mone_fp) - -ENTRY(__ret_mone_asi) - wr %g0, ASI_AIUS, %asi - ret - restore %g0, 1, %o0 -ENDPROC(__ret_mone_asi) - -ENTRY(__retl_mone_asi) - wr %g0, ASI_AIUS, %asi - retl - mov 1, %o0 -ENDPROC(__retl_mone_asi) - -ENTRY(__retl_mone_asi_fp) - wr %g0, ASI_AIUS, %asi - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_mone_asi_fp) - ENTRY(__retl_o1) retl mov %o1, %o0 From 0fd0ff01d4c3c01e7fe69b762ee1a13236639acc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 21:25:31 -0700 Subject: [PATCH 566/884] sparc64: Delete now unused user copy fixup functions. Now that all of the user copy routines are converted to return accurate residual lengths when an exception occurs, we no longer need the broken fixup routines. Signed-off-by: David S. Miller --- arch/sparc/include/asm/uaccess_64.h | 36 ++------------- arch/sparc/lib/Makefile | 2 +- arch/sparc/lib/user_fixup.c | 71 ----------------------------- 3 files changed, 4 insertions(+), 105 deletions(-) delete mode 100644 arch/sparc/lib/user_fixup.c diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 0244012435c8..5373136c412b 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -188,64 +188,34 @@ int __get_user_bad(void); unsigned long __must_check ___copy_from_user(void *to, const void __user *from, unsigned long size); -unsigned long copy_from_user_fixup(void *to, const void __user *from, - unsigned long size); static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret; - check_object_size(to, size, false); - ret = ___copy_from_user(to, from, size); - if (unlikely(ret)) { - if ((long)ret < 0) - ret = copy_from_user_fixup(to, from, size); - return ret; - } - - return ret; + return ___copy_from_user(to, from, size); } #define __copy_from_user copy_from_user unsigned long __must_check ___copy_to_user(void __user *to, const void *from, unsigned long size); -unsigned long copy_to_user_fixup(void __user *to, const void *from, - unsigned long size); static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret; - check_object_size(from, size, true); - ret = ___copy_to_user(to, from, size); - if (unlikely(ret)) { - if ((long)ret < 0) - ret = copy_to_user_fixup(to, from, size); - return ret; - } - return ret; + return ___copy_to_user(to, from, size); } #define __copy_to_user copy_to_user unsigned long __must_check ___copy_in_user(void __user *to, const void __user *from, unsigned long size); -unsigned long copy_in_user_fixup(void __user *to, void __user *from, - unsigned long size); static inline unsigned long __must_check copy_in_user(void __user *to, void __user *from, unsigned long size) { - unsigned long ret = ___copy_in_user(to, from, size); - - if (unlikely(ret)) { - if ((long)ret < 0) - ret = copy_in_user_fixup(to, from, size); - return ret; - } - return ret; + return ___copy_in_user(to, from, size); } #define __copy_in_user copy_in_user diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 885f00e81d1a..69912d2f8b54 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c deleted file mode 100644 index ac96ae236709..000000000000 --- a/arch/sparc/lib/user_fixup.c +++ /dev/null @@ -1,71 +0,0 @@ -/* user_fixup.c: Fix up user copy faults. - * - * Copyright (C) 2004 David S. Miller - */ - -#include -#include -#include -#include -#include - -#include - -/* Calculating the exact fault address when using - * block loads and stores can be very complicated. - * - * Instead of trying to be clever and handling all - * of the cases, just fix things up simply here. - */ - -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long end = start + size; - - if (fault_addr < start || fault_addr >= end) { - *offset = 0; - } else { - *offset = fault_addr - start; - size = end - fault_addr; - } - return size; -} - -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) -{ - unsigned long offset; - - size = compute_size((unsigned long) from, size, &offset); - if (likely(size)) - memset(to + offset, 0, size); - - return size; -} -EXPORT_SYMBOL(copy_from_user_fixup); - -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) -{ - unsigned long offset; - - return compute_size((unsigned long) to, size, &offset); -} -EXPORT_SYMBOL(copy_to_user_fixup); - -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long start = (unsigned long) to; - unsigned long end = start + size; - - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - start = (unsigned long) from; - end = start + size; - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - return size; -} -EXPORT_SYMBOL(copy_in_user_fixup); From 407a3aee6ee2d2cb46d9ba3fc380bc29f35d020c Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 5 Oct 2016 16:57:46 -0700 Subject: [PATCH 567/884] hv: do not lose pending heartbeat vmbus packets The host keeps sending heartbeat packets independent of the guest responding to them. Even though we respond to the heartbeat messages at interrupt level, we can have situations where there maybe multiple heartbeat messages pending that have not been responded to. For instance this occurs when the VM is paused and the host continues to send the heartbeat messages. Address this issue by draining and responding to all the heartbeat messages that maybe pending. Signed-off-by: Long Li Signed-off-by: K. Y. Srinivasan CC: Stable Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_util.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 4aa3cb63fd41..bcd06306f3e8 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -314,10 +314,14 @@ static void heartbeat_onchannelcallback(void *context) u8 *hbeat_txf_buf = util_heartbeat.recv_buffer; struct icmsg_negotiate *negop = NULL; - vmbus_recvpacket(channel, hbeat_txf_buf, - PAGE_SIZE, &recvlen, &requestid); + while (1) { + + vmbus_recvpacket(channel, hbeat_txf_buf, + PAGE_SIZE, &recvlen, &requestid); + + if (!recvlen) + break; - if (recvlen > 0) { icmsghdrp = (struct icmsg_hdr *)&hbeat_txf_buf[ sizeof(struct vmbuspipe_hdr)]; From 1a3f099101b85cc93d864eb030d97e7725c72ea7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 20 Oct 2016 12:14:51 +0200 Subject: [PATCH 568/884] ALSA: hda - Fix surround output pins for ASRock B150M mobo ASRock B150M Pro4/D3 mobo with ALC892 codec doesn't seem to provide proper pins for the surround outputs, hence we need to specify the pincfgs manually with a couple of other corrections. Reported-and-tested-by: Benjamin Valentin Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b582d57fe184..2f909dd8b7b8 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6625,6 +6625,7 @@ enum { ALC891_FIXUP_HEADSET_MODE, ALC891_FIXUP_DELL_MIC_NO_PRESENCE, ALC662_FIXUP_ACER_VERITON, + ALC892_FIXUP_ASROCK_MOBO, }; static const struct hda_fixup alc662_fixups[] = { @@ -6901,6 +6902,16 @@ static const struct hda_fixup alc662_fixups[] = { { } } }, + [ALC892_FIXUP_ASROCK_MOBO] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x15, 0x40f000f0 }, /* disabled */ + { 0x16, 0x40f000f0 }, /* disabled */ + { 0x18, 0x01014011 }, /* LO */ + { 0x1a, 0x01014012 }, /* LO */ + { } + } + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -6938,6 +6949,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), + SND_PCI_QUIRK(0x1849, 0x5892, "ASRock B150M", ALC892_FIXUP_ASROCK_MOBO), SND_PCI_QUIRK(0x19da, 0xa130, "Zotac Z68", ALC662_FIXUP_ZOTAC_Z68), SND_PCI_QUIRK(0x1b0a, 0x01b8, "ACER Veriton", ALC662_FIXUP_ACER_VERITON), SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T), From 991d5add50a5bb6ab8f12f2129f5c7487f6baaf6 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 10 Sep 2016 12:53:21 +0000 Subject: [PATCH 569/884] usb: chipidea: host: fix NULL ptr dereference during shutdown After commit b09b5224fe86 ("usb: chipidea: implement platform shutdown callback") and commit 43a404577a93 ("usb: chipidea: host: set host to be null after hcd is freed") a NULL pointer dereference is caused on i.MX23 during shutdown. So ensure that role is set to CI_ROLE_END and we finish interrupt handling before the hcd is deallocated. This avoids the NULL pointer dereference. Suggested-by: Alan Stern Signed-off-by: Stefan Wahren Fixes: b09b5224fe86 ("usb: chipidea: implement platform shutdown callback") Signed-off-by: Peter Chen --- drivers/usb/chipidea/host.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index 96ae69502c86..111b0e0b8698 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -188,6 +188,8 @@ static void host_stop(struct ci_hdrc *ci) if (hcd) { usb_remove_hcd(hcd); + ci->role = CI_ROLE_END; + synchronize_irq(ci->irq); usb_put_hcd(hcd); if (ci->platdata->reg_vbus && !ci_otg_is_fsm_mode(ci) && (ci->platdata->flags & CI_HDRC_TURN_VBUS_EARLY_ON)) From 45c7a4908a307a023e237a64a3eadcafc4836493 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Fri, 21 Oct 2016 19:57:57 +0900 Subject: [PATCH 570/884] mmc: dw_mmc-pltfm: fix the potential NULL pointer dereference platform_get_resource can be returned the NULL pointer. Then regs->start should be referred to NULL Pointer. devm_ioremap_resource() checks whether res is NULL or not. Signed-off-by: Jaehoon Chung Reviewed-by: Shawn Lin Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/dw_mmc-pltfm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc-pltfm.c b/drivers/mmc/host/dw_mmc-pltfm.c index c0bb0c793e84..dbbc4303bdd0 100644 --- a/drivers/mmc/host/dw_mmc-pltfm.c +++ b/drivers/mmc/host/dw_mmc-pltfm.c @@ -46,12 +46,13 @@ int dw_mci_pltfm_register(struct platform_device *pdev, host->pdata = pdev->dev.platform_data; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - /* Get registers' physical base address */ - host->phy_regs = regs->start; host->regs = devm_ioremap_resource(&pdev->dev, regs); if (IS_ERR(host->regs)) return PTR_ERR(host->regs); + /* Get registers' physical base address */ + host->phy_regs = regs->start; + platform_set_drvdata(pdev, host); return dw_mci_probe(host); } From ae824f00241f495e8d55ebdc0341f3ce61a77da6 Mon Sep 17 00:00:00 2001 From: Ruqiang Ju Date: Mon, 24 Oct 2016 16:39:49 +0800 Subject: [PATCH 571/884] i2c: hix5hd2: allow build with ARCH_HISI This driver should be buildable with ARCH_HISI, because some of other HiSilicon SoCs also use it. Signed-off-by: Ruqiang Ju Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 6d94e2ec5b4f..41abb20f082d 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -79,12 +79,12 @@ config I2C_AMD8111 config I2C_HIX5HD2 tristate "Hix5hd2 high-speed I2C driver" - depends on ARCH_HIX5HD2 || COMPILE_TEST + depends on ARCH_HISI || ARCH_HIX5HD2 || COMPILE_TEST help - Say Y here to include support for high-speed I2C controller in the - Hisilicon based hix5hd2 SoCs. + Say Y here to include support for the high-speed I2C controller + used in HiSilicon hix5hd2 SoCs. - This driver can also be built as a module. If so, the module + This driver can also be built as a module. If so, the module will be called i2c-hix5hd2. config I2C_I801 From 399c168ab5ab5e12ed55b6c91d61c24eb84c9164 Mon Sep 17 00:00:00 2001 From: David Wu Date: Sat, 22 Oct 2016 16:43:42 +0800 Subject: [PATCH 572/884] i2c: rk3x: Give the tuning value 0 during rk3x_i2c_v0_calc_timings We found a bug that i2c transfer sometimes failed on 3066a board with stabel-4.8, the con register would be updated by uninitialized tuning value, it made the i2c transfer failed. So give the tuning value to be zero during rk3x_i2c_v0_calc_timings. Signed-off-by: David Wu Tested-by: Andy Yan Reviewed-by: Douglas Anderson Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-rk3x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 50702c7bb244..df220666d627 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -694,6 +694,8 @@ static int rk3x_i2c_v0_calc_timings(unsigned long clk_rate, t_calc->div_low--; t_calc->div_high--; + /* Give the tuning value 0, that would not update con register */ + t_calc->tuning = 0; /* Maximum divider supported by hw is 0xffff */ if (t_calc->div_low > 0xffff) { t_calc->div_low = 0xffff; From 7fbe6ac02485504b964b283aca62b36b4313ca79 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Oct 2016 08:31:27 -0500 Subject: [PATCH 573/884] x86/unwind: Fix empty stack dereference in guess unwinder Vince Waver reported the following bug: WARNING: CPU: 0 PID: 21338 at arch/x86/mm/fault.c:435 vmalloc_fault+0x58/0x1f0 CPU: 0 PID: 21338 Comm: perf_fuzzer Not tainted 4.8.0+ #37 Hardware name: Hewlett-Packard HP Compaq Pro 6305 SFF/1850, BIOS K06 v02.57 08/16/2013 Call Trace: ? dump_stack+0x46/0x59 ? __warn+0xd5/0xee ? vmalloc_fault+0x58/0x1f0 ? __do_page_fault+0x6d/0x48e ? perf_log_throttle+0xa4/0xf4 ? trace_page_fault+0x22/0x30 ? __unwind_start+0x28/0x42 ? perf_callchain_kernel+0x75/0xac ? get_perf_callchain+0x13a/0x1f0 ? perf_callchain+0x6a/0x6c ? perf_prepare_sample+0x71/0x2eb ? perf_event_output_forward+0x1a/0x54 ? __default_send_IPI_shortcut+0x10/0x2d ? __perf_event_overflow+0xfb/0x167 ? x86_pmu_handle_irq+0x113/0x150 ? native_read_msr+0x6/0x34 ? perf_event_nmi_handler+0x22/0x39 ? perf_ibs_nmi_handler+0x4a/0x51 ? perf_event_nmi_handler+0x22/0x39 ? nmi_handle+0x4d/0xf0 ? perf_ibs_handle_irq+0x3d1/0x3d1 ? default_do_nmi+0x3c/0xd5 ? do_nmi+0x92/0x102 ? end_repeat_nmi+0x1a/0x1e ? entry_SYSCALL_64_after_swapgs+0x12/0x4a ? entry_SYSCALL_64_after_swapgs+0x12/0x4a ? entry_SYSCALL_64_after_swapgs+0x12/0x4a ^A4---[ end trace 632723104d47d31a ]--- BUG: stack guard page was hit at ffffc90008500000 (stack is ffffc900084fc000..ffffc900084fffff) kernel stack overflow (page fault): 0000 [#1] SMP ... The NMI hit in the entry code right after setting up the stack pointer from 'cpu_current_top_of_stack', so the kernel stack was empty. The 'guess' version of __unwind_start() attempted to dereference the "top of stack" pointer, which is not actually *on* the stack. Add a check in the guess unwinder to deal with an empty stack. (The frame pointer unwinder already has such a check.) Reported-by: Vince Weaver Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 7c7900f89770 ("x86/unwind: Add new unwind interface and implementations") Link: http://lkml.kernel.org/r/20161024133127.e5evgeebdbohnmpb@treble Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_guess.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c index 9298993dc8b7..2d721e533cf4 100644 --- a/arch/x86/kernel/unwind_guess.c +++ b/arch/x86/kernel/unwind_guess.c @@ -47,7 +47,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, get_stack_info(first_frame, state->task, &state->stack_info, &state->stack_mask); - if (!__kernel_text_address(*first_frame)) + /* + * The caller can provide the address of the first frame directly + * (first_frame) or indirectly (regs->sp) to indicate which stack frame + * to start unwinding at. Skip ahead until we reach it. + */ + if (!unwind_done(state) && + (!on_stack(&state->stack_info, first_frame, sizeof(long)) || + !__kernel_text_address(*first_frame))) unwind_next_frame(state); } EXPORT_SYMBOL_GPL(__unwind_start); From a2209b742e6cf978b85d4f31a25a269c3d3b062b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 24 Oct 2016 09:00:12 -0600 Subject: [PATCH 574/884] x86/build: Fix build with older GCC versions Older GCC (observed with 4.1.x) doesn't support -Wno-override-init and also doesn't ignore unknown -Wno-* options. Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Valdis Kletnieks Cc: Valdis.Kletnieks@vt.edu Fixes: 5e44258d16 "x86/build: Reduce the W=1 warnings noise when compiling x86 syscall tables" Link: http://lkml.kernel.org/r/580E3E1C02000078001191C4@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/entry/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 77f28ce9c646..9976fcecd17e 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -5,8 +5,8 @@ OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y -CFLAGS_syscall_64.o += -Wno-override-init -CFLAGS_syscall_32.o += -Wno-override-init +CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o From d320b9a5bd85f6178cc3ed8b0a1a9960f2b5bc7b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 17:33:18 +0200 Subject: [PATCH 575/884] x86/quirks: Hide maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc -Wmaybe-uninitialized detects that quirk_intel_brickland_xeon_ras_cap uses uninitialized data when CONFIG_PCI is not set: arch/x86/kernel/quirks.c: In function ‘quirk_intel_brickland_xeon_ras_cap’: arch/x86/kernel/quirks.c:641:13: error: ‘capid0’ is used uninitialized in this function [-Werror=uninitialized] However, the function is also not called in this configuration, so we can avoid the warning by moving the existing #ifdef to cover it as well. Signed-off-by: Arnd Bergmann Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/20161024153325.2752428-1-arnd@arndb.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 51402a7e4ca6..0bee04d41bed 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -625,8 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, amd_disable_seq_and_redirect_scrub); -#endif - #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) #include #include @@ -657,3 +655,4 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); #endif +#endif From 6a676fb69dcbf3310b9e462c1db66c8e7f6ead38 Mon Sep 17 00:00:00 2001 From: Ralf Ramsauer Date: Mon, 17 Oct 2016 15:59:57 +0200 Subject: [PATCH 576/884] i2c: mark device nodes only in case of successful instantiation Instantiated I2C device nodes are marked with OF_POPULATE. This was introduced in 4f001fd30145a6. On unloading, loaded device nodes will of course be unmarked. The problem are nodes that fail during initialisation: If a node fails, it won't be unloaded and hence not be unmarked. If a I2C driver module is unloaded and reloaded, it will skip nodes that failed before. Skip device nodes that are already populated and mark them only in case of success. Fixes: 4f001fd30145a6 ("i2c: Mark instantiated device nodes with OF_POPULATE") Signed-off-by: Ralf Ramsauer Reviewed-by: Geert Uytterhoeven Acked-by: Pantelis Antoniou [wsa: use 14-digit commit sha] Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-core.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 5ab67219f71e..1704fc84d647 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1681,6 +1681,7 @@ static struct i2c_client *of_i2c_register_device(struct i2c_adapter *adap, static void of_i2c_register_devices(struct i2c_adapter *adap) { struct device_node *bus, *node; + struct i2c_client *client; /* Only register child devices if the adapter has a node pointer set */ if (!adap->dev.of_node) @@ -1695,7 +1696,14 @@ static void of_i2c_register_devices(struct i2c_adapter *adap) for_each_available_child_of_node(bus, node) { if (of_node_test_and_set_flag(node, OF_POPULATED)) continue; - of_i2c_register_device(adap, node); + + client = of_i2c_register_device(adap, node); + if (IS_ERR(client)) { + dev_warn(&adap->dev, + "Failed to create I2C device for %s\n", + node->full_name); + of_node_clear_flag(node, OF_POPULATED); + } } of_node_put(bus); @@ -2299,6 +2307,7 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, if (IS_ERR(client)) { dev_err(&adap->dev, "failed to create client for '%s'\n", rd->dn->full_name); + of_node_clear_flag(rd->dn, OF_POPULATED); return notifier_from_errno(PTR_ERR(client)); } break; From 17791650c356b3cb220946d697d89afc1e32c315 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 17 Oct 2016 11:54:05 +1000 Subject: [PATCH 577/884] i2c: allow configuration of imx driver for ColdFire architecture The i2c controller used by Freescales iMX processors is the same hardware module used on Freescales ColdFire family of processors. We can use the existing i2c-imx driver on ColdFire family members. Modify the configuration to allow it to be selected when compiling for ColdFire targets. Signed-off-by: Greg Ungerer Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 41abb20f082d..d252276feadf 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -589,10 +589,10 @@ config I2C_IMG config I2C_IMX tristate "IMX I2C interface" - depends on ARCH_MXC || ARCH_LAYERSCAPE + depends on ARCH_MXC || ARCH_LAYERSCAPE || COLDFIRE help Say Y here if you want to use the IIC bus controller on - the Freescale i.MX/MXC or Layerscape processors. + the Freescale i.MX/MXC, Layerscape or ColdFire processors. This driver can also be built as a module. If so, the module will be called i2c-imx. From 3855ada848dbdbeae39fcf9eaaf6a7678315f153 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 18 Oct 2016 18:01:45 -0300 Subject: [PATCH 578/884] i2c: jz4780: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Signed-off-by: Javier Martinez Canillas Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index b8ea62105f42..30132c3957cd 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -729,6 +729,7 @@ static const struct of_device_id jz4780_i2c_of_matches[] = { { .compatible = "ingenic,jz4780-i2c", }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, jz4780_i2c_of_matches); static int jz4780_i2c_probe(struct platform_device *pdev) { From 06e7b10a8711d10b4c4c21ab1aac3f1529a084df Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 18 Oct 2016 18:01:46 -0300 Subject: [PATCH 579/884] i2c: xlp9xx: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Signed-off-by: Javier Martinez Canillas Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xlp9xx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-xlp9xx.c b/drivers/i2c/busses/i2c-xlp9xx.c index 2a972ed7aa0d..e29ff37a43bd 100644 --- a/drivers/i2c/busses/i2c-xlp9xx.c +++ b/drivers/i2c/busses/i2c-xlp9xx.c @@ -426,6 +426,7 @@ static const struct of_device_id xlp9xx_i2c_of_match[] = { { .compatible = "netlogic,xlp980-i2c", }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, xlp9xx_i2c_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id xlp9xx_i2c_acpi_ids[] = { From 2cb496db3d56baa86d53022044c4d25ffe7fa038 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 18 Oct 2016 18:01:47 -0300 Subject: [PATCH 580/884] i2c: xlr: Fix module autoload for OF registration If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Signed-off-by: Javier Martinez Canillas Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xlr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c index 0968f59b6df5..ad17d88d8573 100644 --- a/drivers/i2c/busses/i2c-xlr.c +++ b/drivers/i2c/busses/i2c-xlr.c @@ -358,6 +358,7 @@ static const struct of_device_id xlr_i2c_dt_ids[] = { }, { } }; +MODULE_DEVICE_TABLE(of, xlr_i2c_dt_ids); static int xlr_i2c_probe(struct platform_device *pdev) { From 60a951af8e1656e2a17a96d64941aafe0668d750 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 18 Oct 2016 18:01:48 -0300 Subject: [PATCH 581/884] i2c: digicolor: Fix module autoload If the driver is built as a module, autoload won't work because the module alias information is not filled. So user-space can't match the registered device with the corresponding module. Export the module alias information using the MODULE_DEVICE_TABLE() macro. Signed-off-by: Javier Martinez Canillas Acked-by: Baruch Siach Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-digicolor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-digicolor.c b/drivers/i2c/busses/i2c-digicolor.c index 9604024e0eb0..49f2084f7bb5 100644 --- a/drivers/i2c/busses/i2c-digicolor.c +++ b/drivers/i2c/busses/i2c-digicolor.c @@ -368,6 +368,7 @@ static const struct of_device_id dc_i2c_match[] = { { .compatible = "cnxt,cx92755-i2c" }, { }, }; +MODULE_DEVICE_TABLE(of, dc_i2c_match); static struct platform_driver dc_i2c_driver = { .probe = dc_i2c_probe, From 603616017c35f4d0fbdbcace72adf9bf949c4a65 Mon Sep 17 00:00:00 2001 From: Hoan Tran Date: Mon, 10 Oct 2016 10:13:10 -0700 Subject: [PATCH 582/884] i2c: xgene: Avoid dma_buffer overrun SMBus block command uses the first byte of buffer for the data length. The dma_buffer should be increased by 1 to avoid the overrun issue. Reported-by: Phil Endecott Signed-off-by: Hoan Tran Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-xgene-slimpro.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index 263685c7a512..05cf192ef1ac 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -105,7 +105,7 @@ struct slimpro_i2c_dev { struct mbox_chan *mbox_chan; struct mbox_client mbox_client; struct completion rd_complete; - u8 dma_buffer[I2C_SMBUS_BLOCK_MAX]; + u8 dma_buffer[I2C_SMBUS_BLOCK_MAX + 1]; /* dma_buffer[0] is used for length */ u32 *resp_msg; }; From ba9ad2af7019956b990ad654c56da5bac1e8b71b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 11 Oct 2016 13:13:27 +0200 Subject: [PATCH 583/884] i2c: i801: Fix I2C Block Read on 8-Series/C220 and later Starting with the 8-Series/C220 PCH (Lynx Point), the SMBus controller includes a SPD EEPROM protection mechanism. Once the SPD Write Disable bit is set, only reads are allowed to slave addresses 0x50-0x57. However the legacy implementation of I2C Block Read since the ICH5 looks like a write, and is therefore blocked by the SPD protection mechanism. This causes the eeprom and at24 drivers to fail. So assume that I2C Block Read is implemented as an actual read on these chipsets. I tested it on my Q87 chipset and it seems to work just fine. Signed-off-by: Jean Delvare Tested-by: Jarkko Nikula [wsa: rebased to v4.9-rc2] Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 08847e8b8998..eb3627f35d12 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -146,6 +146,7 @@ #define SMBHSTCFG_HST_EN 1 #define SMBHSTCFG_SMB_SMI_EN 2 #define SMBHSTCFG_I2C_EN 4 +#define SMBHSTCFG_SPD_WD 0x10 /* TCO configuration bits for TCOCTL */ #define TCOCTL_EN 0x0100 @@ -865,9 +866,16 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, block = 1; break; case I2C_SMBUS_I2C_BLOCK_DATA: - /* NB: page 240 of ICH5 datasheet shows that the R/#W - * bit should be cleared here, even when reading */ - outb_p((addr & 0x7f) << 1, SMBHSTADD(priv)); + /* + * NB: page 240 of ICH5 datasheet shows that the R/#W + * bit should be cleared here, even when reading. + * However if SPD Write Disable is set (Lynx Point and later), + * the read will fail if we don't set the R/#W bit. + */ + outb_p(((addr & 0x7f) << 1) | + ((priv->original_hstcfg & SMBHSTCFG_SPD_WD) ? + (read_write & 0x01) : 0), + SMBHSTADD(priv)); if (read_write == I2C_SMBUS_READ) { /* NB: page 240 of ICH5 datasheet also shows * that DATA1 is the cmd field when reading */ @@ -1573,6 +1581,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) /* Disable SMBus interrupt feature if SMBus using SMI# */ priv->features &= ~FEATURE_IRQ; } + if (temp & SMBHSTCFG_SPD_WD) + dev_info(&dev->dev, "SPD Write Disable is set\n"); /* Clear special mode bits */ if (priv->features & (FEATURE_SMBUS_PEC | FEATURE_BLOCK_BUFFER)) From 171e23e150acfb285f1772cedf04d35694af740b Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 29 Sep 2016 16:04:59 +0300 Subject: [PATCH 584/884] i2c: designware: Avoid aborted transfers with fast reacting I2C slaves I2C DesignWare may abort transfer with arbitration lost if I2C slave pulls SDA down quickly after falling edge of SCL. Reason for this is unknown but after trial and error it was found this can be avoided by enabling non-zero SDA RX hold time for the receiver. By the specification SDA RX hold time extends incoming SDA low to high transition by n * ic_clk cycles but only when SCL is high. However it seems to help avoid above faulty arbitration lost error. Bits 23:16 in IC_SDA_HOLD register define the SDA RX hold time for the receiver. Be conservative and enable 1 ic_clk cycle long hold time in case boot firmware hasn't set it up. Reported-by: Jukka Laitinen Signed-off-by: Jarkko Nikula Tested-by: Jukka Laitinen Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-core.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-designware-core.c b/drivers/i2c/busses/i2c-designware-core.c index 1fe93c43215c..11e866d05368 100644 --- a/drivers/i2c/busses/i2c-designware-core.c +++ b/drivers/i2c/busses/i2c-designware-core.c @@ -95,6 +95,9 @@ #define DW_IC_STATUS_TFE BIT(2) #define DW_IC_STATUS_MST_ACTIVITY BIT(5) +#define DW_IC_SDA_HOLD_RX_SHIFT 16 +#define DW_IC_SDA_HOLD_RX_MASK GENMASK(23, DW_IC_SDA_HOLD_RX_SHIFT) + #define DW_IC_ERR_TX_ABRT 0x1 #define DW_IC_TAR_10BITADDR_MASTER BIT(12) @@ -420,12 +423,20 @@ int i2c_dw_init(struct dw_i2c_dev *dev) /* Configure SDA Hold Time if required */ reg = dw_readl(dev, DW_IC_COMP_VERSION); if (reg >= DW_IC_SDA_HOLD_MIN_VERS) { - if (dev->sda_hold_time) { - dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD); - } else { + if (!dev->sda_hold_time) { /* Keep previous hold time setting if no one set it */ dev->sda_hold_time = dw_readl(dev, DW_IC_SDA_HOLD); } + /* + * Workaround for avoiding TX arbitration lost in case I2C + * slave pulls SDA down "too quickly" after falling egde of + * SCL by enabling non-zero SDA RX hold. Specification says it + * extends incoming SDA low to high transition while SCL is + * high but it apprears to help also above issue. + */ + if (!(dev->sda_hold_time & DW_IC_SDA_HOLD_RX_MASK)) + dev->sda_hold_time |= 1 << DW_IC_SDA_HOLD_RX_SHIFT; + dw_writel(dev, dev->sda_hold_time, DW_IC_SDA_HOLD); } else { dev_warn(dev->dev, "Hardware too old to adjust SDA hold time.\n"); From 533169d164c6b4c8571d0d48779f6ff6be593d72 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 26 Sep 2016 17:18:58 -0700 Subject: [PATCH 585/884] i2c: imx: defer probe if bus recovery GPIOs are not ready MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some SoC might load the GPIO driver after the I2C driver and using the I2C bus recovery mechanism via GPIOs. In this case it is crucial to defer probing if the GPIO request functions do so, otherwise the I2C driver gets loaded without recovery mechanisms enabled. Signed-off-by: Stefan Agner Acked-by: Uwe Kleine-König Acked-by: Li Yang Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/busses/i2c-imx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 592a8f26a708..47fc1f1acff7 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1009,10 +1009,13 @@ static int i2c_imx_init_recovery_info(struct imx_i2c_struct *i2c_imx, rinfo->sda_gpio = of_get_named_gpio(pdev->dev.of_node, "sda-gpios", 0); rinfo->scl_gpio = of_get_named_gpio(pdev->dev.of_node, "scl-gpios", 0); - if (!gpio_is_valid(rinfo->sda_gpio) || - !gpio_is_valid(rinfo->scl_gpio) || - IS_ERR(i2c_imx->pinctrl_pins_default) || - IS_ERR(i2c_imx->pinctrl_pins_gpio)) { + if (rinfo->sda_gpio == -EPROBE_DEFER || + rinfo->scl_gpio == -EPROBE_DEFER) { + return -EPROBE_DEFER; + } else if (!gpio_is_valid(rinfo->sda_gpio) || + !gpio_is_valid(rinfo->scl_gpio) || + IS_ERR(i2c_imx->pinctrl_pins_default) || + IS_ERR(i2c_imx->pinctrl_pins_gpio)) { dev_dbg(&pdev->dev, "recovery information incomplete\n"); return 0; } From 9b50898ad96c793a8f7cde9d8f281596d752a7dd Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 25 Oct 2016 15:56:35 +0200 Subject: [PATCH 586/884] ALSA: seq: Fix time account regression The recent rewrite of the sequencer time accounting using timespec64 in the commit [3915bf294652: ALSA: seq_timer: use monotonic times internally] introduced a bad regression. Namely, the time reported back doesn't increase but goes back and forth. The culprit was obvious: the delta is stored to the result (cur_time = delta), instead of adding the delta (cur_time += delta)! Let's fix it. Fixes: 3915bf294652 ('ALSA: seq_timer: use monotonic times internally') Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=177571 Reported-by: Yves Guillemot Cc: # v4.8+ Signed-off-by: Takashi Iwai --- sound/core/seq/seq_timer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index dcc102813aef..37d9cfbc29f9 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -448,8 +448,8 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) ktime_get_ts64(&tm); tm = timespec64_sub(tm, tmr->last_update); - cur_time.tv_nsec = tm.tv_nsec; - cur_time.tv_sec = tm.tv_sec; + cur_time.tv_nsec += tm.tv_nsec; + cur_time.tv_sec += tm.tv_sec; snd_seq_sanity_real_time(&cur_time); } spin_unlock_irqrestore(&tmr->lock, flags); From b831275a3553c32091222ac619cfddd73a5553fb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Oct 2016 11:41:56 +0200 Subject: [PATCH 587/884] timers: Plug locking race vs. timer migration Linus noticed that lock_timer_base() lacks a READ_ONCE() for accessing the timer flags. As a consequence the compiler is allowed to reload the flags between the initial check for TIMER_MIGRATION and the following timer base computation and the spin lock of the base. While this has not been observed (yet), we need to make sure that it never happens. Fixes: 0eeda71bc30d ("timer: Replace timer base by a cpu index") Reported-by: Linus Torvalds Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1610241711220.4983@nanos Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Peter Zijlstra --- kernel/time/timer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 2d47980a1bc4..0d4b91c5a374 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -943,7 +943,14 @@ static struct timer_base *lock_timer_base(struct timer_list *timer, { for (;;) { struct timer_base *base; - u32 tf = timer->flags; + u32 tf; + + /* + * We need to use READ_ONCE() here, otherwise the compiler + * might re-read @tf between the check for TIMER_MIGRATING + * and spin_lock(). + */ + tf = READ_ONCE(timer->flags); if (!(tf & TIMER_MIGRATING)) { base = get_timer_base(tf); From 4da9152a4308dcbf611cde399c695c359fc9145f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Oct 2016 11:55:10 +0200 Subject: [PATCH 588/884] timers: Lock base for same bucket optimization Linus stumbled over the unlocked modification of the timer expiry value in mod_timer() which is an optimization for timers which stay in the same bucket - due to the bucket granularity - despite their expiry time getting updated. The optimization itself still makes sense even if we take the lock, because in case that the bucket stays the same, we avoid the pointless queue/enqueue dance. Make the check and the modification of timer->expires protected by the base lock and shuffle the remaining code around so we can keep the lock held when we actually have to requeue the timer to a different bucket. Fixes: f00c0afdfa62 ("timers: Implement optimization for same expiry time in mod_timer()") Reported-by: Linus Torvalds Signed-off-by: Thomas Gleixner Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1610241711220.4983@nanos Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Peter Zijlstra --- kernel/time/timer.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 0d4b91c5a374..ccf913038f9f 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -971,6 +971,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) unsigned long clk = 0, flags; int ret = 0; + BUG_ON(!timer->function); + /* * This is a common optimization triggered by the networking code - if * the timer is re-modified to have the same timeout or ends up in the @@ -979,13 +981,16 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) if (timer_pending(timer)) { if (timer->expires == expires) return 1; - /* - * Take the current timer_jiffies of base, but without holding - * the lock! - */ - base = get_timer_base(timer->flags); - clk = base->clk; + /* + * We lock timer base and calculate the bucket index right + * here. If the timer ends up in the same bucket, then we + * just update the expiry time and avoid the whole + * dequeue/enqueue dance. + */ + base = lock_timer_base(timer, &flags); + + clk = base->clk; idx = calc_wheel_index(expires, clk); /* @@ -995,14 +1000,14 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) */ if (idx == timer_get_idx(timer)) { timer->expires = expires; - return 1; + ret = 1; + goto out_unlock; } + } else { + base = lock_timer_base(timer, &flags); } timer_stats_timer_set_start_info(timer); - BUG_ON(!timer->function); - - base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, false); if (!ret && pending_only) @@ -1035,9 +1040,10 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance - * between calculating 'idx' and taking the lock, only enqueue_timer() - * and trigger_dyntick_cpu() is required. Otherwise we need to - * (re)calculate the wheel index via internal_add_timer(). + * between calculating 'idx' and possibly switching the base, only + * enqueue_timer() and trigger_dyntick_cpu() is required. Otherwise + * we need to (re)calculate the wheel index via + * internal_add_timer(). */ if (idx != UINT_MAX && clk == base->clk) { enqueue_timer(base, timer, idx); From 041ad7bc758db259bb960ef795197dd14aab19a6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 22 Oct 2016 11:07:35 +0000 Subject: [PATCH 589/884] timers: Prevent base clock rewind when forwarding clock Ashton and Michael reported, that kernel versions 4.8 and later suffer from USB timeouts which are caused by the timer wheel rework. This is caused by a bug in the base clock forwarding mechanism, which leads to timers expiring early. The scenario which leads to this is: run_timers() while (jiffies >= base->clk) { collect_expired_timers(); base->clk++; expire_timers(); } So base->clk = jiffies + 1. Now the cpu goes idle: idle() get_next_timer_interrupt() nextevt = __next_time_interrupt(); if (time_after(nextevt, base->clk)) base->clk = jiffies; jiffies has not advanced since run_timers(), so this assignment effectively decrements base->clk by one. base->clk is the index into the timer wheel arrays. So let's assume the following state after the base->clk increment in run_timers(): jiffies = 0 base->clk = 1 A timer gets enqueued with an expiry delta of 63 ticks (which is the case with the USB timeout and HZ=250) so the resulting bucket index is: base->clk + delta = 1 + 63 = 64 The timer goes into the first wheel level. The array size is 64 so it ends up in bucket 0, which is correct as it takes 63 ticks to advance base->clk to index into bucket 0 again. If the cpu goes idle before jiffies advance, then the bug in the forwarding mechanism sets base->clk back to 0, so the next invocation of run_timers() at the next tick will index into bucket 0 and therefore expire the timer 62 ticks too early. Instead of blindly setting base->clk to jiffies we must make the forwarding conditional on jiffies > base->clk, but we cannot use jiffies for this as we might run into the following issue: if (time_after(jiffies, base->clk) { if (time_after(nextevt, base->clk)) base->clk = jiffies; jiffies can increment between the check and the assigment far enough to advance beyond nextevt. So we need to use a stable value for checking. get_next_timer_interrupt() has the basej argument which is the jiffies value snapshot taken in the calling code. So we can just that. Thanks to Ashton for bisecting and providing trace data! Fixes: a683f390b93f ("timers: Forward the wheel clock whenever possible") Reported-by: Ashton Holmes Reported-by: Michael Thayer Signed-off-by: Thomas Gleixner Cc: Michal Necasek Cc: Peter Zijlstra Cc: knut.osmundsen@oracle.com Cc: stable@vger.kernel.org Cc: stern@rowland.harvard.edu Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161022110552.175308322@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ccf913038f9f..7c446fb5163a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1523,12 +1523,16 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); base->next_expiry = nextevt; /* - * We have a fresh next event. Check whether we can forward the base: + * We have a fresh next event. Check whether we can forward the + * base. We can only do that when @basej is past base->clk + * otherwise we might rewind base->clk. */ - if (time_after(nextevt, jiffies)) - base->clk = jiffies; - else if (time_after(nextevt, base->clk)) - base->clk = nextevt; + if (time_after(basej, base->clk)) { + if (time_after(nextevt, basej)) + base->clk = basej; + else if (time_after(nextevt, base->clk)) + base->clk = nextevt; + } if (time_before_eq(nextevt, basej)) { expires = basem; From 6bad6bccf2d717f652d37e63cf261eaa23466009 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 22 Oct 2016 11:07:37 +0000 Subject: [PATCH 590/884] timers: Prevent base clock corruption when forwarding When a timer is enqueued we try to forward the timer base clock. This mechanism has two issues: 1) Forwarding a remote base unlocked The forwarding function is called from get_target_base() with the current timer base lock held. But if the new target base is a different base than the current base (can happen with NOHZ, sigh!) then the forwarding is done on an unlocked base. This can lead to corruption of base->clk. Solution is simple: Invoke the forwarding after the target base is locked. 2) Possible corruption due to jiffies advancing This is similar to the issue in get_net_timer_interrupt() which was fixed in the previous patch. jiffies can advance between check and assignement and therefore advancing base->clk beyond the next expiry value. So we need to read jiffies into a local variable once and do the checks and assignment with the local copy. Fixes: a683f390b93f("timers: Forward the wheel clock whenever possible") Reported-by: Ashton Holmes Reported-by: Michael Thayer Signed-off-by: Thomas Gleixner Cc: Michal Necasek Cc: Peter Zijlstra Cc: knut.osmundsen@oracle.com Cc: stable@vger.kernel.org Cc: stern@rowland.harvard.edu Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161022110552.253640125@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/time/timer.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 7c446fb5163a..c611c47de884 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -878,7 +878,7 @@ static inline struct timer_base *get_timer_base(u32 tflags) #ifdef CONFIG_NO_HZ_COMMON static inline struct timer_base * -__get_target_base(struct timer_base *base, unsigned tflags) +get_target_base(struct timer_base *base, unsigned tflags) { #ifdef CONFIG_SMP if ((tflags & TIMER_PINNED) || !base->migration_enabled) @@ -891,25 +891,27 @@ __get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { + unsigned long jnow = READ_ONCE(jiffies); + /* * We only forward the base when it's idle and we have a delta between * base clock and jiffies. */ - if (!base->is_idle || (long) (jiffies - base->clk) < 2) + if (!base->is_idle || (long) (jnow - base->clk) < 2) return; /* * If the next expiry value is > jiffies, then we fast forward to * jiffies otherwise we forward to the next expiry value. */ - if (time_after(base->next_expiry, jiffies)) - base->clk = jiffies; + if (time_after(base->next_expiry, jnow)) + base->clk = jnow; else base->clk = base->next_expiry; } #else static inline struct timer_base * -__get_target_base(struct timer_base *base, unsigned tflags) +get_target_base(struct timer_base *base, unsigned tflags) { return get_timer_this_cpu_base(tflags); } @@ -917,14 +919,6 @@ __get_target_base(struct timer_base *base, unsigned tflags) static inline void forward_timer_base(struct timer_base *base) { } #endif -static inline struct timer_base * -get_target_base(struct timer_base *base, unsigned tflags) -{ - struct timer_base *target = __get_target_base(base, tflags); - - forward_timer_base(target); - return target; -} /* * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means @@ -1037,6 +1031,9 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) } } + /* Try to forward a stale timer base clock */ + forward_timer_base(base); + timer->expires = expires; /* * If 'idx' was calculated above and the base time did not advance From 0ce57f8af1782fd12d3a81872a4ab97244989802 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Oct 2016 14:04:34 +0200 Subject: [PATCH 591/884] ahci: fix the single MSI-X case in ahci_init_one We need to make sure hpriv->irq is set properly if we don't use per-port vectors, so switch from blindly assigning pdev->irq to using pci_irq_vector, which handles all interrupt types correctly. Signed-off-by: Christoph Hellwig Reported-by: Robert Richter Tested-by: Robert Richter Tested-by: David Daney Fixes: 0b9e2988ab22 ("ahci: use pci_alloc_irq_vectors") Signed-off-by: Tejun Heo --- drivers/ata/ahci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 60e42e2ed68f..9669fc7c19df 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -1620,7 +1620,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* legacy intx interrupts */ pci_intx(pdev, 1); } - hpriv->irq = pdev->irq; + hpriv->irq = pci_irq_vector(pdev, 0); if (!(hpriv->cap & HOST_CAP_SSS) || ahci_ignore_sss) host->flags |= ATA_HOST_PARALLEL_SCAN; From ef6239e0cf5e665e7b20fd659678b98331b3081d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Oct 2016 09:37:37 -0400 Subject: [PATCH 592/884] drm/amdgpu/vce3: only enable 3 rings on new enough firmware (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Older firmware versions don't support 3 rings. fixes: https://bugs.freedesktop.org/show_bug.cgi?id=98016 v2: use define for fw version Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 8533269ec160..6feed726e299 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -52,6 +52,8 @@ #define VCE_V3_0_STACK_SIZE (64 * 1024) #define VCE_V3_0_DATA_SIZE ((16 * 1024 * AMDGPU_MAX_VCE_HANDLES) + (52 * 1024)) +#define FW_52_8_3 ((52 << 24) | (8 << 16) | (3 << 8)) + static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx); static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev); @@ -382,6 +384,10 @@ static int vce_v3_0_sw_init(void *handle) if (r) return r; + /* 52.8.3 required for 3 ring support */ + if (adev->vce.fw_version < FW_52_8_3) + adev->vce.num_rings = 2; + r = amdgpu_vce_resume(adev); if (r) return r; From 537b4b462caa8bfb9726d9695b8e56e2d5e6b41e Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Mon, 24 Oct 2016 23:32:04 +0200 Subject: [PATCH 593/884] drm/radeon: drop register readback in cayman_cp_int_cntl_setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The read is taking a considerable amount of time (about 50us on this machine). The register does not ever hold anything other than the ring ID that is updated in this exact function, so there is no need for the read modify write cycle. This chops off a big chunk of the time spent in hardirq disabled context, as this function is called multiple times in the interrupt handler. With this change applied radeon won't show up in the list of the worst IRQ latency offenders anymore, where it was a regular before. Reviewed-by: Christian König Signed-off-by: Lucas Stach Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/ni.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 103fc8650197..a0d4a0522fdc 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1396,9 +1396,7 @@ static void cayman_pcie_gart_fini(struct radeon_device *rdev) void cayman_cp_int_cntl_setup(struct radeon_device *rdev, int ring, u32 cp_int_cntl) { - u32 srbm_gfx_cntl = RREG32(SRBM_GFX_CNTL) & ~3; - - WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl | (ring & 3)); + WREG32(SRBM_GFX_CNTL, RINGID(ring)); WREG32(CP_INT_CNTL, cp_int_cntl); } From b429ae4d5b565a71dfffd759dfcd4f6c093ced94 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 25 Oct 2016 16:23:26 -0700 Subject: [PATCH 594/884] sparc64: Fix illegal relative branches in hypervisor patched TLB code. When we copy code over to patch another piece of code, we can only use PC-relative branches that target code within that piece of code. Such PC-relative branches cannot be made to external symbols because the patch moves the location of the code and thus modifies the relative address of external symbols. Use an absolute jmpl to fix this problem. Signed-off-by: David S. Miller --- arch/sparc/mm/ultra.S | 65 +++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index b4f4733abc6e..85de139bfad6 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -30,7 +30,7 @@ .text .align 32 .globl __flush_tlb_mm -__flush_tlb_mm: /* 18 insns */ +__flush_tlb_mm: /* 19 insns */ /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ ldxa [%o1] ASI_DMMU, %g2 cmp %g2, %o0 @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ .align 32 .globl __flush_tlb_pending -__flush_tlb_pending: /* 26 insns */ +__flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -113,7 +113,7 @@ __flush_tlb_pending: /* 26 insns */ .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* 16 insns */ +__flush_tlb_kernel_range: /* 19 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f @@ -131,6 +131,9 @@ __flush_tlb_kernel_range: /* 16 insns */ retl nop nop + nop + nop + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -309,19 +312,28 @@ __hypervisor_tlb_tl0_error: ret restore -__hypervisor_flush_tlb_mm: /* 10 insns */ +__hypervisor_flush_tlb_mm: /* 19 insns */ mov %o0, %o2 /* ARG2: mmu context */ mov 0, %o0 /* ARG0: CPU lists unimplemented */ mov 0, %o1 /* ARG1: CPU lists unimplemented */ mov HV_MMU_ALL, %o3 /* ARG3: flags */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_FAST_MMU_DEMAP_CTX, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5 + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_page: /* 11 insns */ +__hypervisor_flush_tlb_page: /* 22 insns */ /* %o0 = context, %o1 = vaddr */ mov %o0, %g2 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ @@ -330,10 +342,21 @@ __hypervisor_flush_tlb_page: /* 11 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop __hypervisor_flush_tlb_pending: /* 16 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ @@ -347,14 +370,25 @@ __hypervisor_flush_tlb_pending: /* 16 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g1, 1b nop retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_kernel_range: /* 16 insns */ +__hypervisor_flush_tlb_kernel_range: /* 19 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f @@ -366,12 +400,15 @@ __hypervisor_flush_tlb_kernel_range: /* 16 insns */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 3f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g2, 1b sub %g2, %g3, %g2 2: retl nop +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop #ifdef DCACHE_ALIASING_POSSIBLE /* XXX Niagara and friends have an 8K cache, so no aliasing is @@ -819,28 +856,28 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 call tlb_patch_one - mov 10, %o2 + mov 19, %o2 sethi %hi(__flush_tlb_page), %o0 or %o0, %lo(__flush_tlb_page), %o0 sethi %hi(__hypervisor_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_flush_tlb_page), %o1 call tlb_patch_one - mov 11, %o2 + mov 22, %o2 sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 call tlb_patch_one - mov 16, %o2 + mov 27, %o2 sethi %hi(__flush_tlb_kernel_range), %o0 or %o0, %lo(__flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 16, %o2 + mov 19, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 From a467a672cf097ec11332a9b22db6e31d3ef50359 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 26 Oct 2016 10:07:44 +1030 Subject: [PATCH 595/884] MAINTAINERS: Begin module maintainer transition Being a Linux kernel maintainer has been my proudest professional accomplishment, spanning the last 19 years. But now we have a surfeit of excellent hackers, and I can hand this over without regret. I'll still be around as co-maintainer for another cycle, but Jessica is now the one to convince if you want your patches applied. She rocks, and is far more timely than me too! Signed-off-by: Rusty Russell Acked-by: Jessica Yu --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index c44795306342..f30b8ea700fd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8100,6 +8100,7 @@ S: Maintained F: drivers/media/dvb-frontends/mn88473* MODULE SUPPORT +M: Jessica Yu M: Rusty Russell S: Maintained F: include/linux/module.h From 9d9fa230206a3aea6ef451646c97122f04777983 Mon Sep 17 00:00:00 2001 From: James Clarke Date: Mon, 24 Oct 2016 19:49:25 +0100 Subject: [PATCH 596/884] sparc: Handle negative offsets in arch_jump_label_transform Additionally, if the offset will overflow the immediate for a ba,pt instruction, fall back on a standard ba to get an extra 3 bits. Signed-off-by: James Clarke Signed-off-by: David S. Miller --- arch/sparc/kernel/jump_label.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 59bbeff55024..07933b9e9ce0 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -13,19 +13,30 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; + u32 val; if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; + bool use_v9_branch = false; + + BUG_ON(off & 3); #ifdef CONFIG_SPARC64 - /* ba,pt %xcc, . + (off << 2) */ - val = 0x10680000 | ((u32) off >> 2); -#else - /* ba . + (off << 2) */ - val = 0x10800000 | ((u32) off >> 2); + if (off <= 0xfffff && off >= -0x100000) + use_v9_branch = true; #endif + if (use_v9_branch) { + /* WDISP19 - target is . + immed << 2 */ + /* ba,pt %xcc, . + off */ + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff); + } else { + /* WDISP22 - target is . + immed << 2 */ + BUG_ON(off > 0x7fffff); + BUG_ON(off < -0x800000); + /* ba . + off */ + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff); + } } else { val = 0x01000000; } From 849c498766060a16aad5b0e0d03206726e7d2fa4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 25 Oct 2016 19:43:17 -0700 Subject: [PATCH 597/884] sparc64: Handle extremely large kernel TSB range flushes sanely. If the number of pages we are flushing is more than twice the number of entries in the TSB, just scan the TSB table for matches rather than probing each and every page in the range. Based upon a patch and report by James Clarke. Signed-off-by: David S. Miller --- arch/sparc/mm/tsb.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index f2b77112e9d8..e20fbbafb0b0 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr) return (tag == (vaddr >> 22)); } +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) +{ + unsigned long idx; + + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { + struct tsb *ent = &swapper_tsb[idx]; + unsigned long match = idx << 13; + + match |= (ent->tag << 22); + if (match >= start && match < end) + ent->tag = (1UL << TSB_TAG_INVALID_BIT); + } +} + /* TSB flushes need only occur on the processor initiating the address * space modification, not on each cpu the address space has run on. * Only the TLB flush needs that treatment. @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) { unsigned long v; + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) + return flush_tsb_kernel_range_scan(start, end); + for (v = start; v < end; v += PAGE_SIZE) { unsigned long hash = tsb_hash(v, PAGE_SHIFT, KERNEL_TSB_NENTRIES); From 8ef4227615e158faa4ee85a1d6466782f7e22f2f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 Oct 2016 15:27:59 +1000 Subject: [PATCH 598/884] x86/io: add interface to reserve io memtype for a resource range. (v1.1) A recent change to the mm code in: 87744ab3832b mm: fix cache mode tracking in vm_insert_mixed() started enforcing checking the memory type against the registered list for amixed pfn insertion mappings. It happens that the drm drivers for a number of gpus relied on this being broken. Currently the driver only inserted VRAM mappings into the tracking table when they came from the kernel, and userspace mappings never landed in the table. This led to a regression where all the mapping end up as UC instead of WC now. I've considered a number of solutions but since this needs to be fixed in fixes and not next, and some of the solutions were going to introduce overhead that hadn't been there before I didn't consider them viable at this stage. These mainly concerned hooking into the TTM io reserve APIs, but these API have a bunch of fast paths I didn't want to unwind to add this to. The solution I've decided on is to add a new API like the arch_phys_wc APIs (these would have worked but wc_del didn't take a range), and use them from the drivers to add a WC compatible mapping to the table for all VRAM on those GPUs. This means we can then create userspace mapping that won't get degraded to UC. v1.1: use CONFIG_X86_PAT + add some comments in io.h Cc: Toshi Kani Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: x86@kernel.org Cc: mcgrof@suse.com Cc: Dan Williams Acked-by: Ingo Molnar Reviewed-by: Thomas Gleixner Signed-off-by: Dave Airlie --- arch/x86/include/asm/io.h | 6 ++++++ arch/x86/mm/pat.c | 14 ++++++++++++++ include/linux/io.h | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index de25aad07853..d34bd370074b 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif +#ifdef CONFIG_X86_PAT +extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); +extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); +#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc +#endif + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 170cc4ff057b..83e701f160a9 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -730,6 +730,20 @@ void io_free_memtype(resource_size_t start, resource_size_t end) free_memtype(start, end); } +int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) +{ + enum page_cache_mode type = _PAGE_CACHE_MODE_WC; + + return io_reserve_memtype(start, start + size, &type); +} +EXPORT_SYMBOL(arch_io_reserve_memtype_wc); + +void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) +{ + io_free_memtype(start, start + size); +} +EXPORT_SYMBOL(arch_io_free_memtype_wc); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { diff --git a/include/linux/io.h b/include/linux/io.h index e2c8419278c1..82ef36eac8a1 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -141,4 +141,26 @@ enum { void *memremap(resource_size_t offset, size_t size, unsigned long flags); void memunmap(void *addr); +/* + * On x86 PAT systems we have memory tracking that keeps track of + * the allowed mappings on memory ranges. This tracking works for + * all the in-kernel mapping APIs (ioremap*), but where the user + * wishes to map a range from a physical device into user memory + * the tracking won't be updated. This API is to be used by + * drivers which remap physical device pages into userspace, + * and wants to make sure they are mapped WC and not UC. + */ +#ifndef arch_io_reserve_memtype_wc +static inline int arch_io_reserve_memtype_wc(resource_size_t base, + resource_size_t size) +{ + return 0; +} + +static inline void arch_io_free_memtype_wc(resource_size_t base, + resource_size_t size) +{ +} +#endif + #endif /* _LINUX_IO_H */ From e1957dba5b54b9c9b1d16f9d5f3f8d41d82bee41 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Oct 2016 07:56:59 +0200 Subject: [PATCH 599/884] cfg80211: process events caused by suspend before suspending When suspending without WoWLAN, cfg80211 will ask drivers to disconnect. Even when the driver does this synchronously, and immediately returns with a notification, cfg80211 schedules the handling thereof to a workqueue, and may then call back into the driver when the driver was already suspended/ing. Fix this by processing all events caused by cfg80211_leave_all() directly after that function returns. The driver still needs to do the right thing here and wait for the firmware response, but that is - at least - true for mwifiex where this occurred. Reported-by: Amitkumar Karwar Tested-by: Amitkumar Karwar Signed-off-by: Johannes Berg --- net/wireless/sysfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 0082f4b01795..14b3f007826d 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -104,13 +104,16 @@ static int wiphy_suspend(struct device *dev) rtnl_lock(); if (rdev->wiphy.registered) { - if (!rdev->wiphy.wowlan_config) + if (!rdev->wiphy.wowlan_config) { cfg80211_leave_all(rdev); + cfg80211_process_rdev_events(rdev); + } if (rdev->ops->suspend) ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); if (ret == 1) { /* Driver refuse to configure wowlan */ cfg80211_leave_all(rdev); + cfg80211_process_rdev_events(rdev); ret = rdev_suspend(rdev, NULL); } } From b4f7f4ad425a84fd5d40da21aff8681997b25ff9 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 21 Oct 2016 15:57:23 +0300 Subject: [PATCH 600/884] mac80211: fix some sphinx warnings Signed-off-by: Jani Nikula Signed-off-by: Johannes Berg --- include/net/mac80211.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a810dfcb83c2..e2dba93e374f 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -811,14 +811,18 @@ enum mac80211_rate_control_flags { * in the control information, and it will be filled by the rate * control algorithm according to what should be sent. For example, * if this array contains, in the format { , } the - * information + * information:: + * * { 3, 2 }, { 2, 2 }, { 1, 4 }, { -1, 0 }, { -1, 0 } + * * then this means that the frame should be transmitted * up to twice at rate 3, up to twice at rate 2, and up to four * times at rate 1 if it doesn't get acknowledged. Say it gets * acknowledged by the peer after the fifth attempt, the status - * information should then contain + * information should then contain:: + * * { 3, 2 }, { 2, 2 }, { 1, 1 }, { -1, 0 } ... + * * since it was transmitted twice at rate 3, twice at rate 2 * and once at rate 1 after which we received an acknowledgement. */ @@ -1168,8 +1172,8 @@ enum mac80211_rx_vht_flags { * @rate_idx: index of data rate into band's supported rates or MCS index if * HT or VHT is used (%RX_FLAG_HT/%RX_FLAG_VHT) * @vht_nss: number of streams (VHT only) - * @flag: %RX_FLAG_* - * @vht_flag: %RX_VHT_FLAG_* + * @flag: %RX_FLAG_\* + * @vht_flag: %RX_VHT_FLAG_\* * @rx_flags: internal RX flags for mac80211 * @ampdu_reference: A-MPDU reference number, must be a different value for * each A-MPDU but the same for each subframe within one A-MPDU @@ -1432,7 +1436,7 @@ enum ieee80211_vif_flags { * @probe_req_reg: probe requests should be reported to mac80211 for this * interface. * @drv_priv: data area for driver use, will always be aligned to - * sizeof(void *). + * sizeof(void \*). * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) */ struct ieee80211_vif { @@ -1743,7 +1747,7 @@ struct ieee80211_sta_rates { * @wme: indicates whether the STA supports QoS/WME (if local devices does, * otherwise always false) * @drv_priv: data area for driver use, will always be aligned to - * sizeof(void *), size is determined in hw information. + * sizeof(void \*), size is determined in hw information. * @uapsd_queues: bitmap of queues configured for uapsd. Only valid * if wme is supported. * @max_sp: max Service Period. Only valid if wme is supported. @@ -2146,12 +2150,12 @@ enum ieee80211_hw_flags { * * @radiotap_mcs_details: lists which MCS information can the HW * reports, by default it is set to _MCS, _GI and _BW but doesn't - * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_* values, only + * include _FMT. Use %IEEE80211_RADIOTAP_MCS_HAVE_\* values, only * adding _BW is supported today. * * @radiotap_vht_details: lists which VHT MCS information the HW reports, * the default is _GI | _BANDWIDTH. - * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. + * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_\* values. * * @radiotap_timestamp: Information for the radiotap timestamp field; if the * 'units_pos' member is set to a non-negative value it must be set to @@ -2486,6 +2490,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb); * in the software stack cares about, we will, in the future, have mac80211 * tell the driver which information elements are interesting in the sense * that we want to see changes in them. This will include + * * - a list of information element IDs * - a list of OUIs for the vendor information element * From 7cf321d118a825c1541b43ca45294126fd474efa Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 Oct 2016 15:37:48 +1000 Subject: [PATCH 601/884] drm/drivers: add support for using the arch wc mapping API. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes a regression in all these drivers since the cache mode tracking was fixed for mixed mappings. It uses the new arch API to add the VRAM range to the PAT mapping tracking tables. Fixes: 87744ab3832 (mm: fix cache mode tracking in vm_insert_mixed()) Reviewed-by: Christian König . Signed-off-by: Dave Airlie --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 5 +++++ drivers/gpu/drm/ast/ast_ttm.c | 6 ++++++ drivers/gpu/drm/cirrus/cirrus_ttm.c | 7 +++++++ drivers/gpu/drm/mgag200/mgag200_ttm.c | 7 +++++++ drivers/gpu/drm/nouveau/nouveau_ttm.c | 8 ++++++++ drivers/gpu/drm/radeon/radeon_object.c | 5 +++++ 6 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index aa074fac0c7f..f3efb1c5dae9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -754,6 +754,10 @@ static const char *amdgpu_vram_names[] = { int amdgpu_bo_init(struct amdgpu_device *adev) { + /* reserve PAT memory space to WC for VRAM */ + arch_io_reserve_memtype_wc(adev->mc.aper_base, + adev->mc.aper_size); + /* Add an MTRR for the VRAM */ adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base, adev->mc.aper_size); @@ -769,6 +773,7 @@ void amdgpu_bo_fini(struct amdgpu_device *adev) { amdgpu_ttm_fini(adev); arch_phys_wc_del(adev->mc.vram_mtrr); + arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size); } int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c index 608df4c90520..0743e65cb240 100644 --- a/drivers/gpu/drm/ast/ast_ttm.c +++ b/drivers/gpu/drm/ast/ast_ttm.c @@ -267,6 +267,8 @@ int ast_mm_init(struct ast_private *ast) return ret; } + arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); ast->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0), pci_resource_len(dev->pdev, 0)); @@ -275,11 +277,15 @@ int ast_mm_init(struct ast_private *ast) void ast_mm_fini(struct ast_private *ast) { + struct drm_device *dev = ast->dev; + ttm_bo_device_release(&ast->ttm.bdev); ast_ttm_global_release(ast); arch_phys_wc_del(ast->fb_mtrr); + arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); } void ast_ttm_placement(struct ast_bo *bo, int domain) diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c index bb2438dd8733..5e7e63ce7bce 100644 --- a/drivers/gpu/drm/cirrus/cirrus_ttm.c +++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c @@ -267,6 +267,9 @@ int cirrus_mm_init(struct cirrus_device *cirrus) return ret; } + arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); + cirrus->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0), pci_resource_len(dev->pdev, 0)); @@ -276,6 +279,8 @@ int cirrus_mm_init(struct cirrus_device *cirrus) void cirrus_mm_fini(struct cirrus_device *cirrus) { + struct drm_device *dev = cirrus->dev; + if (!cirrus->mm_inited) return; @@ -285,6 +290,8 @@ void cirrus_mm_fini(struct cirrus_device *cirrus) arch_phys_wc_del(cirrus->fb_mtrr); cirrus->fb_mtrr = 0; + arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); } void cirrus_ttm_placement(struct cirrus_bo *bo, int domain) diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c index 919b35f2ad24..dcf7d11ac380 100644 --- a/drivers/gpu/drm/mgag200/mgag200_ttm.c +++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c @@ -266,6 +266,9 @@ int mgag200_mm_init(struct mga_device *mdev) return ret; } + arch_io_reserve_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); + mdev->fb_mtrr = arch_phys_wc_add(pci_resource_start(dev->pdev, 0), pci_resource_len(dev->pdev, 0)); @@ -274,10 +277,14 @@ int mgag200_mm_init(struct mga_device *mdev) void mgag200_mm_fini(struct mga_device *mdev) { + struct drm_device *dev = mdev->dev; + ttm_bo_device_release(&mdev->ttm.bdev); mgag200_ttm_global_release(mdev); + arch_io_free_memtype_wc(pci_resource_start(dev->pdev, 0), + pci_resource_len(dev->pdev, 0)); arch_phys_wc_del(mdev->fb_mtrr); mdev->fb_mtrr = 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c index 1825dbc33192..a6dbe8258040 100644 --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c @@ -398,6 +398,9 @@ nouveau_ttm_init(struct nouveau_drm *drm) /* VRAM init */ drm->gem.vram_available = drm->device.info.ram_user; + arch_io_reserve_memtype_wc(device->func->resource_addr(device, 1), + device->func->resource_size(device, 1)); + ret = ttm_bo_init_mm(&drm->ttm.bdev, TTM_PL_VRAM, drm->gem.vram_available >> PAGE_SHIFT); if (ret) { @@ -430,6 +433,8 @@ nouveau_ttm_init(struct nouveau_drm *drm) void nouveau_ttm_fini(struct nouveau_drm *drm) { + struct nvkm_device *device = nvxx_device(&drm->device); + ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT); @@ -439,4 +444,7 @@ nouveau_ttm_fini(struct nouveau_drm *drm) arch_phys_wc_del(drm->ttm.mtrr); drm->ttm.mtrr = 0; + arch_io_free_memtype_wc(device->func->resource_addr(device, 1), + device->func->resource_size(device, 1)); + } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index be30861afae9..41b72ce6613f 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -446,6 +446,10 @@ void radeon_bo_force_delete(struct radeon_device *rdev) int radeon_bo_init(struct radeon_device *rdev) { + /* reserve PAT memory space to WC for VRAM */ + arch_io_reserve_memtype_wc(rdev->mc.aper_base, + rdev->mc.aper_size); + /* Add an MTRR for the VRAM */ if (!rdev->fastfb_working) { rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base, @@ -463,6 +467,7 @@ void radeon_bo_fini(struct radeon_device *rdev) { radeon_ttm_fini(rdev); arch_phys_wc_del(rdev->mc.vram_mtrr); + arch_io_free_memtype_wc(rdev->mc.aper_base, rdev->mc.aper_size); } /* Returns how many bytes TTM can move per IB. From 2925d366f450dc1db0ac88f2509a0eec0fef4226 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 17 Oct 2016 17:16:02 -0700 Subject: [PATCH 602/884] extcon: qcom-spmi-misc: Sync the extcon state on interrupt The driver was changed after submission to use the new style APIs like extcon_set_state(). Unfortunately, that only sets the state, and doesn't notify any consumers that the cable state has changed. Use extcon_set_state_sync() here instead so that we notify cable consumers of the state change. This fixes USB host-device role switching on the db8074 platform. Fixes: 38085c987f52 ("extcon: Add support for qcom SPMI PMIC USB id detection hardware") Signed-off-by: Stephen Boyd Signed-off-by: Chanwoo Choi --- drivers/extcon/extcon-qcom-spmi-misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/extcon/extcon-qcom-spmi-misc.c b/drivers/extcon/extcon-qcom-spmi-misc.c index ca957a5f4291..b8cde096a808 100644 --- a/drivers/extcon/extcon-qcom-spmi-misc.c +++ b/drivers/extcon/extcon-qcom-spmi-misc.c @@ -51,7 +51,7 @@ static void qcom_usb_extcon_detect_cable(struct work_struct *work) if (ret) return; - extcon_set_state(info->edev, EXTCON_USB_HOST, !id); + extcon_set_state_sync(info->edev, EXTCON_USB_HOST, !id); } static irqreturn_t qcom_usb_irq_handler(int irq, void *dev_id) From cac5fcedaabdadf150c8a9be9fee76defc8ba444 Mon Sep 17 00:00:00 2001 From: Felix Monninger Date: Tue, 25 Oct 2016 22:28:08 +0100 Subject: [PATCH 603/884] drm: Release reference from blob lookup after replacing property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_property_lookup_blob() returns a reference to the returned blob, and drm_atomic_replace_property_blob() takes a references to the blob it stores, so afterwards we are left owning a reference to the new_blob that we never release, and thus leak memory every time we update a property such as during drm_atomic_helper_legacy_gamma_set(). v2: update credentials, drm_property_unreference_blob() is NULL safe and NULL is passed consistently to it throughout drm_atomic.c so do so here. Reported-by: Felix Monninger Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98420 Signed-off-by: Felix Monninger Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Fixes: 5488dc16fde7 ("drm: introduce pipe color correction properties") Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161025212808.3908-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/drm_atomic.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 23739609427d..e6862a744210 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -420,18 +420,21 @@ drm_atomic_replace_property_blob_from_id(struct drm_crtc *crtc, ssize_t expected_size, bool *replaced) { - struct drm_device *dev = crtc->dev; struct drm_property_blob *new_blob = NULL; if (blob_id != 0) { - new_blob = drm_property_lookup_blob(dev, blob_id); + new_blob = drm_property_lookup_blob(crtc->dev, blob_id); if (new_blob == NULL) return -EINVAL; - if (expected_size > 0 && expected_size != new_blob->length) + + if (expected_size > 0 && expected_size != new_blob->length) { + drm_property_unreference_blob(new_blob); return -EINVAL; + } } drm_atomic_replace_property_blob(blob, new_blob, replaced); + drm_property_unreference_blob(new_blob); return 0; } From 87d3b6588f9bf205902868d3e5baf68e37ad4ae1 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 20 Oct 2016 17:05:30 +0200 Subject: [PATCH 604/884] drm/fb-helper: Don't call dirty callback for untouched clips MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 4.7 kernel, we've seen the error messages like kernel: [TTM] Buffer eviction failed kernel: qxl 0000:00:02.0: object_init failed for (4026540032, 0x00000001) kernel: [drm:qxl_alloc_bo_reserved [qxl]] *ERROR* failed to allocate VRAM BO on QXL when switching and accessing on VT. The culprit was the generic deferred_io code (qxl driver switched to it since 4.7). There is a race between the dirty clip update and the call of callback. In drm_fb_helper_dirty(), the dirty clip is updated in the spinlock, while it kicks off the update worker outside the spinlock. Meanwhile the update worker clears the dirty clip in the spinlock, too. Thus, when drm_fb_helper_dirty() is called concurrently, schedule_work() is called after the clip is cleared in the first worker call. This patch addresses it by validating the clip before calling the dirty fb callback. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98322 Bugzilla: https://bugzilla.suse.com/show_bug.cgi?id=1003298 Fixes: eaa434defaca ('drm/fb-helper: Add fb_deferred_io support') Cc: Signed-off-by: Takashi Iwai Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161020150530.5787-1-tiwai@suse.de --- drivers/gpu/drm/drm_fb_helper.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 03414bde1f15..aae7df01864d 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -644,7 +644,9 @@ static void drm_fb_helper_dirty_work(struct work_struct *work) clip->x2 = clip->y2 = 0; spin_unlock_irqrestore(&helper->dirty_lock, flags); - helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1); + /* call dirty callback only when it has been really touched */ + if (clip_copy.x1 < clip_copy.x2 && clip_copy.y1 < clip_copy.y2) + helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1); } /** From 62c61514191bfe5731b43619b9b1bf4b423beeb0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 23 Oct 2016 09:32:34 -0700 Subject: [PATCH 605/884] doc: Add missing parameter for msi_setup commit 92ca8d20dee2 ("genirq/msi: Switch to new irq spreading") introduced new parameter to msi_init_setup and but did not update docbook comments. Fixes 'make htmldocs' warning. Signed-off-by: Stephen Hemminger Cc: bhelgaas@google.com Cc: linux-pci@vger.kernel.org Signed-off-by: Thomas Gleixner --- drivers/pci/msi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index bfdd0744b686..ad70507cfb56 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -610,6 +610,7 @@ static int msi_verify_entries(struct pci_dev *dev) * msi_capability_init - configure device's MSI capability structure * @dev: pointer to the pci_dev data structure of MSI device function * @nvec: number of interrupts to allocate + * @affinity: flag to indicate cpu irq affinity mask should be set * * Setup the MSI capability structure of the device with the requested * number of interrupts. A return value of zero indicates the successful @@ -752,6 +753,7 @@ static void msix_program_entries(struct pci_dev *dev, * @dev: pointer to the pci_dev data structure of MSI-X device function * @entries: pointer to an array of struct msix_entry entries * @nvec: number of @entries + * @affinity: flag to indicate cpu irq affinity mask should be set * * Setup the MSI-X capability structure of device function with a * single MSI-X irq. A return of zero indicates the successful setup of From 5c0ba57744b1422d528f19430dd66d6803cea86f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Oct 2016 22:57:10 +0200 Subject: [PATCH 606/884] spi: fsl-espi: avoid processing uninitalized data on error When we get a spurious interrupt in fsl_espi_irq, we end up processing four uninitalized bytes of data, as shown in this warning message: drivers/spi/spi-fsl-espi.c: In function 'fsl_espi_irq': drivers/spi/spi-fsl-espi.c:462:4: warning: 'rx_data' may be used uninitialized in this function [-Wmaybe-uninitialized] This adds another check so we skip the data in this case. Fixes: 6319a68011b8 ("spi/fsl-espi: avoid infinite loops on fsl_espi_cpu_irq()") Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown Cc: stable@vger.kernel.org --- drivers/spi/spi-fsl-espi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c index 7451585a080e..2c175b9495f7 100644 --- a/drivers/spi/spi-fsl-espi.c +++ b/drivers/spi/spi-fsl-espi.c @@ -458,7 +458,7 @@ static void fsl_espi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events) mspi->len -= rx_nr_bytes; - if (mspi->rx) + if (rx_nr_bytes && mspi->rx) mspi->get_rx(rx_data, mspi); } From 5de0a8c0c240338cb5b73363b0673c6aa804bb1c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 24 Oct 2016 15:01:48 -0400 Subject: [PATCH 607/884] x86: Fix export for mcount and __fentry__ Commit 784d5699eddc5 ("x86: move exports to actual definitions") removed the EXPORT_SYMBOL(__fentry__) and EXPORT_SYMBOL(mcount) from x8664_ksyms_64.c, and added EXPORT_SYMBOL(function_hook) in mcount_64.S instead. The problem is that function_hook isn't a function at all, but a macro that is defined as either mcount or __fentry__ depending on the support from gcc. Originally, I thought this was a macro issue, like what __stringify() is used for. But the problem is a bit deeper. The Makefile.build has some magic that does post processing of files to create the CRC bindings. It does some searches for EXPORT_SYMBOL() and because it finds a macro name and not the actual functions, this causes function_hook not to be converted into mcount or __fentry__ and they are missed. Instead of adding more magic to Makefile.build, just add EXPORT_SYMBOL() for mcount and __fentry__ where the ifdef is used. Since this is assembly and not C, it doesn't require being set after the function is defined. Signed-off-by: Steven Rostedt Tested-by: Borislav Petkov Cc: Gabriel C Cc: Nicholas Piggin Cc: Al Viro Link: http://lkml.kernel.org/r/20161024150148.4f9d90e4@gandalf.local.home Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mcount_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index efe73aacf966..7b0d3da52fb4 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -18,8 +18,10 @@ #ifdef CC_USING_FENTRY # define function_hook __fentry__ +EXPORT_SYMBOL(__fentry__) #else # define function_hook mcount +EXPORT_SYMBOL(mcount) #endif /* All cases save the original rbp (8 bytes) */ @@ -295,7 +297,6 @@ trace: jmp fgraph_trace END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ -EXPORT_SYMBOL(function_hook) #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER From 9078210ef4016539c909fb67164d3f1c27323d8b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 25 Oct 2016 16:08:19 +0100 Subject: [PATCH 608/884] KVM: MIPS: Fix lazy user ASID regenerate for SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_mips_check_asids() runs before entering the guest and performs lazy regeneration of host ASID for guest usermode, using last_user_gasid to track the last guest ASID in the VCPU that was used by guest usermode on any host CPU. last_user_gasid is reset after performing the lazy ASID regeneration on the current CPU, and by kvm_arch_vcpu_load() if the host ASID for guest usermode is regenerated due to staleness (to cancel outstanding lazy ASID regenerations). Unfortunately neither case handles SMP hosts correctly: - When the lazy ASID regeneration is performed it should apply to all CPUs (as last_user_gasid does), so reset the ASID on other CPUs to zero to trigger regeneration when the VCPU is next loaded on those CPUs. - When the ASID is found to be stale on the current CPU, we should not cancel lazy ASID regenerations globally, so drop the reset of last_user_gasid altogether here. Both cases would require a guest ASID change and two host CPU migrations (and in the latter case one of the CPUs to start a new ASID cycle) before guest usermode could potentially access stale user pages from a previously running ASID in the same VCPU. Fixes: 25b08c7fb0e4 ("KVM: MIPS: Invalidate TLB by regenerating ASIDs") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 5 ++++- arch/mips/kvm/mmu.c | 4 ---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 622037d851a3..06a60b19acfb 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -426,7 +426,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, static void kvm_mips_check_asids(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - int cpu = smp_processor_id(); + int i, cpu = smp_processor_id(); unsigned int gasid; /* @@ -442,6 +442,9 @@ static void kvm_mips_check_asids(struct kvm_vcpu *vcpu) vcpu); vcpu->arch.guest_user_asid[cpu] = vcpu->arch.guest_user_mm.context.asid[cpu]; + for_each_possible_cpu(i) + if (i != cpu) + vcpu->arch.guest_user_asid[cpu] = 0; vcpu->arch.last_user_gasid = gasid; } } diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 03883ba806e2..3b677c851be0 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -260,13 +260,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) & asid_version_mask(cpu)) { - u32 gasid = kvm_read_c0_guest_entryhi(vcpu->arch.cop0) & - KVM_ENTRYHI_ASID; - kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu); vcpu->arch.guest_user_asid[cpu] = vcpu->arch.guest_user_mm.context.asid[cpu]; - vcpu->arch.last_user_gasid = gasid; newasid++; kvm_debug("[%d]: cpu_context: %#lx\n", cpu, From ede5f3e7b54a4347be4d8525269eae50902bd7cd Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 25 Oct 2016 16:11:11 +0100 Subject: [PATCH 609/884] KVM: MIPS: Make ERET handle ERL before EXL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ERET instruction to return from exception is used for returning from exception level (Status.EXL) and error level (Status.ERL). If both bits are set however we should be returning from ERL first, as ERL can interrupt EXL, for example when an NMI is taken. KVM however checks EXL first. Fix the order of the checks to match the pseudocode in the instruction set manual. Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 8770f32c9e0b..c45ef0f13dfa 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -790,15 +790,15 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) struct mips_coproc *cop0 = vcpu->arch.cop0; enum emulation_result er = EMULATE_DONE; - if (kvm_read_c0_guest_status(cop0) & ST0_EXL) { + if (kvm_read_c0_guest_status(cop0) & ST0_ERL) { + kvm_clear_c0_guest_status(cop0, ST0_ERL); + vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0); + } else if (kvm_read_c0_guest_status(cop0) & ST0_EXL) { kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc, kvm_read_c0_guest_epc(cop0)); kvm_clear_c0_guest_status(cop0, ST0_EXL); vcpu->arch.pc = kvm_read_c0_guest_epc(cop0); - } else if (kvm_read_c0_guest_status(cop0) & ST0_ERL) { - kvm_clear_c0_guest_status(cop0, ST0_ERL); - vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0); } else { kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n", vcpu->arch.pc); From e1e575f6b026734be3b1f075e780e91ab08ca541 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 25 Oct 2016 16:11:12 +0100 Subject: [PATCH 610/884] KVM: MIPS: Precalculate MMIO load resume PC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The advancing of the PC when completing an MMIO load is done before re-entering the guest, i.e. before restoring the guest ASID. However if the load is in a branch delay slot it may need to access guest code to read the prior branch instruction. This isn't safe in TLB mapped code at the moment, nor in the future when we'll access unmapped guest segments using direct user accessors too, as it could read the branch from host user memory instead. Therefore calculate the resume PC in advance while we're still in the right context and save it in the new vcpu->arch.io_pc (replacing the no longer needed vcpu->arch.pending_load_cause), and restore it on MMIO completion. Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 7 ++++--- arch/mips/kvm/emulate.c | 24 +++++++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 07f58cfc1ab9..bebec370324f 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -293,7 +293,10 @@ struct kvm_vcpu_arch { /* Host KSEG0 address of the EI/DI offset */ void *kseg0_commpage; - u32 io_gpr; /* GPR used as IO source/target */ + /* Resume PC after MMIO completion */ + unsigned long io_pc; + /* GPR used as IO source/target */ + u32 io_gpr; struct hrtimer comparecount_timer; /* Count timer control KVM register */ @@ -315,8 +318,6 @@ struct kvm_vcpu_arch { /* Bitmask of pending exceptions to be cleared */ unsigned long pending_exceptions_clr; - u32 pending_load_cause; - /* Save/Restore the entryhi register when are are preempted/scheduled back in */ unsigned long preempt_entryhi; diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index c45ef0f13dfa..aa0937423e28 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1528,13 +1528,25 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DO_MMIO; + unsigned long curr_pc; u32 op, rt; u32 bytes; rt = inst.i_format.rt; op = inst.i_format.opcode; - vcpu->arch.pending_load_cause = cause; + /* + * Find the resume PC now while we have safe and easy access to the + * prior branch instruction, and save it for + * kvm_mips_complete_mmio_load() to restore later. + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + vcpu->arch.io_pc = vcpu->arch.pc; + vcpu->arch.pc = curr_pc; + vcpu->arch.io_gpr = rt; switch (op) { @@ -2494,9 +2506,8 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, goto done; } - er = update_pc(vcpu, vcpu->arch.pending_load_cause); - if (er == EMULATE_FAIL) - return er; + /* Restore saved resume PC */ + vcpu->arch.pc = vcpu->arch.io_pc; switch (run->mmio.len) { case 4: @@ -2518,11 +2529,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, break; } - if (vcpu->arch.pending_load_cause & CAUSEF_BD) - kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n", - vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr, - vcpu->mmio_needed); - done: return er; } From 45c7ee43a5184ddbff652ee0d2e826f86f1b616b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 26 Oct 2016 08:48:11 +0200 Subject: [PATCH 611/884] KVM: s390: Fix STHYI buffer alignment for diag224 Diag224 requires a page-aligned 4k buffer to store the name table into. kmalloc does not guarantee page alignment, hence we replace it with __get_free_page for the buffer allocation. Cc: stable@vger.kernel.org # v4.8+ Reported-by: Michael Holzheu Signed-off-by: Janosch Frank Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sthyi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index bd98b7d25200..05c98bb853cf 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -315,7 +315,7 @@ static void fill_diag(struct sthyi_sctns *sctns) if (r < 0) goto out; - diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); if (!diag224_buf || diag224(diag224_buf)) goto out; @@ -378,7 +378,7 @@ static void fill_diag(struct sthyi_sctns *sctns) sctns->par.infpval1 |= PAR_WGHT_VLD; out: - kfree(diag224_buf); + free_page((unsigned long)diag224_buf); vfree(diag204_buf); } From 36343f6ea721e003ed11b48a6a05d77a255b3a62 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 26 Oct 2016 13:35:56 +0200 Subject: [PATCH 612/884] KVM: fix OOPS on flush_work The conversion done by commit 3706feacd007 ("KVM: Remove deprecated create_singlethread_workqueue") is broken. It flushes a single work item &irqfd->shutdown instead of all of them, and even worse if there is no irqfd on the list then you get a NULL pointer dereference. Revert the virt/kvm/eventfd.c part of that patch; to avoid the deprecated function, just allocate our own workqueue---it does not even have to be unbound---with alloc_workqueue. Fixes: 3706feacd007 Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 22 +++++++++++++++++++--- virt/kvm/kvm_main.c | 6 ++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f397e9b20370..a29786dd9522 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -42,6 +42,7 @@ #ifdef CONFIG_HAVE_KVM_IRQFD +static struct workqueue_struct *irqfd_cleanup_wq; static void irqfd_inject(struct work_struct *work) @@ -167,7 +168,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) list_del_init(&irqfd->list); - schedule_work(&irqfd->shutdown); + queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } int __attribute__((weak)) kvm_arch_set_irq_inatomic( @@ -554,7 +555,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) * so that we guarantee there will not be any more interrupts on this * gsi once this deassign function returns. */ - flush_work(&irqfd->shutdown); + flush_workqueue(irqfd_cleanup_wq); return 0; } @@ -591,7 +592,7 @@ kvm_irqfd_release(struct kvm *kvm) * Block until we know all outstanding shutdown jobs have completed * since we do not take a kvm* reference. */ - flush_work(&irqfd->shutdown); + flush_workqueue(irqfd_cleanup_wq); } @@ -621,8 +622,23 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_unlock_irq(&kvm->irqfds.lock); } +/* + * create a host-wide workqueue for issuing deferred shutdown requests + * aggregated from all vm* instances. We need our own isolated + * queue to ease flushing work items when a VM exits. + */ +int kvm_irqfd_init(void) +{ + irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0); + if (!irqfd_cleanup_wq) + return -ENOMEM; + + return 0; +} + void kvm_irqfd_exit(void) { + destroy_workqueue(irqfd_cleanup_wq); } #endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 28510e72618a..d92c3d5b0fbe 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3846,7 +3846,12 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, * kvm_arch_init makes sure there's at most one caller * for architectures that support multiple implementations, * like intel and amd on x86. + * kvm_arch_init must be called before kvm_irqfd_init to avoid creating + * conflicts in case kvm is already setup for another implementation. */ + r = kvm_irqfd_init(); + if (r) + goto out_irqfd; if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) { r = -ENOMEM; @@ -3928,6 +3933,7 @@ out_free_0a: free_cpumask_var(cpus_hardware_enabled); out_free_0: kvm_irqfd_exit(); +out_irqfd: kvm_arch_exit(); out_fail: return r; From 7dfcb36a1f17e4c7c7c12b9d8a6902037c7d98dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2016 12:05:52 +0300 Subject: [PATCH 613/884] drm/fb-helper: Fix connector ref leak on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to drop the connector references already taken when we abort in the middle of drm_fb_helper_single_add_all_connectors() Cc: stable@vger.kernel.org Cc: Carlos Santa Cc: Kirill A. Shutemov Tested-by: Carlos Santa Tested-by: Kirill A. Shutemov Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1477472755-15288-2-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_fb_helper.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index aae7df01864d..efaccc2e1b4c 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -131,7 +131,12 @@ int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper) return 0; fail: for (i = 0; i < fb_helper->connector_count; i++) { - kfree(fb_helper->connector_info[i]); + struct drm_fb_helper_connector *fb_helper_connector = + fb_helper->connector_info[i]; + + drm_connector_unreference(fb_helper_connector->connector); + + kfree(fb_helper_connector); fb_helper->connector_info[i] = NULL; } fb_helper->connector_count = 0; From 94d7dea448fae6cbb83395323c1d2fd7f19dc388 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 26 Oct 2016 16:57:15 +0800 Subject: [PATCH 614/884] block: flush: fix IO hang in case of flood fua req This patch fixes one issue reported by Kent, which can be triggered in bcachefs over sata disk. Actually it is a generic issue in block flush vs. blk-tag. Cc: Christoph Hellwig Reported-by: Kent Overstreet Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-flush.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/block/blk-flush.c b/block/blk-flush.c index 6a14b68b9135..3c882cbc7541 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -342,6 +342,34 @@ static void flush_data_end_io(struct request *rq, int error) struct request_queue *q = rq->q; struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); + /* + * Updating q->in_flight[] here for making this tag usable + * early. Because in blk_queue_start_tag(), + * q->in_flight[BLK_RW_ASYNC] is used to limit async I/O and + * reserve tags for sync I/O. + * + * More importantly this way can avoid the following I/O + * deadlock: + * + * - suppose there are 40 fua requests comming to flush queue + * and queue depth is 31 + * - 30 rqs are scheduled then blk_queue_start_tag() can't alloc + * tag for async I/O any more + * - all the 30 rqs are completed before FLUSH_PENDING_TIMEOUT + * and flush_data_end_io() is called + * - the other rqs still can't go ahead if not updating + * q->in_flight[BLK_RW_ASYNC] here, meantime these rqs + * are held in flush data queue and make no progress of + * handling post flush rq + * - only after the post flush rq is handled, all these rqs + * can be completed + */ + + elv_completed_request(q, rq); + + /* for avoiding double accounting */ + rq->cmd_flags &= ~REQ_STARTED; + /* * After populating an empty queue, kick it to avoid stall. Read * the comment in flush_end_io(). From 38d868e41c4b9250d5a115c049dc2d48f4909581 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 10 Oct 2016 17:50:56 +0300 Subject: [PATCH 615/884] drm: Don't force all planes to be added to the state due to zpos MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't want all planes to be added to the state whenever a plane with fixed zpos gets enabled/disabled. This is true especially for eg. cursor planes on i915, as we want cursor updates to go through w/o throttling. Same holds for drivers that don't support zpos at all (i915 actually falls into this category right now since we've not yet added zpos support). Allow drivers more freedom by letting them deal with zpos themselves instead of doing it in drm_atomic_helper_check_planes() unconditionally. Let's just inline the required calls into all the driver that currently depend on this. v2: Inline the stuff into the drivers instead of adding another helper, document things better (Daniel) Cc: Daniel Vetter Cc: Marek Szyprowski Cc: Benjamin Gaignard Cc: Vincent Abriou Cc: Laurent Pinchart Cc: Inki Dae Cc: Joonyoung Shim Cc: Seung-Woo Kim Cc: Kyungmin Park Cc: Lyude Cc: Maarten Lankhorst Cc: stable@vger.kernel.org Fixes: 44d1240d006c ("drm: add generic zpos property") Signed-off-by: Ville Syrjälä Reviewed-by: Sean Paul Acked-by: Benjamin Gaignard Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1476111056-12734-1-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_atomic_helper.c | 4 ---- drivers/gpu/drm/exynos/exynos_drm_drv.c | 20 ++++++++++++++++++++ drivers/gpu/drm/exynos/exynos_drm_drv.h | 1 + drivers/gpu/drm/exynos/exynos_drm_fb.c | 2 +- drivers/gpu/drm/rcar-du/rcar_du_kms.c | 12 ++++++++++-- drivers/gpu/drm/sti/sti_drv.c | 22 +++++++++++++++++++++- include/drm/drm_plane.h | 8 +++++++- 7 files changed, 60 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index c3f83476f996..21f992605541 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -594,10 +594,6 @@ drm_atomic_helper_check_planes(struct drm_device *dev, struct drm_plane_state *plane_state; int i, ret = 0; - ret = drm_atomic_normalize_zpos(dev, state); - if (ret) - return ret; - for_each_plane_in_state(state, plane, plane_state, i) { const struct drm_plane_helper_funcs *funcs; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index def78c8c1780..f86e7c846678 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -262,6 +262,26 @@ int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, return 0; } +int exynos_atomic_check(struct drm_device *dev, + struct drm_atomic_state *state) +{ + int ret; + + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + ret = drm_atomic_normalize_zpos(dev, state); + if (ret) + return ret; + + ret = drm_atomic_helper_check_planes(dev, state); + if (ret) + return ret; + + return ret; +} + static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) { struct drm_exynos_file_private *file_priv; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index d215149e737b..80c4d5b81689 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -301,6 +301,7 @@ static inline int exynos_dpi_bind(struct drm_device *dev, int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock); +int exynos_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); extern struct platform_driver fimd_driver; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 40ce841eb952..23cce0a3f5fc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -190,7 +190,7 @@ dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index) static const struct drm_mode_config_funcs exynos_drm_mode_config_funcs = { .fb_create = exynos_user_fb_create, .output_poll_changed = exynos_drm_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = exynos_atomic_check, .atomic_commit = exynos_atomic_commit, }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index bd9c3bb9252c..392c7e6de042 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -231,8 +231,16 @@ static int rcar_du_atomic_check(struct drm_device *dev, struct rcar_du_device *rcdu = dev->dev_private; int ret; - ret = drm_atomic_helper_check(dev, state); - if (ret < 0) + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + ret = drm_atomic_normalize_zpos(dev, state); + if (ret) + return ret; + + ret = drm_atomic_helper_check_planes(dev, state); + if (ret) return ret; if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE)) diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c index 2784919a7366..9df308565f6c 100644 --- a/drivers/gpu/drm/sti/sti_drv.c +++ b/drivers/gpu/drm/sti/sti_drv.c @@ -195,6 +195,26 @@ static void sti_atomic_work(struct work_struct *work) sti_atomic_complete(private, private->commit.state); } +static int sti_atomic_check(struct drm_device *dev, + struct drm_atomic_state *state) +{ + int ret; + + ret = drm_atomic_helper_check_modeset(dev, state); + if (ret) + return ret; + + ret = drm_atomic_normalize_zpos(dev, state); + if (ret) + return ret; + + ret = drm_atomic_helper_check_planes(dev, state); + if (ret) + return ret; + + return ret; +} + static int sti_atomic_commit(struct drm_device *drm, struct drm_atomic_state *state, bool nonblock) { @@ -248,7 +268,7 @@ static void sti_output_poll_changed(struct drm_device *ddev) static const struct drm_mode_config_funcs sti_mode_config_funcs = { .fb_create = drm_fb_cma_create, .output_poll_changed = sti_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = sti_atomic_check, .atomic_commit = sti_atomic_commit, }; diff --git a/include/drm/drm_plane.h b/include/drm/drm_plane.h index 43cf193e54d6..8b4dc62470ff 100644 --- a/include/drm/drm_plane.h +++ b/include/drm/drm_plane.h @@ -47,8 +47,14 @@ struct drm_crtc; * @src_h: height of visible portion of plane (in 16.16) * @rotation: rotation of the plane * @zpos: priority of the given plane on crtc (optional) + * Note that multiple active planes on the same crtc can have an identical + * zpos value. The rule to solving the conflict is to compare the plane + * object IDs; the plane with a higher ID must be stacked on top of a + * plane with a lower ID. * @normalized_zpos: normalized value of zpos: unique, range from 0 to N-1 - * where N is the number of active planes for given crtc + * where N is the number of active planes for given crtc. Note that + * the driver must call drm_atomic_normalize_zpos() to update this before + * it can be trusted. * @src: clipped source coordinates of the plane (in 16.16) * @dst: clipped destination coordinates of the plane * @visible: visibility of the plane From a2889606636d135148de101fe3311dfea67baf1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2016 17:41:18 +0300 Subject: [PATCH 616/884] drm/fb-helper: Keep references for the current set of used connectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fbdev helper code keeps around two lists of connectors. One is the list of all connectors it could use, and that list already holds references for all the connectors. However the other list, or rather lists, is the one actively being used. That list is tracked per-crtc and currently doesn't hold any extra references. Let's grab those extra references to avoid oopsing when the connector vanishes. The list of all possible connectors should get updated when the hpd happens, but the list of actively used connectors would not get updated until the next time the fb-helper picks through the set of possible connectors. And so we need to hang on to the connectors until that time. Since we need to clean up in drm_fb_helper_crtc_free() as well, let's pull the code to a common place. And while at it let's pull in up the modeset->mode cleanup in there as well. The case of modeset->fb is a bit less clear. I'm thinking we should probably hold a reference to it, but for now I just slapped on a FIXME. v2: Cleanup things drm_fb_helper_crtc_free() too (Chris) v3: Don't leak modeset->connectors (Chris) Cc: Chris Wilson Cc: stable@vger.kernel.org Cc: Carlos Santa Cc: Kirill A. Shutemov Tested-by: Carlos Santa (v1) Tested-by: Kirill A. Shutemov (v1) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97666 Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1477492878-4990-1-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_fb_helper.c | 57 +++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index efaccc2e1b4c..6c75e62c0b22 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -608,6 +608,24 @@ int drm_fb_helper_blank(int blank, struct fb_info *info) } EXPORT_SYMBOL(drm_fb_helper_blank); +static void drm_fb_helper_modeset_release(struct drm_fb_helper *helper, + struct drm_mode_set *modeset) +{ + int i; + + for (i = 0; i < modeset->num_connectors; i++) { + drm_connector_unreference(modeset->connectors[i]); + modeset->connectors[i] = NULL; + } + modeset->num_connectors = 0; + + drm_mode_destroy(helper->dev, modeset->mode); + modeset->mode = NULL; + + /* FIXME should hold a ref? */ + modeset->fb = NULL; +} + static void drm_fb_helper_crtc_free(struct drm_fb_helper *helper) { int i; @@ -617,10 +635,12 @@ static void drm_fb_helper_crtc_free(struct drm_fb_helper *helper) kfree(helper->connector_info[i]); } kfree(helper->connector_info); + for (i = 0; i < helper->crtc_count; i++) { - kfree(helper->crtc_info[i].mode_set.connectors); - if (helper->crtc_info[i].mode_set.mode) - drm_mode_destroy(helper->dev, helper->crtc_info[i].mode_set.mode); + struct drm_mode_set *modeset = &helper->crtc_info[i].mode_set; + + drm_fb_helper_modeset_release(helper, modeset); + kfree(modeset->connectors); } kfree(helper->crtc_info); } @@ -2095,7 +2115,6 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper) struct drm_fb_helper_crtc **crtcs; struct drm_display_mode **modes; struct drm_fb_offset *offsets; - struct drm_mode_set *modeset; bool *enabled; int width, height; int i; @@ -2143,45 +2162,35 @@ static void drm_setup_crtcs(struct drm_fb_helper *fb_helper) /* need to set the modesets up here for use later */ /* fill out the connector<->crtc mappings into the modesets */ - for (i = 0; i < fb_helper->crtc_count; i++) { - modeset = &fb_helper->crtc_info[i].mode_set; - modeset->num_connectors = 0; - modeset->fb = NULL; - } + for (i = 0; i < fb_helper->crtc_count; i++) + drm_fb_helper_modeset_release(fb_helper, + &fb_helper->crtc_info[i].mode_set); for (i = 0; i < fb_helper->connector_count; i++) { struct drm_display_mode *mode = modes[i]; struct drm_fb_helper_crtc *fb_crtc = crtcs[i]; struct drm_fb_offset *offset = &offsets[i]; - modeset = &fb_crtc->mode_set; + struct drm_mode_set *modeset = &fb_crtc->mode_set; if (mode && fb_crtc) { + struct drm_connector *connector = + fb_helper->connector_info[i]->connector; + DRM_DEBUG_KMS("desired mode %s set on crtc %d (%d,%d)\n", mode->name, fb_crtc->mode_set.crtc->base.id, offset->x, offset->y); + fb_crtc->desired_mode = mode; fb_crtc->x = offset->x; fb_crtc->y = offset->y; - if (modeset->mode) - drm_mode_destroy(dev, modeset->mode); modeset->mode = drm_mode_duplicate(dev, fb_crtc->desired_mode); - modeset->connectors[modeset->num_connectors++] = fb_helper->connector_info[i]->connector; + drm_connector_reference(connector); + modeset->connectors[modeset->num_connectors++] = connector; modeset->fb = fb_helper->fb; modeset->x = offset->x; modeset->y = offset->y; } } - - /* Clear out any old modes if there are no more connected outputs. */ - for (i = 0; i < fb_helper->crtc_count; i++) { - modeset = &fb_helper->crtc_info[i].mode_set; - if (modeset->num_connectors == 0) { - BUG_ON(modeset->fb); - if (modeset->mode) - drm_mode_destroy(dev, modeset->mode); - modeset->mode = NULL; - } - } out: kfree(crtcs); kfree(modes); From 36e3fa6a38e135e9478a2f75dec9bf6ff1e6480e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2016 16:30:33 +0300 Subject: [PATCH 617/884] drm/dp/mst: Clear port->pdt when tearing down the i2c adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The i2c adapter is only relevant for some peer device types, so let's clear the pdt if it's still the same as the old_pdt when we tear down the i2c adapter. I don't really like this design pattern of updating port->whatever before doing the accompanying changes and passing around old_whatever to figure stuff out. Would make much more sense to me to the pass the new value around and only update the port->whatever when things are consistent. But let's try to work with what we have right now. Quoting a follow-up from Ville: "And naturally I forgot to amend the commit message w.r.t. this guy [the change in drm_dp_destroy_port]. We don't really need to do this here, but I figured I'd try to be a bit more consistent by having it, just to avoid accidental mistakes if/when someone changes this stuff again later." v2: Clear port->pdt in the caller, if needed (Daniel) Cc: Daniel Vetter Cc: stable@vger.kernel.org Cc: Carlos Santa Cc: Kirill A. Shutemov Tested-by: Carlos Santa (v1) Tested-by: Kirill A. Shutemov (v1) Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97666 Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1477488633-16544-1-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_dp_mst_topology.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 04e457117980..ba13f9d8720b 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -914,6 +914,7 @@ static void drm_dp_destroy_port(struct kref *kref) /* no need to clean up vcpi * as if we have no connector we never setup a vcpi */ drm_dp_port_teardown_pdt(port, port->pdt); + port->pdt = DP_PEER_DEVICE_NONE; } kfree(port); } @@ -2919,6 +2920,7 @@ static void drm_dp_destroy_connector_work(struct work_struct *work) mgr->cbs->destroy_connector(mgr, port->connector); drm_dp_port_teardown_pdt(port, port->pdt); + port->pdt = DP_PEER_DEVICE_NONE; if (!port->input && port->vcpi.vcpi > 0) { drm_dp_mst_reset_vcpi_slots(mgr, port); From 4da5caa6a6f82cda3193bca855235b87debf78bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 26 Oct 2016 12:05:55 +0300 Subject: [PATCH 618/884] drm/dp/mst: Check peer device type before attempting EDID read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only certain types of pdts have the DDC bus registered, so check for that before we attempt the EDID read. Othwewise we risk playing around with an i2c adapter that doesn't actually exist. Cc: stable@vger.kernel.org Cc: Carlos Santa Cc: Kirill A. Shutemov Tested-by: Carlos Santa Tested-by: Kirill A. Shutemov Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97666 Signed-off-by: Ville Syrjälä Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1477472755-15288-5-git-send-email-ville.syrjala@linux.intel.com --- drivers/gpu/drm/drm_dp_mst_topology.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index ba13f9d8720b..aa644487749c 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1160,7 +1160,9 @@ static void drm_dp_add_port(struct drm_dp_mst_branch *mstb, drm_dp_put_port(port); goto out; } - if (port->port_num >= DP_MST_LOGICAL_PORT_0) { + if ((port->pdt == DP_PEER_DEVICE_DP_LEGACY_CONV || + port->pdt == DP_PEER_DEVICE_SST_SINK) && + port->port_num >= DP_MST_LOGICAL_PORT_0) { port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc); drm_mode_connector_set_tile_property(port->connector); } From 830cda3f9855ff092b0e9610346d110846fc497c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Oct 2016 10:08:22 -0700 Subject: [PATCH 619/884] sparc64: Fix instruction count in comment for __hypervisor_flush_tlb_pending. Noticed by James Clarke. Signed-off-by: David S. Miller --- arch/sparc/mm/ultra.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 85de139bfad6..5128d38b1d1a 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -358,7 +358,7 @@ __hypervisor_flush_tlb_page: /* 22 insns */ nop nop -__hypervisor_flush_tlb_pending: /* 16 insns */ +__hypervisor_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 mov %o2, %g2 From a236441bb69723032db94128761a469030c3fe6d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Oct 2016 10:20:14 -0700 Subject: [PATCH 620/884] sparc64: Fix illegal relative branches in hypervisor patched TLB cross-call code. Just like the non-cross-call TLB flush handlers, the cross-call ones need to avoid doing PC-relative branches outside of their code blocks. Signed-off-by: David S. Miller --- arch/sparc/mm/ultra.S | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 5128d38b1d1a..0fa2e6202c1f 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -484,7 +484,7 @@ cheetah_patch_cachetlbops: */ .align 32 .globl xcall_flush_tlb_mm -xcall_flush_tlb_mm: /* 21 insns */ +xcall_flush_tlb_mm: /* 24 insns */ mov PRIMARY_CONTEXT, %g2 ldxa [%g2] ASI_DMMU, %g3 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -506,9 +506,12 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop nop + nop + nop + nop .globl xcall_flush_tlb_page -xcall_flush_tlb_page: /* 17 insns */ +xcall_flush_tlb_page: /* 20 insns */ /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 @@ -527,9 +530,12 @@ xcall_flush_tlb_page: /* 17 insns */ retry nop nop + nop + nop + nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: /* 25 insns */ +xcall_flush_tlb_kernel_range: /* 28 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 @@ -555,6 +561,9 @@ xcall_flush_tlb_kernel_range: /* 25 insns */ nop nop nop + nop + nop + nop /* This runs in a very controlled environment, so we do * not need to worry about BH races etc. @@ -737,7 +746,7 @@ __hypervisor_tlb_xcall_error: ba,a,pt %xcc, rtrap .globl __hypervisor_xcall_flush_tlb_mm -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */ /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ mov %o0, %g2 mov %o1, %g3 @@ -751,7 +760,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP mov HV_FAST_MMU_DEMAP_CTX, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 @@ -760,9 +769,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov %g7, %o5 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_page -__hypervisor_xcall_flush_tlb_page: /* 17 insns */ +__hypervisor_xcall_flush_tlb_page: /* 20 insns */ /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 @@ -774,16 +786,19 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */ sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,a,pn %o0, __hypervisor_tlb_xcall_error + brnz,a,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_kernel_range -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ +__hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 @@ -800,7 +815,7 @@ __hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 sethi %hi(PAGE_SIZE), %o2 brnz,pt %g3, 1b @@ -810,6 +825,9 @@ __hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ mov %g7, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop /* These just get rescheduled to PIL vectors. */ .globl xcall_call_function @@ -894,21 +912,21 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 call tlb_patch_one - mov 21, %o2 + mov 24, %o2 sethi %hi(xcall_flush_tlb_page), %o0 or %o0, %lo(xcall_flush_tlb_page), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 17, %o2 + mov 20, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 25, %o2 + mov 28, %o2 #endif /* CONFIG_SMP */ ret From 26984c3bc29aa15b705475177842feddcd3d9df0 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 21 Oct 2016 16:13:55 +0800 Subject: [PATCH 621/884] arm64/numa: fix pcpu_cpu_distance() to get correct CPU proximity The pcpu_build_alloc_info() function group CPUs according to their proximity, by call callback function @cpu_distance_fn from different ARCHs. For arm64 the callback of @cpu_distance_fn is pcpu_cpu_distance(from, to) -> node_distance(from, to) The @from and @to for function node_distance() should be nid. However, pcpu_cpu_distance() in arch/arm64/mm/numa.c just past the cpu id for @from and @to, and didn't convert to numa node id. For this incorrect cpu proximity get from ARCH, it may cause each CPU in one group and make group_cnt out of bound: setup_per_cpu_areas() pcpu_embed_first_chunk() pcpu_build_alloc_info() in pcpu_build_alloc_info, since cpu_distance_fn will return REMOTE_DISTANCE if we pass cpu ids (0,1,2...), so cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE will wrongly be ture. This may results in triggering the BUG_ON(unit != nr_units) later: [ 0.000000] kernel BUG at mm/percpu.c:1916! [ 0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc1-00003-g14155ca-dirty #26 [ 0.000000] Hardware name: Hisilicon Hi1616 Evaluation Board (DT) [ 0.000000] task: ffff000008d6e900 task.stack: ffff000008d60000 [ 0.000000] PC is at pcpu_embed_first_chunk+0x420/0x704 [ 0.000000] LR is at pcpu_embed_first_chunk+0x3bc/0x704 [ 0.000000] pc : [] lr : [] pstate: 800000c5 [ 0.000000] sp : ffff000008d63eb0 [ 0.000000] x29: ffff000008d63eb0 [ 0.000000] x28: 0000000000000000 [ 0.000000] x27: 0000000000000040 [ 0.000000] x26: ffff8413fbfcef00 [ 0.000000] x25: 0000000000000042 [ 0.000000] x24: 0000000000000042 [ 0.000000] x23: 0000000000001000 [ 0.000000] x22: 0000000000000046 [ 0.000000] x21: 0000000000000001 [ 0.000000] x20: ffff000008cb3bc8 [ 0.000000] x19: ffff8413fbfcf570 [ 0.000000] x18: 0000000000000000 [ 0.000000] x17: ffff000008e49ae0 [ 0.000000] x16: 0000000000000003 [ 0.000000] x15: 000000000000001e [ 0.000000] x14: 0000000000000004 [ 0.000000] x13: 0000000000000000 [ 0.000000] x12: 000000000000006f [ 0.000000] x11: 00000413fbffff00 [ 0.000000] x10: 0000000000000004 [ 0.000000] x9 : 0000000000000000 [ 0.000000] x8 : 0000000000000001 [ 0.000000] x7 : ffff8413fbfcf63c [ 0.000000] x6 : ffff000008d65d28 [ 0.000000] x5 : ffff000008d65e50 [ 0.000000] x4 : 0000000000000000 [ 0.000000] x3 : ffff000008cb3cc8 [ 0.000000] x2 : 0000000000000040 [ 0.000000] x1 : 0000000000000040 [ 0.000000] x0 : 0000000000000000 [...] [ 0.000000] Call trace: [ 0.000000] Exception stack(0xffff000008d63ce0 to 0xffff000008d63e10) [ 0.000000] 3ce0: ffff8413fbfcf570 0001000000000000 ffff000008d63eb0 ffff000008c754f4 [ 0.000000] 3d00: ffff000008d63d50 ffff0000081af210 00000413fbfff010 0000000000001000 [ 0.000000] 3d20: ffff000008d63d50 ffff0000081af220 00000413fbfff010 0000000000001000 [ 0.000000] 3d40: 00000413fbfcef00 0000000000000004 ffff000008d63db0 ffff0000081af390 [ 0.000000] 3d60: 00000413fbfcef00 0000000000001000 0000000000000000 0000000000001000 [ 0.000000] 3d80: 0000000000000000 0000000000000040 0000000000000040 ffff000008cb3cc8 [ 0.000000] 3da0: 0000000000000000 ffff000008d65e50 ffff000008d65d28 ffff8413fbfcf63c [ 0.000000] 3dc0: 0000000000000001 0000000000000000 0000000000000004 00000413fbffff00 [ 0.000000] 3de0: 000000000000006f 0000000000000000 0000000000000004 000000000000001e [ 0.000000] 3e00: 0000000000000003 ffff000008e49ae0 [ 0.000000] [] pcpu_embed_first_chunk+0x420/0x704 [ 0.000000] [] setup_per_cpu_areas+0x38/0xc8 [ 0.000000] [] start_kernel+0x10c/0x390 [ 0.000000] [] __primary_switched+0x5c/0x64 [ 0.000000] Code: b8018660 17ffffd7 6b16037f 54000080 (d4210000) [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! Fix by getting cpu's node id with early_cpu_to_node() then pass it to node_distance() as the original intention. Fixes: 7af3a0a99252 ("arm64/numa: support HAVE_SETUP_PER_CPU_AREA") Signed-off-by: Yisheng Xie Signed-off-by: Hanjun Guo Cc: Catalin Marinas Cc: Lorenzo Pieralisi Cc: Will Deacon Cc: Zhen Lei Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 778a985c8a70..9a71d0678a08 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -147,7 +147,7 @@ static int __init early_cpu_to_node(int cpu) static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { - return node_distance(from, to); + return node_distance(early_cpu_to_node(from), early_cpu_to_node(to)); } static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, From 3f7a09f44e5ef8a2629842f2d22892114e603fc1 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 21 Oct 2016 16:13:56 +0800 Subject: [PATCH 622/884] arm64/numa: fix incorrect log for memory-less node When booting on NUMA system with memory-less node (no memory dimm on this memory controller), the print for setup_node_data() is incorrect: NUMA: Initmem setup node 2 [mem 0x00000000-0xffffffffffffffff] It can be fixed by printing [mem 0x00000000-0x00000000] when end_pfn is 0, but print will be more useful. Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.") Signed-off-by: Hanjun Guo Cc: Catalin Marinas Cc: Ganapatrao Kulkarni Cc: Lorenzo Pieralisi Cc: Mark Rutland Cc: Will Deacon Cc: Yisheng Xie Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 9a71d0678a08..4b32168cf91a 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -223,8 +223,11 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); + if (start_pfn < end_pfn) + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, + start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); + else + pr_info("Initmem setup node %d []\n", nid); nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); nd = __va(nd_pa); From 3fa72fe9c614717d22ae75b84d45f41da65c10fe Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 21 Oct 2016 14:28:46 +0530 Subject: [PATCH 623/884] arm64: mm: fix __page_to_voff definition Fix parameter name for __page_to_voff, to match its definition. At present, we don't see any issue, as page_to_virt's caller declares 'page'. Fixes: 9f2875912dac ("arm64: mm: restrict virt_to_page() to the linear mapping") Acked-by: Mark Rutland Acked-by: Ard Biesheuvel Signed-off-by: Neeraj Upadhyay Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index ba62df8c6e35..b71086d25195 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -217,7 +217,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) -#define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) +#define __page_to_voff(page) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) #define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) From 49a5d73fd03528289c315d345e4917a1ba2b48e7 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Fri, 21 Oct 2016 16:55:02 +0800 Subject: [PATCH 624/884] drm/amdgpu: fix s3 resume back, uvd dpm randomly can't disable. the value of last_mclk_dpm_enable_mask will be changed if other clients(vce,dal) trigger set power state between enable and disable uvd dpm. Signed-off-by: Rex Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/ci_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index 29e6061b9087..5be788b269e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -4075,7 +4075,7 @@ static int ci_enable_uvd_dpm(struct amdgpu_device *adev, bool enable) pi->dpm_level_enable_mask.mclk_dpm_enable_mask); } } else { - if (pi->last_mclk_dpm_enable_mask & 0x1) { + if (pi->uvd_enabled) { pi->uvd_enabled = false; pi->dpm_level_enable_mask.mclk_dpm_enable_mask |= 1; amdgpu_ci_send_msg_to_smc_with_parameter(adev, From 7dc86ef5ac91642dfc3eb93ee0f0458e702a343e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 14 Oct 2016 16:38:02 -0400 Subject: [PATCH 625/884] drm/radeon/si_dpm: workaround for SI kickers Consolidate existing quirks. Fixes stability issues on some kickers. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/si_dpm.c | 59 ++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index e22b11189628..c49934527a87 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2999,6 +2999,49 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, int i; struct si_dpm_quirk *p = si_dpm_quirk_list; + /* limit all SI kickers */ + if (rdev->family == CHIP_PITCAIRN) { + if ((rdev->pdev->revision == 0x81) || + (rdev->pdev->device == 0x6810) || + (rdev->pdev->device == 0x6811) || + (rdev->pdev->device == 0x6816) || + (rdev->pdev->device == 0x6817) || + (rdev->pdev->device == 0x6806)) + max_mclk = 120000; + } else if (rdev->family == CHIP_VERDE) { + if ((rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0x87) || + (rdev->pdev->device == 0x6820) || + (rdev->pdev->device == 0x6821) || + (rdev->pdev->device == 0x6822) || + (rdev->pdev->device == 0x6823) || + (rdev->pdev->device == 0x682A) || + (rdev->pdev->device == 0x682B)) { + max_sclk = 75000; + max_mclk = 80000; + } + } else if (rdev->family == CHIP_OLAND) { + if ((rdev->pdev->revision == 0xC7) || + (rdev->pdev->revision == 0x80) || + (rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0x83) || + (rdev->pdev->device == 0x6604) || + (rdev->pdev->device == 0x6605)) { + max_sclk = 75000; + max_mclk = 80000; + } + } else if (rdev->family == CHIP_HAINAN) { + if ((rdev->pdev->revision == 0x81) || + (rdev->pdev->revision == 0x83) || + (rdev->pdev->revision == 0xC3) || + (rdev->pdev->device == 0x6664) || + (rdev->pdev->device == 0x6665) || + (rdev->pdev->device == 0x6667)) { + max_sclk = 75000; + max_mclk = 80000; + } + } /* Apply dpm quirks */ while (p && p->chip_device != 0) { if (rdev->pdev->vendor == p->chip_vendor && @@ -3011,22 +3054,6 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, } ++p; } - /* limit mclk on all R7 370 parts for stability */ - if (rdev->pdev->device == 0x6811 && - rdev->pdev->revision == 0x81) - max_mclk = 120000; - /* limit sclk/mclk on Jet parts for stability */ - if (rdev->pdev->device == 0x6665 && - rdev->pdev->revision == 0xc3) { - max_sclk = 75000; - max_mclk = 80000; - } - /* limit clocks on HD8600 series */ - if (rdev->pdev->device == 0x6660 && - rdev->pdev->revision == 0x83) { - max_sclk = 75000; - max_mclk = 80000; - } if (rps->vce_active) { rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk; From ad110449e285c75eb53357e87419a73c96ccb1b9 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Tue, 25 Oct 2016 14:42:51 +0100 Subject: [PATCH 626/884] PCI: qcom: Fix pp->dev usage before assignment Initialize pp->dev in qcom_pcie_probe() before calling get_resources(), which uses it. [bhelgaas: changelog] Fixes: e6a087eeaf91 ("PCI: qcom: Remove redundant struct qcom_pcie.dev") Signed-off-by: Srinivas Kandagatla Signed-off-by: Bjorn Helgaas --- drivers/pci/host/pcie-qcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/host/pcie-qcom.c b/drivers/pci/host/pcie-qcom.c index ef0a84c7a588..35936409b2d4 100644 --- a/drivers/pci/host/pcie-qcom.c +++ b/drivers/pci/host/pcie-qcom.c @@ -533,11 +533,11 @@ static int qcom_pcie_probe(struct platform_device *pdev) if (IS_ERR(pcie->phy)) return PTR_ERR(pcie->phy); + pp->dev = dev; ret = pcie->ops->get_resources(pcie); if (ret) return ret; - pp->dev = dev; pp->root_bus_nr = -1; pp->ops = &qcom_pcie_dw_ops; From 05692d7005a364add85c6e25a6c4447ce08f913a Mon Sep 17 00:00:00 2001 From: Vlad Tsyrklevich Date: Wed, 12 Oct 2016 18:51:24 +0200 Subject: [PATCH 627/884] vfio/pci: Fix integer overflows, bitmask check The VFIO_DEVICE_SET_IRQS ioctl did not sufficiently sanitize user-supplied integers, potentially allowing memory corruption. This patch adds appropriate integer overflow checks, checks the range bounds for VFIO_IRQ_SET_DATA_NONE, and also verifies that only single element in the VFIO_IRQ_SET_DATA_TYPE_MASK bitmask is set. VFIO_IRQ_SET_ACTION_TYPE_MASK is already correctly checked later in vfio_pci_set_irqs_ioctl(). Furthermore, a kzalloc is changed to a kcalloc because the use of a kzalloc with an integer multiplication allowed an integer overflow condition to be reached without this patch. kcalloc checks for overflow and should prevent a similar occurrence. Signed-off-by: Vlad Tsyrklevich Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 33 ++++++++++++++++++++----------- drivers/vfio/pci/vfio_pci_intrs.c | 2 +- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index d624a527777f..031bc08d000d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -829,8 +829,9 @@ static long vfio_pci_ioctl(void *device_data, } else if (cmd == VFIO_DEVICE_SET_IRQS) { struct vfio_irq_set hdr; + size_t size; u8 *data = NULL; - int ret = 0; + int max, ret = 0; minsz = offsetofend(struct vfio_irq_set, count); @@ -838,23 +839,31 @@ static long vfio_pci_ioctl(void *device_data, return -EFAULT; if (hdr.argsz < minsz || hdr.index >= VFIO_PCI_NUM_IRQS || + hdr.count >= (U32_MAX - hdr.start) || hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK | VFIO_IRQ_SET_ACTION_TYPE_MASK)) return -EINVAL; - if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) { - size_t size; - int max = vfio_pci_get_irq_count(vdev, hdr.index); + max = vfio_pci_get_irq_count(vdev, hdr.index); + if (hdr.start >= max || hdr.start + hdr.count > max) + return -EINVAL; - if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL) - size = sizeof(uint8_t); - else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD) - size = sizeof(int32_t); - else - return -EINVAL; + switch (hdr.flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { + case VFIO_IRQ_SET_DATA_NONE: + size = 0; + break; + case VFIO_IRQ_SET_DATA_BOOL: + size = sizeof(uint8_t); + break; + case VFIO_IRQ_SET_DATA_EVENTFD: + size = sizeof(int32_t); + break; + default: + return -EINVAL; + } - if (hdr.argsz - minsz < hdr.count * size || - hdr.start >= max || hdr.start + hdr.count > max) + if (size) { + if (hdr.argsz - minsz < hdr.count * size) return -EINVAL; data = memdup_user((void __user *)(arg + minsz), diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index c2e60893cd09..1c46045b0e7f 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -256,7 +256,7 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) if (!is_irq_none(vdev)) return -EINVAL; - vdev->ctx = kzalloc(nvec * sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); + vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL); if (!vdev->ctx) return -ENOMEM; From 2876a34466ce382a76b9ffb34757bb48928ac743 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Tue, 18 Oct 2016 16:30:09 -0400 Subject: [PATCH 628/884] sunrpc: don't pass on-stack memory to sg_set_buf As of ac4e97abce9b "scatterlist: sg_set_buf() argument must be in linear mapping", sg_set_buf hits a BUG when make_checksum_v2->xdr_process_buf, among other callers, passes it memory on the stack. We only need a scatterlist to pass this to the crypto code, and it seems like overkill to require kmalloc'd memory just to encrypt a few bytes, but for now this seems the best fix. Many of these callers are in the NFS write paths, so we allocate with GFP_NOFS. It might be possible to do without allocations here entirely, but that would probably be a bigger project. Cc: Rusty Russell Signed-off-by: J. Bruce Fields --- net/sunrpc/auth_gss/auth_gss.c | 13 +++-- net/sunrpc/auth_gss/gss_krb5_crypto.c | 84 ++++++++++++++++----------- net/sunrpc/auth_gss/svcauth_gss.c | 21 ++++--- 3 files changed, 72 insertions(+), 46 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index d8bd97a5a7c9..3dfd769dc5b5 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1616,7 +1616,7 @@ gss_validate(struct rpc_task *task, __be32 *p) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); - __be32 seq; + __be32 *seq = NULL; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; @@ -1631,9 +1631,12 @@ gss_validate(struct rpc_task *task, __be32 *p) goto out_bad; if (flav != RPC_AUTH_GSS) goto out_bad; - seq = htonl(task->tk_rqstp->rq_seqno); - iov.iov_base = &seq; - iov.iov_len = sizeof(seq); + seq = kmalloc(4, GFP_NOFS); + if (!seq) + goto out_bad; + *seq = htonl(task->tk_rqstp->rq_seqno); + iov.iov_base = seq; + iov.iov_len = 4; xdr_buf_from_iov(&iov, &verf_buf); mic.data = (u8 *)p; mic.len = len; @@ -1653,11 +1656,13 @@ gss_validate(struct rpc_task *task, __be32 *p) gss_put_ctx(ctx); dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n", task->tk_pid, __func__); + kfree(seq); return p + XDR_QUADLEN(len); out_bad: gss_put_ctx(ctx); dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__, PTR_ERR(ret)); + kfree(seq); return ret; } diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 244245bcbbd2..90115ceefd49 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -166,8 +166,8 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, unsigned int usage, struct xdr_netobj *cksumout) { struct scatterlist sg[1]; - int err; - u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; + int err = -1; + u8 *checksumdata; u8 rc4salt[4]; struct crypto_ahash *md5; struct crypto_ahash *hmac_md5; @@ -187,23 +187,22 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + if (!checksumdata) + return GSS_S_FAILURE; + md5 = crypto_alloc_ahash("md5", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(md5)) - return GSS_S_FAILURE; + goto out_free_cksum; hmac_md5 = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(hmac_md5)) { - crypto_free_ahash(md5); - return GSS_S_FAILURE; - } + if (IS_ERR(hmac_md5)) + goto out_free_md5; req = ahash_request_alloc(md5, GFP_KERNEL); - if (!req) { - crypto_free_ahash(hmac_md5); - crypto_free_ahash(md5); - return GSS_S_FAILURE; - } + if (!req) + goto out_free_hmac_md5; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -232,11 +231,8 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, ahash_request_free(req); req = ahash_request_alloc(hmac_md5, GFP_KERNEL); - if (!req) { - crypto_free_ahash(hmac_md5); - crypto_free_ahash(md5); - return GSS_S_FAILURE; - } + if (!req) + goto out_free_hmac_md5; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -258,8 +254,12 @@ make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen, cksumout->len = kctx->gk5e->cksumlength; out: ahash_request_free(req); - crypto_free_ahash(md5); +out_free_hmac_md5: crypto_free_ahash(hmac_md5); +out_free_md5: + crypto_free_ahash(md5); +out_free_cksum: + kfree(checksumdata); return err ? GSS_S_FAILURE : 0; } @@ -276,8 +276,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, struct crypto_ahash *tfm; struct ahash_request *req; struct scatterlist sg[1]; - int err; - u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; + int err = -1; + u8 *checksumdata; unsigned int checksumlen; if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR) @@ -291,15 +291,17 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } - tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + if (checksumdata == NULL) return GSS_S_FAILURE; + tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto out_free_cksum; + req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!req) { - crypto_free_ahash(tfm); - return GSS_S_FAILURE; - } + if (!req) + goto out_free_ahash; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -349,7 +351,10 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen, cksumout->len = kctx->gk5e->cksumlength; out: ahash_request_free(req); +out_free_ahash: crypto_free_ahash(tfm); +out_free_cksum: + kfree(checksumdata); return err ? GSS_S_FAILURE : 0; } @@ -368,8 +373,8 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, struct crypto_ahash *tfm; struct ahash_request *req; struct scatterlist sg[1]; - int err; - u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN]; + int err = -1; + u8 *checksumdata; unsigned int checksumlen; if (kctx->gk5e->keyed_cksum == 0) { @@ -383,16 +388,18 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, return GSS_S_FAILURE; } + checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_NOFS); + if (!checksumdata) + return GSS_S_FAILURE; + tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) - return GSS_S_FAILURE; + goto out_free_cksum; checksumlen = crypto_ahash_digestsize(tfm); req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!req) { - crypto_free_ahash(tfm); - return GSS_S_FAILURE; - } + if (!req) + goto out_free_ahash; ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); @@ -433,7 +440,10 @@ make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen, } out: ahash_request_free(req); +out_free_ahash: crypto_free_ahash(tfm); +out_free_cksum: + kfree(checksumdata); return err ? GSS_S_FAILURE : 0; } @@ -666,14 +676,17 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf, u32 ret; struct scatterlist sg[1]; SKCIPHER_REQUEST_ON_STACK(req, cipher); - u8 data[GSS_KRB5_MAX_BLOCKSIZE * 2]; + u8 *data; struct page **save_pages; u32 len = buf->len - offset; - if (len > ARRAY_SIZE(data)) { + if (len > GSS_KRB5_MAX_BLOCKSIZE * 2) { WARN_ON(0); return -ENOMEM; } + data = kmalloc(GSS_KRB5_MAX_BLOCKSIZE * 2, GFP_NOFS); + if (!data) + return -ENOMEM; /* * For encryption, we want to read from the cleartext @@ -708,6 +721,7 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf, ret = write_bytes_to_xdr_buf(buf, offset, data, len); out: + kfree(data); return ret; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d67f7e1bc82d..45662d7f0943 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -718,30 +718,37 @@ gss_write_null_verf(struct svc_rqst *rqstp) static int gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) { - __be32 xdr_seq; + __be32 *xdr_seq; u32 maj_stat; struct xdr_buf verf_data; struct xdr_netobj mic; __be32 *p; struct kvec iov; + int err = -1; svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS); - xdr_seq = htonl(seq); + xdr_seq = kmalloc(4, GFP_KERNEL); + if (!xdr_seq) + return -1; + *xdr_seq = htonl(seq); - iov.iov_base = &xdr_seq; - iov.iov_len = sizeof(xdr_seq); + iov.iov_base = xdr_seq; + iov.iov_len = 4; xdr_buf_from_iov(&iov, &verf_data); p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx_id, &verf_data, &mic); if (maj_stat != GSS_S_COMPLETE) - return -1; + goto out; *p++ = htonl(mic.len); memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len); p += XDR_QUADLEN(mic.len); if (!xdr_ressize_check(rqstp, p)) - return -1; - return 0; + goto out; + err = 0; +out: + kfree(xdr_seq); + return err; } struct gss_domain { From 92d230dd8cafac417e130e404d4b64eafe2271de Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sat, 22 Oct 2016 14:31:06 +0000 Subject: [PATCH 629/884] rocker: fix error return code in rocker_world_check_init() Fix to return error code -EINVAL from the error handling case instead of 0, as done elsewhere in this function. Fixes: e420114eef4a ("rocker: introduce worlds infrastructure") Signed-off-by: Wei Yongjun Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 5424fb341613..24b746406bc7 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1471,7 +1471,7 @@ static int rocker_world_check_init(struct rocker_port *rocker_port) if (rocker->wops) { if (rocker->wops->mode != mode) { dev_err(&rocker->pdev->dev, "hardware has ports in different worlds, which is not supported\n"); - return err; + return -EINVAL; } return 0; } From 293de7dee4f9602676846dbeb84b1580123306b4 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 23 Oct 2016 09:28:29 -0700 Subject: [PATCH 630/884] doc: update docbook annotations for socket and skb The skbuff and sock structure both had missing parameter annotation values. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/net/sock.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 601258f6e621..32810f279f8e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -936,6 +936,7 @@ struct sk_buff_fclones { /** * skb_fclone_busy - check if fclone is busy + * @sk: socket * @skb: buffer * * Returns true if skb is a fast clone, and its clone is not freed. diff --git a/include/net/sock.h b/include/net/sock.h index ebf75db08e06..73c6b008f1b7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -252,6 +252,7 @@ struct sock_common { * @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler) * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE) * @sk_sndbuf: size of send buffer in bytes + * @sk_padding: unused element for alignment * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets * @sk_no_check_rx: allow zero checksum in RX packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) @@ -302,7 +303,8 @@ struct sock_common { * @sk_backlog_rcv: callback to process the backlog * @sk_destruct: called at sock freeing time, i.e. when all refcnt == 0 * @sk_reuseport_cb: reuseport group container - */ + * @sk_rcu: used during RCU grace period + */ struct sock { /* * Now struct inet_timewait_sock also uses sock_common, so please just From ecc515d7238f2cffac839069d56dc271141defa0 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 24 Oct 2016 01:01:09 +0800 Subject: [PATCH 631/884] sctp: fix the panic caused by route update Commit 7303a1475008 ("sctp: identify chunks that need to be fragmented at IP level") made the chunk be fragmented at IP level in the next round if it's size exceed PMTU. But there still is another case, PMTU can be updated if transport's dst expires and transport's pmtu_pending is set in sctp_packet_transmit. If the new PMTU is less than the chunk, the same issue with that commit can be triggered. So we should drop this packet and let it retransmit in another round where it would be fragmented at IP level. This patch is to fix it by checking the chunk size after PMTU may be updated and dropping this packet if it's size exceed PMTU. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/output.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index 2a5c1896d18f..6cb0df859195 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -418,6 +418,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) __u8 has_data = 0; int gso = 0; int pktcount = 0; + int auth_len = 0; struct dst_entry *dst; unsigned char *auth = NULL; /* pointer to auth in skb data */ @@ -510,7 +511,12 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) list_for_each_entry(chunk, &packet->chunk_list, list) { int padded = SCTP_PAD4(chunk->skb->len); - if (pkt_size + padded > tp->pathmtu) + if (chunk == packet->auth) + auth_len = padded; + else if (auth_len + padded + packet->overhead > + tp->pathmtu) + goto nomem; + else if (pkt_size + padded > tp->pathmtu) break; pkt_size += padded; } From 10df8e6152c6c400a563a673e9956320bfce1871 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Oct 2016 18:03:06 -0700 Subject: [PATCH 632/884] udp: fix IP_CHECKSUM handling First bug was added in commit ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") : Tom missed that ipv4 udp messages could be received on AF_INET6 socket. ip_cmsg_recv(msg, skb) should have been replaced by ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr)); Then commit e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") forgot to adjust the offsets now UDP headers are pulled before skb are put in receive queue. Fixes: ad6f939ab193 ("ip: Add offset parameter to ip_cmsg_recv") Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Cc: Sam Kumar Cc: Willem de Bruijn Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/ip.h | 4 ++-- net/ipv4/ip_sockglue.c | 11 ++++++----- net/ipv4/udp.c | 2 +- net/ipv6/udp.c | 3 ++- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index c9d07988911e..5413883ac47f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -578,7 +578,7 @@ int ip_options_rcv_srr(struct sk_buff *skb); */ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb); -void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset); +void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset); int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, bool allow_ipv6); int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, @@ -600,7 +600,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb) { - ip_cmsg_recv_offset(msg, skb, 0); + ip_cmsg_recv_offset(msg, skb, 0, 0); } bool icmp_global_allow(void); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index af4919792b6a..b8a2d63d1fb8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -98,7 +98,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) } static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { __wsum csum = skb->csum; @@ -106,8 +106,9 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, return; if (offset != 0) - csum = csum_sub(csum, csum_partial(skb_transport_header(skb), - offset, 0)); + csum = csum_sub(csum, + csum_partial(skb_transport_header(skb) + tlen, + offset, 0)); put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum); } @@ -153,7 +154,7 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) } void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, - int offset) + int tlen, int offset) { struct inet_sock *inet = inet_sk(skb->sk); unsigned int flags = inet->cmsg_flags; @@ -216,7 +217,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, } if (flags & IP_CMSG_CHECKSUM) - ip_cmsg_recv_checksum(msg, skb, offset); + ip_cmsg_recv_checksum(msg, skb, tlen, offset); } EXPORT_SYMBOL(ip_cmsg_recv_offset); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 311613e413cb..d123d68f4d1d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1322,7 +1322,7 @@ try_again: *addr_len = sizeof(*sin); } if (inet->cmsg_flags) - ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr) + off); + ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off); err = copied; if (flags & MSG_TRUNC) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 9aa7c1c7a9ce..b2ef061e6836 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -427,7 +427,8 @@ try_again: if (is_udp4) { if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); + ip_cmsg_recv_offset(msg, skb, + sizeof(struct udphdr), off); } else { if (np->rxopt.all) ip6_datagram_recv_specific_ctl(sk, msg, skb); From 4d2b496f19f3c2cfaca1e8fa0710688b5ff3811d Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Wed, 26 Oct 2016 11:22:53 -0400 Subject: [PATCH 633/884] scsi: scsi_debug: Fix memory leak if LBP enabled and module is unloaded map_storep was not being vfree()'d in the module_exit call. Cc: Signed-off-by: Ewan D. Milne Reviewed-by: Laurence Oberman Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_debug.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index c905709707f0..cf04a364fd8b 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5134,6 +5134,7 @@ static void __exit scsi_debug_exit(void) bus_unregister(&pseudo_lld_bus); root_device_unregister(pseudo_primary); + vfree(map_storep); vfree(dif_storep); vfree(fake_storep); kfree(sdebug_q_arr); From 2bf7dc8443e113844d078fd6541b7f4aa544f92f Mon Sep 17 00:00:00 2001 From: Ching Huang Date: Wed, 19 Oct 2016 17:50:26 +0800 Subject: [PATCH 634/884] scsi: arcmsr: Send SYNCHRONIZE_CACHE command to firmware The arcmsr driver failed to pass SYNCHRONIZE CACHE to controller firmware. Depending on how drive caches are handled internally by controller firmware this could potentially lead to data integrity problems. Ensure that cache flushes are passed to the controller. [mkp: applied by hand and removed unused vars] Cc: Signed-off-by: Ching Huang Reported-by: Tomas Henzl Signed-off-by: Martin K. Petersen --- drivers/scsi/arcmsr/arcmsr_hba.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 3d53d636b17b..f0cfb0451757 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -2636,18 +2636,9 @@ static int arcmsr_queue_command_lck(struct scsi_cmnd *cmd, struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata; struct CommandControlBlock *ccb; int target = cmd->device->id; - int lun = cmd->device->lun; - uint8_t scsicmd = cmd->cmnd[0]; cmd->scsi_done = done; cmd->host_scribble = NULL; cmd->result = 0; - if ((scsicmd == SYNCHRONIZE_CACHE) ||(scsicmd == SEND_DIAGNOSTIC)){ - if(acb->devstate[target][lun] == ARECA_RAID_GONE) { - cmd->result = (DID_NO_CONNECT << 16); - } - cmd->scsi_done(cmd); - return 0; - } if (target == 16) { /* virtual device for iop message transfer */ arcmsr_handle_virtual_command(acb, cmd); From e52fed7177f74382f742c27de2cc5314790aebb6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 23 Oct 2016 21:32:47 -0700 Subject: [PATCH 635/884] netvsc: fix incorrect receive checksum offloading The Hyper-V netvsc driver was looking at the incorrect status bits in the checksum info. It was setting the receive checksum unnecessary flag based on the IP header checksum being correct. The checksum flag is skb is about TCP and UDP checksum status. Because of this bug, any packet received with bad TCP checksum would be passed up the stack and to the application causing data corruption. The problem is reproducible via netcat and netem. This had a side effect of not doing receive checksum offload on IPv6. The driver was also also always doing checksum offload independent of the checksum setting done via ethtool. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 5d6e75a40d38..c71d966d905f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -607,15 +607,18 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, packet->total_data_buflen); skb->protocol = eth_type_trans(skb, net); - if (csum_info) { - /* We only look at the IP checksum here. - * Should we be dropping the packet if checksum - * failed? How do we deal with other checksums - TCP/UDP? - */ - if (csum_info->receive.ip_checksum_succeeded) + + /* skb is already created with CHECKSUM_NONE */ + skb_checksum_none_assert(skb); + + /* + * In Linux, the IP checksum is always checked. + * Do L4 checksum offload if enabled and present. + */ + if (csum_info && (net->features & NETIF_F_RXCSUM)) { + if (csum_info->receive.tcp_checksum_succeeded || + csum_info->receive.udp_checksum_succeeded) skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb->ip_summed = CHECKSUM_NONE; } if (vlan_tci & VLAN_TAG_PRESENT) From 67f0160fe34ec5391a428603b9832c9f99d8f3a1 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Mon, 24 Oct 2016 08:48:09 +0300 Subject: [PATCH 636/884] MAINTAINERS: Update qlogic networking drivers Following Cavium's acquisition of qlogic we need to update all the qlogic drivers maintainer's entries to point to our new e-mail addresses, as well as update some of the driver's maintainers as those are no longer working for Cavium. I would like to thank Sony Chacko and Rajesh Borundia for their support and development of our various networking drivers. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- MAINTAINERS | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1fc66f0aceb5..af299a77ab20 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2543,15 +2543,18 @@ S: Supported F: drivers/net/ethernet/broadcom/genet/ BROADCOM BNX2 GIGABIT ETHERNET DRIVER -M: Sony Chacko -M: Dept-HSGLinuxNICDev@qlogic.com +M: Rasesh Mody +M: Harish Patil +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bnx2.* F: drivers/net/ethernet/broadcom/bnx2_* BROADCOM BNX2X 10 GIGABIT ETHERNET DRIVER -M: Ariel Elior +M: Yuval Mintz +M: Ariel Elior +M: everest-linux-l2@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bnx2x/ @@ -2758,7 +2761,9 @@ S: Supported F: drivers/scsi/bfa/ BROCADE BNA 10 GIGABIT ETHERNET DRIVER -M: Rasesh Mody +M: Rasesh Mody +M: Sudarsana Kalluru +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/brocade/bna/ @@ -8492,11 +8497,10 @@ F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ NETXEN (1/10) GbE SUPPORT -M: Manish Chopra -M: Sony Chacko -M: Rajesh Borundia +M: Manish Chopra +M: Rahul Verma +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org -W: http://www.qlogic.com S: Supported F: drivers/net/ethernet/qlogic/netxen/ @@ -9873,33 +9877,32 @@ F: Documentation/scsi/LICENSE.qla4xxx F: drivers/scsi/qla4xxx/ QLOGIC QLA3XXX NETWORK DRIVER -M: Jitendra Kalsaria -M: Ron Mercer -M: linux-driver@qlogic.com +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: Documentation/networking/LICENSE.qla3xxx F: drivers/net/ethernet/qlogic/qla3xxx.* QLOGIC QLCNIC (1/10)Gb ETHERNET DRIVER -M: Dept-GELinuxNICDev@qlogic.com +M: Harish Patil +M: Manish Chopra +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qlcnic/ QLOGIC QLGE 10Gb ETHERNET DRIVER -M: Harish Patil -M: Sudarsana Kalluru -M: Dept-GELinuxNICDev@qlogic.com -M: linux-driver@qlogic.com +M: Harish Patil +M: Manish Chopra +M: Dept-GELinuxNICDev@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qlge/ QLOGIC QL4xxx ETHERNET DRIVER -M: Yuval Mintz -M: Ariel Elior -M: everest-linux-l2@qlogic.com +M: Yuval Mintz +M: Ariel Elior +M: everest-linux-l2@cavium.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/qlogic/qed/ From 03dab869b7b239c4e013ec82aea22e181e441cfc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Oct 2016 15:01:54 +0100 Subject: [PATCH 637/884] KEYS: Fix short sprintf buffer in /proc/keys show function This fixes CVE-2016-7042. Fix a short sprintf buffer in proc_keys_show(). If the gcc stack protector is turned on, this can cause a panic due to stack corruption. The problem is that xbuf[] is not big enough to hold a 64-bit timeout rendered as weeks: (gdb) p 0xffffffffffffffffULL/(60*60*24*7) $2 = 30500568904943 That's 14 chars plus NUL, not 11 chars plus NUL. Expand the buffer to 16 chars. I think the unpatched code apparently works if the stack-protector is not enabled because on a 32-bit machine the buffer won't be overflowed and on a 64-bit machine there's a 64-bit aligned pointer at one side and an int that isn't checked again on the other side. The panic incurred looks something like: Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: ffffffff81352ebe CPU: 0 PID: 1692 Comm: reproducer Not tainted 4.7.2-201.fc24.x86_64 #1 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 0000000000000086 00000000fbbd2679 ffff8800a044bc00 ffffffff813d941f ffffffff81a28d58 ffff8800a044bc98 ffff8800a044bc88 ffffffff811b2cb6 ffff880000000010 ffff8800a044bc98 ffff8800a044bc30 00000000fbbd2679 Call Trace: [] dump_stack+0x63/0x84 [] panic+0xde/0x22a [] ? proc_keys_show+0x3ce/0x3d0 [] __stack_chk_fail+0x19/0x30 [] proc_keys_show+0x3ce/0x3d0 [] ? key_validate+0x50/0x50 [] ? key_default_cmp+0x20/0x20 [] seq_read+0x2cc/0x390 [] proc_reg_read+0x42/0x70 [] __vfs_read+0x37/0x150 [] ? security_file_permission+0xa0/0xc0 [] vfs_read+0x96/0x130 [] SyS_read+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 Reported-by: Ondrej Kozina Signed-off-by: David Howells Tested-by: Ondrej Kozina cc: stable@vger.kernel.org Signed-off-by: James Morris --- security/keys/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/proc.c b/security/keys/proc.c index f0611a6368cd..b9f531c9e4fa 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -181,7 +181,7 @@ static int proc_keys_show(struct seq_file *m, void *v) struct timespec now; unsigned long timo; key_ref_t key_ref, skey_ref; - char xbuf[12]; + char xbuf[16]; int rc; struct keyring_search_context ctx = { From 7df3e59c3d1df4f87fe874c7956ef7a3d2f4d5fb Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 26 Oct 2016 15:02:01 +0100 Subject: [PATCH 638/884] KEYS: Sort out big_key initialisation big_key has two separate initialisation functions, one that registers the key type and one that registers the crypto. If the key type fails to register, there's no problem if the crypto registers successfully because there's no way to reach the crypto except through the key type. However, if the key type registers successfully but the crypto does not, big_key_rng and big_key_blkcipher may end up set to NULL - but the code neither checks for this nor unregisters the big key key type. Furthermore, since the key type is registered before the crypto, it is theoretically possible for the kernel to try adding a big_key before the crypto is set up, leading to the same effect. Fix this by merging big_key_crypto_init() and big_key_init() and calling the resulting function late. If they're going to be encrypted, we shouldn't be creating big_keys before we have the facilities to do the encryption available. The key type registration is also moved after the crypto initialisation. The fix also includes message printing on failure. If the big_key type isn't correctly set up, simply doing: dd if=/dev/zero bs=4096 count=1 | keyctl padd big_key a @s ought to cause an oops. Fixes: 13100a72f40f5748a04017e0ab3df4cf27c809ef ('Security: Keys: Big keys stored encrypted') Signed-off-by: David Howells cc: Peter Hlavaty cc: Kirill Marinushkin cc: Artem Savkov cc: stable@vger.kernel.org Signed-off-by: James Morris --- security/keys/big_key.c | 59 ++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/security/keys/big_key.c b/security/keys/big_key.c index c0b3030b5634..835c1ab30d01 100644 --- a/security/keys/big_key.c +++ b/security/keys/big_key.c @@ -9,6 +9,7 @@ * 2 of the Licence, or (at your option) any later version. */ +#define pr_fmt(fmt) "big_key: "fmt #include #include #include @@ -341,44 +342,48 @@ error: */ static int __init big_key_init(void) { - return register_key_type(&key_type_big_key); -} + struct crypto_skcipher *cipher; + struct crypto_rng *rng; + int ret; -/* - * Initialize big_key crypto and RNG algorithms - */ -static int __init big_key_crypto_init(void) -{ - int ret = -EINVAL; - - /* init RNG */ - big_key_rng = crypto_alloc_rng(big_key_rng_name, 0, 0); - if (IS_ERR(big_key_rng)) { - big_key_rng = NULL; - return -EFAULT; + rng = crypto_alloc_rng(big_key_rng_name, 0, 0); + if (IS_ERR(rng)) { + pr_err("Can't alloc rng: %ld\n", PTR_ERR(rng)); + return PTR_ERR(rng); } + big_key_rng = rng; + /* seed RNG */ - ret = crypto_rng_reset(big_key_rng, NULL, crypto_rng_seedsize(big_key_rng)); - if (ret) - goto error; + ret = crypto_rng_reset(rng, NULL, crypto_rng_seedsize(rng)); + if (ret) { + pr_err("Can't reset rng: %d\n", ret); + goto error_rng; + } /* init block cipher */ - big_key_skcipher = crypto_alloc_skcipher(big_key_alg_name, - 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(big_key_skcipher)) { - big_key_skcipher = NULL; - ret = -EFAULT; - goto error; + cipher = crypto_alloc_skcipher(big_key_alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(cipher)) { + ret = PTR_ERR(cipher); + pr_err("Can't alloc crypto: %d\n", ret); + goto error_rng; + } + + big_key_skcipher = cipher; + + ret = register_key_type(&key_type_big_key); + if (ret < 0) { + pr_err("Can't register type: %d\n", ret); + goto error_cipher; } return 0; -error: +error_cipher: + crypto_free_skcipher(big_key_skcipher); +error_rng: crypto_free_rng(big_key_rng); - big_key_rng = NULL; return ret; } -device_initcall(big_key_init); -late_initcall(big_key_crypto_init); +late_initcall(big_key_init); From 31e6ec4519c0fe0ee4a2f6ba3ab278e9506b9500 Mon Sep 17 00:00:00 2001 From: Artem Savkov Date: Wed, 26 Oct 2016 15:02:09 +0100 Subject: [PATCH 639/884] security/keys: make BIG_KEYS dependent on stdrng. Since BIG_KEYS can't be compiled as module it requires one of the "stdrng" providers to be compiled into kernel. Otherwise big_key_crypto_init() fails on crypto_alloc_rng step and next dereference of big_key_skcipher (e.g. in big_key_preparse()) results in a NULL pointer dereference. Fixes: 13100a72f40f5748a04017e0ab3df4cf27c809ef ('Security: Keys: Big keys stored encrypted') Signed-off-by: Artem Savkov Signed-off-by: David Howells cc: Stephan Mueller cc: Kirill Marinushkin cc: stable@vger.kernel.org Signed-off-by: James Morris --- security/keys/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/keys/Kconfig b/security/keys/Kconfig index f826e8739023..d942c7c2bc0a 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -41,7 +41,7 @@ config BIG_KEYS bool "Large payload keys" depends on KEYS depends on TMPFS - select CRYPTO + depends on (CRYPTO_ANSI_CPRNG = y || CRYPTO_DRBG = y) select CRYPTO_AES select CRYPTO_ECB select CRYPTO_RNG From 56fb2d6eb63acd48b50437b415b6f7d2fcffe75d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 26 Oct 2016 10:34:08 -0500 Subject: [PATCH 640/884] objtool: Fix rare switch jump table pattern detection The following commit: 3732710ff6f2 ("objtool: Improve rare switch jump table pattern detection") ... improved objtool's ability to detect GCC switch statement jump tables for GCC 6. However the check to allow short jumps with the scanned range of instructions wasn't quite right. The pattern detection should allow jumps to the indirect jump instruction itself. This fixes the following warning: drivers/infiniband/sw/rxe/rxe_comp.o: warning: objtool: rxe_completer()+0x315: sibling call from callable instruction with changed frame pointer Reported-by: Arnd Bergmann Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 3732710ff6f2 ("objtool: Improve rare switch jump table pattern detection") Link: http://lkml.kernel.org/r/20161026153408.2rifnw7bvoc5sex7@treble Signed-off-by: Ingo Molnar --- tools/objtool/builtin-check.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 4490601a9235..e8a1f699058a 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -754,7 +754,7 @@ static struct rela *find_switch_table(struct objtool_file *file, if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest && (insn->jump_dest->offset <= insn->offset || - insn->jump_dest->offset >= orig_insn->offset)) + insn->jump_dest->offset > orig_insn->offset)) break; text_rela = find_rela_by_dest_range(insn->sec, insn->offset, From f5d6d2da0d9098a4aa0ebcc187aa0fc167045d6b Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 26 Oct 2016 13:37:04 +0200 Subject: [PATCH 641/884] sched/fair: Remove unused but set variable 'rq' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit: 8663e24d56dc ("sched/fair: Reorder cgroup creation code") ... the variable 'rq' in alloc_fair_sched_group() is set but no longer used. Remove it to fix the following GCC warning when building with 'W=1': kernel/sched/fair.c:8842:13: warning: variable ‘rq’ set but not used [-Wunused-but-set-variable] Signed-off-by: Tobias Klauser Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161026113704.8981-1-tklauser@distanz.ch Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d941c97dfbc3..c242944f5cbd 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8839,7 +8839,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) { struct sched_entity *se; struct cfs_rq *cfs_rq; - struct rq *rq; int i; tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL); @@ -8854,8 +8853,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) init_cfs_bandwidth(tg_cfs_bandwidth(tg)); for_each_possible_cpu(i) { - rq = cpu_rq(i); - cfs_rq = kzalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, cpu_to_node(i)); if (!cfs_rq) From d1f63f0c81c22ba705fcd149a1fcec37b734d818 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 26 Oct 2016 15:04:41 +0000 Subject: [PATCH 642/884] mmc: sdhci-msm: Fix error return code in sdhci_msm_probe() Fix to return a negative error code from the platform_get_irq_byname() error handling case instead of 0, as done elsewhere in this function. Fixes: ad81d3871004 ("mmc: sdhci-msm: Add support for UHS cards") Signed-off-by: Wei Yongjun Acked-by: Adrian Hunter Acked-by: Georgi Djakov Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-msm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c index 8ef44a2a2fd9..90ed2e12d345 100644 --- a/drivers/mmc/host/sdhci-msm.c +++ b/drivers/mmc/host/sdhci-msm.c @@ -647,6 +647,7 @@ static int sdhci_msm_probe(struct platform_device *pdev) if (msm_host->pwr_irq < 0) { dev_err(&pdev->dev, "Get pwr_irq failed (%d)\n", msm_host->pwr_irq); + ret = msm_host->pwr_irq; goto clk_disable; } From 58e3948a87e39289aeda5753e9712092c8ca0745 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 13 Oct 2016 13:10:57 +0200 Subject: [PATCH 643/884] KVM: document lock orders This is long overdue, and not really hard. Signed-off-by: Paolo Bonzini Message-Id: <1476357057-17899-1-git-send-email-pbonzini@redhat.com> Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index f2491a8c68b4..e5dd9f4d6100 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -4,7 +4,17 @@ KVM Lock Overview 1. Acquisition Orders --------------------- -(to be written) +The acquisition orders for mutexes are as follows: + +- kvm->lock is taken outside vcpu->mutex + +- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock + +- kvm->slots_lock is taken outside kvm->irq_lock, though acquiring + them together is quite rare. + +For spinlocks, kvm_lock is taken outside kvm->mmu_lock. Everything +else is a leaf: no other lock is taken inside the critical sections. 2: Exception ------------ From bdc3478f90cd4d2928197f36629d5cf93b64dbe9 Mon Sep 17 00:00:00 2001 From: Marcel Hasler Date: Thu, 27 Oct 2016 00:42:27 +0200 Subject: [PATCH 644/884] ALSA: usb-audio: Add quirk for Syntek STK1160 The stk1160 chip needs QUIRK_AUDIO_ALIGN_TRANSFER. This patch resolves the issue reported on the mailing list (http://marc.info/?l=linux-sound&m=139223599126215&w=2) and also fixes bug 180071 (https://bugzilla.kernel.org/show_bug.cgi?id=180071). Signed-off-by: Marcel Hasler Cc: Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index c60a776e815d..8a59d4782a0f 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2907,6 +2907,23 @@ AU0828_DEVICE(0x2040, 0x7260, "Hauppauge", "HVR-950Q"), AU0828_DEVICE(0x2040, 0x7213, "Hauppauge", "HVR-950Q"), AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), +/* Syntek STK1160 */ +{ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | + USB_DEVICE_ID_MATCH_INT_CLASS | + USB_DEVICE_ID_MATCH_INT_SUBCLASS, + .idVendor = 0x05e1, + .idProduct = 0x0408, + .bInterfaceClass = USB_CLASS_AUDIO, + .bInterfaceSubClass = USB_SUBCLASS_AUDIOCONTROL, + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "Syntek", + .product_name = "STK1160", + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_AUDIO_ALIGN_TRANSFER + } +}, + /* Digidesign Mbox */ { /* Thanks to Clemens Ladisch */ From 85c856b39b479dde410ddd09df1da745343010c9 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 26 Oct 2016 08:38:38 -0700 Subject: [PATCH 645/884] kvm: nVMX: Fix kernel panics induced by illegal INVEPT/INVVPID types Bitwise shifts by amounts greater than or equal to the width of the left operand are undefined. A malicious guest can exploit this to crash a 32-bit host, due to the BUG_ON(1)'s in handle_{invept,invvpid}. Signed-off-by: Jim Mattson Message-Id: <1477496318-17681-1-git-send-email-jmattson@google.com> [Change 1UL to 1, to match the range check on the shift count. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cf1b16dbc98a..74a4df993a51 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7659,7 +7659,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; - if (!(types & (1UL << type))) { + if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); skip_emulated_instruction(vcpu); @@ -7722,7 +7722,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; - if (!(types & (1UL << type))) { + if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); skip_emulated_instruction(vcpu); From 39715bf972ed4fee18fe5409609a971fb16b1771 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Wed, 5 Oct 2016 07:57:26 +0200 Subject: [PATCH 646/884] powerpc/process: Fix CONFIG_ALIVEC typo in restore_tm_state() It should be ALTIVEC, not ALIVEC. Cyril explains: If a thread performs a transaction with altivec and then gets preempted for whatever reason, this bug may cause the kernel to not re-enable altivec when that thread runs again. This will result in an altivec unavailable fault, when that fault happens inside a user transaction the kernel has no choice but to enable altivec and doom the transaction. The result is that transactions using altivec may get aborted more often than they should. The difficulty in catching this with a selftest is my deliberate use of the word may above. Optimisations to avoid FPU/altivec/VSX faults mean that the kernel will always leave them on for 255 switches. This code prevents the kernel turning it off if it got to the 256th switch (and userspace was transactional). Fixes: dc16b553c949 ("powerpc: Always restore FPU/VEC/VSX if hardware transactional memory in use") Reviewed-by: Cyril Bur Signed-off-by: Valentin Rothberg Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9e7c10fe205f..ce6dc61b15b2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1012,7 +1012,7 @@ void restore_tm_state(struct pt_regs *regs) /* Ensure that restore_math() will restore */ if (msr_diff & MSR_FP) current->thread.load_fp = 1; -#ifdef CONFIG_ALIVEC +#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC) current->thread.load_vec = 1; #endif From bd77c4498616e27d5725b5959d880ce2272fefa9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 24 Oct 2016 08:50:43 +0530 Subject: [PATCH 647/884] powerpc/mm/radix: Use tlbiel only if we ever ran on the current cpu Before this patch, we used tlbiel, if we ever ran only on this core. That was mostly derived from the nohash usage of the same. But is incorrect, the ISA 3.0 clarifies tlbiel such that: "All TLB entries that have all of the following properties are made invalid on the thread executing the tlbiel instruction" ie. tlbiel only invalidates TLB entries on the current thread. So if the mm has been used on any other thread (aka. cpu) then we must broadcast the invalidate. This bug could lead to invalid TLB entries if a program runs on multiple threads of a core. Hence use tlbiel, if we only ever ran on only the current cpu. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/tlb.h | 12 ++++++++++++ arch/powerpc/mm/tlb-radix.c | 8 ++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index f6f68f73e858..99e1397b71da 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -52,11 +52,23 @@ static inline int mm_is_core_local(struct mm_struct *mm) return cpumask_subset(mm_cpumask(mm), topology_sibling_cpumask(smp_processor_id())); } + +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + return cpumask_equal(mm_cpumask(mm), + cpumask_of(smp_processor_id())); +} + #else static inline int mm_is_core_local(struct mm_struct *mm) { return 1; } + +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + return 1; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 0e49ec541ab5..bda8c43be78a 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -175,7 +175,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -201,7 +201,7 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -226,7 +226,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, pid = mm ? mm->context.id : 0; if (unlikely(pid == MMU_NO_CONTEXT)) goto bail; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -321,7 +321,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, { unsigned long pid; unsigned long addr; - int local = mm_is_core_local(mm); + int local = mm_is_thread_local(mm); unsigned long ap = mmu_get_ap(psize); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; From fb479e44a9e240a23c2d208c2ace23542a47f41c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 13 Oct 2016 13:17:14 +1100 Subject: [PATCH 648/884] powerpc/64s: relocation, register save fixes for system reset interrupt This patch does a couple of things. First of all, powernv immediately explodes when running a relocated kernel, because the system reset exception for handling sleeps does not do correct relocated branches. Secondly, the sleep handling code trashes the condition and cfar registers, which we would like to preserve for debugging purposes (for non-sleep case exception). This patch changes the exception to use the standard format that saves registers before any tests or branches are made. It adds the test for idle-wakeup as an "extra" to break out of the normal exception path. Then it branches to a relocated idle handler that calls the various idle handling functions. After this patch, POWER8 CPU simulator now boots powernv kernel that is running at non-zero. Fixes: 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode") Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Nicholas Piggin Acked-by: Gautham R. Shenoy Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 16 ++++++++ arch/powerpc/kernel/exceptions-64s.S | 50 ++++++++++++++---------- 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 2e4e7d878c8e..84d49b197c32 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -93,6 +93,10 @@ ld reg,PACAKBASE(r13); /* get high part of &label */ \ ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; +#define __LOAD_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l; + /* Exception register prefixes */ #define EXC_HV H #define EXC_STD @@ -208,6 +212,18 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define kvmppc_interrupt kvmppc_interrupt_pr #endif +#ifdef CONFIG_RELOCATABLE +#define BRANCH_TO_COMMON(reg, label) \ + __LOAD_HANDLER(reg, label); \ + mtctr reg; \ + bctr + +#else +#define BRANCH_TO_COMMON(reg, label) \ + b label + +#endif + #define __KVM_HANDLER_PROLOG(area, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f129408c6022..08ba447a4b3d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -95,19 +95,35 @@ __start_interrupts: /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x4100) + +#ifdef CONFIG_PPC_P7_NAP + /* + * If running native on arch 2.06 or later, check if we are waking up + * from nap/sleep/winkle, and branch to idle handler. + */ +#define IDLETEST(n) \ + BEGIN_FTR_SECTION ; \ + mfspr r10,SPRN_SRR1 ; \ + rlwinm. r10,r10,47-31,30,31 ; \ + beq- 1f ; \ + cmpwi cr3,r10,2 ; \ + BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \ +1: \ + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#else +#define IDLETEST NOTEST +#endif + EXC_REAL_BEGIN(system_reset, 0x100, 0x200) SET_SCRATCH0(r13) -#ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are - * waking up from nap/sleep/winkle. - */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - beq 9f + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + IDLETEST, 0x100) - cmpwi cr3,r13,2 - GET_PACA(r13) +EXC_REAL_END(system_reset, 0x100, 0x200) +EXC_VIRT_NONE(0x4100, 0x4200) + +#ifdef CONFIG_PPC_P7_NAP +EXC_COMMON_BEGIN(system_reset_idle_common) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -130,14 +146,8 @@ BEGIN_FTR_SECTION blt cr3,2f b pnv_wakeup_loss 2: b pnv_wakeup_noloss +#endif -9: -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) -EXC_REAL_END(system_reset, 0x100, 0x200) -EXC_VIRT_NONE(0x4100, 0x4200) EXC_COMMON(system_reset_common, 0x100, system_reset_exception) #ifdef CONFIG_PPC_PSERIES @@ -817,10 +827,8 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) TRAMP_KVM(PACA_EXGEN, 0xb00) EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) - -#define LOAD_SYSCALL_HANDLER(reg) \ - ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(system_call_common))@l; +#define LOAD_SYSCALL_HANDLER(reg) \ + __LOAD_HANDLER(reg, system_call_common) /* Syscall routine is used twice, in reloc-off and reloc-on paths */ #define SYSCALL_PSERIES_1 \ From 42acfc6615f47e465731c263bee0c799edb098f2 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 3 Oct 2016 11:00:17 +0200 Subject: [PATCH 649/884] tty: vt, fix bogus division in csi_J MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In csi_J(3), the third parameter of scr_memsetw (vc_screenbuf_size) is divided by 2 inappropriatelly. But scr_memsetw expects size, not count, because it divides the size by 2 on its own before doing actual memset-by-words. So remove the bogus division. Signed-off-by: Jiri Slaby Cc: Petr Písař Fixes: f8df13e0a9 (tty: Clean console safely) Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 06fb39c1d6dd..ae203c2cd15a 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1176,7 +1176,7 @@ static void csi_J(struct vc_data *vc, int vpar) break; case 3: /* erase scroll-back buffer (and whole display) */ scr_memsetw(vc->vc_screenbuf, vc->vc_video_erase_char, - vc->vc_screenbuf_size >> 1); + vc->vc_screenbuf_size); set_origin(vc); if (con_is_visible(vc)) update_screen(vc); From b20fb13c7c093f9170fbf162fc7f333d7a5cf77a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Oct 2016 15:42:54 +0200 Subject: [PATCH 650/884] serial: stm32: Fix comparisons with undefined register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/tty/serial/stm32-usart.c: In function ‘stm32_receive_chars’: drivers/tty/serial/stm32-usart.c:130: warning: comparison is always true due to limited range of data type drivers/tty/serial/stm32-usart.c: In function ‘stm32_tx_dma_complete’: drivers/tty/serial/stm32-usart.c:177: warning: comparison is always false due to limited range of data type stm32_usart_offsets.icr is u8, while UNDEF_REG = ~0 is int, and thus 0xffffffff. As all registers in stm32_usart_offsets are u8, change the definition of UNDEF_REG to 0xff to fix this. Fixes: ada8618ff3bfe183 ("serial: stm32: adding support for stm32f7") Signed-off-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/stm32-usart.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/stm32-usart.h b/drivers/tty/serial/stm32-usart.h index 41d974923102..cd97ceb76e4f 100644 --- a/drivers/tty/serial/stm32-usart.h +++ b/drivers/tty/serial/stm32-usart.h @@ -31,7 +31,7 @@ struct stm32_usart_info { struct stm32_usart_config cfg; }; -#define UNDEF_REG ~0 +#define UNDEF_REG 0xff /* Register offsets */ struct stm32_usart_info stm32f4_info = { From bc2a024f865d712bb5748aabe77cd515d7d5eea9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 Oct 2016 15:55:24 +0200 Subject: [PATCH 651/884] serial: SERIAL_STM32 should depend on HAS_DMA If NO_DMA=y: drivers/built-in.o: In function `stm32_serial_remove': stm32-usart.c:(.text+0xcea1a): undefined reference to `bad_dma_ops' stm32-usart.c:(.text+0xcea7a): undefined reference to `bad_dma_ops' Add a dependency on HAS_DMA to fix this. Signed-off-by: Geert Uytterhoeven Acked-by: Alexandre Torgue Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index c7831407a882..25c1d7bc0100 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1625,6 +1625,7 @@ config SERIAL_SPRD_CONSOLE config SERIAL_STM32 tristate "STMicroelectronics STM32 serial port support" select SERIAL_CORE + depends on HAS_DMA depends on ARM || COMPILE_TEST help This driver is for the on-chip Serial Controller on From 0267a4ff9836a1a4e59044db5bb8cdaddb986d3f Mon Sep 17 00:00:00 2001 From: Nava kishore Manne Date: Wed, 12 Oct 2016 13:17:27 +0530 Subject: [PATCH 652/884] serial: xuartps: Add new compatible string for ZynqMP This patch Adds the new compatible string for ZynqMP SoC. Signed-off-by: Nava kishore Manne Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/xilinx_uartps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index f37edaa5ac75..dd4c02fa4820 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1200,6 +1200,7 @@ static int __init cdns_early_console_setup(struct earlycon_device *device, OF_EARLYCON_DECLARE(cdns, "xlnx,xuartps", cdns_early_console_setup); OF_EARLYCON_DECLARE(cdns, "cdns,uart-r1p8", cdns_early_console_setup); OF_EARLYCON_DECLARE(cdns, "cdns,uart-r1p12", cdns_early_console_setup); +OF_EARLYCON_DECLARE(cdns, "xlnx,zynqmp-uart", cdns_early_console_setup); /** * cdns_uart_console_write - perform write operation @@ -1438,6 +1439,7 @@ static const struct of_device_id cdns_uart_of_match[] = { { .compatible = "xlnx,xuartps", }, { .compatible = "cdns,uart-r1p8", }, { .compatible = "cdns,uart-r1p12", .data = &zynqmp_uart_def }, + { .compatible = "xlnx,zynqmp-uart", .data = &zynqmp_uart_def }, {} }; MODULE_DEVICE_TABLE(of, cdns_uart_of_match); From 78c22449f2d32aafd8804047f7e3bee4926b52eb Mon Sep 17 00:00:00 2001 From: Nava kishore Manne Date: Wed, 12 Oct 2016 13:17:28 +0530 Subject: [PATCH 653/884] devicetree: bindings: uart: Add new compatible string for ZynqMP This patch Adds the new compatible string for ZynqMP SoC. Signed-off-by: Nava kishore Manne Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/cdns,uart.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/serial/cdns,uart.txt b/Documentation/devicetree/bindings/serial/cdns,uart.txt index a3eb154c32ca..227bb770b027 100644 --- a/Documentation/devicetree/bindings/serial/cdns,uart.txt +++ b/Documentation/devicetree/bindings/serial/cdns,uart.txt @@ -1,7 +1,9 @@ Binding for Cadence UART Controller Required properties: -- compatible : should be "cdns,uart-r1p8", or "xlnx,xuartps" +- compatible : + Use "xlnx,xuartps","cdns,uart-r1p8" for Zynq-7xxx SoC. + Use "xlnx,zynqmp-uart","cdns,uart-r1p12" for Zynq Ultrascale+ MPSoC. - reg: Should contain UART controller registers location and length. - interrupts: Should contain UART controller interrupts. - clocks: Must contain phandles to the UART clocks From beadba5e19e2c44ec3527d3d1fc3ac3eda957e09 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 23 Oct 2016 11:38:18 +0000 Subject: [PATCH 654/884] serial: pch_uart: add terminate entry for dmi_system_id tables Make sure dmi_system_id tables are NULL terminated. Signed-off-by: Wei Yongjun Acked-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index d391650b82e7..42caccb5e87e 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -419,6 +419,7 @@ static struct dmi_system_id pch_uart_dmi_table[] = { }, (void *)MINNOW_UARTCLK, }, + { } }; /* Return UART clock, checking for board specific clocks. */ From 0ead21ad25f53117a1e39f0bddcb363e38886996 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 24 Oct 2016 17:00:27 +0900 Subject: [PATCH 655/884] serial: 8250_uniphier: fix more unterminated string Commit 1681d2116c96 ("serial: 8250_uniphier: add "\n" at the end of error log") missed this. Signed-off-by: Denys Vlasenko [masahiro: add commit log] Signed-off-by: Masahiro Yamada Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_uniphier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_uniphier.c b/drivers/tty/serial/8250/8250_uniphier.c index b8d9c8c9d02a..a8babb0cf659 100644 --- a/drivers/tty/serial/8250/8250_uniphier.c +++ b/drivers/tty/serial/8250/8250_uniphier.c @@ -199,7 +199,7 @@ static int uniphier_uart_probe(struct platform_device *pdev) regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!regs) { - dev_err(dev, "failed to get memory resource"); + dev_err(dev, "failed to get memory resource\n"); return -EINVAL; } From 09065c5f0f1ff4bfb309975e182b746989a869c5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 24 Oct 2016 17:00:28 +0900 Subject: [PATCH 656/884] serial: 8250_uniphier: fix clearing divisor latch access bit At this point, 'value' is always a byte, then this code is clearing bit 15, which is already clear. I meant to clear bit 7. Signed-off-by: Masahiro Yamada Reported-by: Denys Vlasenko Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_uniphier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_uniphier.c b/drivers/tty/serial/8250/8250_uniphier.c index a8babb0cf659..417d9e7038e1 100644 --- a/drivers/tty/serial/8250/8250_uniphier.c +++ b/drivers/tty/serial/8250/8250_uniphier.c @@ -99,7 +99,7 @@ static void uniphier_serial_out(struct uart_port *p, int offset, int value) case UART_LCR: valshift = UNIPHIER_UART_LCR_SHIFT; /* Divisor latch access bit does not exist. */ - value &= ~(UART_LCR_DLAB << valshift); + value &= ~UART_LCR_DLAB; /* fall through */ case UART_MCR: offset = UNIPHIER_UART_LCR_MCR; From be2c92b8f1648527620058fdac2bae12b07f1fe9 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 24 Oct 2016 15:56:49 -0500 Subject: [PATCH 657/884] serial: core: fix console problems on uart_close MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 761ed4a94582 ('tty: serial_core: convert uart_close to use tty_port_close') started setting the ttyport console flag for serial drivers. This is causing crashes, hangs, or garbage output on several platforms because the serial shutdown is skipped and IRQs are left enabled. Partially revert commit 761ed4a94582 and drop reporting UART tty_ports as a console leaving the console handling to the serial_core as it was before. Fixes: 761ed4a94582ab29 ("tty: serial_core: convert uart_close to use tty_port_close") Reported-by: Niklas Söderlund Reported-by: Mike Galbraith Reported-by: Mugunthan V N Cc: Peter Hurley Cc: Geert Uytterhoeven Cc: Alan Cox Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: linux-serial@vger.kernel.org Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 6e4f63627479..664c99aeeca5 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2746,8 +2746,6 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) uport->cons = drv->cons; uport->minor = drv->tty_driver->minor_start + uport->line; - port->console = uart_console(uport); - /* * If this port is a console, then the spinlock is already * initialised. From f00a7c57569db04633818bc5e0c0e35d62733b02 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 Oct 2016 11:13:48 +0300 Subject: [PATCH 658/884] serial: 8250_lpss: enable MSI for sure The commit 4fe0d154880b ("PCI: Use positive flags in pci_alloc_irq_vectors()") replaces flags from negative to positive values which makes mandatory to have the last argument in pci_alloc_irq_vectors() non-zero (if we want to be no-op). This basically drops MSI enabling in 8250_lpss driver. Restore desired behaviour in 8250_lpss by passing PCI_IRQ_ALL_TYPES instead of 0 to pci_alloc_irq_vectors(). Fixes: 60a9244a5d14 ("serial: 8250_lpss: enable MSI for Intel Quark") Cc: Christoph Hellwig Signed-off-by: Andy Shevchenko Reviewed-by: Bryan O'Donoghue Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_lpss.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c index 886fcf37f291..b9923464599f 100644 --- a/drivers/tty/serial/8250/8250_lpss.c +++ b/drivers/tty/serial/8250/8250_lpss.c @@ -213,7 +213,7 @@ static int qrk_serial_setup(struct lpss8250 *lpss, struct uart_port *port) struct pci_dev *pdev = to_pci_dev(port->dev); int ret; - ret = pci_alloc_irq_vectors(pdev, 1, 1, 0); + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) return ret; From d704b2d32c39c256dea659e142a31b875a13c63b Mon Sep 17 00:00:00 2001 From: Aaron Brice Date: Thu, 6 Oct 2016 15:13:04 -0700 Subject: [PATCH 659/884] tty: serial: fsl_lpuart: Fix Tx DMA edge case In the case where head == 0 on the circular buffer, there should be one DMA buffer, not two. The second zero-length buffer would break the lpuart driver, transfer would never complete. Signed-off-by: Aaron Brice Acked-by: Stefan Agner Tested-by: Stefan Agner Tested-by: Bhuvanchandra DV Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/fsl_lpuart.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index de9d5107c00a..76103f2c4a80 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -328,7 +328,7 @@ static void lpuart_dma_tx(struct lpuart_port *sport) sport->dma_tx_bytes = uart_circ_chars_pending(xmit); - if (xmit->tail < xmit->head) { + if (xmit->tail < xmit->head || xmit->head == 0) { sport->dma_tx_nents = 1; sg_init_one(sgl, xmit->buf + xmit->tail, sport->dma_tx_bytes); } else { @@ -359,7 +359,6 @@ static void lpuart_dma_tx(struct lpuart_port *sport) sport->dma_tx_in_progress = true; sport->dma_tx_cookie = dmaengine_submit(sport->dma_tx_desc); dma_async_issue_pending(sport->dma_tx_chan); - } static void lpuart_dma_tx_complete(void *arg) From 32b2921e6a7461fe63b71217067a6cf4bddb132f Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Fri, 14 Oct 2016 15:18:28 +0200 Subject: [PATCH 660/884] tty: limit terminal size to 4M chars Size of kmalloc() in vc_do_resize() is controlled by user. Too large kmalloc() size triggers WARNING message on console. Put a reasonable upper bound on terminal size to prevent WARNINGs. Signed-off-by: Dmitry Vyukov CC: David Rientjes Cc: One Thousand Gnomes Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Peter Hurley Cc: linux-kernel@vger.kernel.org Cc: syzkaller@googlegroups.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index ae203c2cd15a..26cda08bc611 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -870,6 +870,8 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) return 0; + if (new_screen_size > (4 << 20)) + return -EINVAL; newscreen = kmalloc(new_screen_size, GFP_USER); if (!newscreen) return -ENOMEM; From ecb988a3b7985913d1f0112f66667cdd15e40711 Mon Sep 17 00:00:00 2001 From: Steve Shih Date: Mon, 17 Oct 2016 09:51:05 -0700 Subject: [PATCH 661/884] tty: serial: 8250: 8250_core: NXP SC16C2552 workaround NXP SC16C2552 requires that we always write a reset to the RX FIFO and TX FIFO whenever we enable the FIFOs Cc: xe-kernel@external.cisco.com Signed-off-by: Steve Shih Signed-off-by: David Singleton Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250_port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 1bfb6fdbaa20..1731b98d2471 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -83,7 +83,8 @@ static const struct serial8250_config uart_config[] = { .name = "16550A", .fifo_size = 16, .tx_loadsz = 16, - .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10 | + UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT, .rxtrig_bytes = {1, 4, 8, 14}, .flags = UART_CAP_FIFO, }, From c03e1b8703a4a0c8bac7f6b65d8fcb539ec62d82 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 21 Oct 2016 23:00:43 +0300 Subject: [PATCH 662/884] sh-sci: document R8A7743/5 support Renesas RZ/G SoC also have the SCIF, SCIFA, SCIFB, and HSCIF ports and they seem compatible with the R-Car gen2 SoC in this respect... Document RZ/G1[ME] (also known as R8A774[35]) SoC bindings. Signed-off-by: Sergei Shtylyov Acked-by: Simon Horman Acked-by: Geert Uytterhoeven Signed-off-by: Greg Kroah-Hartman --- .../devicetree/bindings/serial/renesas,sci-serial.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt index 1e4000d83aee..8d27d1a603e7 100644 --- a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt +++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt @@ -9,6 +9,14 @@ Required properties: - "renesas,scifb-r8a73a4" for R8A73A4 (R-Mobile APE6) SCIFB compatible UART. - "renesas,scifa-r8a7740" for R8A7740 (R-Mobile A1) SCIFA compatible UART. - "renesas,scifb-r8a7740" for R8A7740 (R-Mobile A1) SCIFB compatible UART. + - "renesas,scif-r8a7743" for R8A7743 (RZ/G1M) SCIF compatible UART. + - "renesas,scifa-r8a7743" for R8A7743 (RZ/G1M) SCIFA compatible UART. + - "renesas,scifb-r8a7743" for R8A7743 (RZ/G1M) SCIFB compatible UART. + - "renesas,hscif-r8a7743" for R8A7743 (RZ/G1M) HSCIF compatible UART. + - "renesas,scif-r8a7745" for R8A7745 (RZ/G1E) SCIF compatible UART. + - "renesas,scifa-r8a7745" for R8A7745 (RZ/G1E) SCIFA compatible UART. + - "renesas,scifb-r8a7745" for R8A7745 (RZ/G1E) SCIFB compatible UART. + - "renesas,hscif-r8a7745" for R8A7745 (RZ/G1E) HSCIF compatible UART. - "renesas,scif-r8a7778" for R8A7778 (R-Car M1) SCIF compatible UART. - "renesas,scif-r8a7779" for R8A7779 (R-Car H1) SCIF compatible UART. - "renesas,scif-r8a7790" for R8A7790 (R-Car H2) SCIF compatible UART. From 03842c17397e14cb8bb1adc2015f5dce6c733ffe Mon Sep 17 00:00:00 2001 From: Francois Berder Date: Tue, 25 Oct 2016 13:24:13 +0100 Subject: [PATCH 663/884] sc16is7xx: always write state when configuring GPIO as an output The regmap_update first reads the IOState register and then triggers a write if needed. However, GPIOS might be configured as an input so the read to IOState on this GPIO is the current state which might be random. Signed-off-by: Francois Berder Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 2675792a8f59..fb0672554123 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1130,9 +1130,13 @@ static int sc16is7xx_gpio_direction_output(struct gpio_chip *chip, { struct sc16is7xx_port *s = gpiochip_get_data(chip); struct uart_port *port = &s->p[0].port; + u8 state = sc16is7xx_port_read(port, SC16IS7XX_IOSTATE_REG); - sc16is7xx_port_update(port, SC16IS7XX_IOSTATE_REG, BIT(offset), - val ? BIT(offset) : 0); + if (val) + state |= BIT(offset); + else + state &= ~BIT(offset); + sc16is7xx_port_write(port, SC16IS7XX_IOSTATE_REG, state); sc16is7xx_port_update(port, SC16IS7XX_IODIR_REG, BIT(offset), BIT(offset)); From 009e39ae44f4191188aeb6dfbf661b771dbbe515 Mon Sep 17 00:00:00 2001 From: Scot Doyle Date: Thu, 13 Oct 2016 12:12:43 -0500 Subject: [PATCH 664/884] vt: clear selection before resizing When resizing a vt its selection may exceed the new size, resulting in an invalid memory access [1]. Clear the selection before resizing. [1] http://lkml.kernel.org/r/CACT4Y+acDTwy4umEvf5ROBGiRJNrxHN4Cn5szCXE5Jw-d1B=Xw@mail.gmail.com Reported-and-tested-by: Dmitry Vyukov Signed-off-by: Scot Doyle Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 26cda08bc611..8c3bf3d613c0 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -876,6 +876,9 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, if (!newscreen) return -ENOMEM; + if (vc == sel_cons) + clear_selection(); + old_rows = vc->vc_rows; old_row_size = vc->vc_size_row; From 2a9becdd4dbed499815938308bdab9aae70dd561 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 14 Oct 2016 10:56:42 -0700 Subject: [PATCH 665/884] kernfs: Add noop_fsync to supported kernfs_file_fops If you edit a kernfs backed file with vi(1), you see an ugly error message when you write the file because vi tries to fsync(2) the file after writing, which fails. We have noop_fsync() for this, use it. Signed-off-by: Tony Luck Signed-off-by: Greg Kroah-Hartman --- fs/kernfs/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 2bcb86e6e6ca..78219d5644e9 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -911,6 +911,7 @@ const struct file_operations kernfs_file_fops = { .open = kernfs_fop_open, .release = kernfs_fop_release, .poll = kernfs_fop_poll, + .fsync = noop_fsync, }; /** From 248ff02165437864146d6fbd2d99b2359c3723e6 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 7 Oct 2016 09:09:30 -0700 Subject: [PATCH 666/884] driver core: Make Kconfig text for DEBUG_TEST_DRIVER_REMOVE stronger The current state of driver removal is not great. CONFIG_DEBUG_TEST_DRIVER_REMOVE finds lots of errors. The help text currently undersells exactly how many errors this option will find. Add a bit more description to indicate this option shouldn't be turned on unless you actually want to debug driver removal. The text can be changed later when more drivers are fixed up. Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/base/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig index fdf44cac08e6..d02e7c0f5bfd 100644 --- a/drivers/base/Kconfig +++ b/drivers/base/Kconfig @@ -213,14 +213,16 @@ config DEBUG_DEVRES If you are unsure about this, Say N here. config DEBUG_TEST_DRIVER_REMOVE - bool "Test driver remove calls during probe" + bool "Test driver remove calls during probe (UNSTABLE)" depends on DEBUG_KERNEL help Say Y here if you want the Driver core to test driver remove functions by calling probe, remove, probe. This tests the remove path without having to unbind the driver or unload the driver module. - If you are unsure about this, say N here. + This option is expected to find errors and may render your system + unusable. You should say N here unless you are explicitly looking to + test this functionality. config SYS_HYPERVISOR bool From 7fe311302f7d52601cd799ad508a6f92cb3d748d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 27 Oct 2016 09:49:19 -0600 Subject: [PATCH 667/884] blk-mq: update hardware and software queues for sleeping alloc If we end up sleeping due to running out of requests, we should update the hardware and software queues in the map ctx structure. Otherwise we could end up having rq->mq_ctx point to the pre-sleep context, and risk corrupting ctx->rq_list since we'll be grabbing the wrong lock when inserting the request. Reported-by: Dave Jones Reported-by: Chris Mason Tested-by: Chris Mason Fixes: 63581af3f31e ("blk-mq: remove non-blocking pass in blk_mq_map_request") Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index ddc2eed64771..f3d27a6dee09 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1217,9 +1217,9 @@ static struct request *blk_mq_map_request(struct request_queue *q, blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx); rq = __blk_mq_alloc_request(&alloc_data, op, op_flags); - hctx->queued++; - data->hctx = hctx; - data->ctx = ctx; + data->hctx = alloc_data.hctx; + data->ctx = alloc_data.ctx; + data->hctx->queued++; return rq; } From a74ad5e660a9ee1d071665e7e8ad822784a2dc7f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Oct 2016 09:04:54 -0700 Subject: [PATCH 668/884] sparc64: Handle extremely large kernel TLB range flushes more gracefully. When the vmalloc area gets fragmented, and because the firmware mapping area sits between where modules live and the vmalloc area, we can sometimes receive requests for enormous kernel TLB range flushes. When this happens the cpu just spins flushing billions of pages and this triggers the NMI watchdog and other problems. We took care of this on the TSB side by doing a linear scan of the table once we pass a certain threshold. Do something similar for the TLB flush, however we are limited by the TLB flush facilities provided by the different chip variants. First of all we use an (mostly arbitrary) cut-off of 256K which is about 32 pages. This can be tuned in the future. The huge range code path for each chip works as follows: 1) On spitfire we flush all non-locked TLB entries using diagnostic acceses. 2) On cheetah we use the "flush all" TLB flush. 3) On sun4v/hypervisor we do a TLB context flush on context 0, which unlike previous chips does not remove "permanent" or locked entries. We could probably do something better on spitfire, such as limiting the flush to kernel TLB entries or even doing range comparisons. However that probably isn't worth it since those chips are old and the TLB only had 64 entries. Reported-by: James Clarke Tested-by: James Clarke Signed-off-by: David S. Miller --- arch/sparc/mm/ultra.S | 283 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 228 insertions(+), 55 deletions(-) diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 0fa2e6202c1f..5d2fd6cd3189 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 27 insns */ .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* 19 insns */ +__flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow sethi %hi(PAGE_SIZE), %o4 - sub %o1, %o0, %o3 sub %o3, %o4, %o3 or %o0, 0x20, %o0 ! Nucleus 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP @@ -134,6 +136,38 @@ __flush_tlb_kernel_range: /* 19 insns */ nop nop nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + +__spitfire_flush_tlb_kernel_range_slow: + mov 63 * 8, %o4 +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_IMMU + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_DMMU + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %o4, 8, %o4 + brgez,pt %o4, 1b + nop + retl + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -288,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_kernel_range: /* 31 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, 3f + sethi %hi(PAGE_SIZE), %o4 + sub %o3, %o4, %o3 + or %o0, 0x20, %o0 ! Nucleus +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %o3, 1b + sub %o3, %o4, %o3 +2: sethi %hi(KERNBASE), %o3 + flush %o3 + retl + nop +3: mov 0x80, %o4 + stxa %g0, [%o4] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%o4] ASI_IMMU_DEMAP + membar #Sync + retl + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ sethi %hi(PAGE_OFFSET), %g1 @@ -388,13 +456,15 @@ __hypervisor_flush_tlb_pending: /* 27 insns */ nop nop -__hypervisor_flush_tlb_kernel_range: /* 19 insns */ +__hypervisor_flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f - sethi %hi(PAGE_SIZE), %g3 - mov %o0, %g1 - sub %o1, %g1, %g2 + sub %o1, %o0, %g2 + srlx %g2, 18, %g3 + brnz,pn %g3, 4f + mov %o0, %g1 + sethi %hi(PAGE_SIZE), %g3 sub %g2, %g3, %g2 1: add %g1, %g2, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ @@ -409,6 +479,16 @@ __hypervisor_flush_tlb_kernel_range: /* 19 insns */ 3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 nop +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 3b + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop #ifdef DCACHE_ALIASING_POSSIBLE /* XXX Niagara and friends have an 8K cache, so no aliasing is @@ -431,43 +511,6 @@ tlb_patch_one: retl nop - .globl cheetah_patch_cachetlbops -cheetah_patch_cachetlbops: - save %sp, -128, %sp - - sethi %hi(__flush_tlb_mm), %o0 - or %o0, %lo(__flush_tlb_mm), %o0 - sethi %hi(__cheetah_flush_tlb_mm), %o1 - or %o1, %lo(__cheetah_flush_tlb_mm), %o1 - call tlb_patch_one - mov 19, %o2 - - sethi %hi(__flush_tlb_page), %o0 - or %o0, %lo(__flush_tlb_page), %o0 - sethi %hi(__cheetah_flush_tlb_page), %o1 - or %o1, %lo(__cheetah_flush_tlb_page), %o1 - call tlb_patch_one - mov 22, %o2 - - sethi %hi(__flush_tlb_pending), %o0 - or %o0, %lo(__flush_tlb_pending), %o0 - sethi %hi(__cheetah_flush_tlb_pending), %o1 - or %o1, %lo(__cheetah_flush_tlb_pending), %o1 - call tlb_patch_one - mov 27, %o2 - -#ifdef DCACHE_ALIASING_POSSIBLE - sethi %hi(__flush_dcache_page), %o0 - or %o0, %lo(__flush_dcache_page), %o0 - sethi %hi(__cheetah_flush_dcache_page), %o1 - or %o1, %lo(__cheetah_flush_dcache_page), %o1 - call tlb_patch_one - mov 11, %o2 -#endif /* DCACHE_ALIASING_POSSIBLE */ - - ret - restore - #ifdef CONFIG_SMP /* These are all called by the slaves of a cross call, at * trap level 1, with interrupts fully disabled. @@ -535,13 +578,15 @@ xcall_flush_tlb_page: /* 20 insns */ nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: /* 28 insns */ +xcall_flush_tlb_kernel_range: /* 44 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 - add %g2, 1, %g2 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP @@ -550,11 +595,25 @@ xcall_flush_tlb_kernel_range: /* 28 insns */ brnz,pt %g3, 1b sub %g3, %g2, %g3 retry - nop - nop - nop - nop - nop +2: mov 63 * 8, %g1 +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_IMMU + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_DMMU + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %g1, 8, %g1 + brgez,pt %g1, 1b + nop + retry nop nop nop @@ -683,6 +742,52 @@ xcall_fetch_glob_pmu_n4: retry +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 + sub %g3, %g2, %g3 + or %g1, 0x20, %g1 ! Nucleus +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %g3, 1b + sub %g3, %g2, %g3 + retry +2: mov 0x80, %g2 + stxa %g0, [%g2] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%g2] ASI_IMMU_DEMAP + membar #Sync + retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE .align 32 .globl xcall_flush_dcache_page_cheetah @@ -798,18 +903,20 @@ __hypervisor_xcall_flush_tlb_page: /* 20 insns */ nop .globl __hypervisor_xcall_flush_tlb_kernel_range -__hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 + srlx %g3, 18, %g7 add %g2, 1, %g2 sub %g3, %g2, %g3 mov %o0, %g2 mov %o1, %g4 - mov %o2, %g7 + brnz,pn %g7, 2f + mov %o2, %g7 1: add %g1, %g3, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ @@ -820,7 +927,7 @@ __hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ sethi %hi(PAGE_SIZE), %o2 brnz,pt %g3, 1b sub %g3, %o2, %g3 - mov %g2, %o0 +5: mov %g2, %o0 mov %g4, %o1 mov %g7, %o2 membar #Sync @@ -828,6 +935,20 @@ __hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ 1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 nop +2: mov %o3, %g1 + mov %o5, %g3 + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov %g1, %o3 + brz,pt %o0, 5b + mov %g3, %o5 + mov HV_FAST_MMU_DEMAP_CTX, %g6 + ba,pt %xcc, 1b + clr %g5 /* These just get rescheduled to PIL vectors. */ .globl xcall_call_function @@ -864,6 +985,58 @@ xcall_kgdb_capture: #endif /* CONFIG_SMP */ + .globl cheetah_patch_cachetlbops +cheetah_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__cheetah_flush_tlb_mm), %o1 + or %o1, %lo(__cheetah_flush_tlb_mm), %o1 + call tlb_patch_one + mov 19, %o2 + + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__cheetah_flush_tlb_pending), %o1 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1 + call tlb_patch_one + mov 27, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 31, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__cheetah_flush_dcache_page), %o1 + or %o1, %lo(__cheetah_flush_dcache_page), %o1 + call tlb_patch_one + mov 11, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + +#ifdef CONFIG_SMP + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 44, %o2 +#endif /* CONFIG_SMP */ + + ret + restore .globl hypervisor_patch_cachetlbops hypervisor_patch_cachetlbops: @@ -895,7 +1068,7 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 19, %o2 + mov 31, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 @@ -926,7 +1099,7 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 28, %o2 + mov 44, %o2 #endif /* CONFIG_SMP */ ret From 9dcb8b685fc30813b35ab4b4bf39244430753190 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 26 Oct 2016 10:15:30 -0700 Subject: [PATCH 669/884] mm: remove per-zone hashtable of bitlock waitqueues The per-zone waitqueues exist because of a scalability issue with the page waitqueues on some NUMA machines, but it turns out that they hurt normal loads, and now with the vmalloced stacks they also end up breaking gfs2 that uses a bit_wait on a stack object: wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE) where 'gh' can be a reference to the local variable 'mount_gh' on the stack of fill_super(). The reason the per-zone hash table breaks for this case is that there is no "zone" for virtual allocations, and trying to look up the physical page to get at it will fail (with a BUG_ON()). It turns out that I actually complained to the mm people about the per-zone hash table for another reason just a month ago: the zone lookup also hurts the regular use of "unlock_page()" a lot, because the zone lookup ends up forcing several unnecessary cache misses and generates horrible code. As part of that earlier discussion, we had a much better solution for the NUMA scalability issue - by just making the page lock have a separate contention bit, the waitqueue doesn't even have to be looked at for the normal case. Peter Zijlstra already has a patch for that, but let's see if anybody even notices. In the meantime, let's fix the actual gfs2 breakage by simplifying the bitlock waitqueues and removing the per-zone issue. Reported-by: Andreas Gruenbacher Tested-by: Bob Peterson Acked-by: Mel Gorman Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Steven Whitehouse Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 30 +---------- kernel/sched/core.c | 16 ++++++ kernel/sched/wait.c | 10 ---- mm/filemap.c | 4 +- mm/memory_hotplug.c | 28 ---------- mm/page_alloc.c | 115 +---------------------------------------- 6 files changed, 21 insertions(+), 182 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 7f2ae99e5daf..0f088f3a2fed 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -440,33 +440,7 @@ struct zone { seqlock_t span_seqlock; #endif - /* - * wait_table -- the array holding the hash table - * wait_table_hash_nr_entries -- the size of the hash table array - * wait_table_bits -- wait_table_size == (1 << wait_table_bits) - * - * The purpose of all these is to keep track of the people - * waiting for a page to become available and make them - * runnable again when possible. The trouble is that this - * consumes a lot of space, especially when so few things - * wait on pages at a given time. So instead of using - * per-page waitqueues, we use a waitqueue hash table. - * - * The bucket discipline is to sleep on the same queue when - * colliding and wake all in that wait queue when removing. - * When something wakes, it must check to be sure its page is - * truly available, a la thundering herd. The cost of a - * collision is great, but given the expected load of the - * table, they should be so rare as to be outweighed by the - * benefits from the saved space. - * - * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the - * primary users of these fields, and in mm/page_alloc.c - * free_area_init_core() performs the initialization of them. - */ - wait_queue_head_t *wait_table; - unsigned long wait_table_hash_nr_entries; - unsigned long wait_table_bits; + int initialized; /* Write-intensive fields used from the page allocator */ ZONE_PADDING(_pad1_) @@ -546,7 +520,7 @@ static inline bool zone_spans_pfn(const struct zone *zone, unsigned long pfn) static inline bool zone_is_initialized(struct zone *zone) { - return !!zone->wait_table; + return zone->initialized; } static inline bool zone_is_empty(struct zone *zone) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 94732d1ab00a..42d4027f9e26 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7515,11 +7515,27 @@ static struct kmem_cache *task_group_cache __read_mostly; DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); +#define WAIT_TABLE_BITS 8 +#define WAIT_TABLE_SIZE (1 << WAIT_TABLE_BITS) +static wait_queue_head_t bit_wait_table[WAIT_TABLE_SIZE] __cacheline_aligned; + +wait_queue_head_t *bit_waitqueue(void *word, int bit) +{ + const int shift = BITS_PER_LONG == 32 ? 5 : 6; + unsigned long val = (unsigned long)word << shift | bit; + + return bit_wait_table + hash_long(val, WAIT_TABLE_BITS); +} +EXPORT_SYMBOL(bit_waitqueue); + void __init sched_init(void) { int i, j; unsigned long alloc_size = 0, ptr; + for (i = 0; i < WAIT_TABLE_SIZE; i++) + init_waitqueue_head(bit_wait_table + i); + #ifdef CONFIG_FAIR_GROUP_SCHED alloc_size += 2 * nr_cpu_ids * sizeof(void **); #endif diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 4f7053579fe3..9453efe9b25a 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -480,16 +480,6 @@ void wake_up_bit(void *word, int bit) } EXPORT_SYMBOL(wake_up_bit); -wait_queue_head_t *bit_waitqueue(void *word, int bit) -{ - const int shift = BITS_PER_LONG == 32 ? 5 : 6; - const struct zone *zone = page_zone(virt_to_page(word)); - unsigned long val = (unsigned long)word << shift | bit; - - return &zone->wait_table[hash_long(val, zone->wait_table_bits)]; -} -EXPORT_SYMBOL(bit_waitqueue); - /* * Manipulate the atomic_t address to produce a better bit waitqueue table hash * index (we're keying off bit -1, but that would produce a horrible hash diff --git a/mm/filemap.c b/mm/filemap.c index 849f459ad078..c7fe2f16503f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -790,9 +790,7 @@ EXPORT_SYMBOL(__page_cache_alloc); */ wait_queue_head_t *page_waitqueue(struct page *page) { - const struct zone *zone = page_zone(page); - - return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)]; + return bit_waitqueue(page, 0); } EXPORT_SYMBOL(page_waitqueue); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 962927309b6e..b18dab401be6 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -268,7 +268,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) unsigned long i, pfn, end_pfn, nr_pages; int node = pgdat->node_id; struct page *page; - struct zone *zone; nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT; page = virt_to_page(pgdat); @@ -276,19 +275,6 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat) for (i = 0; i < nr_pages; i++, page++) get_page_bootmem(node, page, NODE_INFO); - zone = &pgdat->node_zones[0]; - for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) { - if (zone_is_initialized(zone)) { - nr_pages = zone->wait_table_hash_nr_entries - * sizeof(wait_queue_head_t); - nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT; - page = virt_to_page(zone->wait_table); - - for (i = 0; i < nr_pages; i++, page++) - get_page_bootmem(node, page, NODE_INFO); - } - } - pfn = pgdat->node_start_pfn; end_pfn = pgdat_end_pfn(pgdat); @@ -2158,20 +2144,6 @@ void try_offline_node(int nid) */ node_set_offline(nid); unregister_one_node(nid); - - /* free waittable in each zone */ - for (i = 0; i < MAX_NR_ZONES; i++) { - struct zone *zone = pgdat->node_zones + i; - - /* - * wait_table may be allocated from boot memory, - * here only free if it's allocated by vmalloc. - */ - if (is_vmalloc_addr(zone->wait_table)) { - vfree(zone->wait_table); - zone->wait_table = NULL; - } - } } EXPORT_SYMBOL(try_offline_node); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2b3bf6767d54..de7c6e43b1c9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4976,72 +4976,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone) #endif } -/* - * Helper functions to size the waitqueue hash table. - * Essentially these want to choose hash table sizes sufficiently - * large so that collisions trying to wait on pages are rare. - * But in fact, the number of active page waitqueues on typical - * systems is ridiculously low, less than 200. So this is even - * conservative, even though it seems large. - * - * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to - * waitqueues, i.e. the size of the waitq table given the number of pages. - */ -#define PAGES_PER_WAITQUEUE 256 - -#ifndef CONFIG_MEMORY_HOTPLUG -static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) -{ - unsigned long size = 1; - - pages /= PAGES_PER_WAITQUEUE; - - while (size < pages) - size <<= 1; - - /* - * Once we have dozens or even hundreds of threads sleeping - * on IO we've got bigger problems than wait queue collision. - * Limit the size of the wait table to a reasonable size. - */ - size = min(size, 4096UL); - - return max(size, 4UL); -} -#else -/* - * A zone's size might be changed by hot-add, so it is not possible to determine - * a suitable size for its wait_table. So we use the maximum size now. - * - * The max wait table size = 4096 x sizeof(wait_queue_head_t). ie: - * - * i386 (preemption config) : 4096 x 16 = 64Kbyte. - * ia64, x86-64 (no preemption): 4096 x 20 = 80Kbyte. - * ia64, x86-64 (preemption) : 4096 x 24 = 96Kbyte. - * - * The maximum entries are prepared when a zone's memory is (512K + 256) pages - * or more by the traditional way. (See above). It equals: - * - * i386, x86-64, powerpc(4K page size) : = ( 2G + 1M)byte. - * ia64(16K page size) : = ( 8G + 4M)byte. - * powerpc (64K page size) : = (32G +16M)byte. - */ -static inline unsigned long wait_table_hash_nr_entries(unsigned long pages) -{ - return 4096UL; -} -#endif - -/* - * This is an integer logarithm so that shifts can be used later - * to extract the more random high bits from the multiplicative - * hash function before the remainder is taken. - */ -static inline unsigned long wait_table_bits(unsigned long size) -{ - return ffz(~size); -} - /* * Initially all pages are reserved - free ones are freed * up by free_all_bootmem() once the early boot process is @@ -5304,49 +5238,6 @@ void __init setup_per_cpu_pageset(void) alloc_percpu(struct per_cpu_nodestat); } -static noinline __ref -int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) -{ - int i; - size_t alloc_size; - - /* - * The per-page waitqueue mechanism uses hashed waitqueues - * per zone. - */ - zone->wait_table_hash_nr_entries = - wait_table_hash_nr_entries(zone_size_pages); - zone->wait_table_bits = - wait_table_bits(zone->wait_table_hash_nr_entries); - alloc_size = zone->wait_table_hash_nr_entries - * sizeof(wait_queue_head_t); - - if (!slab_is_available()) { - zone->wait_table = (wait_queue_head_t *) - memblock_virt_alloc_node_nopanic( - alloc_size, zone->zone_pgdat->node_id); - } else { - /* - * This case means that a zone whose size was 0 gets new memory - * via memory hot-add. - * But it may be the case that a new node was hot-added. In - * this case vmalloc() will not be able to use this new node's - * memory - this wait_table must be initialized to use this new - * node itself as well. - * To use this new node's memory, further consideration will be - * necessary. - */ - zone->wait_table = vmalloc(alloc_size); - } - if (!zone->wait_table) - return -ENOMEM; - - for (i = 0; i < zone->wait_table_hash_nr_entries; ++i) - init_waitqueue_head(zone->wait_table + i); - - return 0; -} - static __meminit void zone_pcp_init(struct zone *zone) { /* @@ -5367,10 +5258,7 @@ int __meminit init_currently_empty_zone(struct zone *zone, unsigned long size) { struct pglist_data *pgdat = zone->zone_pgdat; - int ret; - ret = zone_wait_table_init(zone, size); - if (ret) - return ret; + pgdat->nr_zones = zone_idx(zone) + 1; zone->zone_start_pfn = zone_start_pfn; @@ -5382,6 +5270,7 @@ int __meminit init_currently_empty_zone(struct zone *zone, zone_start_pfn, (zone_start_pfn + size)); zone_init_free_lists(zone); + zone->initialized = 1; return 0; } From 570dd45042a7c8a7aba1ee029c5dd0f5ccf41b9b Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 27 Oct 2016 10:42:20 -0700 Subject: [PATCH 670/884] btrfs: fix races on root_log_ctx lists btrfs_remove_all_log_ctxs takes a shortcut where it avoids walking the list because it knows all of the waiters are patiently waiting for the commit to finish. But, there's a small race where btrfs_sync_log can remove itself from the list if it finds a log commit is already done. Also, it uses list_del_init() to remove itself from the list, but there's no way to know if btrfs_remove_all_log_ctxs has already run, so we don't know for sure if it is safe to call list_del_init(). This gets rid of all the shortcuts for btrfs_remove_all_log_ctxs(), and just calls it with the proper locking. This is part two of the corruption fixed by cbd60aa7cd1. I should have done this in the first place, but convinced myself the optimizations were safe. A 12 hour run of dbench 2048 will eventually trigger a list debug WARN_ON for the list_del_init() in btrfs_sync_log(). Fixes: d1433debe7f4346cf9fc0dafc71c3137d2a97bc4 Reported-by: Dave Jones cc: stable@vger.kernel.org # 3.15+ Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 688df71c1bf7..4b1c0a6eee04 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2713,14 +2713,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, int index, int error) { struct btrfs_log_ctx *ctx; + struct btrfs_log_ctx *safe; - if (!error) { - INIT_LIST_HEAD(&root->log_ctxs[index]); - return; - } - - list_for_each_entry(ctx, &root->log_ctxs[index], list) + list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) { + list_del_init(&ctx->list); ctx->log_ret = error; + } INIT_LIST_HEAD(&root->log_ctxs[index]); } @@ -2961,13 +2959,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, mutex_unlock(&root->log_mutex); out_wake_log_root: - /* - * We needn't get log_mutex here because we are sure all - * the other tasks are blocked. - */ + mutex_lock(&log_root_tree->log_mutex); btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); - mutex_lock(&log_root_tree->log_mutex); log_root_tree->log_transid_committed++; atomic_set(&log_root_tree->log_commit[index2], 0); mutex_unlock(&log_root_tree->log_mutex); @@ -2978,10 +2972,8 @@ out_wake_log_root: if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) wake_up(&log_root_tree->log_commit_wait[index2]); out: - /* See above. */ - btrfs_remove_all_log_ctxs(root, index1, ret); - mutex_lock(&root->log_mutex); + btrfs_remove_all_log_ctxs(root, index1, ret); root->log_transid_committed++; atomic_set(&root->log_commit[index1], 0); mutex_unlock(&root->log_mutex); From 71451bdfc2f7bc67cf0b7a5d51bc1489aaa55701 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Oct 2016 15:27:45 -0400 Subject: [PATCH 671/884] drm/amdgpu/si_dpm: workaround for SI kickers Consolidate existing quirks. Fixes stability issues on some kickers. Acked-by: Huang Rui Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/si_dpm.c | 59 +++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index ee77e9cb55b1..d6f85b1a0b93 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -3477,6 +3477,49 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, int i; struct si_dpm_quirk *p = si_dpm_quirk_list; + /* limit all SI kickers */ + if (adev->asic_type == CHIP_PITCAIRN) { + if ((adev->pdev->revision == 0x81) || + (adev->pdev->device == 0x6810) || + (adev->pdev->device == 0x6811) || + (adev->pdev->device == 0x6816) || + (adev->pdev->device == 0x6817) || + (adev->pdev->device == 0x6806)) + max_mclk = 120000; + } else if (adev->asic_type == CHIP_VERDE) { + if ((adev->pdev->revision == 0x81) || + (adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0x87) || + (adev->pdev->device == 0x6820) || + (adev->pdev->device == 0x6821) || + (adev->pdev->device == 0x6822) || + (adev->pdev->device == 0x6823) || + (adev->pdev->device == 0x682A) || + (adev->pdev->device == 0x682B)) { + max_sclk = 75000; + max_mclk = 80000; + } + } else if (adev->asic_type == CHIP_OLAND) { + if ((adev->pdev->revision == 0xC7) || + (adev->pdev->revision == 0x80) || + (adev->pdev->revision == 0x81) || + (adev->pdev->revision == 0x83) || + (adev->pdev->device == 0x6604) || + (adev->pdev->device == 0x6605)) { + max_sclk = 75000; + max_mclk = 80000; + } + } else if (adev->asic_type == CHIP_HAINAN) { + if ((adev->pdev->revision == 0x81) || + (adev->pdev->revision == 0x83) || + (adev->pdev->revision == 0xC3) || + (adev->pdev->device == 0x6664) || + (adev->pdev->device == 0x6665) || + (adev->pdev->device == 0x6667)) { + max_sclk = 75000; + max_mclk = 80000; + } + } /* Apply dpm quirks */ while (p && p->chip_device != 0) { if (adev->pdev->vendor == p->chip_vendor && @@ -3489,22 +3532,6 @@ static void si_apply_state_adjust_rules(struct amdgpu_device *adev, } ++p; } - /* limit mclk on all R7 370 parts for stability */ - if (adev->pdev->device == 0x6811 && - adev->pdev->revision == 0x81) - max_mclk = 120000; - /* limit sclk/mclk on Jet parts for stability */ - if (adev->pdev->device == 0x6665 && - adev->pdev->revision == 0xc3) { - max_sclk = 75000; - max_mclk = 80000; - } - /* Limit clocks for some HD8600 parts */ - if (adev->pdev->device == 0x6660 && - adev->pdev->revision == 0x83) { - max_sclk = 75000; - max_mclk = 80000; - } if (rps->vce_active) { rps->evclk = adev->pm.dpm.vce_states[adev->pm.dpm.vce_level].evclk; From a2941d01267437b6edcd3e769ae9a461fe36ae62 Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 26 Oct 2016 12:56:07 +0800 Subject: [PATCH 672/884] drm/amd/powerplay: fix bug get wrong evv voltage of Polaris. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rex Zhu Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c | 2 +- drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index 1167205057b3..2ba7937d2545 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -716,7 +716,7 @@ int phm_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, *voltage = 1150; } else { ret = atomctrl_get_voltage_evv_on_sclk_ai(hwmgr, voltage_type, sclk, id, &vol); - *voltage = (uint16_t)vol/100; + *voltage = (uint16_t)(vol/100); } return ret; } diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c index 1126bd4f74dc..0894527d932f 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c @@ -1320,7 +1320,8 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_ if (0 != result) return result; - *voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel); + *voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *) + (&get_voltage_info_param_space))->ulVoltageLevel); return result; } From 796f4687bd622b0f8076b6695001ab5cdc2b722a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 27 Oct 2016 20:14:45 +0200 Subject: [PATCH 673/884] kvm/x86: Show WRMSR data is in hex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the "0x" prefix to the error messages format to make it unambiguous about what kind of value we're talking about. Signed-off-by: Borislav Petkov Cc: Paolo Bonzini Cc: "Radim Krčmář" Message-Id: <20161027181445.25319-1-bp@alien8.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e375235d81c9..0b2087981c06 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2262,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has_osvw(vcpu)) @@ -2280,11 +2280,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); if (!ignore_msrs) { - vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", + vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } else { - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; } From 4fc6d239ee5640909932c47f3c7b93dd8e415fea Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Mon, 24 Oct 2016 12:40:53 +0200 Subject: [PATCH 674/884] Revert "at803x: fix suspend/resume for SGMII link" This reverts commit 98267311fe3b334ae7c107fa0e2413adcf3ba735. Suspending the SGMII alongside the copper side made the at803x inaccessable while powered down, e.g. it can't be re-probed after suspend. Signed-off-by: Zefir Kurtisi Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index f279a897a5c7..cf74d108953e 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -52,9 +52,6 @@ #define AT803X_DEBUG_REG_5 0x05 #define AT803X_DEBUG_TX_CLK_DLY_EN BIT(8) -#define AT803X_REG_CHIP_CONFIG 0x1f -#define AT803X_BT_BX_REG_SEL 0x8000 - #define ATH8030_PHY_ID 0x004dd076 #define ATH8031_PHY_ID 0x004dd074 #define ATH8035_PHY_ID 0x004dd072 @@ -209,7 +206,6 @@ static int at803x_suspend(struct phy_device *phydev) { int value; int wol_enabled; - int ccr; mutex_lock(&phydev->lock); @@ -225,16 +221,6 @@ static int at803x_suspend(struct phy_device *phydev) phy_write(phydev, MII_BMCR, value); - if (phydev->interface != PHY_INTERFACE_MODE_SGMII) - goto done; - - /* also power-down SGMII interface */ - ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG); - phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL); - phy_write(phydev, MII_BMCR, phy_read(phydev, MII_BMCR) | BMCR_PDOWN); - phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL); - -done: mutex_unlock(&phydev->lock); return 0; @@ -243,7 +229,6 @@ done: static int at803x_resume(struct phy_device *phydev) { int value; - int ccr; mutex_lock(&phydev->lock); @@ -251,17 +236,6 @@ static int at803x_resume(struct phy_device *phydev) value &= ~(BMCR_PDOWN | BMCR_ISOLATE); phy_write(phydev, MII_BMCR, value); - if (phydev->interface != PHY_INTERFACE_MODE_SGMII) - goto done; - - /* also power-up SGMII interface */ - ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG); - phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL); - value = phy_read(phydev, MII_BMCR) & ~(BMCR_PDOWN | BMCR_ISOLATE); - phy_write(phydev, MII_BMCR, value); - phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL); - -done: mutex_unlock(&phydev->lock); return 0; From f62265b53ef34a372b657c99e23d32e95b464316 Mon Sep 17 00:00:00 2001 From: Zefir Kurtisi Date: Mon, 24 Oct 2016 12:40:54 +0200 Subject: [PATCH 675/884] at803x: double check SGMII side autoneg In SGMII mode, we observed an autonegotiation issue after power-down-up cycles where the copper side reports successful link establishment but the SGMII side's link is down. This happened in a setup where the at8031 is connected over SGMII to a eTSEC (fsl gianfar), but so far could not be reproduced with other Ethernet device / driver combinations. This commit adds a wrapper function for at8031 that in case of operating in SGMII mode double checks SGMII link state when generic aneg_done() succeeds. It prints a warning on failure but intentionally does not try to recover from this state. As a result, if you ever see a warning '803x_aneg_done: SGMII link is not ok' you will end up having an Ethernet link up but won't get any data through. This should not happen, if it does, please contact the module maintainer. Signed-off-by: Zefir Kurtisi Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index cf74d108953e..a52b560e428b 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -42,10 +42,18 @@ #define AT803X_MMD_ACCESS_CONTROL 0x0D #define AT803X_MMD_ACCESS_CONTROL_DATA 0x0E #define AT803X_FUNC_DATA 0x4003 +#define AT803X_REG_CHIP_CONFIG 0x1f +#define AT803X_BT_BX_REG_SEL 0x8000 #define AT803X_DEBUG_ADDR 0x1D #define AT803X_DEBUG_DATA 0x1E +#define AT803X_MODE_CFG_MASK 0x0F +#define AT803X_MODE_CFG_SGMII 0x01 + +#define AT803X_PSSR 0x11 /*PHY-Specific Status Register*/ +#define AT803X_PSSR_MR_AN_COMPLETE 0x0200 + #define AT803X_DEBUG_REG_0 0x00 #define AT803X_DEBUG_RX_CLK_DLY_EN BIT(15) @@ -355,6 +363,36 @@ static void at803x_link_change_notify(struct phy_device *phydev) } } +static int at803x_aneg_done(struct phy_device *phydev) +{ + int ccr; + + int aneg_done = genphy_aneg_done(phydev); + if (aneg_done != BMSR_ANEGCOMPLETE) + return aneg_done; + + /* + * in SGMII mode, if copper side autoneg is successful, + * also check SGMII side autoneg result + */ + ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG); + if ((ccr & AT803X_MODE_CFG_MASK) != AT803X_MODE_CFG_SGMII) + return aneg_done; + + /* switch to SGMII/fiber page */ + phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr & ~AT803X_BT_BX_REG_SEL); + + /* check if the SGMII link is OK. */ + if (!(phy_read(phydev, AT803X_PSSR) & AT803X_PSSR_MR_AN_COMPLETE)) { + pr_warn("803x_aneg_done: SGMII link is not ok\n"); + aneg_done = 0; + } + /* switch back to copper page */ + phy_write(phydev, AT803X_REG_CHIP_CONFIG, ccr | AT803X_BT_BX_REG_SEL); + + return aneg_done; +} + static struct phy_driver at803x_driver[] = { { /* ATHEROS 8035 */ @@ -406,6 +444,7 @@ static struct phy_driver at803x_driver[] = { .flags = PHY_HAS_INTERRUPT, .config_aneg = genphy_config_aneg, .read_status = genphy_read_status, + .aneg_done = at803x_aneg_done, .ack_interrupt = &at803x_ack_interrupt, .config_intr = &at803x_config_intr, } }; From e2796541ef1d82c73334bfde89b97c9174c8d057 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Wed, 19 Oct 2016 17:34:49 +0200 Subject: [PATCH 676/884] MAINTAINERS: add a maintainer for the SPI NOR subsystem I would like to volunteer as a maintainer for the SPI NOR part of the MTD subsystem. Over the last months, a significant number of SPI NOR related patches have been submitted, some of them have been reviewed, but very few have finally been merged. Hence, the number of pending SPI NOR related patches continues to increase over the time. Through my work on SPI NOR memories from many manufacturers over the last two years, I've gained a solid understanding of this technology. I've already helped by reviewing patches from other contributors on the mailing list, and would like to help getting those patches integrated by volunteering as a maintainer for this specific area. Boris Brezillon has already stepped up as a maintainer for the NAND sub-subsystem in MTD, and the SPI NOR sub-subsystem could be handled in the same way: I would be reviewing patches touching this area, collecting them and sending pull requests to Brian Norris. Also Marek Vasut has volunteered as well as maintainer for the SPI NOR subsystem. Signed-off-by: Cyrille Pitchen Acked-by: Boris Brezillon Acked-by: Marek Vasut Acked-by: Jagan Teki Signed-off-by: Brian Norris --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1cd38a7e0064..2e87047515f8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11392,6 +11392,17 @@ W: http://www.st.com/spear S: Maintained F: drivers/clk/spear/ +SPI NOR SUBSYSTEM +M: Cyrille Pitchen +M: Marek Vasut +L: linux-mtd@lists.infradead.org +W: http://www.linux-mtd.infradead.org/ +Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ +T: git git://github.com/spi-nor/linux.git +S: Maintained +F: drivers/mtd/spi-nor/ +F: include/linux/mtd/spi-nor.h + SPI SUBSYSTEM M: Mark Brown L: linux-spi@vger.kernel.org From e0f841f5cbf2a195c63f3441f3d8ef1cd2bdeeed Mon Sep 17 00:00:00 2001 From: Tobias Brunner Date: Mon, 24 Oct 2016 15:44:26 +0200 Subject: [PATCH 677/884] macsec: Fix header length if SCI is added if explicitly disabled Even if sending SCIs is explicitly disabled, the code that creates the Security Tag might still decide to add it (e.g. if multiple RX SCs are defined on the MACsec interface). But because the header length so far only depended on the configuration option the SCI overwrote the original frame's contents (EtherType and e.g. the beginning of the IP header) and if encrypted did not visibly end up in the packet, while the SC flag in the TCI field of the Security Tag was still set, resulting in invalid MACsec frames. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Tobias Brunner Acked-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 3ea47f28e143..d2e61e002926 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -397,6 +397,14 @@ static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 +static bool send_sci(const struct macsec_secy *secy) +{ + const struct macsec_tx_sc *tx_sc = &secy->tx_sc; + + return tx_sc->send_sci || + (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb); +} + static sci_t make_sci(u8 *addr, __be16 port) { sci_t sci; @@ -437,15 +445,15 @@ static unsigned int macsec_extra_len(bool sci_present) /* Fill SecTAG according to IEEE 802.1AE-2006 10.5.3 */ static void macsec_fill_sectag(struct macsec_eth_header *h, - const struct macsec_secy *secy, u32 pn) + const struct macsec_secy *secy, u32 pn, + bool sci_present) { const struct macsec_tx_sc *tx_sc = &secy->tx_sc; - memset(&h->tci_an, 0, macsec_sectag_len(tx_sc->send_sci)); + memset(&h->tci_an, 0, macsec_sectag_len(sci_present)); h->eth.h_proto = htons(ETH_P_MACSEC); - if (tx_sc->send_sci || - (secy->n_rx_sc > 1 && !tx_sc->end_station && !tx_sc->scb)) { + if (sci_present) { h->tci_an |= MACSEC_TCI_SC; memcpy(&h->secure_channel_id, &secy->sci, sizeof(h->secure_channel_id)); @@ -650,6 +658,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; struct macsec_dev *macsec = macsec_priv(dev); + bool sci_present; u32 pn; secy = &macsec->secy; @@ -687,7 +696,8 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, unprotected_len = skb->len; eth = eth_hdr(skb); - hh = (struct macsec_eth_header *)skb_push(skb, macsec_extra_len(tx_sc->send_sci)); + sci_present = send_sci(secy); + hh = (struct macsec_eth_header *)skb_push(skb, macsec_extra_len(sci_present)); memmove(hh, eth, 2 * ETH_ALEN); pn = tx_sa_update_pn(tx_sa, secy); @@ -696,7 +706,7 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, kfree_skb(skb); return ERR_PTR(-ENOLINK); } - macsec_fill_sectag(hh, secy, pn); + macsec_fill_sectag(hh, secy, pn, sci_present); macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN); skb_put(skb, secy->icv_len); @@ -726,10 +736,10 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, skb_to_sgvec(skb, sg, 0, skb->len); if (tx_sc->encrypt) { - int len = skb->len - macsec_hdr_len(tx_sc->send_sci) - + int len = skb->len - macsec_hdr_len(sci_present) - secy->icv_len; aead_request_set_crypt(req, sg, sg, len, iv); - aead_request_set_ad(req, macsec_hdr_len(tx_sc->send_sci)); + aead_request_set_ad(req, macsec_hdr_len(sci_present)); } else { aead_request_set_crypt(req, sg, sg, 0, iv); aead_request_set_ad(req, skb->len - secy->icv_len); From 30656167bd2347bf867a09cbae2705bd927d3983 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 24 Oct 2016 14:35:26 +0200 Subject: [PATCH 678/884] MAINTAINERS: add more people to the MTD maintainer team Brian has been maintaining the MTD subsystem alone for several years now, and maintaining such a subsystem can really be time consuming. Create a maintainer team formed of the most active MTD contributors to help Brian with this task, which will hopefully improve the subsystem reactivity. Signed-off-by: Boris Brezillon Acked-by: Cyrille Pitchen Acked-by: Marek Vasut Acked-by: Richard Weinberger Signed-off-by: Brian Norris --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2e87047515f8..11a1c065ea91 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7912,6 +7912,10 @@ F: mm/ MEMORY TECHNOLOGY DEVICES (MTD) M: David Woodhouse M: Brian Norris +M: Boris Brezillon +M: Marek Vasut +M: Richard Weinberger +M: Cyrille Pitchen L: linux-mtd@lists.infradead.org W: http://www.linux-mtd.infradead.org/ Q: http://patchwork.ozlabs.org/project/linux-mtd/list/ From e30520c2b075fae3977d482a31b54b0256160a57 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 17:54:18 +0200 Subject: [PATCH 679/884] kalmia: avoid potential uninitialized variable use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The kalmia_send_init_packet() returns zero or a negative return code, but gcc has no way of knowing that there cannot be a positive return code, so it determines that copying the ethernet address at the end of kalmia_bind() will access uninitialized data: drivers/net/usb/kalmia.c: In function ‘kalmia_bind’: arch/x86/include/asm/string_32.h:78:22: error: ‘*((void *)ðernet_addr+4)’ may be used uninitialized in this function [-Werror=maybe-uninitialized] *((short *)to + 2) = *((short *)from + 2); ^ drivers/net/usb/kalmia.c:138:5: note: ‘*((void *)ðernet_addr+4)’ was declared here This warning is harmless, but for consistency, we should make the check for the return code match what the driver does everywhere else and just progate it, which then gets rid of the warning. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/usb/kalmia.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index 5662babf0583..3e37724d30ae 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c @@ -151,7 +151,7 @@ kalmia_bind(struct usbnet *dev, struct usb_interface *intf) status = kalmia_init_and_get_ethernet_addr(dev, ethernet_addr); - if (status < 0) { + if (status) { usb_set_intfdata(intf, NULL); usb_driver_release_interface(driver_of(intf), intf); return status; From 830218c1add1da16519b71909e5cf21522b7d062 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 24 Oct 2016 10:52:35 -0700 Subject: [PATCH 680/884] net: ipv6: Fix processing of RAs in presence of VRF rt6_add_route_info and rt6_add_dflt_router were updated to pull the FIB table from the device index, but the corresponding rt6_get_route_info and rt6_get_dflt_router functions were not leading to the failure to process RA's: ICMPv6: RA: ndisc_router_discovery failed to add default route Fix the 'get' functions by using the table id associated with the device when applicable. Also, now that default routes can be added to tables other than the default table, rt6_purge_dflt_routers needs to be updated as well to look at all tables. To handle that efficiently, add a flag to the table denoting if it is has a default route via RA. Fixes: ca254490c8dfd ("net: Add VRF support to IPv6 stack") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ net/ipv6/route.c | 68 ++++++++++++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index fb961a576abe..a74e2aa40ef4 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -230,6 +230,8 @@ struct fib6_table { rwlock_t tb6_lock; struct fib6_node tb6_root; struct inet_peer_base tb6_peers; + unsigned int flags; +#define RT6_TABLE_HAS_DFLT_ROUTER BIT(0) }; #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bdbc38e8bf29..3ac19eb81a86 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -102,11 +102,13 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, + struct net_device *dev, unsigned int pref); static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex); + const struct in6_addr *gwaddr, + struct net_device *dev); #endif struct uncached_list { @@ -803,7 +805,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_get_dflt_router(gwaddr, dev); else rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, - gwaddr, dev->ifindex); + gwaddr, dev); if (rt && !lifetime) { ip6_del_rt(rt); @@ -811,8 +813,8 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } if (!rt && lifetime) - rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, - pref); + rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, + dev, pref); else if (rt) rt->rt6i_flags = RTF_ROUTEINFO | (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); @@ -2325,13 +2327,16 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex) + const struct in6_addr *gwaddr, + struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; + int ifindex = dev->ifindex; struct fib6_node *fn; struct rt6_info *rt = NULL; struct fib6_table *table; - table = fib6_get_table(net, RT6_TABLE_INFO); + table = fib6_get_table(net, tb_id); if (!table) return NULL; @@ -2357,12 +2362,13 @@ out: static struct rt6_info *rt6_add_route_info(struct net *net, const struct in6_addr *prefix, int prefixlen, - const struct in6_addr *gwaddr, int ifindex, + const struct in6_addr *gwaddr, + struct net_device *dev, unsigned int pref) { struct fib6_config cfg = { .fc_metric = IP6_RT_PRIO_USER, - .fc_ifindex = ifindex, + .fc_ifindex = dev->ifindex, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), @@ -2371,7 +2377,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_nlinfo.nl_net = net, }; - cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO; + cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, cfg.fc_dst = *prefix; cfg.fc_gateway = *gwaddr; @@ -2381,16 +2387,17 @@ static struct rt6_info *rt6_add_route_info(struct net *net, ip6_route_add(&cfg); - return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); + return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); } #endif struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) { + u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; struct rt6_info *rt; struct fib6_table *table; - table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); + table = fib6_get_table(dev_net(dev), tb_id); if (!table) return NULL; @@ -2424,20 +2431,20 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, cfg.fc_gateway = *gwaddr; - ip6_route_add(&cfg); + if (!ip6_route_add(&cfg)) { + struct fib6_table *table; + + table = fib6_get_table(dev_net(dev), cfg.fc_table); + if (table) + table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; + } return rt6_get_dflt_router(gwaddr, dev); } -void rt6_purge_dflt_routers(struct net *net) +static void __rt6_purge_dflt_routers(struct fib6_table *table) { struct rt6_info *rt; - struct fib6_table *table; - - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(net, RT6_TABLE_DFLT); - if (!table) - return; restart: read_lock_bh(&table->tb6_lock); @@ -2451,6 +2458,27 @@ restart: } } read_unlock_bh(&table->tb6_lock); + + table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; +} + +void rt6_purge_dflt_routers(struct net *net) +{ + struct fib6_table *table; + struct hlist_head *head; + unsigned int h; + + rcu_read_lock(); + + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + head = &net->ipv6.fib_table_hash[h]; + hlist_for_each_entry_rcu(table, head, tb6_hlist) { + if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) + __rt6_purge_dflt_routers(table); + } + } + + rcu_read_unlock(); } static void rtmsg_to_fib6_config(struct net *net, From d5d32e4b76687f4df9ad3ba8d3702b7347f51fa6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 24 Oct 2016 12:27:23 -0700 Subject: [PATCH 681/884] net: ipv6: Do not consider link state for nexthop validation Similar to IPv4, do not consider link state when validating next hops. Currently, if the link is down default routes can fail to insert: $ ip -6 ro add vrf blue default via 2100:2::64 dev eth2 RTNETLINK answers: No route to host With this patch the command succeeds. Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + net/ipv6/route.c | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index e0cd318d5103..f83e78d071a3 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -32,6 +32,7 @@ struct route_info { #define RT6_LOOKUP_F_SRCPREF_TMP 0x00000008 #define RT6_LOOKUP_F_SRCPREF_PUBLIC 0x00000010 #define RT6_LOOKUP_F_SRCPREF_COA 0x00000020 +#define RT6_LOOKUP_F_IGNORE_LINKSTATE 0x00000040 /* We do not (yet ?) support IPv6 jumbograms (RFC 2675) * Unlike IPv4, hdr->seg_len doesn't include the IPv6 header diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 3ac19eb81a86..947ed1ded026 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -658,7 +658,8 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, struct net_device *dev = rt->dst.dev; if (dev && !netif_carrier_ok(dev) && - idev->cnf.ignore_routes_with_linkdown) + idev->cnf.ignore_routes_with_linkdown && + !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; if (rt6_check_expired(rt)) @@ -1052,6 +1053,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; @@ -1791,7 +1793,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = RT6_LOOKUP_F_IFACE; + int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE; table = fib6_get_table(net, cfg->fc_table); if (!table) From bc72f3dd89e087e33afe8c490fbe132e3dcd9afe Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 23:40:30 +0200 Subject: [PATCH 682/884] flow_dissector: fix vlan tag handling gcc warns about an uninitialized pointer dereference in the vlan priority handling: net/core/flow_dissector.c: In function '__skb_flow_dissect': net/core/flow_dissector.c:281:61: error: 'vlan' may be used uninitialized in this function [-Werror=maybe-uninitialized] As pointed out by Jiri Pirko, the variable is never actually used without being initialized first as the only way it end up uninitialized is with skb_vlan_tag_present(skb)==true, and that means it does not get accessed. However, the warning hints at some related issues that I'm addressing here: - the second check for the vlan tag is different from the first one that tests the skb for being NULL first, causing both the warning and a possible NULL pointer dereference that was not entirely fixed. - The same patch that introduced the NULL pointer check dropped an earlier optimization that skipped the repeated check of the protocol type - The local '_vlan' variable is referenced through the 'vlan' pointer but the variable has gone out of scope by the time that it is accessed, causing undefined behavior Caching the result of the 'skb && skb_vlan_tag_present(skb)' check in a local variable allows the compiler to further optimize the later check. With those changes, the warning also disappears. Fixes: 3805a938a6c2 ("flow_dissector: Check skb for VLAN only if skb specified.") Fixes: d5709f7ab776 ("flow_dissector: For stripped vlan, get vlan info from skb->vlan_tci") Signed-off-by: Arnd Bergmann Acked-by: Jiri Pirko Acked-by: Eric Garver Signed-off-by: David S. Miller --- net/core/flow_dissector.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 44e6ba9d3a6b..ab193e5def07 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -246,13 +246,13 @@ ipv6: case htons(ETH_P_8021AD): case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; + struct vlan_hdr _vlan; + bool vlan_tag_present = skb && skb_vlan_tag_present(skb); - if (skb && skb_vlan_tag_present(skb)) + if (vlan_tag_present) proto = skb->protocol; - if (eth_type_vlan(proto)) { - struct vlan_hdr _vlan; - + if (!vlan_tag_present || eth_type_vlan(skb->protocol)) { vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); if (!vlan) @@ -270,7 +270,7 @@ ipv6: FLOW_DISSECTOR_KEY_VLAN, target_container); - if (skb_vlan_tag_present(skb)) { + if (vlan_tag_present) { key_vlan->vlan_id = skb_vlan_tag_get_id(skb); key_vlan->vlan_priority = (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT); From c121f72a66c5f92fbe2fc53baa274eef39875cec Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 24 Oct 2016 23:46:18 +0100 Subject: [PATCH 683/884] net: bgmac: fix spelling mistake: "connecton" -> "connection" trivial fix to spelling mistake in dev_err message Signed-off-by: Colin Ian King Acked-by: Jon Mason Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 856379cbb402..31ca204b38d2 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1449,7 +1449,7 @@ static int bgmac_phy_connect(struct bgmac *bgmac) phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link, PHY_INTERFACE_MODE_MII); if (IS_ERR(phy_dev)) { - dev_err(bgmac->dev, "PHY connecton failed\n"); + dev_err(bgmac->dev, "PHY connection failed\n"); return PTR_ERR(phy_dev); } From a3b8cb1f84a0de95323902c76bab245675d6d218 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Thu, 22 Sep 2016 15:06:52 -0700 Subject: [PATCH 684/884] ixgbe: fix panic when using macvlan with l2-fwd-offload enabled Fix NULL pointer dereference in the case where a macvlan interface is brought up while the PF is still down: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 IP: [] ixgbe_alloc_rx_buffers+0x42/0x1a0 [ixgbe] Call Trace: [] ixgbe_configure_rx_ring+0x2eb/0x3d0 [ixgbe] [] ixgbe_fwd_ring_up+0xd1/0x380 [ixgbe] [] ixgbe_fwd_add+0x149/0x230 [ixgbe] [] macvlan_open+0x260/0x2b0 [macvlan] Reported-by: Matthew Garrett Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index a244d9a67264..bd93d823cc25 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9135,10 +9135,14 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev) goto fwd_add_err; fwd_adapter->pool = pool; fwd_adapter->real_adapter = adapter; - err = ixgbe_fwd_ring_up(vdev, fwd_adapter); - if (err) - goto fwd_add_err; - netif_tx_start_all_queues(vdev); + + if (netif_running(pdev)) { + err = ixgbe_fwd_ring_up(vdev, fwd_adapter); + if (err) + goto fwd_add_err; + netif_tx_start_all_queues(vdev); + } + return fwd_adapter; fwd_add_err: /* unwind counter and free adapter struct */ From ea6acb7ef78960e4b6f1cd8c4162a5e490e83dcd Mon Sep 17 00:00:00 2001 From: David Ertman Date: Tue, 20 Sep 2016 07:10:50 -0700 Subject: [PATCH 685/884] i40e: Fix configure TCs after initial DCB disable in commit a036244c068612a43fa8c0f33a0eb4daa4d8dba0 a fix was put into place to avoid a kernel panic when a non- supported traffic class configuration was put into place and then lldp was enabled/disabled on the link partner switch. This fix caused it to be necessary to unload/reload the driver to reenable DCB once a supported TC config was in place. The root cause of the original panic was that the function i40e_pf_get_default_tc was allowing for a default TC other than TC 0, and only TC 0 is supported as a default. This patch removes the get_default_tc function and replaces it with a #define since there is only one TC supported as a default. Change-Id: I448371974e946386d0a7718d73668b450b7c72ef Signed-off-by: Dave Ertman Tested-by: Ronald Bynoe Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e.h | 1 + drivers/net/ethernet/intel/i40e/i40e_main.c | 31 +++------------------ 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2030d7c1dc94..6d61e443bdf8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -92,6 +92,7 @@ #define I40E_AQ_LEN 256 #define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ #define I40E_MAX_USER_PRIORITY 8 +#define I40E_DEFAULT_TRAFFIC_CLASS BIT(0) #define I40E_DEFAULT_MSG_ENABLE 4 #define I40E_QUEUE_WAIT_RETRY_LIMIT 10 #define I40E_INT_NAME_STR_LEN (IFNAMSIZ + 16) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index ac1faee2a5b8..050d005d2ede 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4640,29 +4640,6 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) return num_tc; } -/** - * i40e_pf_get_default_tc - Get bitmap for first enabled TC - * @pf: PF being queried - * - * Return a bitmap for first enabled traffic class for this PF. - **/ -static u8 i40e_pf_get_default_tc(struct i40e_pf *pf) -{ - u8 enabled_tc = pf->hw.func_caps.enabled_tcmap; - u8 i = 0; - - if (!enabled_tc) - return 0x1; /* TC0 */ - - /* Find the first enabled TC */ - for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { - if (enabled_tc & BIT(i)) - break; - } - - return BIT(i); -} - /** * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried @@ -4673,7 +4650,7 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) { /* If DCB is not enabled for this PF then just return default TC */ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) - return i40e_pf_get_default_tc(pf); + return I40E_DEFAULT_TRAFFIC_CLASS; /* SFP mode we want PF to be enabled for all TCs */ if (!(pf->flags & I40E_FLAG_MFP_ENABLED)) @@ -4683,7 +4660,7 @@ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf) if (pf->hw.func_caps.iscsi) return i40e_get_iscsi_tc_map(pf); else - return i40e_pf_get_default_tc(pf); + return I40E_DEFAULT_TRAFFIC_CLASS; } /** @@ -5029,7 +5006,7 @@ static void i40e_dcb_reconfigure(struct i40e_pf *pf) if (v == pf->lan_vsi) tc_map = i40e_pf_get_tc_map(pf); else - tc_map = i40e_pf_get_default_tc(pf); + tc_map = I40E_DEFAULT_TRAFFIC_CLASS; #ifdef I40E_FCOE if (pf->vsi[v]->type == I40E_VSI_FCOE) tc_map = i40e_get_fcoe_tc_map(pf); @@ -5717,7 +5694,7 @@ static int i40e_handle_lldp_event(struct i40e_pf *pf, u8 type; /* Not DCB capable or capability disabled */ - if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) + if (!(pf->flags & I40E_FLAG_DCB_CAPABLE)) return ret; /* Ignore if event is not for Nearest Bridge */ From 4c95aa5d8fc34be18fcab01b1c6251c8c2b61520 Mon Sep 17 00:00:00 2001 From: Guilherme G Piccoli Date: Thu, 22 Sep 2016 10:03:58 -0300 Subject: [PATCH 686/884] i40e: disable MSI-X interrupts if we cannot reserve enough vectors If we fail on allocating enough MSI-X interrupts, we should disable them since they were previously enabled in this point of code. Not disabling them can lead to WARN_ON() being triggered and subsequent failure in enabling MSI as a fallback; the below message was shown without this patch while we played with interrupt allocation in i40e driver: [ 21.461346] sysfs: cannot create duplicate filename '/devices/pci0007:00/0007:00:00.0/0007:01:00.3/msi_irqs' [ 21.461459] ------------[ cut here ]------------ [ 21.461514] WARNING: CPU: 64 PID: 1155 at fs/sysfs/dir.c:31 sysfs_warn_dup+0x88/0xc0 Also, we noticed that without this patch, if we modprobe the module without enough MSI-X interrupts (triggering the above warning), unload the module and re-load it again, we got a crash on the system. Signed-off-by: Guilherme G Piccoli Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 050d005d2ede..6abc1301a8e4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -7684,6 +7684,7 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->flags &= ~I40E_FLAG_MSIX_ENABLED; kfree(pf->msix_entries); pf->msix_entries = NULL; + pci_disable_msix(pf->pdev); return -ENODEV; } else if (v_actual == I40E_MIN_MSIX) { From 9ee7837449b3d6f0fcf9132c6b5e5aaa58cc67d4 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 24 Oct 2016 20:18:27 -0400 Subject: [PATCH 687/884] net sched filters: fix notification of filter delete with proper handle Daniel says: While trying out [1][2], I noticed that tc monitor doesn't show the correct handle on delete: $ tc monitor qdisc clsact ffff: dev eno1 parent ffff:fff1 filter dev eno1 ingress protocol all pref 49152 bpf handle 0x2a [...] deleted filter dev eno1 ingress protocol all pref 49152 bpf handle 0xf3be0c80 some context to explain the above: The user identity of any tc filter is represented by a 32-bit identifier encoded in tcm->tcm_handle. Example 0x2a in the bpf filter above. A user wishing to delete, get or even modify a specific filter uses this handle to reference it. Every classifier is free to provide its own semantics for the 32 bit handle. Example: classifiers like u32 use schemes like 800:1:801 to describe the semantics of their filters represented as hash table, bucket and node ids etc. Classifiers also have internal per-filter representation which is different from this externally visible identity. Most classifiers set this internal representation to be a pointer address (which allows fast retrieval of said filters in their implementations). This internal representation is referenced with the "fh" variable in the kernel control code. When a user successfuly deletes a specific filter, by specifying the correct tcm->tcm_handle, an event is generated to user space which indicates which specific filter was deleted. Before this patch, the "fh" value was sent to user space as the identity. As an example what is shown in the sample bpf filter delete event above is 0xf3be0c80. This is infact a 32-bit truncation of 0xffff8807f3be0c80 which happens to be a 64-bit memory address of the internal filter representation (address of the corresponding filter's struct cls_bpf_prog); After this patch the appropriate user identifiable handle as encoded in the originating request tcm->tcm_handle is generated in the event. One of the cardinal rules of netlink rules is to be able to take an event (such as a delete in this case) and reflect it back to the kernel and successfully delete the filter. This patch achieves that. Note, this issue has existed since the original TC action infrastructure code patch back in 2004 as found in: https://git.kernel.org/cgit/linux/kernel/git/history/history.git/commit/ [1] http://patchwork.ozlabs.org/patch/682828/ [2] http://patchwork.ozlabs.org/patch/682829/ Fixes: 4e54c4816bfe ("[NET]: Add tc extensions infrastructure.") Reported-by: Daniel Borkmann Acked-by: Cong Wang Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/cls_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2ee29a3375f6..2b2a7974e4bb 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -345,7 +345,8 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, + tfilter_notify(net, skb, n, tp, + t->tcm_handle, RTM_DELTFILTER, false); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); From 599b076d15ee3ead7af20fc907079df00b2d59a0 Mon Sep 17 00:00:00 2001 From: Huaibin Wang Date: Mon, 26 Sep 2016 09:51:18 +0200 Subject: [PATCH 688/884] i40e: fix call of ndo_dflt_bridge_getlink() Order of arguments is wrong. The wrong code has been introduced by commit 7d4f8d871ab1, but is compiled only since commit 9df70b66418e. Note that this may break netlink dumps. Fixes: 9df70b66418e ("i40e: Remove incorrect #ifdef's") Fixes: 7d4f8d871ab1 ("switchdev; add VLAN support for port's bridge_getlink") CC: Carolyn Wyborny Signed-off-by: Huaibin Wang Signed-off-by: Nicolas Dichtel Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/i40e/i40e_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6abc1301a8e4..31c97e3937a4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9034,7 +9034,7 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, return 0; return ndo_dflt_bridge_getlink(skb, pid, seq, dev, veb->bridge_mode, - nlflags, 0, 0, filter_mask, NULL); + 0, 0, nlflags, filter_mask, NULL); } /* Hardware supports L4 tunnel length of 128B (=2^7) which includes From 9db4f36e82c2394c958d8e42a498fb664684bc22 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 27 Oct 2016 15:49:12 -0700 Subject: [PATCH 689/884] mm: remove unused variable in memory hotplug When I removed the per-zone bitlock hashed waitqueues in commit 9dcb8b685fc3 ("mm: remove per-zone hashtable of bitlock waitqueues"), I removed all the magic hotplug memory initialization of said waitqueues too. But when I actually _tested_ the resulting build, I stupidly assumed that "allmodconfig" would enable memory hotplug. And it doesn't, because it enables KASAN instead, which then disables hotplug memory support. As a result, my build test of the per-zone waitqueues was totally broken, and I didn't notice that the compiler warns about the now unused iterator variable 'i'. I guess I should be happy that that seems to be the worst breakage from my clearly horribly failed test coverage. Reported-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 1 - 1 file changed, 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b18dab401be6..cad4b9125695 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -2117,7 +2117,6 @@ void try_offline_node(int nid) unsigned long start_pfn = pgdat->node_start_pfn; unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; unsigned long pfn; - int i; for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { unsigned long section_nr = pfn_to_section_nr(pfn); From 867dfe342118b1ea0256a85f7c0d9ceb0ead032a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Oct 2016 17:52:04 +0200 Subject: [PATCH 690/884] nvdimm: make CONFIG_NVDIMM_DAX 'bool' A bugfix just tried to address a randconfig build problem and introduced a variant of the same problem: with CONFIG_LIBNVDIMM=y and CONFIG_NVDIMM_DAX=m, the nvdimm module now fails to link: drivers/nvdimm/built-in.o: In function `to_nd_device_type': bus.c:(.text+0x1b5d): undefined reference to `is_nd_dax' drivers/nvdimm/built-in.o: In function `nd_region_notify_driver_action.constprop.2': region_devs.c:(.text+0x6b6c): undefined reference to `is_nd_dax' region_devs.c:(.text+0x6b8c): undefined reference to `to_nd_dax' drivers/nvdimm/built-in.o: In function `nd_region_probe': region.c:(.text+0x70f3): undefined reference to `nd_dax_create' drivers/nvdimm/built-in.o: In function `mode_show': namespace_devs.c:(.text+0xa196): undefined reference to `is_nd_dax' drivers/nvdimm/built-in.o: In function `nvdimm_namespace_common_probe': (.text+0xa55f): undefined reference to `is_nd_dax' drivers/nvdimm/built-in.o: In function `nvdimm_namespace_common_probe': (.text+0xa56e): undefined reference to `to_nd_dax' This reverts the earlier fix, making NVDIMM_DAX a 'bool' option again as it should be (it gets linked into the libnvdimm module). To fix the original problem, I'm adding a dependency on LIBNVDIMM to DEV_DAX_PMEM, which ensures we can't have that one built-in if the rest is a module. Fixes: 4e65e9381c7a ("/dev/dax: fix Kconfig dependency build breakage") Signed-off-by: Arnd Bergmann Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/dax/Kconfig | 2 +- drivers/nvdimm/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index daadd20aa936..3e2ab3b14eea 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -14,7 +14,7 @@ if DEV_DAX config DEV_DAX_PMEM tristate "PMEM DAX: direct access to persistent memory" - depends on NVDIMM_DAX + depends on LIBNVDIMM && NVDIMM_DAX default DEV_DAX help Support raw access to persistent memory. Note that this diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 8b2b740d6679..124c2432ac9c 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -89,7 +89,7 @@ config NVDIMM_PFN Select Y if unsure config NVDIMM_DAX - tristate "NVDIMM DAX: Raw access to persistent memory" + bool "NVDIMM DAX: Raw access to persistent memory" default LIBNVDIMM depends on NVDIMM_PFN help From 67463e54beb63114965c3d2c7cb81d1d524e2697 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 27 Oct 2016 16:23:01 -0700 Subject: [PATCH 691/884] Allow KASAN and HOTPLUG_MEMORY to co-exist when doing build testing No, KASAN may not be able to co-exist with HOTPLUG_MEMORY at runtime, but for build testing there is no reason not to allow them together. This hopefully means better build coverage and fewer embarrasing silly problems like the one fixed by commit 9db4f36e82c2 ("mm: remove unused variable in memory hotplug") in the future. Cc: Stephen Rothwell Cc: Andrey Ryabinin Cc: Alexander Potapenko Signed-off-by: Linus Torvalds --- mm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index be0ee11fa0d9..86e3e0e74d20 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -187,7 +187,7 @@ config MEMORY_HOTPLUG bool "Allow for memory hot-add" depends on SPARSEMEM || X86_64_ACPI_NUMA depends on ARCH_ENABLE_MEMORY_HOTPLUG - depends on !KASAN + depends on COMPILE_TEST || !KASAN config MEMORY_HOTPLUG_SPARSE def_bool y From 52e73eb2872c9af6f382b2b22954ca8214397a4e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 27 Oct 2016 17:04:05 -0700 Subject: [PATCH 692/884] device-dax: fix percpu_ref_exit ordering We need to wait until the percpu_ref is released before exit. Otherwise, we sometimes lose the race and trigger this new warning that was added in v4.9 (commit a67823c1ed10 "percpu-refcount: init ->confirm_switch member properly"): WARNING: CPU: 0 PID: 3629 at lib/percpu-refcount.c:107 percpu_ref_exit+0x51/0x60 [..] Call Trace: [] dump_stack+0x85/0xc2 [] __warn+0xcb/0xf0 [] warn_slowpath_null+0x1d/0x20 [] percpu_ref_exit+0x51/0x60 [] dax_pmem_percpu_exit+0x1a/0x50 [dax_pmem] [] devm_action_release+0xf/0x20 Cc: Fixes: ab68f2622136 ("/dev/dax, pmem: direct access to persistent memory") Signed-off-by: Dan Williams --- drivers/dax/pmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index 9630d8837ba9..4a15fa5df98b 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -44,7 +44,6 @@ static void dax_pmem_percpu_exit(void *data) dev_dbg(dax_pmem->dev, "%s\n", __func__); percpu_ref_exit(ref); - wait_for_completion(&dax_pmem->cmp); } static void dax_pmem_percpu_kill(void *data) @@ -54,6 +53,7 @@ static void dax_pmem_percpu_kill(void *data) dev_dbg(dax_pmem->dev, "%s\n", __func__); percpu_ref_kill(ref); + wait_for_completion(&dax_pmem->cmp); } static int dax_pmem_probe(struct device *dev) From 86d9f48534e800e4d62cdc1b5aaf539f4c1d47d6 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 27 Oct 2016 17:46:18 -0700 Subject: [PATCH 693/884] mm/slab: fix kmemcg cache creation delayed issue There is a bug report that SLAB makes extreme load average due to over 2000 kworker thread. https://bugzilla.kernel.org/show_bug.cgi?id=172981 This issue is caused by kmemcg feature that try to create new set of kmem_caches for each memcg. Recently, kmem_cache creation is slowed by synchronize_sched() and futher kmem_cache creation is also delayed since kmem_cache creation is synchronized by a global slab_mutex lock. So, the number of kworker that try to create kmem_cache increases quietly. synchronize_sched() is for lockless access to node's shared array but it's not needed when a new kmem_cache is created. So, this patch rules out that case. Fixes: 801faf0db894 ("mm/slab: lockless decision to grow cache") Link: http://lkml.kernel.org/r/1475734855-4837-1-git-send-email-iamjoonsoo.kim@lge.com Reported-by: Doug Smythies Tested-by: Doug Smythies Signed-off-by: Joonsoo Kim Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slab.c b/mm/slab.c index 090fb26b3a39..c451e3f6bbf6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -966,7 +966,7 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep, * guaranteed to be valid until irq is re-enabled, because it will be * freed after synchronize_sched(). */ - if (force_change) + if (old_shared && force_change) synchronize_sched(); fail: From b274c0bb394c6a69ac12feac7c2db81f5aff5a55 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Thu, 27 Oct 2016 17:46:21 -0700 Subject: [PATCH 694/884] kcov: properly check if we are in an interrupt in_interrupt() returns a nonzero value when we are either in an interrupt or have bh disabled via local_bh_disable(). Since we are interested in only ignoring coverage from actual interrupts, do a proper check instead of just calling in_interrupt(). As a result of this change, kcov will start to collect coverage from within local_bh_disable()/local_bh_enable() sections. Link: http://lkml.kernel.org/r/1476115803-20712-1-git-send-email-andreyknvl@google.com Signed-off-by: Andrey Konovalov Acked-by: Dmitry Vyukov Cc: Nicolai Stange Cc: Andrey Ryabinin Cc: Kees Cook Cc: James Morse Cc: Vegard Nossum Cc: Quentin Casasnovas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kcov.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/kcov.c b/kernel/kcov.c index 8d44b3fea9d0..30e6d05aa5a9 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -53,8 +53,15 @@ void notrace __sanitizer_cov_trace_pc(void) /* * We are interested in code coverage as a function of a syscall inputs, * so we ignore code executed in interrupts. + * The checks for whether we are in an interrupt are open-coded, because + * 1. We can't use in_interrupt() here, since it also returns true + * when we are inside local_bh_disable() section. + * 2. We don't want to use (in_irq() | in_serving_softirq() | in_nmi()), + * since that leads to slower generated code (three separate tests, + * one for each of the flags). */ - if (!t || in_interrupt()) + if (!t || (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_OFFSET + | NMI_MASK))) return; mode = READ_ONCE(t->kcov_mode); if (mode == KCOV_MODE_TRACE) { From 21753583056d48a5fad964d6f272e28168426845 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 27 Oct 2016 17:46:24 -0700 Subject: [PATCH 695/884] h8300: fix syscall restarting Back in commit f56141e3e2d9 ("all arches, signal: move restart_block to struct task_struct"), all architectures and core code were changed to use task_struct::restart_block. However, when h8300 support was subsequently restored in v4.2, it was not updated to account for this, and maintains thread_info::restart_block, which is not kept in sync. This patch drops the redundant restart_block from thread_info, and moves h8300 to the common one in task_struct, ensuring that syscall restarting always works as expected. Fixes: f56141e3e2d9 ("all arches, signal: move restart_block to struct task_struct") Link: http://lkml.kernel.org/r/1476714934-11635-1-git-send-email-mark.rutland@arm.com Signed-off-by: Mark Rutland Cc: Andy Lutomirski Cc: Yoshinori Sato Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/h8300/include/asm/thread_info.h | 4 ---- arch/h8300/kernel/signal.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index b408fe660cf8..3cef06875f5c 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -31,7 +31,6 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; - struct restart_block restart_block; }; /* @@ -44,9 +43,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index ad1f81f574e5..7138303cbbf2 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -79,7 +79,7 @@ restore_sigcontext(struct sigcontext *usc, int *pd0) unsigned int er0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* restore passed registers */ #define COPY(r) do { err |= get_user(regs->r, &usc->sc_##r); } while (0) From 1bc11d70b5db7c6bb1414b283d7f09b1fe1ac0d0 Mon Sep 17 00:00:00 2001 From: Alexander Polakov Date: Thu, 27 Oct 2016 17:46:27 -0700 Subject: [PATCH 696/884] mm/list_lru.c: avoid error-path NULL pointer deref As described in https://bugzilla.kernel.org/show_bug.cgi?id=177821: After some analysis it seems to be that the problem is in alloc_super(). In case list_lru_init_memcg() fails it goes into destroy_super(), which calls list_lru_destroy(). And in list_lru_init() we see that in case memcg_init_list_lru() fails, lru->node is freed, but not set NULL, which then leads list_lru_destroy() to believe it is initialized and call memcg_destroy_list_lru(). memcg_destroy_list_lru() in turn can access lru->node[i].memcg_lrus, which is NULL. [akpm@linux-foundation.org: add comment] Signed-off-by: Alexander Polakov Acked-by: Vladimir Davydov Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/list_lru.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mm/list_lru.c b/mm/list_lru.c index 1d05cb9d363d..234676e31edd 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -554,6 +554,8 @@ int __list_lru_init(struct list_lru *lru, bool memcg_aware, err = memcg_init_list_lru(lru, memcg_aware); if (err) { kfree(lru->node); + /* Do this so a list_lru_destroy() doesn't crash: */ + lru->node = NULL; goto out; } From 1f84a18fc010d7a62667199c9be35872bbf31526 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 27 Oct 2016 17:46:29 -0700 Subject: [PATCH 697/884] mm: page_alloc: use KERN_CONT where appropriate Recent changes to printk require KERN_CONT uses to continue logging messages. So add KERN_CONT where necessary. [akpm@linux-foundation.org: coding-style fixes] Fixes: 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") Link: http://lkml.kernel.org/r/c7df37c8665134654a17aaeb8b9f6ace1d6db58b.1476239034.git.joe@perches.com Reported-by: Mark Rutland Signed-off-by: Joe Perches Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index de7c6e43b1c9..8fd42aa7c4bd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4224,7 +4224,7 @@ static void show_migration_types(unsigned char type) } *p = '\0'; - printk("(%s) ", tmp); + printk(KERN_CONT "(%s) ", tmp); } /* @@ -4335,7 +4335,8 @@ void show_free_areas(unsigned int filter) free_pcp += per_cpu_ptr(zone->pageset, cpu)->pcp.count; show_node(zone); - printk("%s" + printk(KERN_CONT + "%s" " free:%lukB" " min:%lukB" " low:%lukB" @@ -4382,8 +4383,8 @@ void show_free_areas(unsigned int filter) K(zone_page_state(zone, NR_FREE_CMA_PAGES))); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) - printk(" %ld", zone->lowmem_reserve[i]); - printk("\n"); + printk(KERN_CONT " %ld", zone->lowmem_reserve[i]); + printk(KERN_CONT "\n"); } for_each_populated_zone(zone) { @@ -4394,7 +4395,7 @@ void show_free_areas(unsigned int filter) if (skip_free_areas_node(filter, zone_to_nid(zone))) continue; show_node(zone); - printk("%s: ", zone->name); + printk(KERN_CONT "%s: ", zone->name); spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { @@ -4412,11 +4413,12 @@ void show_free_areas(unsigned int filter) } spin_unlock_irqrestore(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { - printk("%lu*%lukB ", nr[order], K(1UL) << order); + printk(KERN_CONT "%lu*%lukB ", + nr[order], K(1UL) << order); if (nr[order]) show_migration_types(types[order]); } - printk("= %lukB\n", K(total)); + printk(KERN_CONT "= %lukB\n", K(total)); } hugetlb_show_meminfo(); From 07a63c41fa1f6533f5668e5b33a295bfd63aa534 Mon Sep 17 00:00:00 2001 From: Aruna Ramakrishna Date: Thu, 27 Oct 2016 17:46:32 -0700 Subject: [PATCH 698/884] mm/slab: improve performance of gathering slabinfo stats On large systems, when some slab caches grow to millions of objects (and many gigabytes), running 'cat /proc/slabinfo' can take up to 1-2 seconds. During this time, interrupts are disabled while walking the slab lists (slabs_full, slabs_partial, and slabs_free) for each node, and this sometimes causes timeouts in other drivers (for instance, Infiniband). This patch optimizes 'cat /proc/slabinfo' by maintaining a counter for total number of allocated slabs per node, per cache. This counter is updated when a slab is created or destroyed. This enables us to skip traversing the slabs_full list while gathering slabinfo statistics, and since slabs_full tends to be the biggest list when the cache is large, it results in a dramatic performance improvement. Getting slabinfo statistics now only requires walking the slabs_free and slabs_partial lists, and those lists are usually much smaller than slabs_full. We tested this after growing the dentry cache to 70GB, and the performance improved from 2s to 5ms. Link: http://lkml.kernel.org/r/1472517876-26814-1-git-send-email-aruna.ramakrishna@oracle.com Signed-off-by: Aruna Ramakrishna Acked-by: David Rientjes Cc: Mike Kravetz Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab.c | 43 +++++++++++++++++++++++++++---------------- mm/slab.h | 1 + 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index c451e3f6bbf6..0b0550ca85b4 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -233,6 +233,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) spin_lock_init(&parent->list_lock); parent->free_objects = 0; parent->free_touched = 0; + parent->num_slabs = 0; } #define MAKE_LIST(cachep, listp, slab, nodeid) \ @@ -1382,24 +1383,27 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) for_each_kmem_cache_node(cachep, node, n) { unsigned long active_objs = 0, num_objs = 0, free_objects = 0; unsigned long active_slabs = 0, num_slabs = 0; + unsigned long num_slabs_partial = 0, num_slabs_free = 0; + unsigned long num_slabs_full; spin_lock_irqsave(&n->list_lock, flags); - list_for_each_entry(page, &n->slabs_full, lru) { - active_objs += cachep->num; - active_slabs++; - } + num_slabs = n->num_slabs; list_for_each_entry(page, &n->slabs_partial, lru) { active_objs += page->active; - active_slabs++; + num_slabs_partial++; } list_for_each_entry(page, &n->slabs_free, lru) - num_slabs++; + num_slabs_free++; free_objects += n->free_objects; spin_unlock_irqrestore(&n->list_lock, flags); - num_slabs += active_slabs; num_objs = num_slabs * cachep->num; + active_slabs = num_slabs - num_slabs_free; + num_slabs_full = num_slabs - + (num_slabs_partial + num_slabs_free); + active_objs += (num_slabs_full * cachep->num); + pr_warn(" node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n", node, active_slabs, num_slabs, active_objs, num_objs, free_objects); @@ -2314,6 +2318,7 @@ static int drain_freelist(struct kmem_cache *cache, page = list_entry(p, struct page, lru); list_del(&page->lru); + n->num_slabs--; /* * Safe to drop the lock. The slab is no longer linked * to the cache. @@ -2752,6 +2757,8 @@ static void cache_grow_end(struct kmem_cache *cachep, struct page *page) list_add_tail(&page->lru, &(n->slabs_free)); else fixup_slab_list(cachep, n, page, &list); + + n->num_slabs++; STATS_INC_GROWN(cachep); n->free_objects += cachep->num - page->active; spin_unlock(&n->list_lock); @@ -3443,6 +3450,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, page = list_last_entry(&n->slabs_free, struct page, lru); list_move(&page->lru, list); + n->num_slabs--; } } @@ -4099,6 +4107,8 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) unsigned long num_objs; unsigned long active_slabs = 0; unsigned long num_slabs, free_objects = 0, shared_avail = 0; + unsigned long num_slabs_partial = 0, num_slabs_free = 0; + unsigned long num_slabs_full = 0; const char *name; char *error = NULL; int node; @@ -4111,33 +4121,34 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) check_irq_on(); spin_lock_irq(&n->list_lock); - list_for_each_entry(page, &n->slabs_full, lru) { - if (page->active != cachep->num && !error) - error = "slabs_full accounting error"; - active_objs += cachep->num; - active_slabs++; - } + num_slabs += n->num_slabs; + list_for_each_entry(page, &n->slabs_partial, lru) { if (page->active == cachep->num && !error) error = "slabs_partial accounting error"; if (!page->active && !error) error = "slabs_partial accounting error"; active_objs += page->active; - active_slabs++; + num_slabs_partial++; } + list_for_each_entry(page, &n->slabs_free, lru) { if (page->active && !error) error = "slabs_free accounting error"; - num_slabs++; + num_slabs_free++; } + free_objects += n->free_objects; if (n->shared) shared_avail += n->shared->avail; spin_unlock_irq(&n->list_lock); } - num_slabs += active_slabs; num_objs = num_slabs * cachep->num; + active_slabs = num_slabs - num_slabs_free; + num_slabs_full = num_slabs - (num_slabs_partial + num_slabs_free); + active_objs += (num_slabs_full * cachep->num); + if (num_objs - active_objs != free_objects && !error) error = "free_objects accounting error"; diff --git a/mm/slab.h b/mm/slab.h index 9653f2e2591a..bc05fdc3edce 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -432,6 +432,7 @@ struct kmem_cache_node { struct list_head slabs_partial; /* partial list first, better asm code */ struct list_head slabs_full; struct list_head slabs_free; + unsigned long num_slabs; unsigned long free_objects; unsigned int free_limit; unsigned int colour_next; /* Per-node cache coloring */ From 8c8d4d45204902e144abc0f15b7c658828028fa1 Mon Sep 17 00:00:00 2001 From: Aristeu Rozanski Date: Thu, 27 Oct 2016 17:46:35 -0700 Subject: [PATCH 699/884] ipc: account for kmem usage on mqueue and msg When kmem accounting switched from account by default to only account if flagged by __GFP_ACCOUNT, IPC mqueue and messages was left out. The production use case at hand is that mqueues should be customizable via sysctls in Docker containers in a Kubernetes cluster. This can only be safely allowed to the users of the cluster (without the risk that they can cause resource shortage on a node, influencing other users' containers) if all resources they control are bounded, i.e. accounted for. Link: http://lkml.kernel.org/r/1476806075-1210-1-git-send-email-arozansk@redhat.com Signed-off-by: Aristeu Rozanski Reported-by: Stefan Schimanski Acked-by: Davidlohr Bueso Cc: Alexey Dobriyan Cc: Johannes Weiner Cc: Michal Hocko Cc: Vladimir Davydov Cc: Stefan Schimanski Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/msgutil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ipc/msgutil.c b/ipc/msgutil.c index a521999de4f1..bf74eaa5c39f 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -53,7 +53,7 @@ static struct msg_msg *alloc_msg(size_t len) size_t alen; alen = min(len, DATALEN_MSG); - msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL); + msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL_ACCOUNT); if (msg == NULL) return NULL; @@ -65,7 +65,7 @@ static struct msg_msg *alloc_msg(size_t len) while (len > 0) { struct msg_msgseg *seg; alen = min(len, DATALEN_SEG); - seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL); + seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL_ACCOUNT); if (seg == NULL) goto out_err; *pseg = seg; From c0a0aba8e478229b2f0956918542152fbad3f794 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 27 Oct 2016 17:46:38 -0700 Subject: [PATCH 700/884] kconfig.h: remove config_enabled() macro The use of config_enabled() is ambiguous. For config options, IS_ENABLED(), IS_REACHABLE(), etc. will make intention clearer. Sometimes config_enabled() has been used for non-config options because it is useful to check whether the given symbol is defined or not. I have been tackling on deprecating config_enabled(), and now is the time to finish this work. Some new users have appeared for v4.9-rc1, but it is trivial to replace them: - arch/x86/mm/kaslr.c replace config_enabled() with IS_ENABLED() because CONFIG_X86_ESPFIX64 and CONFIG_EFI are boolean. - include/asm-generic/export.h replace config_enabled() with __is_defined(). Then, config_enabled() can be removed now. Going forward, please use IS_ENABLED(), IS_REACHABLE(), etc. for config options, and __is_defined() for non-config symbols. Link: http://lkml.kernel.org/r/1476616078-32252-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Ingo Molnar Acked-by: Nicolas Pitre Cc: Peter Oberparleiter Cc: Arnd Bergmann Cc: Kees Cook Cc: Michal Marek Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Thomas Garnier Cc: Paul Bolle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/kaslr.c | 6 +++--- include/asm-generic/export.h | 2 +- include/linux/kconfig.h | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index ddd2661c4502..887e57182716 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -104,10 +104,10 @@ void __init kernel_randomize_memory(void) * consistent with the vaddr_start/vaddr_end variables. */ BUILD_BUG_ON(vaddr_start >= vaddr_end); - BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) && + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && vaddr_end >= EFI_VA_START); - BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) || - config_enabled(CONFIG_EFI)) && + BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || + IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h index 43199a049da5..63554e9f6e0c 100644 --- a/include/asm-generic/export.h +++ b/include/asm-generic/export.h @@ -70,7 +70,7 @@ KSYM(__kcrctab_\name): #include #define __EXPORT_SYMBOL(sym, val, sec) \ - __cond_export_sym(sym, val, sec, config_enabled(__KSYM_##sym)) + __cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym)) #define __cond_export_sym(sym, val, sec, conf) \ ___cond_export_sym(sym, val, sec, conf) #define ___cond_export_sym(sym, val, sec, enabled) \ diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index 15ec117ec537..8f2e059e4d45 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -31,7 +31,6 @@ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when * the last step cherry picks the 2nd arg, we get a zero. */ -#define config_enabled(cfg) ___is_defined(cfg) #define __is_defined(x) ___is_defined(x) #define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val) #define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0) @@ -41,13 +40,13 @@ * otherwise. For boolean options, this is equivalent to * IS_ENABLED(CONFIG_FOO). */ -#define IS_BUILTIN(option) config_enabled(option) +#define IS_BUILTIN(option) __is_defined(option) /* * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0 * otherwise. */ -#define IS_MODULE(option) config_enabled(option##_MODULE) +#define IS_MODULE(option) __is_defined(option##_MODULE) /* * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled From 0e07f663c90154079b287931257defbdb1d5405a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 27 Oct 2016 17:46:41 -0700 Subject: [PATCH 701/884] latent_entropy: raise CONFIG_FRAME_WARN by default When building with the latent_entropy plugin, set the default CONFIG_FRAME_WARN to 2048, since some __init functions have many basic blocks that, when instrumented by the latent_entropy plugin, grow beyond 1024 byte stack size on 32-bit builds. Link: http://lkml.kernel.org/r/20161018211216.GA39687@beast Signed-off-by: Kees Cook Reported-by: kbuild test robot Cc: Emese Revfy Cc: Ingo Molnar Cc: Michal Marek Cc: "Paul E. McKenney" Cc: Dan Williams Cc: Andrey Ryabinin Cc: Josh Poimboeuf Cc: Tejun Heo Cc: Nikolay Aleksandrov Cc: Dmitry Vyukov Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 33bc56cf60d7..b01e547d4d04 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -198,6 +198,7 @@ config FRAME_WARN int "Warn for stack frames larger than (needs gcc 4.4)" range 0 8192 default 0 if KASAN + default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 1024 if !64BIT default 2048 if 64BIT help From 02754e0a484a50a92d44c38879f2cb2792ebc572 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 27 Oct 2016 17:46:44 -0700 Subject: [PATCH 702/884] lib/stackdepot.c: bump stackdepot capacity from 16MB to 128MB KASAN uses stackdepot to memorize stacks for all kmalloc/kfree calls. Current stackdepot capacity is 16MB (1024 top level entries x 4 pages on second level). Size of each stack is (num_frames + 3) * sizeof(long). Which gives us ~84K stacks. This capacity was chosen empirically and it is enough to run kernel normally. However, when lots of configs are enabled and a fuzzer tries to maximize code coverage, it easily hits the limit within tens of minutes. I've tested for long a time with number of top level entries bumped 4x (4096). And I think I've seen overflow only once. But I don't have all configs enabled and code coverage has not reached maximum yet. So bump it 8x to 8192. Since we have two-level table, memory cost of this is very moderate -- currently the top-level table is 8KB, with this patch it is 64KB, which is negligible under KASAN. Here is some approx math. 128MB allows us to memorize ~670K stacks (assuming stack is ~200b). I've grepped kernel for kmalloc|kfree|kmem_cache_alloc|kmem_cache_free| kzalloc|kstrdup|kstrndup|kmemdup and it gives ~60K matches. Most of alloc/free call sites are reachable with only one stack. But some utility functions can have large fanout. Assuming average fanout is 5x, total number of alloc/free stacks is ~300K. Link: http://lkml.kernel.org/r/1476458416-122131-1-git-send-email-dvyukov@google.com Signed-off-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Joonsoo Kim Cc: Baozeng Ding Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/stackdepot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 60f77f1d470a..4d830e299989 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -50,7 +50,7 @@ STACK_ALLOC_ALIGN) #define STACK_ALLOC_INDEX_BITS (DEPOT_STACK_BITS - \ STACK_ALLOC_NULL_PROTECTION_BITS - STACK_ALLOC_OFFSET_BITS) -#define STACK_ALLOC_SLABS_CAP 1024 +#define STACK_ALLOC_SLABS_CAP 8192 #define STACK_ALLOC_MAX_SLABS \ (((1LL << (STACK_ALLOC_INDEX_BITS)) < STACK_ALLOC_SLABS_CAP) ? \ (1LL << (STACK_ALLOC_INDEX_BITS)) : STACK_ALLOC_SLABS_CAP) From 37df49f433bc3a11f5716fe65aaec5189c6402cb Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 27 Oct 2016 17:46:47 -0700 Subject: [PATCH 703/884] mm: kmemleak: ensure that the task stack is not freed during scanning Commit 68f24b08ee89 ("sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK") may cause the task->stack to be freed during kmemleak_scan() execution, leading to either a NULL pointer fault (if task->stack is NULL) or kmemleak accessing already freed memory. This patch uses the new try_get_task_stack() API to ensure that the task stack is not freed during kmemleak stack scanning. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=173901. Fixes: 68f24b08ee89 ("sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK") Link: http://lkml.kernel.org/r/1476266223-14325-1-git-send-email-catalin.marinas@arm.com Signed-off-by: Catalin Marinas Reported-by: CAI Qian Tested-by: CAI Qian Acked-by: Michal Hocko Cc: Andy Lutomirski Cc: CAI Qian Cc: Hillf Danton Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kmemleak.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index a5e453cf05c4..e5355a5b423f 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1453,8 +1453,11 @@ static void kmemleak_scan(void) read_lock(&tasklist_lock); do_each_thread(g, p) { - scan_block(task_stack_page(p), task_stack_page(p) + - THREAD_SIZE, NULL); + void *stack = try_get_task_stack(p); + if (stack) { + scan_block(stack, stack + THREAD_SIZE, NULL); + put_task_stack(p); + } } while_each_thread(g, p); read_unlock(&tasklist_lock); } From 06b2849d103f4a91212876a211d0d7df227a9513 Mon Sep 17 00:00:00 2001 From: Leon Yu Date: Thu, 27 Oct 2016 17:46:50 -0700 Subject: [PATCH 704/884] proc: fix NULL dereference when reading /proc//auxv Reading auxv of any kernel thread results in NULL pointer dereferencing in auxv_read() where mm can be NULL. Fix that by checking for NULL mm and bailing out early. This is also the original behavior changed by recent commit c5317167854e ("proc: switch auxv to use of __mem_open()"). # cat /proc/2/auxv Unable to handle kernel NULL pointer dereference at virtual address 000000a8 Internal error: Oops: 17 [#1] PREEMPT SMP ARM CPU: 3 PID: 113 Comm: cat Not tainted 4.9.0-rc1-ARCH+ #1 Hardware name: BCM2709 task: ea3b0b00 task.stack: e99b2000 PC is at auxv_read+0x24/0x4c LR is at do_readv_writev+0x2fc/0x37c Process cat (pid: 113, stack limit = 0xe99b2210) Call chain: auxv_read do_readv_writev vfs_readv default_file_splice_read splice_direct_to_actor do_splice_direct do_sendfile SyS_sendfile64 ret_fast_syscall Fixes: c5317167854e ("proc: switch auxv to use of __mem_open()") Link: http://lkml.kernel.org/r/1476966200-14457-1-git-send-email-chianglungyu@gmail.com Signed-off-by: Leon Yu Acked-by: Oleg Nesterov Acked-by: Michal Hocko Cc: Al Viro Cc: Kees Cook Cc: John Stultz Cc: Mateusz Guzik Cc: Janis Danisevskis Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index adfc5b4986f5..ca651ac00660 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1012,6 +1012,9 @@ static ssize_t auxv_read(struct file *file, char __user *buf, { struct mm_struct *mm = file->private_data; unsigned int nwords = 0; + + if (!mm) + return 0; do { nwords += 2; } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ From 8f72cb4ef90c63bcb5111c2e3ec7ea2727eab2f8 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Thu, 27 Oct 2016 17:46:53 -0700 Subject: [PATCH 705/884] CREDITS: update credit information for Martin Kepplinger Content and employer changed. Link: http://lkml.kernel.org/r/1477304102-28830-1-git-send-email-martin.kepplinger@ginzinger.com Signed-off-by: Martin Kepplinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- CREDITS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CREDITS b/CREDITS index 513aaa3546bf..837367624e45 100644 --- a/CREDITS +++ b/CREDITS @@ -1864,10 +1864,11 @@ S: The Netherlands N: Martin Kepplinger E: martink@posteo.de -E: martin.kepplinger@theobroma-systems.com +E: martin.kepplinger@ginzinger.com W: http://www.martinkepplinger.com D: mma8452 accelerators iio driver -D: Kernel cleanups +D: pegasus_notetaker input driver +D: Kernel fixes and cleanups S: Garnisonstraße 26 S: 4020 Linz S: Austria From 89a2848381b5fcd9c4d9c0cd97680e3b28730e31 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 27 Oct 2016 17:46:56 -0700 Subject: [PATCH 706/884] mm: memcontrol: do not recurse in direct reclaim On 4.0, we saw a stack corruption from a page fault entering direct memory cgroup reclaim, calling into btrfs_releasepage(), which then tried to allocate an extent and recursed back into a kmem charge ad nauseam: [...] btrfs_releasepage+0x2c/0x30 try_to_release_page+0x32/0x50 shrink_page_list+0x6da/0x7a0 shrink_inactive_list+0x1e5/0x510 shrink_lruvec+0x605/0x7f0 shrink_zone+0xee/0x320 do_try_to_free_pages+0x174/0x440 try_to_free_mem_cgroup_pages+0xa7/0x130 try_charge+0x17b/0x830 memcg_charge_kmem+0x40/0x80 new_slab+0x2d9/0x5a0 __slab_alloc+0x2fd/0x44f kmem_cache_alloc+0x193/0x1e0 alloc_extent_state+0x21/0xc0 __clear_extent_bit+0x2b5/0x400 try_release_extent_mapping+0x1a3/0x220 __btrfs_releasepage+0x31/0x70 btrfs_releasepage+0x2c/0x30 try_to_release_page+0x32/0x50 shrink_page_list+0x6da/0x7a0 shrink_inactive_list+0x1e5/0x510 shrink_lruvec+0x605/0x7f0 shrink_zone+0xee/0x320 do_try_to_free_pages+0x174/0x440 try_to_free_mem_cgroup_pages+0xa7/0x130 try_charge+0x17b/0x830 mem_cgroup_try_charge+0x65/0x1c0 handle_mm_fault+0x117f/0x1510 __do_page_fault+0x177/0x420 do_page_fault+0xc/0x10 page_fault+0x22/0x30 On later kernels, kmem charging is opt-in rather than opt-out, and that particular kmem allocation in btrfs_releasepage() is no longer being charged and won't recurse and overrun the stack anymore. But it's not impossible for an accounted allocation to happen from the memcg direct reclaim context, and we needed to reproduce this crash many times before we even got a useful stack trace out of it. Like other direct reclaimers, mark tasks in memcg reclaim PF_MEMALLOC to avoid recursing into any other form of direct reclaim. Then let recursive charges from PF_MEMALLOC contexts bypass the cgroup limit. Link: http://lkml.kernel.org/r/20161025141050.GA13019@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Cc: Tejun Heo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 9 +++++++++ mm/vmscan.c | 2 ++ 2 files changed, 11 insertions(+) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ae052b5e3315..0f870ba43942 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1917,6 +1917,15 @@ retry: current->flags & PF_EXITING)) goto force; + /* + * Prevent unbounded recursion when reclaim operations need to + * allocate memory. This might exceed the limits temporarily, + * but we prefer facilitating memory reclaim and getting back + * under the limit over triggering OOM kills in these cases. + */ + if (unlikely(current->flags & PF_MEMALLOC)) + goto force; + if (unlikely(task_in_memcg_oom(current))) goto nomem; diff --git a/mm/vmscan.c b/mm/vmscan.c index 744f926af442..76fda2268148 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3043,7 +3043,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, sc.gfp_mask, sc.reclaim_idx); + current->flags |= PF_MEMALLOC; nr_reclaimed = do_try_to_free_pages(zonelist, &sc); + current->flags &= ~PF_MEMALLOC; trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed); From 62e931fac45b17c2a42549389879411572f75804 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Thu, 27 Oct 2016 17:46:59 -0700 Subject: [PATCH 707/884] lib/genalloc.c: start search from start of chunk gen_pool_alloc_algo() iterates over the chunks of a pool trying to find a contiguous block of memory that satisfies the allocation request. The shortcut if (size > atomic_read(&chunk->avail)) continue; makes the loop skip over chunks that do not have enough bytes left to fulfill the request. There are two situations, though, where an allocation might still fail: (1) The available memory is not contiguous, i.e. the request cannot be fulfilled due to external fragmentation. (2) A race condition. Another thread runs the same code concurrently and is quicker to grab the available memory. In those situations, the loop calls pool->algo() to search the entire chunk, and pool->algo() returns some value that is >= end_bit to indicate that the search failed. This return value is then assigned to start_bit. The variables start_bit and end_bit describe the range that should be searched, and this range should be reset for every chunk that is searched. Today, the code fails to reset start_bit to 0. As a result, prefixes of subsequent chunks are ignored. Memory allocations might fail even though there is plenty of room left in these prefixes of those other chunks. Fixes: 7f184275aa30 ("lib, Make gen_pool memory allocator lockless") Link: http://lkml.kernel.org/r/1477420604-28918-1-git-send-email-danielmentz@google.com Signed-off-by: Daniel Mentz Reviewed-by: Mathieu Desnoyers Acked-by: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/genalloc.c b/lib/genalloc.c index 0a1139644d32..144fe6b1a03e 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -292,7 +292,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, struct gen_pool_chunk *chunk; unsigned long addr = 0; int order = pool->min_alloc_order; - int nbits, start_bit = 0, end_bit, remain; + int nbits, start_bit, end_bit, remain; #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG BUG_ON(in_nmi()); @@ -307,6 +307,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, if (size > atomic_read(&chunk->avail)) continue; + start_bit = 0; end_bit = chunk_size(chunk) >> order; retry: start_bit = algo(chunk->bits, end_bit, start_bit, From 14f947c87a2164b19c7f8c02234f4f348e03f409 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 27 Oct 2016 17:47:02 -0700 Subject: [PATCH 708/884] fs: exofs: print a hex number after a 0x prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes the message hard to interpret correctly if a base 10 number is prefixed by 0x. So change to a hex number. Link: http://lkml.kernel.org/r/20161026125658.25728-2-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Cc: Boaz Harrosh Cc: Benny Halevy Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exofs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 79101651fe9e..42f9a0a0c4ca 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -137,7 +137,7 @@ Espan: bad_entry: EXOFS_ERR( "ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - " - "offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n", + "offset=%lu, inode=0x%llx, rec_len=%d, name_len=%d\n", dir->i_ino, error, (page->index<inode_no)), rec_len, p->name_len); From ee52c44dee63ff2686a7b0d98fff7c80852ac022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 27 Oct 2016 17:47:04 -0700 Subject: [PATCH 709/884] block: DAC960: print a hex number after a 0x prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes the message hard to interpret correctly if a base 10 number is prefixed by 0x. So change to a hex number. Link: http://lkml.kernel.org/r/20161026125658.25728-3-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/DAC960.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 811e11c82f32..0809cda93cc0 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -2954,7 +2954,7 @@ DAC960_DetectController(struct pci_dev *PCI_Device, case DAC960_PD_Controller: if (!request_region(Controller->IO_Address, 0x80, Controller->FullModelName)) { - DAC960_Error("IO port 0x%d busy for Controller at\n", + DAC960_Error("IO port 0x%lx busy for Controller at\n", Controller, Controller->IO_Address); goto Failure; } @@ -2990,7 +2990,7 @@ DAC960_DetectController(struct pci_dev *PCI_Device, case DAC960_P_Controller: if (!request_region(Controller->IO_Address, 0x80, Controller->FullModelName)){ - DAC960_Error("IO port 0x%d busy for Controller at\n", + DAC960_Error("IO port 0x%lx busy for Controller at\n", Controller, Controller->IO_Address); goto Failure; } From 9105585d13bd2946f0eb2a664e32ec9a9b23bf1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 27 Oct 2016 17:47:07 -0700 Subject: [PATCH 710/884] ipack: print a hex number after a 0x prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes the result hard to interpret correctly if a base 10 number is prefixed by 0x. So change to a hex number. Link: http://lkml.kernel.org/r/20161026125658.25728-4-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Cc: Samuel Iglesias Gonsalvez Cc: Jens Taprogge Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/ipack/ipack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c index c0e7b624ce54..12102448fddd 100644 --- a/drivers/ipack/ipack.c +++ b/drivers/ipack/ipack.c @@ -178,7 +178,7 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, idev->id_vendor, idev->id_device); } -ipack_device_attr(id_format, "0x%hhu\n"); +ipack_device_attr(id_format, "0x%hhx\n"); static DEVICE_ATTR_RO(id); static DEVICE_ATTR_RO(id_device); From 17a88939568be28a8ea5195b55ef3a84a469777e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Thu, 27 Oct 2016 17:47:10 -0700 Subject: [PATCH 711/884] cris/arch-v32: cryptocop: print a hex number after a 0x prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes the result hard to interpret correctly if a base 10 number is prefixed by 0x. So change to a hex number. Link: http://lkml.kernel.org/r/20161026125658.25728-6-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Cc: Mikael Starvik Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v32/drivers/cryptocop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index 099e170a93ee..0068fd411a84 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -3149,7 +3149,7 @@ static void print_dma_descriptors(struct cryptocop_int_operation *iop) printk("print_dma_descriptors start\n"); printk("iop:\n"); - printk("\tsid: 0x%lld\n", iop->sid); + printk("\tsid: 0x%llx\n", iop->sid); printk("\tcdesc_out: 0x%p\n", iop->cdesc_out); printk("\tcdesc_in: 0x%p\n", iop->cdesc_in); From 8e819101ce6fcc58801c9a813ea99c4da0255eef Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Thu, 27 Oct 2016 17:47:12 -0700 Subject: [PATCH 712/884] drivers/misc/sgi-gru/grumain.c: remove bogus 0x prefix from printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Would like to have this be a decimal number. Link: http://lkml.kernel.org/r/20161026134746.GA30169@sgi.com Signed-off-by: Dimitri Sivanich Reported-by: Uwe Kleine-König Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/sgi-gru/grumain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 1525870f460a..33741ad4a74a 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -283,7 +283,7 @@ static void gru_unload_mm_tracker(struct gru_state *gru, spin_lock(&gru->gs_asid_lock); BUG_ON((asids->mt_ctxbitmap & ctxbitmap) != ctxbitmap); asids->mt_ctxbitmap ^= ctxbitmap; - gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n", + gru_dbg(grudev, "gid %d, gts %p, gms %p, ctxnum %d, asidmap 0x%lx\n", gru->gs_gid, gts, gms, gts->ts_ctxnum, gms->ms_asidmap[0]); spin_unlock(&gru->gs_asid_lock); spin_unlock(&gms->ms_asid_lock); From ce93bedb5ed2b16c08c6df4c797865f9ead600a3 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 22 Oct 2016 14:15:22 -0200 Subject: [PATCH 713/884] mtd: nand: gpmi: disable the clocks on errors We should disable the previously enabled GPMI clocks in the error paths. Also, when gpmi_enable_clk() fails simply return the error code immediately rather than jumping to to the 'err_out' label. Signed-off-by: Fabio Estevam Reviewed-by: Marek Vasut Acked-by: Han Xu Signed-off-by: Boris Brezillon --- drivers/mtd/nand/gpmi-nand/gpmi-lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c index 0f68a99fc4ad..141bd70a49c2 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c @@ -161,7 +161,7 @@ int gpmi_init(struct gpmi_nand_data *this) ret = gpmi_enable_clk(this); if (ret) - goto err_out; + return ret; ret = gpmi_reset_block(r->gpmi_regs, false); if (ret) goto err_out; @@ -197,6 +197,7 @@ int gpmi_init(struct gpmi_nand_data *this) gpmi_disable_clk(this); return 0; err_out: + gpmi_disable_clk(this); return ret; } @@ -270,7 +271,7 @@ int bch_set_geometry(struct gpmi_nand_data *this) ret = gpmi_enable_clk(this); if (ret) - goto err_out; + return ret; /* * Due to erratum #2847 of the MX23, the BCH cannot be soft reset on this @@ -308,6 +309,7 @@ int bch_set_geometry(struct gpmi_nand_data *this) gpmi_disable_clk(this); return 0; err_out: + gpmi_disable_clk(this); return ret; } From 73f907fd5fa56b0066d199bdd7126bbd04f6cd7b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 24 Oct 2016 16:46:20 +0200 Subject: [PATCH 714/884] mtd: nand: Fix data interface configuration logic When changing from one data interface setting to another, one has to ensure a specific sequence which is described in the ONFI spec. One of these constraints is that the CE line has go high after a reset before a command can be sent with the new data interface setting, which is not guaranteed by the current implementation. Rework the nand_reset() function and all the call sites to make sure the CE line is asserted and released when required. Also make sure to actually apply the new data interface setting on the first die. Signed-off-by: Boris Brezillon Fixes: d8e725dd8311 ("mtd: nand: automate NAND timings selection") Reviewed-by: Sascha Hauer Tested-by: Marc Gonzalez --- drivers/mtd/nand/nand_base.c | 60 +++++++++++++++++++++++++----------- include/linux/mtd/nand.h | 2 +- 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index e5718e5ecf92..3bde96a3f7bf 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -1095,10 +1095,11 @@ static void nand_release_data_interface(struct nand_chip *chip) /** * nand_reset - Reset and initialize a NAND device * @chip: The NAND chip + * @chipnr: Internal die id * * Returns 0 for success or negative error code otherwise */ -int nand_reset(struct nand_chip *chip) +int nand_reset(struct nand_chip *chip, int chipnr) { struct mtd_info *mtd = nand_to_mtd(chip); int ret; @@ -1107,9 +1108,17 @@ int nand_reset(struct nand_chip *chip) if (ret) return ret; + /* + * The CS line has to be released before we can apply the new NAND + * interface settings, hence this weird ->select_chip() dance. + */ + chip->select_chip(mtd, chipnr); chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1); + chip->select_chip(mtd, -1); + chip->select_chip(mtd, chipnr); ret = nand_setup_data_interface(chip); + chip->select_chip(mtd, -1); if (ret) return ret; @@ -1185,8 +1194,6 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) /* Shift to get chip number */ chipnr = ofs >> chip->chip_shift; - chip->select_chip(mtd, chipnr); - /* * Reset the chip. * If we want to check the WP through READ STATUS and check the bit 7 @@ -1194,7 +1201,9 @@ int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) * some operation can also clear the bit 7 of status register * eg. erase/program a locked block */ - nand_reset(chip); + nand_reset(chip, chipnr); + + chip->select_chip(mtd, chipnr); /* Check, if it is write protected */ if (nand_check_wp(mtd)) { @@ -1244,8 +1253,6 @@ int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) /* Shift to get chip number */ chipnr = ofs >> chip->chip_shift; - chip->select_chip(mtd, chipnr); - /* * Reset the chip. * If we want to check the WP through READ STATUS and check the bit 7 @@ -1253,7 +1260,9 @@ int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) * some operation can also clear the bit 7 of status register * eg. erase/program a locked block */ - nand_reset(chip); + nand_reset(chip, chipnr); + + chip->select_chip(mtd, chipnr); /* Check, if it is write protected */ if (nand_check_wp(mtd)) { @@ -2940,10 +2949,6 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, } chipnr = (int)(to >> chip->chip_shift); - chip->select_chip(mtd, chipnr); - - /* Shift to get page */ - page = (int)(to >> chip->page_shift); /* * Reset the chip. Some chips (like the Toshiba TC5832DC found in one @@ -2951,7 +2956,12 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to, * if we don't do this. I have no clue why, but I seem to have 'fixed' * it in the doc2000 driver in August 1999. dwmw2. */ - nand_reset(chip); + nand_reset(chip, chipnr); + + chip->select_chip(mtd, chipnr); + + /* Shift to get page */ + page = (int)(to >> chip->page_shift); /* Check, if it is write protected */ if (nand_check_wp(mtd)) { @@ -3984,14 +3994,14 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd, int i, maf_idx; u8 id_data[8]; - /* Select the device */ - chip->select_chip(mtd, 0); - /* * Reset the chip, required by some chips (e.g. Micron MT29FxGxxxxx) * after power-up. */ - nand_reset(chip); + nand_reset(chip, 0); + + /* Select the device */ + chip->select_chip(mtd, 0); /* Send the command for reading device ID */ chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); @@ -4329,17 +4339,31 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips, return PTR_ERR(type); } + /* Initialize the ->data_interface field. */ ret = nand_init_data_interface(chip); if (ret) return ret; + /* + * Setup the data interface correctly on the chip and controller side. + * This explicit call to nand_setup_data_interface() is only required + * for the first die, because nand_reset() has been called before + * ->data_interface and ->default_onfi_timing_mode were set. + * For the other dies, nand_reset() will automatically switch to the + * best mode for us. + */ + ret = nand_setup_data_interface(chip); + if (ret) + return ret; + chip->select_chip(mtd, -1); /* Check for a chip array */ for (i = 1; i < maxchips; i++) { - chip->select_chip(mtd, i); /* See comment in nand_get_flash_type for reset */ - nand_reset(chip); + nand_reset(chip, i); + + chip->select_chip(mtd, i); /* Send the command for reading device ID */ chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1); /* Read manufacturer and device IDs */ diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c5d3d5024fc8..d8905a229f34 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1184,7 +1184,7 @@ int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip, int page); /* Reset and initialize a NAND device */ -int nand_reset(struct nand_chip *chip); +int nand_reset(struct nand_chip *chip, int chipnr); /* Free resources held by the NAND device */ void nand_cleanup(struct nand_chip *chip); From 8ff0513bdcdd71e84aa561cce216675d43fb41b8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 18 Oct 2016 00:05:31 +0200 Subject: [PATCH 715/884] mtd: mtk: avoid warning in mtk_ecc_encode When building with -Wmaybe-uninitialized, gcc produces a silly false positive warning for the mtk_ecc_encode function: drivers/mtd/nand/mtk_ecc.c: In function 'mtk_ecc_encode': drivers/mtd/nand/mtk_ecc.c:402:15: error: 'val' may be used uninitialized in this function [-Werror=maybe-uninitialized] The function for some reason contains a double byte swap on big-endian builds to get the OOB data into the correct order again, and is written in a slightly confusing way. Using a simple memcpy32_fromio() to read the data simplifies it a lot so it becomes more readable and produces no warning. However, the output might not have 32-bit alignment, so we have to use another memcpy to avoid taking alignment faults or writing beyond the end of the array. Signed-off-by: Arnd Bergmann Tested-by: RogerCC Lin Signed-off-by: Boris Brezillon --- drivers/mtd/nand/mtk_ecc.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/mtd/nand/mtk_ecc.c b/drivers/mtd/nand/mtk_ecc.c index d54f666417e1..dbf256217b3e 100644 --- a/drivers/mtd/nand/mtk_ecc.c +++ b/drivers/mtd/nand/mtk_ecc.c @@ -86,6 +86,8 @@ struct mtk_ecc { struct completion done; struct mutex lock; u32 sectors; + + u8 eccdata[112]; }; static inline void mtk_ecc_wait_idle(struct mtk_ecc *ecc, @@ -366,9 +368,8 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config, u8 *data, u32 bytes) { dma_addr_t addr; - u8 *p; - u32 len, i, val; - int ret = 0; + u32 len; + int ret; addr = dma_map_single(ecc->dev, data, bytes, DMA_TO_DEVICE); ret = dma_mapping_error(ecc->dev, addr); @@ -393,14 +394,12 @@ int mtk_ecc_encode(struct mtk_ecc *ecc, struct mtk_ecc_config *config, /* Program ECC bytes to OOB: per sector oob = FDM + ECC + SPARE */ len = (config->strength * ECC_PARITY_BITS + 7) >> 3; - p = data + bytes; - /* write the parity bytes generated by the ECC back to the OOB region */ - for (i = 0; i < len; i++) { - if ((i % 4) == 0) - val = readl(ecc->regs + ECC_ENCPAR(i / 4)); - p[i] = (val >> ((i % 4) * 8)) & 0xff; - } + /* write the parity bytes generated by the ECC back to temp buffer */ + __ioread32_copy(ecc->eccdata, ecc->regs + ECC_ENCPAR(0), round_up(len, 4)); + + /* copy into possibly unaligned OOB region with actual length */ + memcpy(data + bytes, ecc->eccdata, len); timeout: dma_unmap_single(ecc->dev, addr, bytes, DMA_TO_DEVICE); From 1c27f646b18fb56308dff82784ca61951bad0b48 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 27 Oct 2016 14:36:23 +0200 Subject: [PATCH 716/884] x86/microcode/AMD: Fix more fallout from CONFIG_RANDOMIZE_MEMORY=y We needed the physical address of the container in order to compute the offset within the relocated ramdisk. And we did this by doing __pa() on the virtual address. However, __pa() does checks whether the physical address is within PAGE_OFFSET and __START_KERNEL_map - see __phys_addr() - which fail if we have CONFIG_RANDOMIZE_MEMORY enabled: we feed a virtual address which *doesn't* have the randomization offset into a function which uses PAGE_OFFSET which *does* have that offset. This makes this check fire: VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x)); ^^^^^^ due to the randomization offset. The fix is as simple as using __pa_nodebug() because we do that randomization offset accounting later in that function ourselves. Reported-by: Bob Peterson Tested-by: Bob Peterson Signed-off-by: Borislav Petkov Cc: Andreas Gruenbacher Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Steven Whitehouse Cc: Thomas Gleixner Cc: linux-mm Cc: stable@vger.kernel.org # 4.9 Link: http://lkml.kernel.org/r/20161027123623.j2jri5bandimboff@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 620ab06bcf45..017bda12caae 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -429,7 +429,7 @@ int __init save_microcode_in_initrd_amd(void) * We need the physical address of the container for both bitness since * boot_params.hdr.ramdisk_image is a physical address. */ - cont = __pa(container); + cont = __pa_nodebug(container); cont_va = container; #endif From 0933840acf7b65d6d30a5b6089d882afea57aca3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 20 Oct 2016 13:10:11 +0200 Subject: [PATCH 717/884] perf/core: Protect PMU device removal with a 'pmu_bus_running' check, to fix CONFIG_DEBUG_TEST_DRIVER_REMOVE=y kernel panic CAI Qian reported a crash in the PMU uncore device removal code, enabled by the CONFIG_DEBUG_TEST_DRIVER_REMOVE=y option: https://marc.info/?l=linux-kernel&m=147688837328451 The reason for the crash is that perf_pmu_unregister() tries to remove a PMU device which is not added at this point. We add PMU devices only after pmu_bus is registered, which happens in the perf_event_sysfs_init() call and sets the 'pmu_bus_running' flag. The fix is to get the 'pmu_bus_running' flag state at the point the PMU is taken out of the PMU list and remove the device later only if it's set. Reported-by: CAI Qian Tested-by: CAI Qian Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rob Herring Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161020111011.GA13361@krava Signed-off-by: Ingo Molnar --- kernel/events/core.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index c6e47e97b33f..a5d2e62faf7e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8855,7 +8855,10 @@ EXPORT_SYMBOL_GPL(perf_pmu_register); void perf_pmu_unregister(struct pmu *pmu) { + int remove_device; + mutex_lock(&pmus_lock); + remove_device = pmu_bus_running; list_del_rcu(&pmu->entry); mutex_unlock(&pmus_lock); @@ -8869,10 +8872,12 @@ void perf_pmu_unregister(struct pmu *pmu) free_percpu(pmu->pmu_disable_count); if (pmu->type >= PERF_TYPE_MAX) idr_remove(&pmu_idr, pmu->type); - if (pmu->nr_addr_filters) - device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); - device_del(pmu->dev); - put_device(pmu->dev); + if (remove_device) { + if (pmu->nr_addr_filters) + device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); + device_del(pmu->dev); + put_device(pmu->dev); + } free_pmu_context(pmu); } EXPORT_SYMBOL_GPL(perf_pmu_unregister); From 5aab90ce1ec449912a2ebc4d45e0c85dac29e9dd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Oct 2016 11:48:24 +0200 Subject: [PATCH 718/884] perf/powerpc: Don't call perf_event_disable() from atomic context The trinity syscall fuzzer triggered following WARN() on powerpc: WARNING: CPU: 9 PID: 2998 at arch/powerpc/kernel/hw_breakpoint.c:278 ... NIP [c00000000093aedc] .hw_breakpoint_handler+0x28c/0x2b0 LR [c00000000093aed8] .hw_breakpoint_handler+0x288/0x2b0 Call Trace: [c0000002f7933580] [c00000000093aed8] .hw_breakpoint_handler+0x288/0x2b0 (unreliable) [c0000002f7933630] [c0000000000f671c] .notifier_call_chain+0x7c/0xf0 [c0000002f79336d0] [c0000000000f6abc] .__atomic_notifier_call_chain+0xbc/0x1c0 [c0000002f7933780] [c0000000000f6c40] .notify_die+0x70/0xd0 [c0000002f7933820] [c00000000001a74c] .do_break+0x4c/0x100 [c0000002f7933920] [c0000000000089fc] handle_dabr_fault+0x14/0x48 Followed by a lockdep warning: =============================== [ INFO: suspicious RCU usage. ] 4.8.0-rc5+ #7 Tainted: G W ------------------------------- ./include/linux/rcupdate.h:556 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by ls/2998: #0: (rcu_read_lock){......}, at: [] .__atomic_notifier_call_chain+0x0/0x1c0 #1: (rcu_read_lock){......}, at: [] .hw_breakpoint_handler+0x0/0x2b0 stack backtrace: CPU: 9 PID: 2998 Comm: ls Tainted: G W 4.8.0-rc5+ #7 Call Trace: [c0000002f7933150] [c00000000094b1f8] .dump_stack+0xe0/0x14c (unreliable) [c0000002f79331e0] [c00000000013c468] .lockdep_rcu_suspicious+0x138/0x180 [c0000002f7933270] [c0000000001005d8] .___might_sleep+0x278/0x2e0 [c0000002f7933300] [c000000000935584] .mutex_lock_nested+0x64/0x5a0 [c0000002f7933410] [c00000000023084c] .perf_event_ctx_lock_nested+0x16c/0x380 [c0000002f7933500] [c000000000230a80] .perf_event_disable+0x20/0x60 [c0000002f7933580] [c00000000093aeec] .hw_breakpoint_handler+0x29c/0x2b0 [c0000002f7933630] [c0000000000f671c] .notifier_call_chain+0x7c/0xf0 [c0000002f79336d0] [c0000000000f6abc] .__atomic_notifier_call_chain+0xbc/0x1c0 [c0000002f7933780] [c0000000000f6c40] .notify_die+0x70/0xd0 [c0000002f7933820] [c00000000001a74c] .do_break+0x4c/0x100 [c0000002f7933920] [c0000000000089fc] handle_dabr_fault+0x14/0x48 While it looks like the first WARN() is probably valid, the other one is triggered by disabling event via perf_event_disable() from atomic context. The event is disabled here in case we were not able to emulate the instruction that hit the breakpoint. By disabling the event we unschedule the event and make sure it's not scheduled back. But we can't call perf_event_disable() from atomic context, instead we need to use the event's pending_disable irq_work method to disable it. Reported-by: Jan Stancek Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Huang Ying Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Neuling Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161026094824.GA21397@krava Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/hw_breakpoint.c | 2 +- include/linux/perf_event.h | 1 + kernel/events/core.c | 10 ++++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 9781c69eae57..03d089b3ed72 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -275,7 +275,7 @@ int hw_breakpoint_handler(struct die_args *args) if (!stepped) { WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " "0x%lx will be disabled.", info->address); - perf_event_disable(bp); + perf_event_disable_inatomic(bp); goto out; } /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 060d0ede88df..4741ecdb9817 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1257,6 +1257,7 @@ extern u64 perf_swevent_set_period(struct perf_event *event); extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); extern void perf_event_disable_local(struct perf_event *event); +extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_task_tick(void); #else /* !CONFIG_PERF_EVENTS: */ static inline void * diff --git a/kernel/events/core.c b/kernel/events/core.c index a5d2e62faf7e..0e292132efac 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1960,6 +1960,12 @@ void perf_event_disable(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_disable); +void perf_event_disable_inatomic(struct perf_event *event) +{ + event->pending_disable = 1; + irq_work_queue(&event->pending); +} + static void perf_set_shadow_time(struct perf_event *event, struct perf_event_context *ctx, u64 tstamp) @@ -7075,8 +7081,8 @@ static int __perf_event_overflow(struct perf_event *event, if (events && atomic_dec_and_test(&event->event_limit)) { ret = 1; event->pending_kill = POLL_HUP; - event->pending_disable = 1; - irq_work_queue(&event->pending); + + perf_event_disable_inatomic(event); } READ_ONCE(event->overflow_handler)(event, data, regs); From f92b7604149a55cb601fc0b52911b1e11f0f2514 Mon Sep 17 00:00:00 2001 From: Imre Palik Date: Fri, 21 Oct 2016 01:18:59 -0700 Subject: [PATCH 719/884] perf/x86/intel: Honour the CPUID for number of fixed counters in hypervisors perf doesn't seem to honour the number of fixed counters specified by CPUID leaf 0xa. It always assumes that Intel CPUs have at least 3 fixed counters. So if some of the fixed counters are masked out by the hypervisor, it still tries to check/set them. This patch makes perf behave nicer when the kernel is running under a hypervisor that doesn't expose all the counters. This patch contains some ideas from Matt Wilson. Signed-off-by: Imre Palik Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Cc: Alexander Kozyrev Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Artyom Kuanbekov Cc: David Carrillo-Cisneros Cc: David Woodhouse Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Matt Wilson Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1477037939-15605-1-git-send-email-imrep.amz@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index eab0915f5995..a74a2dbc0180 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3607,10 +3607,14 @@ __init int intel_pmu_init(void) /* * Quirk: v2 perfmon does not report fixed-purpose events, so - * assume at least 3 events: + * assume at least 3 events, when not running in a hypervisor: */ - if (version > 1) - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); + if (version > 1) { + int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); + + x86_pmu.num_counters_fixed = + max((int)edx.split.num_counters_fixed, assume); + } if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; From bd768e146624cbec7122ed15dead8daa137d909d Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Fri, 21 Oct 2016 12:39:57 -0400 Subject: [PATCH 720/884] KVM: x86: fix wbinvd_dirty_mask use-after-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vcpu->arch.wbinvd_dirty_mask may still be used after freeing it, corrupting memory. For example, the following call trace may set a bit in an already freed cpu mask: kvm_arch_vcpu_load vcpu_load vmx_free_vcpu_nested vmx_free_vcpu kvm_arch_vcpu_free Fix this by deferring freeing of wbinvd_dirty_mask. Cc: stable@vger.kernel.org Signed-off-by: Ido Yariv Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b2087981c06..e954be8a3185 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7410,10 +7410,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { + void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; + kvmclock_reset(vcpu); - free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); kvm_x86_ops->vcpu_free(vcpu); + free_cpumask_var(wbinvd_dirty_mask); } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, From 9bcffe7575b721d7b6d9b3090fe18809d9806e78 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Thu, 27 Oct 2016 18:04:06 +0200 Subject: [PATCH 721/884] tty/serial: at91: fix hardware handshake on Atmel platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 1cf6e8fc8341 ("tty/serial: at91: fix RTS line management when hardware handshake is enabled"), the hardware handshake wasn't functional anymore on Atmel platforms (beside SAMA5D2). To understand why, one has to understand the flag ATMEL_US_USMODE_HWHS first: Before commit 1cf6e8fc8341 ("tty/serial: at91: fix RTS line management when hardware handshake is enabled"), this flag was never set. Thus, the CTS/RTS where only handled by serial_core (and everything worked just fine). This commit introduced the use of the ATMEL_US_USMODE_HWHS flag, enabling it for all boards when the user space enables flow control. When the ATMEL_US_USMODE_HWHS is set, the Atmel USART controller handles a part of the flow control job: - disable the transmitter when the CTS pin gets high. - drive the RTS pin high when the DMA buffer transfer is completed or PDC RX buffer full or RX FIFO is beyond threshold. (depending on the controller version). NB: This feature is *not* mandatory for the flow control to work. (Nevertheless, it's very useful if low latencies are needed.) Now, the specifics of the ATMEL_US_USMODE_HWHS flag: - For platforms with DMAC and no FIFOs (sam9x25, sam9x35, sama5D3, sama5D4, sam9g15, sam9g25, sam9g35)* this feature simply doesn't work. ( source: https://lkml.org/lkml/2016/9/7/598 ) Tested it on sam9g35, the RTS pins always stays up, even when RXEN=1 or a new DMA transfer descriptor is set. => ATMEL_US_USMODE_HWHS must not be used for those platforms - For platforms with a PDC (sam926{0,1,3}, sam9g10, sam9g20, sam9g45, sam9g46)*, there's another kind of problem. Once the flag ATMEL_US_USMODE_HWHS is set, the RTS pin can't be driven anymore via RTSEN/RTSDIS in USART Control Register. The RTS pin can only be driven by enabling/disabling the receiver or setting RCR=RNCR=0 in the PDC (Receive (Next) Counter Register). => Doing this is beyond the scope of this patch and could add other bugs, so the original (and working) behaviour should be set for those platforms (meaning ATMEL_US_USMODE_HWHS flag should be unset). - For platforms with a FIFO (sama5d2)*, the RTS pin is driven according to the RX FIFO thresholds, and can be also driven by RTSEN/RTSDIS in USART Control Register. No problem here. (This was the use case of commit 1cf6e8fc8341 ("tty/serial: at91: fix RTS line management when hardware handshake is enabled")) NB: If the CTS pin declared as a GPIO in the DTS, (for instance cts-gpios = <&pioA PIN_PB31 GPIO_ACTIVE_LOW>), the transmitter will be disabled. => ATMEL_US_USMODE_HWHS flag can be set for this platform ONLY IF the CTS pin is not a GPIO. So, the only case when ATMEL_US_USMODE_HWHS can be enabled is when (atmel_use_fifo(port) && !mctrl_gpio_to_gpiod(atmel_port->gpios, UART_GPIO_CTS)) Tested on all Atmel USART controller flavours: AT91SAM9G35-CM (DMAC flavour), AT91SAM9G20-EK (PDC flavour), SAMA5D2xplained (FIFO flavour). * the list may not be exhaustive Cc: #4.4+ (beware, missing atmel_port variable) Fixes: 1cf6e8fc8341 ("tty/serial: at91: fix RTS line management when hardware handshake is enabled") Signed-off-by: Richard Genoud Acked-by: Alexandre Belloni Acked-by: Cyrille Pitchen Acked-by: Uwe Kleine-König Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index fd8aa1f4ba78..168b10cad47b 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2132,11 +2132,29 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios, mode |= ATMEL_US_USMODE_RS485; } else if (termios->c_cflag & CRTSCTS) { /* RS232 with hardware handshake (RTS/CTS) */ - if (atmel_use_dma_rx(port) && !atmel_use_fifo(port)) { - dev_info(port->dev, "not enabling hardware flow control because DMA is used"); - termios->c_cflag &= ~CRTSCTS; - } else { + if (atmel_use_fifo(port) && + !mctrl_gpio_to_gpiod(atmel_port->gpios, UART_GPIO_CTS)) { + /* + * with ATMEL_US_USMODE_HWHS set, the controller will + * be able to drive the RTS pin high/low when the RX + * FIFO is above RXFTHRES/below RXFTHRES2. + * It will also disable the transmitter when the CTS + * pin is high. + * This mode is not activated if CTS pin is a GPIO + * because in this case, the transmitter is always + * disabled (there must be an internal pull-up + * responsible for this behaviour). + * If the RTS pin is a GPIO, the controller won't be + * able to drive it according to the FIFO thresholds, + * but it will be handled by the driver. + */ mode |= ATMEL_US_USMODE_HWHS; + } else { + /* + * For platforms without FIFO, the flow control is + * handled by the driver. + */ + mode |= ATMEL_US_USMODE_NORMAL; } } else { /* RS232 without hadware handshake */ From 4dda864d73079a1eb01fab4ec29b97db150163bf Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 28 Oct 2016 07:07:47 -0500 Subject: [PATCH 722/884] tty: serial_core: Fix serial console crash on port shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The port->console flag is always false, as uart_console() is called before the serial console has been registered. Hence for a serial port used as the console, uart_tty_port_shutdown() will still be called when userspace closes the port, powering it down. This may lead to a system lock up when the serial console driver writes to the serial port's registers. To fix this, move the setting of port->console after the call to uart_configure_port(), which registers the serial console. Fixes: 761ed4a94582ab29 ("tty: serial_core: convert uart_close to use tty_port_close") Reported-by: Niklas Söderlund Signed-off-by: Geert Uytterhoeven Acked-by: Rob Herring Tested-by: Mugunthan V N Tested-by: Niklas Söderlund [robh: rebased on tty-linus] Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 664c99aeeca5..ce8899c13af3 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -2759,6 +2759,8 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) uart_configure_port(drv, state, uport); + port->console = uart_console(uport); + num_groups = 2; if (uport->attr_group) num_groups++; From d0f4bce2bce7e998abc906f3590e9032af7a41ba Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 28 Oct 2016 07:07:48 -0500 Subject: [PATCH 723/884] tty: serial_core: fix NULL struct tty pointer access in uart_write_wakeup Since commit 761ed4a94582ab29 ("tty: serial_core: convert uart_close to use tty_port_close"), the serial console is broken on various systems and typing "reboot" splats the following on the serial console: INIT: Sending p[ 427.863916] BUG: unable to handle kernel NULL pointer dereference at 00000000000001e0 [ 427.885156] IP: [] tty_wakeup+0xc/0x70 [ 427.898337] PGD 0 [ 427.902051] [ 427.907498] Oops: 0000 [#1] PREEMPT SMP [ 427.917635] Modules linked in: nfsv3 nfs_acl nfs fscache lockd sunrpc grace edd af_packet cpufreq_conservative cpufreq_userspace cpufreq_powersave fuse loop md_mod dm_mod joydev hid_generic usbhid ipmi_ssif ohci_pci ohci_hcd ehci_pci ehci_hcd e1000e ptp firewire_ohci edac_core pps_core tpm_infineon sp5100_tco firewire_core acpi_cpufreq serio_raw pcspkr fjes usbcore shpchp edac_mce_amd tpm_tis ipmi_si tpm_tis_core i2c_piix4 k10temp sg ipmi_msghandler tpm sr_mod button cdrom kvm_amd kvm irqbypass crc_itu_t ast ttm drm_kms_helper drm fb_sys_fops sysimgblt sysfillrect syscopyarea i2c_algo_bit scsi_dh_rdac scsi_dh_alua scsi_dh_emc scsi_dh_hp_sw ata_generic pata_atiixp [ 428.054179] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.9.0-rc1-1.g73e3f23-default #1 [ 428.072868] Hardware name: System manufacturer System Product Name/KGP(M)E-D16, BIOS 0902 12/03/2010 [ 428.094755] task: ffffffffa2c0d500 task.stack: ffffffffa2c00000 [ 428.109717] RIP: 0010:[] [] tty_wakeup+0xc/0x70 [ 428.128407] RSP: 0018:ffff9a1a5fc03df8 EFLAGS: 00010086 [ 428.142184] RAX: ffff9a1857258000 RBX: ffffffffa3050ea0 RCX: 0000000000000000 [ 428.159649] RDX: 000000000000001b RSI: 0000000000000000 RDI: 0000000000000000 [ 428.177109] RBP: ffff9a1a5fc03e08 R08: 0000000000000000 R09: 0000000000000000 [ 428.194547] R10: 0000000000021c77 R11: 0000000000000000 R12: ffff9a1857258000 [ 428.212002] R13: 0000000000000000 R14: 0000000000000020 R15: 0000000000000020 [ 428.229481] FS: 0000000000000000(0000) GS:ffff9a1a5fc00000(0000) knlGS:0000000000000000 [ 428.248938] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 428.263726] CR2: 00000000000001e0 CR3: 0000000390c06000 CR4: 00000000000006f0 [ 428.281331] Stack: [ 428.288696] ffffffffa3050ea0 ffff9a1857258000 ffff9a1a5fc03e18 ffffffffa24e0ab1 [ 428.307064] ffff9a1a5fc03e40 ffffffffa24e8865 ffffffffa3050ea0 00000000000000c2 [ 428.325456] 0000000000000046 ffff9a1a5fc03e78 ffffffffa24e8a5f ffffffffa3050ea0 [ 428.343905] Call Trace: [ 428.352319] [ 428.356216] [] uart_write_wakeup+0x21/0x30 The problem is for console ports, the serial port is not shutdown and interrupts may fire after the struct tty is gone. Simply calling the tty_port helper tty_port_tty_wakeup instead of tty_wakeup directly will ensure there is a valid struct tty. Fixes: 761ed4a94582ab29 ("tty: serial_core: convert uart_close to use tty_port_close") Reported-by: Borislav Petkov Reported-by: Mike Galbraith Cc: Jiri Slaby Cc: Greg Kroah-Hartman Cc: linux-serial@vger.kernel.org Signed-off-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index ce8899c13af3..f2303f390345 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -111,7 +111,7 @@ void uart_write_wakeup(struct uart_port *port) * closed. No cookie for you. */ BUG_ON(!state); - tty_wakeup(state->port.tty); + tty_port_tty_wakeup(&state->port); } static void uart_stop(struct tty_struct *tty) @@ -632,7 +632,7 @@ static void uart_flush_buffer(struct tty_struct *tty) if (port->ops->flush_buffer) port->ops->flush_buffer(port); uart_port_unlock(port, flags); - tty_wakeup(tty); + tty_port_tty_wakeup(&state->port); } /* From 9cccc76bb49ffcc0370a37a6c01368d69cc3b65b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 15 Sep 2016 16:28:54 +0300 Subject: [PATCH 724/884] drm/i915: keep declarations in i915_drv.h Fix sparse warnings: drivers/gpu/drm/i915/i915_drv.c:1179:5: warning: symbol 'i915_driver_load' was not declared. Should it be static? drivers/gpu/drm/i915/i915_drv.c:1267:6: warning: symbol 'i915_driver_unload' was not declared. Should it be static? drivers/gpu/drm/i915/i915_drv.c:2444:25: warning: symbol 'i915_pm_ops' was not declared. Should it be static? Fixes: 42f5551d2769 ("drm/i915: Split out the PCI driver interface to i915_pci.c") Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1473946137-1931-3-git-send-email-jani.nikula@intel.com (cherry picked from commit efab0698f94dd71fac5d946ad664a280441daedb) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 5 +++++ drivers/gpu/drm/i915/i915_pci.c | 7 ------- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8b9ee4e390c0..685e9e065287 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2883,6 +2883,11 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, extern long i915_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); #endif +extern const struct dev_pm_ops i915_pm_ops; + +extern int i915_driver_load(struct pci_dev *pdev, + const struct pci_device_id *ent); +extern void i915_driver_unload(struct drm_device *dev); extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); extern void i915_reset(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 687c768833b3..31e6edd08dd0 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -431,9 +431,6 @@ static const struct pci_device_id pciidlist[] = { }; MODULE_DEVICE_TABLE(pci, pciidlist); -extern int i915_driver_load(struct pci_dev *pdev, - const struct pci_device_id *ent); - static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct intel_device_info *intel_info = @@ -463,8 +460,6 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return i915_driver_load(pdev, ent); } -extern void i915_driver_unload(struct drm_device *dev); - static void i915_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); @@ -473,8 +468,6 @@ static void i915_pci_remove(struct pci_dev *pdev) drm_dev_unref(dev); } -extern const struct dev_pm_ops i915_pm_ops; - static struct pci_driver i915_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, From 7a0e17bd5d5a20784431df24601bfafec9d8ec61 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 4 Oct 2016 12:54:12 +0300 Subject: [PATCH 725/884] drm/i915: workaround sparse warning on variable length arrays Fix sparse warning: drivers/gpu/drm/i915/intel_device_info.c:195:31: warning: Variable length array is used. In truth the array does have constant length, but sparse is too dumb to realize. This is a bit ugly, but silence the warning no matter what. Fixes: 91bedd34abf0 ("drm/i915/bdw: Check for slice, subslice and EU count for BDW") Reviewed-by: Joonas Lahtinen Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1475574853-4178-1-git-send-email-jani.nikula@intel.com (cherry picked from commit ff64aa1e630087381511c4d25de0657824f40efa) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_device_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 73b6858600ac..1b20e160bc1f 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -192,7 +192,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; const int s_max = 3, ss_max = 3, eu_max = 8; int s, ss; - u32 fuse2, eu_disable[s_max]; + u32 fuse2, eu_disable[3]; /* s_max */ fuse2 = I915_READ(GEN8_FUSE2); sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; From 01c72d6c17dc524f04d4dbe361d214e423b35457 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 4 Oct 2016 14:37:32 -0300 Subject: [PATCH 726/884] drm/i915/gen9: fix DDB partitioning for multi-screen cases With the previous code we were only recomputing the DDB partitioning for the CRTCs included in the atomic commit, so any other active CRTCs would end up having their DDB registers zeroed. In this patch we make sure that the computed state starts as a copy of the current partitioning, and then we only zero the DDBs that we're actually going to recompute. How to reproduce the bug: 1 - Enable the primary plane on pipe A 2 - Enable the primary plane on pipe B 3 - Enable the cursor or sprite plane on pipe A Step 3 will zero the DDB partitioning for pipe B since it's not included in the commit that enabled the cursor or sprite for pipe A. I expect this to fix many FIFO underrun problems on gen9+. v2: - Mention the cursor on the steps to reproduce the problem (Paulo). - Add Testcase tag provided by Maarten (Maarten). Testcase: kms_cursor_legacy.cursorA-vs-flipB-atomic-transitions Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96226 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96828 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97450 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97596 Bugzilla: https://www.phoronix.com/scan.php?page=news_item&px=Intel-Skylake-Multi-Screen-Woes Cc: stable@vger.kernel.org Signed-off-by: Paulo Zanoni Reviewed-by: Lyude Link: http://patchwork.freedesktop.org/patch/msgid/1475602652-17326-1-git-send-email-paulo.r.zanoni@intel.com (cherry picked from commit 5a920b85f2c6e3fd7d9dd9bb3f3345e9085e2360) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a2f751cd187a..1a11ab87ec5d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3362,13 +3362,15 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, int num_active; int id, i; + /* Clear the partitioning for disabled planes. */ + memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); + memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe])); + if (WARN_ON(!state)) return 0; if (!cstate->base.active) { ddb->pipe[pipe].start = ddb->pipe[pipe].end = 0; - memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); - memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe])); return 0; } @@ -4050,6 +4052,12 @@ skl_compute_ddb(struct drm_atomic_state *state) intel_state->wm_results.dirty_pipes = ~0; } + /* + * We're not recomputing for the pipes not included in the commit, so + * make sure we start with the current state. + */ + memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb)); + for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { struct intel_crtc_state *cstate; From 47ed32483e1f1fc391c78466f5fd66f03078e9e4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 12 Oct 2016 12:48:26 +0100 Subject: [PATCH 727/884] drm/i915: Use fence_write() from rpm resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During rpm resume we restore the fences, but we do not have the protection of struct_mutex. This rules out updating the activity tracking on the fences, and requires us to rely on the rpm as the serialisation barrier instead. [ 350.298052] [drm:intel_runtime_resume [i915]] Resuming device [ 350.308606] [ 350.310520] =============================== [ 350.315560] [ INFO: suspicious RCU usage. ] [ 350.320554] 4.8.0-rc8-bsw-rapl+ #3133 Tainted: G U W [ 350.327208] ------------------------------- [ 350.331977] ../drivers/gpu/drm/i915/i915_gem_request.h:371 suspicious rcu_dereference_protected() usage! [ 350.342619] [ 350.342619] other info that might help us debug this: [ 350.342619] [ 350.351593] [ 350.351593] rcu_scheduler_active = 1, debug_locks = 0 [ 350.358952] 3 locks held by Xorg/320: [ 350.363077] #0: (&dev->mode_config.mutex){+.+.+.}, at: [] drm_modeset_lock_all+0x3c/0xd0 [drm] [ 350.375162] #1: (crtc_ww_class_acquire){+.+.+.}, at: [] drm_modeset_lock_all+0x46/0xd0 [drm] [ 350.387022] #2: (crtc_ww_class_mutex){+.+.+.}, at: [] drm_modeset_lock+0x36/0x110 [drm] [ 350.398236] [ 350.398236] stack backtrace: [ 350.403196] CPU: 1 PID: 320 Comm: Xorg Tainted: G U W 4.8.0-rc8-bsw-rapl+ #3133 [ 350.412457] Hardware name: Intel Corporation CHERRYVIEW C0 PLATFORM/Braswell CRB, BIOS BRAS.X64.X088.R00.1510270350 10/27/2015 [ 350.425212] 0000000000000000 ffff8801680a78c8 ffffffff81332187 ffff88016c5c5000 [ 350.433611] 0000000000000001 ffff8801680a78f8 ffffffff810ca6da ffff88016cc8b0f0 [ 350.442012] ffff88016cc80000 ffff88016cc80000 ffff880177ad0000 ffff8801680a7948 [ 350.450409] Call Trace: [ 350.453165] [] dump_stack+0x67/0x90 [ 350.458931] [] lockdep_rcu_suspicious+0xea/0x120 [ 350.466002] [] fence_update+0xbd/0x670 [i915] [ 350.472766] [] i915_gem_restore_fences+0x52/0x70 [i915] [ 350.480496] [] vlv_resume_prepare+0x72/0x570 [i915] [ 350.487839] [] intel_runtime_resume+0x102/0x210 [i915] [ 350.495442] [] pci_pm_runtime_resume+0x7f/0xb0 [ 350.502274] [] ? pci_restore_standard_config+0x40/0x40 [ 350.509883] [] __rpm_callback+0x35/0x70 [ 350.516037] [] ? pci_restore_standard_config+0x40/0x40 [ 350.523646] [] rpm_callback+0x24/0x80 [ 350.529604] [] ? pci_restore_standard_config+0x40/0x40 [ 350.537212] [] rpm_resume+0x4ad/0x740 [ 350.543161] [] __pm_runtime_resume+0x51/0x80 [ 350.549824] [] intel_runtime_pm_get+0x28/0x90 [i915] [ 350.557265] [] intel_display_power_get+0x23/0x50 [i915] [ 350.565001] [] intel_atomic_commit_tail+0xdfd/0x10b0 [i915] [ 350.573106] [] ? drm_atomic_helper_swap_state+0x159/0x300 [drm_kms_helper] [ 350.582659] [] ? _raw_spin_unlock+0x31/0x50 [ 350.589205] [] ? drm_atomic_helper_swap_state+0x159/0x300 [drm_kms_helper] [ 350.598787] [] intel_atomic_commit+0x3b5/0x500 [i915] [ 350.606319] [] ? drm_atomic_set_crtc_for_connector+0xcc/0x100 [drm] [ 350.615209] [] drm_atomic_commit+0x49/0x50 [drm] [ 350.622242] [] drm_atomic_helper_set_config+0x88/0xc0 [drm_kms_helper] [ 350.631419] [] drm_mode_set_config_internal+0x6c/0x120 [drm] [ 350.639623] [] drm_mode_setcrtc+0x22c/0x4d0 [drm] [ 350.646760] [] drm_ioctl+0x209/0x460 [drm] [ 350.653217] [] ? drm_mode_getcrtc+0x150/0x150 [drm] [ 350.660536] [] ? __lock_is_held+0x4a/0x70 [ 350.666885] [] do_vfs_ioctl+0x93/0x6b0 [ 350.672939] [] ? __fget+0x113/0x200 [ 350.678797] [] ? __fget+0x5/0x200 [ 350.684361] [] SyS_ioctl+0x44/0x80 [ 350.690030] [] do_syscall_64+0x5b/0x120 [ 350.696184] [] entry_SYSCALL64_slow_path+0x25/0x25 Note we also have to remember the lesson from commit 4fc788f5ee3d ("drm/i915: Flush delayed fence releases after reset") where we have to flush any changes to the fence on restore. v2: Replace call to release user mmaps with an assertion that they have already been zapped. Fixes: 49ef5294cda2 ("drm/i915: Move fence tracking from object to vma") Reported-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Ville Syrjälä Cc: Joonas Lahtinen Cc: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161012114827.17031-1-chris@chris-wilson.co.uk (cherry picked from commit 4676dc838b37ed8c6f3da4571cb4a04cbd604801) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_fence.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 8df1fa7234e8..2c7ba0ee127c 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -290,6 +290,8 @@ i915_vma_put_fence(struct i915_vma *vma) { struct drm_i915_fence_reg *fence = vma->fence; + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + if (!fence) return 0; @@ -341,6 +343,8 @@ i915_vma_get_fence(struct i915_vma *vma) struct drm_i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + /* Just update our place in the LRU if our fence is getting reused. */ if (vma->fence) { fence = vma->fence; @@ -371,6 +375,12 @@ void i915_gem_restore_fences(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); int i; + /* Note that this may be called outside of struct_mutex, by + * runtime suspend/resume. The barrier we require is enforced by + * rpm itself - all access to fences/GTT are only within an rpm + * wakeref, and to acquire that wakeref you must pass through here. + */ + for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; struct i915_vma *vma = reg->vma; @@ -379,10 +389,17 @@ void i915_gem_restore_fences(struct drm_device *dev) * Commit delayed tiling changes if we have an object still * attached to the fence, otherwise just clear the fence. */ - if (vma && !i915_gem_object_is_tiled(vma->obj)) - vma = NULL; + if (vma && !i915_gem_object_is_tiled(vma->obj)) { + GEM_BUG_ON(!reg->dirty); + GEM_BUG_ON(vma->obj->fault_mappable); - fence_update(reg, vma); + list_move(®->link, &dev_priv->mm.fence_list); + vma->fence = NULL; + vma = NULL; + } + + fence_write(reg, vma); + reg->vma = vma; } } From be4dd651ff79b9fddc46ef330afeedc6560fd1ca Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Oct 2016 11:55:08 +0300 Subject: [PATCH 728/884] drm/i915: fix a read size argument We want to read 3 bytes here, but because the parenthesis are in the wrong place we instead read: sizeof(intel_dp->edp_dpcd) == sizeof(intel_dp->edp_dpcd) which is one byte. Fixes: fe5a66f91c88 ("drm/i915: Read PSR caps/intermediate freqs/etc. only once on eDP") Reviewed-by: Chris Wilson Cc: Signed-off-by: Dan Carpenter Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161013085508.GJ16198@mwanda (cherry picked from commit f7170e2eb8f6bf7ef2032cc0659cd38740bf5b97) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 14a3cf0b7213..ee8aa95967de 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3551,8 +3551,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) /* Read the eDP Display control capabilities registers */ if ((intel_dp->dpcd[DP_EDP_CONFIGURATION_CAP] & DP_DPCD_DISPLAY_CONTROL_CAPABLE) && drm_dp_dpcd_read(&intel_dp->aux, DP_EDP_DPCD_REV, - intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd) == - sizeof(intel_dp->edp_dpcd))) + intel_dp->edp_dpcd, sizeof(intel_dp->edp_dpcd)) == + sizeof(intel_dp->edp_dpcd)) DRM_DEBUG_KMS("EDP DPCD : %*ph\n", (int) sizeof(intel_dp->edp_dpcd), intel_dp->edp_dpcd); From fd58753ead86ee289ea89fe26e1842f36e54b36c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 13 Oct 2016 14:34:06 +0300 Subject: [PATCH 729/884] drm/i915: Fix mismatched INIT power domain disabling during suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the display INIT power domain disabling/enabling happens in a mismatched way in the suspend/resume_early hooks respectively. This can leave display power wells incorrectly disabled in the resume hook if the suspend sequence is aborted for some reason resulting in the suspend/resume hooks getting called but the suspend_late/resume_early hooks being skipped. In particular this change fixes "Unclaimed read from register 0x1e1204" on BYT/BSW triggered from i915_drm_resume()-> intel_pps_unlock_regs_wa() when suspending with /sys/power/pm_test set to devices. Fixes: 85e90679335f ("drm/i915: disable power wells on suspend") Cc: Ville Syrjälä Cc: David Weinehall Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1476358446-11621-1-git-send-email-imre.deak@intel.com (cherry picked from commit 4c494a5769cd0de92638b25960ba0158c36088a6) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index bfb2efd8d4d4..18dfdd5c1b3b 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1447,8 +1447,6 @@ static int i915_drm_suspend(struct drm_device *dev) dev_priv->suspend_count++; - intel_display_set_init_power(dev_priv, false); - intel_csr_ucode_suspend(dev_priv); out: @@ -1466,6 +1464,8 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) disable_rpm_wakeref_asserts(dev_priv); + intel_display_set_init_power(dev_priv, false); + fw_csr = !IS_BROXTON(dev_priv) && suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; /* From 5e33791e1f27c3207e7b44071e7c94a487d1eb39 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 7 Oct 2016 17:28:57 -0300 Subject: [PATCH 730/884] drm/i915/gen9: fix watermarks when using the pipe scaler Luckily, the necessary adjustments for when we're using the scaler are exactly the same as the ones needed on ILK+, so just reuse the function we already have. v2: Invert the patch order so stable backports get easier. Cc: stable@vger.kernel.org Signed-off-by: Paulo Zanoni Reviewed-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1475872138-16194-1-git-send-email-paulo.r.zanoni@intel.com (cherry picked from commit cfd7e3a20251b9ac95651d64556f87f86128a966) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1a11ab87ec5d..db24f898853c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3470,12 +3470,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, return 0; } -static uint32_t skl_pipe_pixel_rate(const struct intel_crtc_state *config) -{ - /* TODO: Take into account the scalers once we support them */ - return config->base.adjusted_mode.crtc_clock; -} - /* * The max latency should be 257 (max the punit can code is 255 and we add 2us * for the read latency) and cpp should always be <= 8, so that @@ -3526,7 +3520,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst * Adjusted plane pixel rate is just the pipe's adjusted pixel rate * with additional adjustments for plane-specific scaling. */ - adjusted_pixel_rate = skl_pipe_pixel_rate(cstate); + adjusted_pixel_rate = ilk_pipe_pixel_rate(cstate); downscale_amount = skl_plane_downscale_amount(pstate); pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; @@ -3738,11 +3732,11 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) if (!cstate->base.active) return 0; - if (WARN_ON(skl_pipe_pixel_rate(cstate) == 0)) + if (WARN_ON(ilk_pipe_pixel_rate(cstate) == 0)) return 0; return DIV_ROUND_UP(8 * cstate->base.adjusted_mode.crtc_htotal * 1000, - skl_pipe_pixel_rate(cstate)); + ilk_pipe_pixel_rate(cstate)); } static void skl_compute_transition_wm(struct intel_crtc_state *cstate, From 198c5ee3c60169f8b64bcd330a34593be80699aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 11 Oct 2016 20:52:45 +0300 Subject: [PATCH 731/884] drm/i915: Respect alternate_aux_channel for all DDI ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VBT provides the platform a way to mix and match the DDI ports vs. AUX channels. Currently we only trust the VBT for DDI E, which has no corresponding AUX channel of its own. However it is possible that some board might use some non-standard DDI vs. AUX port routing even for the other ports. Perhaps for signal routing reasons or something, So let's generalize this and trust the VBT for all ports. For now we'll limit this to DDI platforms, as we trust the VBT a bit more there anyway when it comes to the DDI ports. I've structured the code in a way that would allow us to easily expand this to other platforms as well, by simply filling in the ddi_port_info. v2: Drop whitespace changes, keep MISSING_CASE() for unknown aux ch assignment, include a commit message, include debug message during init Cc: stable@vger.kernel.org Cc: Maarten Maathuis Tested-by: Maarten Maathuis References: https://bugs.freedesktop.org/show_bug.cgi?id=97877 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1476208368-5710-2-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Jim Bride (cherry picked from commit 8f7ce038f1178057733b7e765bf9160a2f9be14b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_dp.c | 71 +++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index ee8aa95967de..3581b5a7f716 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1108,6 +1108,44 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) return ret; } +static enum port intel_aux_port(struct drm_i915_private *dev_priv, + enum port port) +{ + const struct ddi_vbt_port_info *info = + &dev_priv->vbt.ddi_port_info[port]; + enum port aux_port; + + if (!info->alternate_aux_channel) { + DRM_DEBUG_KMS("using AUX %c for port %c (platform default)\n", + port_name(port), port_name(port)); + return port; + } + + switch (info->alternate_aux_channel) { + case DP_AUX_A: + aux_port = PORT_A; + break; + case DP_AUX_B: + aux_port = PORT_B; + break; + case DP_AUX_C: + aux_port = PORT_C; + break; + case DP_AUX_D: + aux_port = PORT_D; + break; + default: + MISSING_CASE(info->alternate_aux_channel); + aux_port = PORT_A; + break; + } + + DRM_DEBUG_KMS("using AUX %c for port %c (VBT)\n", + port_name(aux_port), port_name(port)); + + return aux_port; +} + static i915_reg_t g4x_aux_ctl_reg(struct drm_i915_private *dev_priv, enum port port) { @@ -1168,36 +1206,9 @@ static i915_reg_t ilk_aux_data_reg(struct drm_i915_private *dev_priv, } } -/* - * On SKL we don't have Aux for port E so we rely - * on VBT to set a proper alternate aux channel. - */ -static enum port skl_porte_aux_port(struct drm_i915_private *dev_priv) -{ - const struct ddi_vbt_port_info *info = - &dev_priv->vbt.ddi_port_info[PORT_E]; - - switch (info->alternate_aux_channel) { - case DP_AUX_A: - return PORT_A; - case DP_AUX_B: - return PORT_B; - case DP_AUX_C: - return PORT_C; - case DP_AUX_D: - return PORT_D; - default: - MISSING_CASE(info->alternate_aux_channel); - return PORT_A; - } -} - static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv, enum port port) { - if (port == PORT_E) - port = skl_porte_aux_port(dev_priv); - switch (port) { case PORT_A: case PORT_B: @@ -1213,9 +1224,6 @@ static i915_reg_t skl_aux_ctl_reg(struct drm_i915_private *dev_priv, static i915_reg_t skl_aux_data_reg(struct drm_i915_private *dev_priv, enum port port, int index) { - if (port == PORT_E) - port = skl_porte_aux_port(dev_priv); - switch (port) { case PORT_A: case PORT_B: @@ -1253,7 +1261,8 @@ static i915_reg_t intel_aux_data_reg(struct drm_i915_private *dev_priv, static void intel_aux_reg_init(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); - enum port port = dp_to_dig_port(intel_dp)->port; + enum port port = intel_aux_port(dev_priv, + dp_to_dig_port(intel_dp)->port); int i; intel_dp->aux_ch_ctl_reg = intel_aux_ctl_reg(dev_priv, port); From 0ce140d45a8398b501934ac289aef0eb7f47c596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 11 Oct 2016 20:52:47 +0300 Subject: [PATCH 732/884] drm/i915: Clean up DDI DDC/AUX CH sanitation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we use the AUX and GMBUS assignment from VBT for all ports, let's clean up the sanitization of the port information a bit. Previosuly we only did this for port E, and only complained about a non-standard assignment for the other ports. But as we know that non-standard assignments are a fact of life, let's expand the sanitization to all the ports. v2: Include a commit message, fix up the comments a bit v3: Don't clobber other ports if the current port has no alternate aux ch/ddc pin Cc: stable@vger.kernel.org Cc: Maarten Maathuis Tested-by: Maarten Maathuis References: https://bugs.freedesktop.org/show_bug.cgi?id=97877 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1476208368-5710-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Jim Bride (v2) (cherry picked from commit 9454fa871edf15c20a0371548b3ec0d6d944a498) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_bios.c | 122 +++++++++++++++++++----------- 1 file changed, 77 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index c6e69e4cfa83..1f8af87c6294 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1031,6 +1031,77 @@ static u8 translate_iboost(u8 val) return mapping[val]; } +static void sanitize_ddc_pin(struct drm_i915_private *dev_priv, + enum port port) +{ + const struct ddi_vbt_port_info *info = + &dev_priv->vbt.ddi_port_info[port]; + enum port p; + + if (!info->alternate_ddc_pin) + return; + + for_each_port_masked(p, (1 << port) - 1) { + struct ddi_vbt_port_info *i = &dev_priv->vbt.ddi_port_info[p]; + + if (info->alternate_ddc_pin != i->alternate_ddc_pin) + continue; + + DRM_DEBUG_KMS("port %c trying to use the same DDC pin (0x%x) as port %c, " + "disabling port %c DVI/HDMI support\n", + port_name(p), i->alternate_ddc_pin, + port_name(port), port_name(p)); + + /* + * If we have multiple ports supposedly sharing the + * pin, then dvi/hdmi couldn't exist on the shared + * port. Otherwise they share the same ddc bin and + * system couldn't communicate with them separately. + * + * Due to parsing the ports in alphabetical order, + * a higher port will always clobber a lower one. + */ + i->supports_dvi = false; + i->supports_hdmi = false; + i->alternate_ddc_pin = 0; + } +} + +static void sanitize_aux_ch(struct drm_i915_private *dev_priv, + enum port port) +{ + const struct ddi_vbt_port_info *info = + &dev_priv->vbt.ddi_port_info[port]; + enum port p; + + if (!info->alternate_aux_channel) + return; + + for_each_port_masked(p, (1 << port) - 1) { + struct ddi_vbt_port_info *i = &dev_priv->vbt.ddi_port_info[p]; + + if (info->alternate_aux_channel != i->alternate_aux_channel) + continue; + + DRM_DEBUG_KMS("port %c trying to use the same AUX CH (0x%x) as port %c, " + "disabling port %c DP support\n", + port_name(p), i->alternate_aux_channel, + port_name(port), port_name(p)); + + /* + * If we have multiple ports supposedlt sharing the + * aux channel, then DP couldn't exist on the shared + * port. Otherwise they share the same aux channel + * and system couldn't communicate with them separately. + * + * Due to parsing the ports in alphabetical order, + * a higher port will always clobber a lower one. + */ + i->supports_dp = false; + i->alternate_aux_channel = 0; + } +} + static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, const struct bdb_header *bdb) { @@ -1105,54 +1176,15 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); if (is_dvi) { - if (port == PORT_E) { - info->alternate_ddc_pin = ddc_pin; - /* if DDIE share ddc pin with other port, then - * dvi/hdmi couldn't exist on the shared port. - * Otherwise they share the same ddc bin and system - * couldn't communicate with them seperately. */ - if (ddc_pin == DDC_PIN_B) { - dev_priv->vbt.ddi_port_info[PORT_B].supports_dvi = 0; - dev_priv->vbt.ddi_port_info[PORT_B].supports_hdmi = 0; - } else if (ddc_pin == DDC_PIN_C) { - dev_priv->vbt.ddi_port_info[PORT_C].supports_dvi = 0; - dev_priv->vbt.ddi_port_info[PORT_C].supports_hdmi = 0; - } else if (ddc_pin == DDC_PIN_D) { - dev_priv->vbt.ddi_port_info[PORT_D].supports_dvi = 0; - dev_priv->vbt.ddi_port_info[PORT_D].supports_hdmi = 0; - } - } else if (ddc_pin == DDC_PIN_B && port != PORT_B) - DRM_DEBUG_KMS("Unexpected DDC pin for port B\n"); - else if (ddc_pin == DDC_PIN_C && port != PORT_C) - DRM_DEBUG_KMS("Unexpected DDC pin for port C\n"); - else if (ddc_pin == DDC_PIN_D && port != PORT_D) - DRM_DEBUG_KMS("Unexpected DDC pin for port D\n"); + info->alternate_ddc_pin = ddc_pin; + + sanitize_ddc_pin(dev_priv, port); } if (is_dp) { - if (port == PORT_E) { - info->alternate_aux_channel = aux_channel; - /* if DDIE share aux channel with other port, then - * DP couldn't exist on the shared port. Otherwise - * they share the same aux channel and system - * couldn't communicate with them seperately. */ - if (aux_channel == DP_AUX_A) - dev_priv->vbt.ddi_port_info[PORT_A].supports_dp = 0; - else if (aux_channel == DP_AUX_B) - dev_priv->vbt.ddi_port_info[PORT_B].supports_dp = 0; - else if (aux_channel == DP_AUX_C) - dev_priv->vbt.ddi_port_info[PORT_C].supports_dp = 0; - else if (aux_channel == DP_AUX_D) - dev_priv->vbt.ddi_port_info[PORT_D].supports_dp = 0; - } - else if (aux_channel == DP_AUX_A && port != PORT_A) - DRM_DEBUG_KMS("Unexpected AUX channel for port A\n"); - else if (aux_channel == DP_AUX_B && port != PORT_B) - DRM_DEBUG_KMS("Unexpected AUX channel for port B\n"); - else if (aux_channel == DP_AUX_C && port != PORT_C) - DRM_DEBUG_KMS("Unexpected AUX channel for port C\n"); - else if (aux_channel == DP_AUX_D && port != PORT_D) - DRM_DEBUG_KMS("Unexpected AUX channel for port D\n"); + info->alternate_aux_channel = aux_channel; + + sanitize_aux_ch(dev_priv, port); } if (bdb->version >= 158) { From 1fb3672eaf6ec95fb34c22734feffd6041531c5b Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Fri, 21 Oct 2016 15:44:54 +0100 Subject: [PATCH 733/884] drm: i915: Wait for fences on new fb, not old The previous code would wait for fences on the framebuffer from the old plane state to complete, rather than the new, so you would see tearing everywhere. Fix this to wait on the new state before we make it active. Signed-off-by: Daniel Stone Fixes: 94f050246b42 ("drm/i915: nonblocking commit") Cc: stable@vger.kernel.org Cc: Daniel Vetter Cc: Maarten Lankhorst Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161021144454.6288-1-daniels@collabora.com (cherry picked from commit 2d2c5ad83f772d7d7b0bb8348ecea42e88f89ab0) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index fbcfed63a76e..319191e6988c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14310,7 +14310,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) for_each_plane_in_state(state, plane, plane_state, i) { struct intel_plane_state *intel_plane_state = - to_intel_plane_state(plane_state); + to_intel_plane_state(plane->state); if (!intel_plane_state->wait_req) continue; From e3b9e6e3a989904ae062e7a48a9431edc837ea6b Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Oct 2016 13:55:45 -0200 Subject: [PATCH 734/884] drm/i915/fbc: fix CFB size calculation for gen8+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadwell and newer actually compress up to 2560 lines instead of 2048 (as documented in the FBC_CTL page). If we don't take this into consideration we end up reserving too little stolen memory for the CFB, so we may allocate something else (such as a ring) right after what we reserved, and the hardware will overwrite it with the contents of the CFB when FBC is active, causing GPU hangs. Another possibility is that the CFB may be allocated at the very end of the available space, so the CFB will overlap the reserved stolen area, leading to FIFO underruns. This bug has always been a problem on BDW (the only affected platform where FBC is enabled by default), but it's much easier to reproduce since the following commit: commit c58b735fc762e891481e92af7124b85cb0a51fce Author: Chris Wilson Date: Thu Aug 18 17:16:57 2016 +0100 drm/i915: Allocate rings from stolen Of course, you can only reproduce the bug if your screen is taller than 2048 lines. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98213 Fixes: a98ee79317b4 ("drm/i915/fbc: enable FBC by default on HSW and BDW") Cc: # v4.6+ Signed-off-by: Paulo Zanoni Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477065346-13736-1-git-send-email-paulo.r.zanoni@intel.com (cherry picked from commit 79f2624b1b9f776b173b41d743fb3dd7374b3827) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_fbc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index faa67624e1ed..c43dd9abce79 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -104,8 +104,10 @@ static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv, int lines; intel_fbc_get_plane_source_size(cache, NULL, &lines); - if (INTEL_INFO(dev_priv)->gen >= 7) + if (INTEL_GEN(dev_priv) == 7) lines = min(lines, 2048); + else if (INTEL_GEN(dev_priv) >= 8) + lines = min(lines, 2560); /* Hardware needs the full buffer stride, not just the active area. */ return lines * cache->fb.stride; From 6d9deb910c8eee8728129326dc88ec109b03b135 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 25 Oct 2016 17:40:35 +0100 Subject: [PATCH 735/884] drm/i915: Rotated view does not need a fence We do not need to set up a fence for the rotated view. Display does not need it and no one can access it. v2: Move code to __i915_vma_set_map_and_fenceable. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: 05a20d098db1 ("drm/i915: Move map-and-fenceable tracking to the VMA") Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson (cherry picked from commit 07ee2bce6a516e0218dba22581803cb8f11bcf82) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 947e82c2b175..e1053c4949f4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3745,7 +3745,12 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) mappable = (vma->node.start + fence_size <= dev_priv->ggtt.mappable_end); - if (mappable && fenceable) + /* + * Explicitly disable for rotated VMA since the display does not + * need the fence and the VMA is not accessible to other users. + */ + if (mappable && fenceable && + vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) vma->flags |= I915_VMA_CAN_FENCE; else vma->flags &= ~I915_VMA_CAN_FENCE; From 7e9b3f95d64165f37c6448fe624d835c5adf17b9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 25 Oct 2016 17:41:12 +0100 Subject: [PATCH 736/884] drm/i915: Remove two invalid warns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Objects can have multiple VMAs used for display in which case assertion that objects must not be pinned for display more times than the current VMA is incorrect. v2: Commit message update. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: 058d88c4330f ("drm/i915: Track pinned VMA") Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1477413635-3876-1-git-send-email-tvrtko.ursulin@linux.intel.com (cherry picked from commit 3299e7e43484a85eeb5c7ec09958bff05c9d0543) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e1053c4949f4..23960de81b57 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3550,8 +3550,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - WARN_ON(obj->pin_display > i915_vma_pin_count(vma)); - i915_gem_object_flush_cpu_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -3588,7 +3586,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) list_move_tail(&vma->vm_link, &vma->vm->inactive_list); i915_vma_unpin(vma); - WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma)); } /** From c89d5454d48e3b6ecae1c9f8bb14b3fac64d9fc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Oct 2016 19:13:04 +0300 Subject: [PATCH 737/884] drm/i915: Fix SKL+ 90/270 degree rotated plane coordinate computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the framebuffer size in .16 fixed point coordinates to drm_rect_rotate() since that's what the source coordinates are as well at this stage. We used to do this part of the computation in integer coordinates, but that got changed when moving the computation to happen in the check phase of the operation. Unfortunately I forgot to shift up the fb width and height appropriately. With the bogus size we ended up with some negative fb offset, which when added to the vma offset caused out scanout to start at an offset earlier than we inteded. Eg. when testing on my SKL I saw a row of incorrect tiles at the top of my screen. Cc: Tvrtko Ursulin Cc: Sivakumar Thulasimani Cc: drm-intel-fixes@lists.freedesktop.org Fixes: b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477325584-23679-1-git-send-email-ville.syrjala@linux.intel.com Tested-by: Tvrtko Ursulin Reviewed-by: Chris Wilson (cherry picked from commit da064b47c0b8d0dff1905b38c76e7e51fb5a9547) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 319191e6988c..0ad1879bfd9d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2978,7 +2978,8 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) /* Rotate src coordinates to match rotated GTT view */ if (intel_rotation_90_or_270(rotation)) drm_rect_rotate(&plane_state->base.src, - fb->width, fb->height, DRM_ROTATE_270); + fb->width << 16, fb->height << 16, + DRM_ROTATE_270); /* * Handle the AUX surface first since From 6ad37567b6b886121e250036e489d82cde5e5e94 Mon Sep 17 00:00:00 2001 From: Martyn Welch Date: Fri, 21 Oct 2016 17:36:19 +0100 Subject: [PATCH 738/884] vme: vme_get_size potentially returning incorrect value on failure The function vme_get_size returns the size of the window to the caller, however it doesn't check the return value of the call to vme_master_get. Return 0 on failure rather than anything else. Suggested-by: Dan Carpenter Signed-off-by: Martyn Welch Signed-off-by: Greg Kroah-Hartman --- drivers/vme/vme.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/vme/vme.c b/drivers/vme/vme.c index 15b64076bc26..bdbadaa47ef3 100644 --- a/drivers/vme/vme.c +++ b/drivers/vme/vme.c @@ -156,12 +156,16 @@ size_t vme_get_size(struct vme_resource *resource) case VME_MASTER: retval = vme_master_get(resource, &enabled, &base, &size, &aspace, &cycle, &dwidth); + if (retval) + return 0; return size; break; case VME_SLAVE: retval = vme_slave_get(resource, &enabled, &base, &size, &buf_base, &aspace, &cycle); + if (retval) + return 0; return size; break; From a7a7aeefbca2982586ba2c9fd7739b96416a6d1d Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Wed, 19 Oct 2016 12:29:41 +0200 Subject: [PATCH 739/884] GenWQE: Fix bad page access during abort of resource allocation When interrupting an application which was allocating DMAable memory, it was possible, that the DMA memory was deallocated twice, leading to the error symptoms below. Thanks to Gerald, who analyzed the problem and provided this patch. I agree with his analysis of the problem: ddcb_cmd_fixups() -> genwqe_alloc_sync_sgl() (fails in f/lpage, but sgl->sgl != NULL and f/lpage maybe also != NULL) -> ddcb_cmd_cleanup() -> genwqe_free_sync_sgl() (double free, because sgl->sgl != NULL and f/lpage maybe also != NULL) In this scenario we would have exactly the kind of double free that would explain the WARNING / Bad page state, and as expected it is caused by broken error handling (cleanup). Using the Ubuntu git source, tag Ubuntu-4.4.0-33.52, he was able to reproduce the "Bad page state" issue, and with the patch on top he could not reproduce it any more. ------------[ cut here ]------------ WARNING: at /build/linux-o03cxz/linux-4.4.0/arch/s390/include/asm/pci_dma.h:141 Modules linked in: qeth_l2 ghash_s390 prng aes_s390 des_s390 des_generic sha512_s390 sha256_s390 sha1_s390 sha_common genwqe_card qeth crc_itu_t qdio ccwgroup vmur dm_multipath dasd_eckd_mod dasd_mod CPU: 2 PID: 3293 Comm: genwqe_gunzip Not tainted 4.4.0-33-generic #52-Ubuntu task: 0000000032c7e270 ti: 00000000324e4000 task.ti: 00000000324e4000 Krnl PSW : 0404c00180000000 0000000000156346 (dma_update_cpu_trans+0x9e/0xa8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 EA:3 Krnl GPRS: 00000000324e7bcd 0000000000c3c34a 0000000027628298 000000003215b400 0000000000000400 0000000000001fff 0000000000000400 0000000116853000 07000000324e7b1e 0000000000000001 0000000000000001 0000000000000001 0000000000001000 0000000116854000 0000000000156402 00000000324e7a38 Krnl Code: 000000000015633a: 95001000 cli 0(%r1),0 000000000015633e: a774ffc3 brc 7,1562c4 #0000000000156342: a7f40001 brc 15,156344 >0000000000156346: 92011000 mvi 0(%r1),1 000000000015634a: a7f4ffbd brc 15,1562c4 000000000015634e: 0707 bcr 0,%r7 0000000000156350: c00400000000 brcl 0,156350 0000000000156356: eb7ff0500024 stmg %r7,%r15,80(%r15) Call Trace: ([<00000000001563e0>] dma_update_trans+0x90/0x228) [<00000000001565dc>] s390_dma_unmap_pages+0x64/0x160 [<00000000001567c2>] s390_dma_free+0x62/0x98 [<000003ff801310ce>] __genwqe_free_consistent+0x56/0x70 [genwqe_card] [<000003ff801316d0>] genwqe_free_sync_sgl+0xf8/0x160 [genwqe_card] [<000003ff8012bd6e>] ddcb_cmd_cleanup+0x86/0xa8 [genwqe_card] [<000003ff8012c1c0>] do_execute_ddcb+0x110/0x348 [genwqe_card] [<000003ff8012c914>] genwqe_ioctl+0x51c/0xc20 [genwqe_card] [<000000000032513a>] do_vfs_ioctl+0x3b2/0x518 [<0000000000325344>] SyS_ioctl+0xa4/0xb8 [<00000000007b86c6>] system_call+0xd6/0x264 [<000003ff9e8e520a>] 0x3ff9e8e520a Last Breaking-Event-Address: [<0000000000156342>] dma_update_cpu_trans+0x9a/0xa8 ---[ end trace 35996336235145c8 ]--- BUG: Bad page state in process jbd2/dasdb1-8 pfn:3215b page:000003d100c856c0 count:-1 mapcount:0 mapping: (null) index:0x0 flags: 0x3fffc0000000000() page dumped because: nonzero _count Signed-off-by: Gerald Schaefer Signed-off-by: Frank Haverkamp Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/misc/genwqe/card_utils.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 8a679ecc8fd1..fc2794b513fa 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -352,17 +352,27 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, if (copy_from_user(sgl->lpage, user_addr + user_size - sgl->lpage_size, sgl->lpage_size)) { rc = -EFAULT; - goto err_out1; + goto err_out2; } } return 0; + err_out2: + __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, + sgl->lpage_dma_addr); + sgl->lpage = NULL; + sgl->lpage_dma_addr = 0; err_out1: __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, sgl->fpage_dma_addr); + sgl->fpage = NULL; + sgl->fpage_dma_addr = 0; err_out: __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, sgl->sgl_dma_addr); + sgl->sgl = NULL; + sgl->sgl_dma_addr = 0; + sgl->sgl_size = 0; return -ENOMEM; } From eb94cd68abd9b7c92bf70ddc452d65f1a84c46e2 Mon Sep 17 00:00:00 2001 From: Jorgen Hansen Date: Thu, 6 Oct 2016 04:43:08 -0700 Subject: [PATCH 740/884] VMCI: Doorbell create and destroy fixes This change consists of two changes: 1) If vmci_doorbell_create is called when neither guest nor host personality as been initialized, vmci_get_context_id will return VMCI_INVALID_ID. In that case, we should fail the create call. 2) In doorbell destroy, we assume that vmci_guest_code_active() has the same return value on create and destroy. That may not be the case, so we may end up with the wrong refcount. Instead, destroy should check explicitly whether the doorbell is in the index table as an indicator of whether the guest code was active at create time. Reviewed-by: Adit Ranadive Signed-off-by: Jorgen Hansen Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_doorbell.c | 8 +++++++- drivers/misc/vmw_vmci/vmci_driver.c | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/misc/vmw_vmci/vmci_doorbell.c b/drivers/misc/vmw_vmci/vmci_doorbell.c index a8cee33ae8d2..b3fa738ae005 100644 --- a/drivers/misc/vmw_vmci/vmci_doorbell.c +++ b/drivers/misc/vmw_vmci/vmci_doorbell.c @@ -431,6 +431,12 @@ int vmci_doorbell_create(struct vmci_handle *handle, if (vmci_handle_is_invalid(*handle)) { u32 context_id = vmci_get_context_id(); + if (context_id == VMCI_INVALID_ID) { + pr_warn("Failed to get context ID\n"); + result = VMCI_ERROR_NO_RESOURCES; + goto free_mem; + } + /* Let resource code allocate a free ID for us */ new_handle = vmci_make_handle(context_id, VMCI_INVALID_ID); } else { @@ -525,7 +531,7 @@ int vmci_doorbell_destroy(struct vmci_handle handle) entry = container_of(resource, struct dbell_entry, resource); - if (vmci_guest_code_active()) { + if (!hlist_unhashed(&entry->node)) { int result; dbell_index_table_remove(entry); diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c index 896be150e28f..d7eaf1eb11e7 100644 --- a/drivers/misc/vmw_vmci/vmci_driver.c +++ b/drivers/misc/vmw_vmci/vmci_driver.c @@ -113,5 +113,5 @@ module_exit(vmci_drv_exit); MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Machine Communication Interface."); -MODULE_VERSION("1.1.4.0-k"); +MODULE_VERSION("1.1.5.0-k"); MODULE_LICENSE("GPL v2"); From a7d5afe82d4931289250d6e807e35db3885b3b12 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Tue, 4 Oct 2016 15:26:48 -0300 Subject: [PATCH 741/884] MAINTAINERS: Add entry for genwqe driver Frank and I maintain this Signed-off-by: Gabriel Krisman Bertazi Cc: haver@linux.vnet.ibm.com Acked-by: Frank Haverkamp = Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index c44795306342..61f201b7cd95 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5287,6 +5287,12 @@ M: Joe Perches S: Maintained F: scripts/get_maintainer.pl +GENWQE (IBM Generic Workqueue Card) +M: Frank Haverkamp +M: Gabriel Krisman Bertazi +S: Supported +F: drivers/misc/genwqe/ + GFS2 FILE SYSTEM M: Steven Whitehouse M: Bob Peterson From 40b6e61ac72e99672e47cdb99c8d7d226004169b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 28 Oct 2016 11:08:44 +0200 Subject: [PATCH 742/884] ubi: fastmap: Fix add_vol() return value test in ubi_attach_fastmap() Commit e96a8a3bb671 ("UBI: Fastmap: Do not add vol if it already exists") introduced a bug by changing the possible error codes returned by add_vol(): - this function no longer returns NULL in case of allocation failure but return ERR_PTR(-ENOMEM) - when a duplicate entry in the volume RB tree is found it returns ERR_PTR(-EEXIST) instead of ERR_PTR(-EINVAL) Fix the tests done on add_vol() return val to match this new behavior. Fixes: e96a8a3bb671 ("UBI: Fastmap: Do not add vol if it already exists") Reported-by: Dan Carpenter Signed-off-by: Boris Brezillon Acked-by: Sheng Yong Signed-off-by: Richard Weinberger --- drivers/mtd/ubi/fastmap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 2ff62157d3bb..c1f5c29e458e 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -707,11 +707,11 @@ static int ubi_attach_fastmap(struct ubi_device *ubi, fmvhdr->vol_type, be32_to_cpu(fmvhdr->last_eb_bytes)); - if (!av) - goto fail_bad; - if (PTR_ERR(av) == -EINVAL) { - ubi_err(ubi, "volume (ID %i) already exists", - fmvhdr->vol_id); + if (IS_ERR(av)) { + if (PTR_ERR(av) == -EEXIST) + ubi_err(ubi, "volume (ID %i) already exists", + fmvhdr->vol_id); + goto fail_bad; } From a00052a296e54205cf238c75bd98d17d5d02a6db Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 28 Oct 2016 11:49:03 +0200 Subject: [PATCH 743/884] ubifs: Fix regression in ubifs_readdir() Commit c83ed4c9dbb35 ("ubifs: Abort readdir upon error") broke overlayfs support because the fix exposed an internal error code to VFS. Reported-by: Peter Rosin Tested-by: Peter Rosin Reported-by: Ralph Sennhauser Tested-by: Ralph Sennhauser Fixes: c83ed4c9dbb35 ("ubifs: Abort readdir upon error") Cc: stable@vger.kernel.org Signed-off-by: Richard Weinberger --- fs/ubifs/dir.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index bd4a5e8ce441..ca16c5d7bab1 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -543,6 +543,14 @@ out: if (err != -ENOENT) ubifs_err(c, "cannot find next direntry, error %d", err); + else + /* + * -ENOENT is a non-fatal error in this context, the TNC uses + * it to indicate that the cursor moved past the current directory + * and readdir() has to stop. + */ + err = 0; + /* 2 is a special value indicating that there are no more direntries */ ctx->pos = 2; From 711c1f2671174c918045e2cb20aece976ac516cd Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Oct 2016 15:53:02 -0700 Subject: [PATCH 744/884] ARCv2: boot log: print IOC exists as well as enabled status Previously we would not print the case when IOC existed but was not enabled. And while at it, reduce one line off boot printing by consolidating the Peripheral address space and IO-Coherency which in a way applies to them Signed-off-by: Vineet Gupta --- arch/arc/include/asm/setup.h | 1 + arch/arc/kernel/setup.c | 4 +--- arch/arc/mm/cache.c | 9 +++------ 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 48b37c693db3..bdc43df922c9 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -43,5 +43,6 @@ void __init setup_arch_memory(void); #define IS_USED_RUN(v) ((v) ? "" : "(not used) ") #define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) #define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) +#define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2)) #endif /* __ASMARC_SETUP_H */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 75e540972135..a77efa173df0 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -272,9 +272,7 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) FIX_PTR(cpu); - n += scnprintf(buf + n, len - n, - "Vector Table\t: %#x\nPeripherals\t: %#lx:%#lx\n", - cpu->vec_base, perip_base, perip_end); + n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base); if (cpu->extn.fpu_sp || cpu->extn.fpu_dp) n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 518ff76771f3..2b96cfc3be75 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -53,18 +53,15 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); - if (!is_isa_arcv2()) - return buf; - p = &cpuinfo_arc700[c].slc; if (p->ver) n += scnprintf(buf + n, len - n, "SLC\t\t: %uK, %uB Line%s\n", p->sz_k, p->line_len, IS_USED_RUN(slc_enable)); - if (ioc_exists) - n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n", - IS_DISABLED_RUN(ioc_enable)); + n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", + perip_base, + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); return buf; } From 73e284d2572581d848267c74552215f95f0f0996 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Oct 2016 17:49:15 -0700 Subject: [PATCH 745/884] ARC: boot log: refactor printing abt features not captured in BCRs On older arc700 cores, some of the features configured were not present in Build config registers. To print about them at boot, we just use the Kconfig option i.e. whether linux is built to use them or not. So yes this seems bogus, but what else can be done. Moreover if linux is booting with these enabled, then the Kconfig info is a good indicator anyways. Over time these "hacks" accumulated in read_arc_build_cfg_regs() as well as arc_cpu_mumbojumbo(). so refactor and move all of those in a single place: read_arc_build_cfg_regs(). This causes some code redcution too: | bloat-o-meter2 arch/arc/kernel/setup.o.0 arch/arc/kernel/setup.o.1 | add/remove: 0/0 grow/shrink: 2/1 up/down: 64/-132 (-68) | function old new delta | setup_processor 610 670 +60 | cpuinfo_arc700 76 80 +4 | arc_cpu_mumbojumbo 752 620 -132 Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 1 + arch/arc/kernel/setup.c | 89 ++++++++++++++++------------------ 2 files changed, 44 insertions(+), 46 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index db25c65155cb..819b44c1a719 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -349,6 +349,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa isa; + const char *details; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index a77efa173df0..0170d94f3860 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -40,6 +40,20 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; +static const struct cpuinfo_data arc_cpu_tbl[] = { +#ifdef CONFIG_ISA_ARCOMPACT + { {0x20, "ARC 600" }, 0x2F}, + { {0x30, "ARC 700" }, 0x33}, + { {0x34, "ARC 700 R4.10"}, 0x34}, + { {0x35, "ARC 700 R4.11"}, 0x35}, +#else + { {0x50, "ARC HS38 R2.0"}, 0x51}, + { {0x52, "ARC HS38 R2.1"}, 0x52}, + { {0x53, "ARC HS38 R3.0"}, 0x53}, +#endif + { {0x00, NULL } } +}; + static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) { if (is_isa_arcompact()) { @@ -92,11 +106,24 @@ static void read_arc_build_cfg_regs(void) struct bcr_timer timer; struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + const struct cpuinfo_data *tbl; + FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); + for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { + if ((cpu->core.family >= tbl->info.id) && + (cpu->core.family <= tbl->up_range)) { + cpu->details = tbl->info.str; + break; + } + } + + if (tbl->info.id == 0) + cpu->details = "UNKNOWN"; + READ_BCR(ARC_REG_TIMERS_BCR, timer); cpu->extn.timer0 = timer.t0; cpu->extn.timer1 = timer.t1; @@ -160,64 +187,34 @@ static void read_arc_build_cfg_regs(void) cpu->extn.rtt = bcr.ver ? 1 : 0; cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt; + + /* some hacks for lack of feature BCR info in old ARC700 cores */ + if (is_isa_arcompact()) { + if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ + cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); + else + cpu->isa.atomic = cpu->isa.atomic1; + + cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + } } -static const struct cpuinfo_data arc_cpu_tbl[] = { -#ifdef CONFIG_ISA_ARCOMPACT - { {0x20, "ARC 600" }, 0x2F}, - { {0x30, "ARC 700" }, 0x33}, - { {0x34, "ARC 700 R4.10"}, 0x34}, - { {0x35, "ARC 700 R4.11"}, 0x35}, -#else - { {0x50, "ARC HS38 R2.0"}, 0x51}, - { {0x52, "ARC HS38 R2.1"}, 0x52}, - { {0x53, "ARC HS38 R3.0"}, 0x53}, -#endif - { {0x00, NULL } } -}; - - static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; struct bcr_identity *core = &cpu->core; - const struct cpuinfo_data *tbl; - char *isa_nm; - int i, be, atomic; - int n = 0; + int i, n = 0; FIX_PTR(cpu); - if (is_isa_arcompact()) { - isa_nm = "ARCompact"; - be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); - - atomic = cpu->isa.atomic1; - if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ - atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); - } else { - isa_nm = "ARCv2"; - be = cpu->isa.be; - atomic = cpu->isa.atomic; - } - n += scnprintf(buf + n, len - n, "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", core->family, core->cpu_id, core->chip_id); - for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { - if ((core->family >= tbl->info.id) && - (core->family <= tbl->up_range)) { - n += scnprintf(buf + n, len - n, - "processor [%d]\t: %s (%s ISA) %s\n", - cpu_id, tbl->info.str, isa_nm, - IS_AVAIL1(be, "[Big-Endian]")); - break; - } - } - - if (tbl->info.id == 0) - n += scnprintf(buf + n, len - n, "UNKNOWN ARC Processor\n"); + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s\n", + cpu_id, cpu->details, + is_isa_arcompact() ? "ARCompact" : "ARCv2", + IS_AVAIL1(cpu->isa.be, "[Big-Endian]")); n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ", IS_AVAIL1(cpu->extn.timer0, "Timer0 "), @@ -226,7 +223,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) CONFIG_ARC_HAS_RTC)); n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s", - IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC), + IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), IS_AVAIL1(cpu->isa.unalign, "unalign (not used)")); From a024fd9bc4d0b102b8aa66b8ecba678d2d32fdcf Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Oct 2016 18:08:10 -0700 Subject: [PATCH 746/884] ARC: boot log: don't assume SWAPE instruction support This came to light when helping a customer with oldish ARC750 core who were getting instruction errors because of lack of SWAPE but boot log was incorrectly printing it as being present Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- arch/arc/kernel/setup.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 819b44c1a719..b8d29b136b96 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -353,7 +353,7 @@ struct cpuinfo_arc { unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { - unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3, + unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, fpu_sp:1, fpu_dp:1, pad2:6, debug:1, ap:1, smart:1, rtt:1, pad3:4, timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 0170d94f3860..156981aecd74 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -138,6 +138,9 @@ static void read_arc_build_cfg_regs(void) cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */ cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0; cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */ + cpu->extn.swape = (cpu->core.family >= 0x34) ? 1 : + IS_ENABLED(CONFIG_ARC_HAS_SWAPE); + READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem); /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */ @@ -250,7 +253,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL1(cpu->extn.swap, "swap "), IS_AVAIL1(cpu->extn.minmax, "minmax "), IS_AVAIL1(cpu->extn.crc, "crc "), - IS_AVAIL2(1, "swape", CONFIG_ARC_HAS_SWAPE)); + IS_AVAIL2(cpu->extn.swape, "swape", CONFIG_ARC_HAS_SWAPE)); if (cpu->bpu.ver) n += scnprintf(buf + n, len - n, From d7c46114e356fe41b7291ebff70d7ca09c0f0ac9 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 25 Oct 2016 13:45:11 -0700 Subject: [PATCH 747/884] ARC: boot log: remove awkward space comma from MMU line Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index ec868a9081a1..bdb295e09160 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -793,16 +793,16 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) char super_pg[64] = ""; if (p_mmu->s_pg_sz_m) - scnprintf(super_pg, 64, "%dM Super Page%s, ", + scnprintf(super_pg, 64, "%dM Super Page %s", p_mmu->s_pg_sz_m, IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n", + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40)); + IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); return buf; } From d975cbc8acb6f4a52ac46a57b13bd6a7f871b5e9 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 27 Oct 2016 14:33:19 -0700 Subject: [PATCH 748/884] ARC: boot log: refactor cpu name/release printing The motivation is to identify ARC750 vs. ARC770 (we currently print generic "ARC700"). A given ARC700 release could be 750 or 770, with same ARCNUM (or family identifier which is unfortunate). The existing arc_cpu_tbl[] kept a single concatenated string for core name and release which thus doesn't work for 750 vs. 770 identification. So split this into 2 tables, one with core names and other with release. And while we are at it, get rid of the range checking for family numbers. We just document the known to exist cores running Linux and ditch others. With this in place, we add detection of ARC750 which is - cores 0x33 and before - cores 0x34 and later with MMUv2 Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- arch/arc/include/asm/setup.h | 5 ---- arch/arc/kernel/setup.c | 51 ++++++++++++++++++++++------------ 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index b8d29b136b96..7f3f9f63708c 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -349,7 +349,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa isa; - const char *details; + const char *details, *name; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index bdc43df922c9..cb954cdab070 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -27,11 +27,6 @@ struct id_to_str { const char *str; }; -struct cpuinfo_data { - struct id_to_str info; - int up_range; -}; - extern int root_mountflags, end_mem; void setup_processor(void); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 156981aecd74..0385df77a697 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -40,18 +40,27 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; -static const struct cpuinfo_data arc_cpu_tbl[] = { +static const struct id_to_str arc_cpu_rel[] = { #ifdef CONFIG_ISA_ARCOMPACT - { {0x20, "ARC 600" }, 0x2F}, - { {0x30, "ARC 700" }, 0x33}, - { {0x34, "ARC 700 R4.10"}, 0x34}, - { {0x35, "ARC 700 R4.11"}, 0x35}, + { 0x34, "R4.10"}, + { 0x35, "R4.11"}, #else - { {0x50, "ARC HS38 R2.0"}, 0x51}, - { {0x52, "ARC HS38 R2.1"}, 0x52}, - { {0x53, "ARC HS38 R3.0"}, 0x53}, + { 0x51, "R2.0" }, + { 0x52, "R2.1" }, + { 0x53, "R3.0" }, #endif - { {0x00, NULL } } + { 0x00, NULL } +}; + +static const struct id_to_str arc_cpu_nm[] = { +#ifdef CONFIG_ISA_ARCOMPACT + { 0x20, "ARC 600" }, + { 0x30, "ARC 770" }, /* 750 identified seperately */ +#else + { 0x40, "ARC EM" }, + { 0x50, "ARC HS38" }, +#endif + { 0x00, "Unknown" } }; static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) @@ -106,23 +115,25 @@ static void read_arc_build_cfg_regs(void) struct bcr_timer timer; struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; - const struct cpuinfo_data *tbl; + const struct id_to_str *tbl; FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); - for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { - if ((cpu->core.family >= tbl->info.id) && - (cpu->core.family <= tbl->up_range)) { - cpu->details = tbl->info.str; + for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) { + if (cpu->core.family == tbl->id) { + cpu->details = tbl->str; break; } } - if (tbl->info.id == 0) - cpu->details = "UNKNOWN"; + for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) { + if ((cpu->core.family & 0xF0) == tbl->id) + break; + } + cpu->name = tbl->str; READ_BCR(ARC_REG_TIMERS_BCR, timer); cpu->extn.timer0 = timer.t0; @@ -199,6 +210,10 @@ static void read_arc_build_cfg_regs(void) cpu->isa.atomic = cpu->isa.atomic1; cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + + /* there's no direct way to distinguish 750 vs. 770 */ + if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3)) + cpu->name = "ARC750"; } } @@ -214,8 +229,8 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", core->family, core->cpu_id, core->chip_id); - n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s\n", - cpu_id, cpu->details, + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s\n", + cpu_id, cpu->name, cpu->details, is_isa_arcompact() ? "ARCompact" : "ARCv2", IS_AVAIL1(cpu->isa.be, "[Big-Endian]")); From c3005475889c7c730638f95d13be3360f0b33e98 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 21 Oct 2016 16:04:37 -0700 Subject: [PATCH 749/884] ARC: build: retire old toggles These are really ancient toggles and tools no longer require them to be passed. This paves way for deprecating them in long run. Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index aa82d13d4213..864adad52280 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -50,9 +50,6 @@ atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) cflags-$(atleast_gcc44) += -fsection-anchors -cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock -cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape - ifdef CONFIG_ISA_ARCV2 ifndef CONFIG_ARC_HAS_LL64 From f644e3688855902ad11549029098a62cbbc8f558 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 25 Oct 2016 08:58:17 -0700 Subject: [PATCH 750/884] ARC: mm: retire ARC_DBG_TLB_MISS_COUNT... ... given that we have perf counters abel to do the same thing non intrusively Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 8 --- arch/arc/kernel/troubleshoot.c | 110 --------------------------------- arch/arc/mm/tlbex.S | 21 ------- 3 files changed, 139 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ac0b309aced5..bd204bfa29ed 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -540,14 +540,6 @@ config ARC_DBG_TLB_PARANOIA bool "Paranoia Checks in Low Level TLB Handlers" default n -config ARC_DBG_TLB_MISS_COUNT - bool "Profile TLB Misses" - default n - select DEBUG_FS - help - Counts number of I and D TLB Misses and exports them via Debugfs - The counters can be cleared via Debugfs as well - endif config ARC_UBOOT_SUPPORT diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 934150e7ac48..82f9bc819f4a 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -237,113 +237,3 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs, if (!user_mode(regs)) show_stacktrace(current, regs); } - -#ifdef CONFIG_DEBUG_FS - -#include -#include -#include -#include -#include -#include -#include - -static struct dentry *test_dentry; -static struct dentry *test_dir; -static struct dentry *test_u32_dentry; - -static u32 clr_on_read = 1; - -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT -u32 numitlb, numdtlb, num_pte_not_present; - -static int fill_display_data(char *kbuf) -{ - size_t num = 0; - num += sprintf(kbuf + num, "I-TLB Miss %x\n", numitlb); - num += sprintf(kbuf + num, "D-TLB Miss %x\n", numdtlb); - num += sprintf(kbuf + num, "PTE not present %x\n", num_pte_not_present); - - if (clr_on_read) - numitlb = numdtlb = num_pte_not_present = 0; - - return num; -} - -static int tlb_stats_open(struct inode *inode, struct file *file) -{ - file->private_data = (void *)__get_free_page(GFP_KERNEL); - return 0; -} - -/* called on user read(): display the counters */ -static ssize_t tlb_stats_output(struct file *file, /* file descriptor */ - char __user *user_buf, /* user buffer */ - size_t len, /* length of buffer */ - loff_t *offset) /* offset in the file */ -{ - size_t num; - char *kbuf = (char *)file->private_data; - - /* All of the data can he shoved in one iteration */ - if (*offset != 0) - return 0; - - num = fill_display_data(kbuf); - - /* simple_read_from_buffer() is helper for copy to user space - It copies up to @2 (num) bytes from kernel buffer @4 (kbuf) at offset - @3 (offset) into the user space address starting at @1 (user_buf). - @5 (len) is max size of user buffer - */ - return simple_read_from_buffer(user_buf, num, offset, kbuf, len); -} - -/* called on user write : clears the counters */ -static ssize_t tlb_stats_clear(struct file *file, const char __user *user_buf, - size_t length, loff_t *offset) -{ - numitlb = numdtlb = num_pte_not_present = 0; - return length; -} - -static int tlb_stats_close(struct inode *inode, struct file *file) -{ - free_page((unsigned long)(file->private_data)); - return 0; -} - -static const struct file_operations tlb_stats_file_ops = { - .read = tlb_stats_output, - .write = tlb_stats_clear, - .open = tlb_stats_open, - .release = tlb_stats_close -}; -#endif - -static int __init arc_debugfs_init(void) -{ - test_dir = debugfs_create_dir("arc", NULL); - -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - test_dentry = debugfs_create_file("tlb_stats", 0444, test_dir, NULL, - &tlb_stats_file_ops); -#endif - - test_u32_dentry = - debugfs_create_u32("clr_on_read", 0444, test_dir, &clr_on_read); - - return 0; -} - -module_init(arc_debugfs_init); - -static void __exit arc_debugfs_exit(void) -{ - debugfs_remove(test_u32_dentry); - debugfs_remove(test_dentry); - debugfs_remove(test_dir); -} -module_exit(arc_debugfs_exit); - -#endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index f1967eeb32e7..b30e4e36bb00 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -237,15 +237,6 @@ ex_saved_reg1: 2: -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - and.f 0, r0, _PAGE_PRESENT - bz 1f - ld r3, [num_pte_not_present] - add r3, r3, 1 - st r3, [num_pte_not_present] -1: -#endif - .endm ;----------------------------------------------------------------- @@ -309,12 +300,6 @@ ENTRY(EV_TLBMissI) TLBMISS_FREEUP_REGS -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - ld r0, [@numitlb] - add r0, r0, 1 - st r0, [@numitlb] -#endif - ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA LOAD_FAULT_PTE @@ -349,12 +334,6 @@ ENTRY(EV_TLBMissD) TLBMISS_FREEUP_REGS -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - ld r0, [@numdtlb] - add r0, r0, 1 - st r0, [@numdtlb] -#endif - ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA From d65283f7b695b5d04ca1ab58b6bb41f443b96286 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 25 Oct 2016 10:43:20 -0700 Subject: [PATCH 751/884] ARC: module: elide loop to save reference to .eh_frame The loop was really needed in .debug_frame regime where wanted make it as SH_ALLOC so that apply_relocate_add() would process it. That's not needed for .eh_frame, so we check this in apply_relocate_add() which gets called for each section. Note that we need to save reference to "section name strings" section in module_frob_arch_sections() since apply_relocate_add() doesn't get that Signed-off-by: Vineet Gupta --- arch/arc/include/asm/module.h | 1 + arch/arc/kernel/module.c | 18 ++++++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h index 518222bb3f8e..6e91d8b339c3 100644 --- a/arch/arc/include/asm/module.h +++ b/arch/arc/include/asm/module.h @@ -18,6 +18,7 @@ struct mod_arch_specific { void *unw_info; int unw_sec_idx; + const char *secstr; }; #endif diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c index 9a2849756022..24bd2ffb90b7 100644 --- a/arch/arc/kernel/module.c +++ b/arch/arc/kernel/module.c @@ -30,17 +30,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstr, struct module *mod) { #ifdef CONFIG_ARC_DW2_UNWIND - int i; - mod->arch.unw_sec_idx = 0; mod->arch.unw_info = NULL; - - for (i = 1; i < hdr->e_shnum; i++) { - if (strcmp(secstr+sechdrs[i].sh_name, ".eh_frame") == 0) { - mod->arch.unw_sec_idx = i; - break; - } - } + mod->arch.secstr = secstr; #endif return 0; } @@ -66,8 +58,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, Elf32_Addr location; Elf32_Addr sec_to_patch; int relo_type; + unsigned int tgtsec; - sec_to_patch = sechdrs[sechdrs[relsec].sh_info].sh_addr; + tgtsec = sechdrs[relsec].sh_info; + sec_to_patch = sechdrs[tgtsec].sh_addr; sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr; n = sechdrs[relsec].sh_size / sizeof(*rel_entry); @@ -111,6 +105,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, goto relo_err; } + + if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0) + module->arch.unw_sec_idx = tgtsec; + return 0; relo_err: From b75dcd9c7d352c7d9ea9010e95c708595094896a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 25 Oct 2016 11:23:19 -0700 Subject: [PATCH 752/884] ARC: module: print pretty section names Now that we have referece to section name string table in apply_relocate_add(), use it to - print the name of section being relocated - print symbol with NULL name (since it refers to a section) before | Section to fixup 7000a060 | ========================================================= | rela->r_off | rela->addend | sym->st_value | ADDR | VALUE | ========================================================= | 1c 0 7000e000 7000a07c 7000e000 [] | 40 0 7000a000 7000a0a0 7000a000 [] after | Section to fixup .eh_frame @7000a060 | ========================================================= | r_off r_add st_value ADDRESS VALUE | ========================================================= | 1c 0 7000e000 7000a07c 7000e000 [.init.text] | 40 0 7000a000 7000a0a0 7000a000 [.exit.text] Signed-off-by: Vineet Gupta --- arch/arc/kernel/module.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c index 24bd2ffb90b7..42e964db2967 100644 --- a/arch/arc/kernel/module.c +++ b/arch/arc/kernel/module.c @@ -51,31 +51,33 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, unsigned int relsec, /* sec index for relo sec */ struct module *module) { - int i, n; + int i, n, relo_type; Elf32_Rela *rel_entry = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym_entry, *sym_sec; - Elf32_Addr relocation; - Elf32_Addr location; - Elf32_Addr sec_to_patch; - int relo_type; + Elf32_Addr relocation, location, tgt_addr; unsigned int tgtsec; + /* + * @relsec has relocations e.g. .rela.init.text + * @tgtsec is section to patch e.g. .init.text + */ tgtsec = sechdrs[relsec].sh_info; - sec_to_patch = sechdrs[tgtsec].sh_addr; + tgt_addr = sechdrs[tgtsec].sh_addr; sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr; n = sechdrs[relsec].sh_size / sizeof(*rel_entry); - pr_debug("\n========== Module Sym reloc ===========================\n"); - pr_debug("Section to fixup %x\n", sec_to_patch); + pr_debug("\nSection to fixup %s @%x\n", + module->arch.secstr + sechdrs[tgtsec].sh_name, tgt_addr); pr_debug("=========================================================\n"); - pr_debug("rela->r_off | rela->addend | sym->st_value | ADDR | VALUE\n"); + pr_debug("r_off\tr_add\tst_value ADDRESS VALUE\n"); pr_debug("=========================================================\n"); /* Loop thru entries in relocation section */ for (i = 0; i < n; i++) { + const char *s; /* This is where to make the change */ - location = sec_to_patch + rel_entry[i].r_offset; + location = tgt_addr + rel_entry[i].r_offset; /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ @@ -83,10 +85,15 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, relocation = sym_entry->st_value + rel_entry[i].r_addend; - pr_debug("\t%x\t\t%x\t\t%x %x %x [%s]\n", - rel_entry[i].r_offset, rel_entry[i].r_addend, - sym_entry->st_value, location, relocation, - strtab + sym_entry->st_name); + if (sym_entry->st_name == 0 && ELF_ST_TYPE (sym_entry->st_info) == STT_SECTION) { + s = module->arch.secstr + sechdrs[sym_entry->st_shndx].sh_name; + } else { + s = strtab + sym_entry->st_name; + } + + pr_debug(" %x\t%x\t%x %x %x [%s]\n", + rel_entry[i].r_offset, rel_entry[i].r_addend, + sym_entry->st_value, location, relocation, s); /* This assumes modules are built with -mlong-calls * so any branches/jumps are absolute 32 bit jmps From 72193a953ada9058e46970838cc42cbd18bf4eba Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 28 Oct 2016 11:38:53 +0100 Subject: [PATCH 753/884] regmap: Rename ret variable in regmap_read_poll_timeout As almost all of the callers of the regmap_read_poll_timeout macro will include a local ret variable we will always get a Sparse warning about the duplication of the ret variable: warning: symbol 'ret' shadows an earlier one Simply rename the ret variable in the marco to pollret to make this significantly less likely to happen. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- include/linux/regmap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 9adc7b21903d..fc9e3c576f9c 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -116,22 +116,22 @@ struct reg_sequence { #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \ ({ \ ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ - int ret; \ + int pollret; \ might_sleep_if(sleep_us); \ for (;;) { \ - ret = regmap_read((map), (addr), &(val)); \ - if (ret) \ + pollret = regmap_read((map), (addr), &(val)); \ + if (pollret) \ break; \ if (cond) \ break; \ if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ - ret = regmap_read((map), (addr), &(val)); \ + pollret = regmap_read((map), (addr), &(val)); \ break; \ } \ if (sleep_us) \ usleep_range((sleep_us >> 2) + 1, sleep_us); \ } \ - ret ?: ((cond) ? 0 : -ETIMEDOUT); \ + pollret ?: ((cond) ? 0 : -ETIMEDOUT); \ }) #ifdef CONFIG_REGMAP From ba14fa1a57c07cca9d520ceded8d3da5beb6175f Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Thu, 27 Oct 2016 14:31:39 +0200 Subject: [PATCH 754/884] regulator: core: silence warning: "VDD1: ramp_delay not set" commit 73e705bf81ce ("regulator: core: Add set_voltage_time op") introduced a new rdev_warn() if the ramp_delay is 0. Apparently, on omap3/twl4030 platforms with dynamic voltage management this results in non-ending spurious messages like [ 511.143066] VDD1: ramp_delay not set [ 511.662322] VDD1: ramp_delay not set [ 513.903625] VDD1: ramp_delay not set [ 514.222198] VDD1: ramp_delay not set [ 517.062835] VDD1: ramp_delay not set [ 517.382568] VDD1: ramp_delay not set [ 520.142791] VDD1: ramp_delay not set [ 520.502593] VDD1: ramp_delay not set [ 523.062896] VDD1: ramp_delay not set [ 523.362701] VDD1: ramp_delay not set [ 526.143035] VDD1: ramp_delay not set I have observed this on GTA04 while it is reported to occur on N900 as well: https://bugzilla.kernel.org/show_bug.cgi?id=178371 This patch makes the warning appear only in debugging mode. Signed-off-by: H. Nikolaus Schaller Signed-off-by: Mark Brown --- drivers/regulator/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 67426c0477d3..5c1519b229e0 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2754,7 +2754,7 @@ static int _regulator_set_voltage_time(struct regulator_dev *rdev, ramp_delay = rdev->desc->ramp_delay; if (ramp_delay == 0) { - rdev_warn(rdev, "ramp_delay not set\n"); + rdev_dbg(rdev, "ramp_delay not set\n"); return 0; } From 2083d36790e328a364896a2833fbedbf6e1d2317 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Oct 2016 11:25:56 +0200 Subject: [PATCH 755/884] mlxsw: spectrum_router: Save requested prefix bitlist when creating tree Currently, the prefix bitlist is not saved for LPM trees, causing the compare to always fail which causes the tree to be destroyed and created for every inserted and removed FIB entry. So fix this by saving the bitlist as it should have been done from the very beginning. Fixes: 53342023eed9 ("mlxsw: spectrum_router: Implement LPM trees management") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index f3d50d369bbe..4cff4c043d11 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -320,6 +320,8 @@ mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, lpm_tree); if (err) goto err_left_struct_set; + memcpy(&lpm_tree->prefix_usage, prefix_usage, + sizeof(lpm_tree->prefix_usage)); return lpm_tree; err_left_struct_set: From 8b99becdc89ee7635137725e9fa76f304bb168f5 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Oct 2016 11:25:57 +0200 Subject: [PATCH 756/884] mlxsw: spectrum_router: Compare only trees which are in use during tree get Only trees which are in use should be compared to requested prefix usage. Fixes: 53342023eed9 ("mlxsw: spectrum_router: Implement LPM trees management") Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4cff4c043d11..4573da2c5560 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -345,7 +345,8 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { lpm_tree = &mlxsw_sp->router.lpm_trees[i]; - if (lpm_tree->proto == proto && + if (lpm_tree->ref_count != 0 && + lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, prefix_usage)) goto inc_ref_count; From 18e601d6adae5042f82d105ccd3d4498050f2ebf Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 24 Oct 2016 20:33:23 -0400 Subject: [PATCH 757/884] sunrpc: fix some missing rq_rbuffer assignments We've been seeing some crashes in testing that look like this: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] memcpy_orig+0x29/0x110 PGD 212ca2067 PUD 212ca3067 PMD 0 Oops: 0002 [#1] SMP Modules linked in: rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache ppdev parport_pc i2c_piix4 sg parport i2c_core virtio_balloon pcspkr acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod ata_generic pata_acpi virtio_scsi 8139too ata_piix libata 8139cp mii virtio_pci floppy virtio_ring serio_raw virtio CPU: 1 PID: 1540 Comm: nfsd Not tainted 4.9.0-rc1 #39 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2007 task: ffff88020d7ed200 task.stack: ffff880211838000 RIP: 0010:[] [] memcpy_orig+0x29/0x110 RSP: 0018:ffff88021183bdd0 EFLAGS: 00010206 RAX: 0000000000000000 RBX: ffff88020d7fa000 RCX: 000000f400000000 RDX: 0000000000000014 RSI: ffff880212927020 RDI: 0000000000000000 RBP: ffff88021183be30 R08: 01000000ef896996 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff880211704ca8 R13: ffff88021473f000 R14: 00000000ef896996 R15: ffff880211704800 FS: 0000000000000000(0000) GS:ffff88021fc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000212ca1000 CR4: 00000000000006e0 Stack: ffffffffa01ea087 ffffffff63400001 ffff880215145e00 ffff880211bacd00 ffff88021473f2b8 0000000000000004 00000000d0679d67 ffff880211bacd00 ffff88020d7fa000 ffff88021473f000 0000000000000000 ffff88020d7faa30 Call Trace: [] ? svc_tcp_recvfrom+0x5a7/0x790 [sunrpc] [] svc_recv+0xad8/0xbd0 [sunrpc] [] nfsd+0xde/0x160 [nfsd] [] ? nfsd_destroy+0x60/0x60 [nfsd] [] kthread+0xd8/0xf0 [] ret_from_fork+0x1f/0x40 [] ? kthread_park+0x60/0x60 Code: 00 00 48 89 f8 48 83 fa 20 72 7e 40 38 fe 7c 35 48 83 ea 20 48 83 ea 20 4c 8b 06 4c 8b 4e 08 4c 8b 56 10 4c 8b 5e 18 48 8d 76 20 <4c> 89 07 4c 89 4f 08 4c 89 57 10 4c 89 5f 18 48 8d 7f 20 73 d4 RIP [] memcpy_orig+0x29/0x110 RSP CR2: 0000000000000000 Both Bruce and Eryu ran a bisect here and found that the problematic patch was 68778945e46 (SUNRPC: Separate buffer pointers for RPC Call and Reply messages). That patch changed rpc_xdr_encode to use a new rq_rbuffer pointer to set up the receive buffer, but didn't change all of the necessary codepaths to set it properly. In particular the backchannel setup was missing. We need to set rq_rbuffer whenever rq_buffer is set. Ensure that it is. Reviewed-by: Chuck Lever Tested-by: Chuck Lever Reported-by: Eryu Guan Tested-by: Eryu Guan Fixes: 68778945e46 "SUNRPC: Separate buffer pointers..." Reported-by: J. Bruce Fields Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1 + net/sunrpc/xprtsock.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 2d8545c34095..fc4535ead7c2 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -182,6 +182,7 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -ENOMEM; rqst->rq_buffer = page_address(page); + rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; return 0; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0137af1c0916..e01c825bc683 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2563,6 +2563,7 @@ static int bc_malloc(struct rpc_task *task) buf->len = PAGE_SIZE; rqst->rq_buffer = buf->data; + rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; return 0; } From 25ccd2429fd0af5b9c2c136c8c593491aeabf162 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 26 Oct 2016 15:40:12 +0800 Subject: [PATCH 758/884] ACPICA: Dispatcher: Fix order issue of method termination The last step of the method termination should be the end of the method serialization. Otherwise, the steps happening after it will face the race issues that cannot be protected by the method serialization mechanism. This patch fixes this issue by moving the per-method-object deletion code prior than the end of the method serialization. Otherwise, the possible race issues may result in AE_ALREADY_EXISTS error in a parallel environment. Fixes: 74f51b80a0c4 (ACPICA: Namespace: Fix dynamic table loading issues) Reported-and-tested-by: Imre Deak Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dsmethod.c | 40 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index 32e9ddc0cf2b..c4028a8dacd5 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -730,26 +730,6 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc, acpi_ds_method_data_delete_all(walk_state); - /* - * If method is serialized, release the mutex and restore the - * current sync level for this thread - */ - if (method_desc->method.mutex) { - - /* Acquisition Depth handles recursive calls */ - - method_desc->method.mutex->mutex.acquisition_depth--; - if (!method_desc->method.mutex->mutex.acquisition_depth) { - walk_state->thread->current_sync_level = - method_desc->method.mutex->mutex. - original_sync_level; - - acpi_os_release_mutex(method_desc->method. - mutex->mutex.os_mutex); - method_desc->method.mutex->mutex.thread_id = 0; - } - } - /* * Delete any namespace objects created anywhere within the * namespace by the execution of this method. Unless: @@ -786,6 +766,26 @@ acpi_ds_terminate_control_method(union acpi_operand_object *method_desc, ~ACPI_METHOD_MODIFIED_NAMESPACE; } } + + /* + * If method is serialized, release the mutex and restore the + * current sync level for this thread + */ + if (method_desc->method.mutex) { + + /* Acquisition Depth handles recursive calls */ + + method_desc->method.mutex->mutex.acquisition_depth--; + if (!method_desc->method.mutex->mutex.acquisition_depth) { + walk_state->thread->current_sync_level = + method_desc->method.mutex->mutex. + original_sync_level; + + acpi_os_release_mutex(method_desc->method. + mutex->mutex.os_mutex); + method_desc->method.mutex->mutex.thread_id = 0; + } + } } /* Decrement the thread count on the method */ From 8121aa26e32012ca89afafa5e503b879950ac0fe Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 26 Oct 2016 15:40:20 +0800 Subject: [PATCH 759/884] ACPICA: Dispatcher: Fix an unbalanced lock exit path in acpi_ds_auto_serialize_method() There is a lock unbalanced exit path in acpi_ds_initialize_method(), this patch corrects it. Fixes: 441ad11d078f (ACPICA: Dispatcher: Fix a mutex issue for method auto serialization) Tested-by: Imre Deak Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dsmethod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index c4028a8dacd5..5997e592e5a6 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -128,7 +128,7 @@ acpi_ds_auto_serialize_method(struct acpi_namespace_node *node, if (ACPI_FAILURE(status)) { acpi_ds_delete_walk_state(walk_state); acpi_ps_free_op(op); - return_ACPI_STATUS(status); + goto unlock; } walk_state->descending_callback = acpi_ds_detect_named_opcodes; From 8633db6b027952449e155a316f4ae3a530bbe18f Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Wed, 26 Oct 2016 15:42:01 +0800 Subject: [PATCH 760/884] ACPICA: Dispatcher: Fix interpreter locking around acpi_ev_initialize_region() In the code path of acpi_ev_initialize_region(), there is namespace modification code unlocked. This patch tunes the code to make sure such modification are always locked. Fixes: 74f51b80a0c4 (ACPICA: Namespace: Fix dynamic table loading issues) Tested-by: Imre Deak Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/dsinit.c | 11 +++-------- drivers/acpi/acpica/dsmethod.c | 12 +++--------- drivers/acpi/acpica/dswload2.c | 2 -- drivers/acpi/acpica/evrgnini.c | 3 +++ drivers/acpi/acpica/nsload.c | 2 ++ 5 files changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/acpica/dsinit.c b/drivers/acpi/acpica/dsinit.c index f1e6dcc7a827..54d48b90de2c 100644 --- a/drivers/acpi/acpica/dsinit.c +++ b/drivers/acpi/acpica/dsinit.c @@ -46,6 +46,7 @@ #include "acdispat.h" #include "acnamesp.h" #include "actables.h" +#include "acinterp.h" #define _COMPONENT ACPI_DISPATCHER ACPI_MODULE_NAME("dsinit") @@ -214,23 +215,17 @@ acpi_ds_initialize_objects(u32 table_index, /* Walk entire namespace from the supplied root */ - status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - /* * We don't use acpi_walk_namespace since we do not want to acquire * the namespace reader lock. */ status = acpi_ns_walk_namespace(ACPI_TYPE_ANY, start_node, ACPI_UINT32_MAX, - ACPI_NS_WALK_UNLOCK, acpi_ds_init_one_object, - NULL, &info, NULL); + 0, acpi_ds_init_one_object, NULL, &info, + NULL); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "During WalkNamespace")); } - (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); status = acpi_get_table_by_index(table_index, &table); if (ACPI_FAILURE(status)) { diff --git a/drivers/acpi/acpica/dsmethod.c b/drivers/acpi/acpica/dsmethod.c index 5997e592e5a6..2b3210f42a46 100644 --- a/drivers/acpi/acpica/dsmethod.c +++ b/drivers/acpi/acpica/dsmethod.c @@ -99,14 +99,11 @@ acpi_ds_auto_serialize_method(struct acpi_namespace_node *node, "Method auto-serialization parse [%4.4s] %p\n", acpi_ut_get_node_name(node), node)); - acpi_ex_enter_interpreter(); - /* Create/Init a root op for the method parse tree */ op = acpi_ps_alloc_op(AML_METHOD_OP, obj_desc->method.aml_start); if (!op) { - status = AE_NO_MEMORY; - goto unlock; + return_ACPI_STATUS(AE_NO_MEMORY); } acpi_ps_set_name(op, node->name.integer); @@ -118,8 +115,7 @@ acpi_ds_auto_serialize_method(struct acpi_namespace_node *node, acpi_ds_create_walk_state(node->owner_id, NULL, NULL, NULL); if (!walk_state) { acpi_ps_free_op(op); - status = AE_NO_MEMORY; - goto unlock; + return_ACPI_STATUS(AE_NO_MEMORY); } status = acpi_ds_init_aml_walk(walk_state, op, node, @@ -128,7 +124,7 @@ acpi_ds_auto_serialize_method(struct acpi_namespace_node *node, if (ACPI_FAILURE(status)) { acpi_ds_delete_walk_state(walk_state); acpi_ps_free_op(op); - goto unlock; + return_ACPI_STATUS(status); } walk_state->descending_callback = acpi_ds_detect_named_opcodes; @@ -138,8 +134,6 @@ acpi_ds_auto_serialize_method(struct acpi_namespace_node *node, status = acpi_ps_parse_aml(walk_state); acpi_ps_delete_parse_tree(op); -unlock: - acpi_ex_exit_interpreter(); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c index 028b22a3154e..e36218206bb0 100644 --- a/drivers/acpi/acpica/dswload2.c +++ b/drivers/acpi/acpica/dswload2.c @@ -607,11 +607,9 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state) } } - acpi_ex_exit_interpreter(); status = acpi_ev_initialize_region (acpi_ns_get_attached_object(node), FALSE); - acpi_ex_enter_interpreter(); if (ACPI_FAILURE(status)) { /* diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c index 3843f1fc5dbb..75ddd160a716 100644 --- a/drivers/acpi/acpica/evrgnini.c +++ b/drivers/acpi/acpica/evrgnini.c @@ -45,6 +45,7 @@ #include "accommon.h" #include "acevents.h" #include "acnamesp.h" +#include "acinterp.h" #define _COMPONENT ACPI_EVENTS ACPI_MODULE_NAME("evrgnini") @@ -597,9 +598,11 @@ acpi_ev_initialize_region(union acpi_operand_object *region_obj, } } + acpi_ex_exit_interpreter(); status = acpi_ev_execute_reg_method(region_obj, ACPI_REG_CONNECT); + acpi_ex_enter_interpreter(); if (acpi_ns_locked) { status = diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c index 334d3c5ba617..d1f20143bb11 100644 --- a/drivers/acpi/acpica/nsload.c +++ b/drivers/acpi/acpica/nsload.c @@ -137,7 +137,9 @@ unlock: ACPI_DEBUG_PRINT((ACPI_DB_INFO, "**** Begin Table Object Initialization\n")); + acpi_ex_enter_interpreter(); status = acpi_ds_initialize_objects(table_index, node); + acpi_ex_exit_interpreter(); ACPI_DEBUG_PRINT((ACPI_DB_INFO, "**** Completed Table Object Initialization\n")); From 9a8b27fac5bbb77337cc2e5d31d37c9936782d87 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 27 Oct 2016 15:22:13 -0700 Subject: [PATCH 761/884] raid5-cache: correct condition for empty metadata write As long as we recover one metadata block, we should write the empty metadata write. The original code could make recovery corrupted if only one meta is valid. Reported-by: Zhengyuan Liu Signed-off-by: Shaohua Li --- drivers/md/raid5-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 981f85515191..a227a9f3ee65 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -1087,7 +1087,7 @@ static int r5l_recovery_log(struct r5l_log *log) * 1's seq + 10 and let superblock points to meta2. The same recovery will * not think meta 3 is a valid meta, because its seq doesn't match */ - if (ctx.seq > log->last_cp_seq + 1) { + if (ctx.seq > log->last_cp_seq) { int ret; ret = r5l_log_write_empty_meta_block(log, ctx.pos, ctx.seq + 10); From 7449f699b2fb23bdee0a0f03aa4efb5f96fd403f Mon Sep 17 00:00:00 2001 From: Tomasz Majchrzak Date: Fri, 28 Oct 2016 14:45:58 +0200 Subject: [PATCH 762/884] raid1: handle read error also in readonly mode If write is the first operation on a disk and it happens not to be aligned to page size, block layer sends read request first. If read operation fails, the disk is set as failed as no attempt to fix the error is made because array is in auto-readonly mode. Similarily, the disk is set as failed for read-only array. Take the same approach as in raid10. Don't fail the disk if array is in readonly or auto-readonly mode. Try to redirect the request first and if unsuccessful, return a read error. Signed-off-by: Tomasz Majchrzak Signed-off-by: Shaohua Li --- drivers/md/raid1.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index db536a68b2ee..29e2df5cd77b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2297,17 +2297,23 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) * This is all done synchronously while the array is * frozen */ + + bio = r1_bio->bios[r1_bio->read_disk]; + bdevname(bio->bi_bdev, b); + bio_put(bio); + r1_bio->bios[r1_bio->read_disk] = NULL; + if (mddev->ro == 0) { freeze_array(conf, 1); fix_read_error(conf, r1_bio->read_disk, r1_bio->sector, r1_bio->sectors); unfreeze_array(conf); - } else - md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + } else { + r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED; + } + rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); - bio = r1_bio->bios[r1_bio->read_disk]; - bdevname(bio->bi_bdev, b); read_more: disk = read_balance(conf, r1_bio, &max_sectors); if (disk == -1) { @@ -2318,11 +2324,6 @@ read_more: } else { const unsigned long do_sync = r1_bio->master_bio->bi_opf & REQ_SYNC; - if (bio) { - r1_bio->bios[r1_bio->read_disk] = - mddev->ro ? IO_BLOCKED : NULL; - bio_put(bio); - } r1_bio->read_disk = disk; bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev); bio_trim(bio, r1_bio->sector - bio->bi_iter.bi_sector, From 1217e1d1999ed6c9c1e1b1acae0a74ab70464ae2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 28 Oct 2016 15:59:41 +1100 Subject: [PATCH 763/884] md: be careful not lot leak internal curr_resync value into metadata. -- (all) mddev->curr_resync usually records where the current resync is up to, but during the starting phase it has some "magic" values. 1 - means that the array is trying to start a resync, but has yielded to another array which shares physical devices, and also needs to start a resync 2 - means the array is trying to start resync, but has found another array which shares physical devices and has already started resync. 3 - means that resync has commensed, but it is possible that nothing has actually been resynced yet. It is important that this value not be visible to user-space and particularly that it doesn't get written to the metadata, as the resync or recovery checkpoint. In part, this is because it may be slightly higher than the correct value, though this is very rare. In part, because it is not a multiple of 4K, and some devices only support 4K aligned accesses. There are two places where this value is propagates into either ->curr_resync_completed or ->recovery_cp or ->recovery_offset. These currently avoid the propagation of values 1 and 3, but will allow 3 to leak through. Change them to only propagate the value if it is > 3. As this can cause an array to fail, the patch is suitable for -stable. Cc: stable@vger.kernel.org (v3.7+) Reported-by: Viswesh Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 25b57a55db1a..2089d46b0eb8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8144,14 +8144,14 @@ void md_do_sync(struct md_thread *thread) if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && !test_bit(MD_RECOVERY_INTR, &mddev->recovery) && - mddev->curr_resync > 2) { + mddev->curr_resync > 3) { mddev->curr_resync_completed = mddev->curr_resync; sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } mddev->pers->sync_request(mddev, max_sectors, &skipped); if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && - mddev->curr_resync > 2) { + mddev->curr_resync > 3) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { if (mddev->curr_resync >= mddev->recovery_cp) { From 1e90a13d0c3dc94512af1ccb2b6563e8297838fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Oct 2016 13:42:42 +0200 Subject: [PATCH 764/884] x86/smpboot: Init apic mapping before usage The recent changes, which forced the registration of the boot cpu on UP systems, which do not have ACPI tables, have been fixed for systems w/o local APIC, but left a wreckage for systems which have neither ACPI nor mptables, but the CPU has an APIC, e.g. virtualbox. The boot process crashes in prefill_possible_map() as it wants to register the boot cpu, which needs to access the local apic, but the local APIC is not yet mapped. There is no reason why init_apic_mapping() can't be invoked before prefill_possible_map(). So instead of playing another silly early mapping game, as the ACPI/mptables code does, we just move init_apic_mapping() before the call to prefill_possible_map(). In hindsight, I should have noticed that combination earlier. Sorry for the churn (also in stable)! Fixes: ff8560512b8d ("x86/boot/smp: Don't try to poke disabled/non-existent APIC") Reported-and-debugged-by: Michal Necasek Reported-and-tested-by: Wolfgang Bauer Cc: prarit@redhat.com Cc: ville.syrjala@linux.intel.com Cc: michael.thayer@oracle.com Cc: knut.osmundsen@oracle.com Cc: frank.mehnert@oracle.com Cc: Borislav Petkov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1610282114380.5053@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbfbca5fea0c..9c337b0e8ba7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1221,11 +1221,16 @@ void __init setup_arch(char **cmdline_p) */ get_smp_config(); + /* + * Systems w/o ACPI and mptables might not have it mapped the local + * APIC yet, but prefill_possible_map() might need to access it. + */ + init_apic_mappings(); + prefill_possible_map(); init_cpu_to_node(); - init_apic_mappings(); io_apic_init_mappings(); kvm_guest_init(); From bf911e985d6bbaa328c20c3e05f4eb03de11fdd6 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Tue, 25 Oct 2016 14:27:39 -0200 Subject: [PATCH 765/884] sctp: validate chunk len before actually using it Andrey Konovalov reported that KASAN detected that SCTP was using a slab beyond the boundaries. It was caused because when handling out of the blue packets in function sctp_sf_ootb() it was checking the chunk len only after already processing the first chunk, validating only for the 2nd and subsequent ones. The fix is to just move the check upwards so it's also validated for the 1st chunk. Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Signed-off-by: Marcelo Ricardo Leitner Reviewed-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 026e3bca4a94..8ec20a64a3f8 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3422,6 +3422,12 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + /* Report violation if chunk len overflows */ + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); + if (ch_end > skb_tail_pointer(skb)) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* Now that we know we at least have a chunk header, * do things that are type appropriate. */ @@ -3453,12 +3459,6 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } } - /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); - if (ch_end > skb_tail_pointer(skb)) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); - ch = (sctp_chunkhdr_t *) ch_end; } while (ch_end < skb_tail_pointer(skb)); From b47bd6ea40636362a8b6605de51207cc387ba0b8 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 25 Oct 2016 18:36:24 +0300 Subject: [PATCH 766/884] {net, ib}/mlx5: Make cache line size determination at runtime. ARM 64B cache line systems have L1_CACHE_BYTES set to 128. cache_line_size() will return the correct size. Fixes: cf50b5efa2fe('net/mlx5_core/ib: New device capabilities handling.') Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/infiniband/hw/mlx5/qp.c | 1 - .../net/ethernet/mellanox/mlx5/core/alloc.c | 31 ++++++++++++++++--- include/linux/mlx5/driver.h | 11 ------- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 22174774dbb8..63036c731626 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1019,7 +1019,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); - resp.cache_line_size = L1_CACHE_BYTES; + resp.cache_line_size = cache_line_size(); resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 41f4c2afbcdd..7ce97daf26c6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -52,7 +52,6 @@ enum { enum { MLX5_IB_SQ_STRIDE = 6, - MLX5_IB_CACHE_LINE_SIZE = 64, }; static const u32 mlx5_ib_opcode[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 6cb38304669f..2c6e3c7b7417 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -41,6 +41,13 @@ #include "mlx5_core.h" +struct mlx5_db_pgdir { + struct list_head list; + unsigned long *bitmap; + __be32 *db_page; + dma_addr_t db_dma; +}; + /* Handling for queue buffers -- we allocate a bunch of memory and * register it in a memory region at HCA virtual address 0. */ @@ -102,17 +109,28 @@ EXPORT_SYMBOL_GPL(mlx5_buf_free); static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, int node) { + u32 db_per_page = PAGE_SIZE / cache_line_size(); struct mlx5_db_pgdir *pgdir; pgdir = kzalloc(sizeof(*pgdir), GFP_KERNEL); if (!pgdir) return NULL; - bitmap_fill(pgdir->bitmap, MLX5_DB_PER_PAGE); + pgdir->bitmap = kcalloc(BITS_TO_LONGS(db_per_page), + sizeof(unsigned long), + GFP_KERNEL); + + if (!pgdir->bitmap) { + kfree(pgdir); + return NULL; + } + + bitmap_fill(pgdir->bitmap, db_per_page); pgdir->db_page = mlx5_dma_zalloc_coherent_node(dev, PAGE_SIZE, &pgdir->db_dma, node); if (!pgdir->db_page) { + kfree(pgdir->bitmap); kfree(pgdir); return NULL; } @@ -123,18 +141,19 @@ static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, static int mlx5_alloc_db_from_pgdir(struct mlx5_db_pgdir *pgdir, struct mlx5_db *db) { + u32 db_per_page = PAGE_SIZE / cache_line_size(); int offset; int i; - i = find_first_bit(pgdir->bitmap, MLX5_DB_PER_PAGE); - if (i >= MLX5_DB_PER_PAGE) + i = find_first_bit(pgdir->bitmap, db_per_page); + if (i >= db_per_page) return -ENOMEM; __clear_bit(i, pgdir->bitmap); db->u.pgdir = pgdir; db->index = i; - offset = db->index * L1_CACHE_BYTES; + offset = db->index * cache_line_size(); db->db = pgdir->db_page + offset / sizeof(*pgdir->db_page); db->dma = pgdir->db_dma + offset; @@ -181,14 +200,16 @@ EXPORT_SYMBOL_GPL(mlx5_db_alloc); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) { + u32 db_per_page = PAGE_SIZE / cache_line_size(); mutex_lock(&dev->priv.pgdir_mutex); __set_bit(db->index, db->u.pgdir->bitmap); - if (bitmap_full(db->u.pgdir->bitmap, MLX5_DB_PER_PAGE)) { + if (bitmap_full(db->u.pgdir->bitmap, db_per_page)) { dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); + kfree(db->u.pgdir->bitmap); kfree(db->u.pgdir); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 85c4786427e4..5dbda60a09f4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -625,10 +625,6 @@ struct mlx5_db { int index; }; -enum { - MLX5_DB_PER_PAGE = PAGE_SIZE / L1_CACHE_BYTES, -}; - enum { MLX5_COMP_EQ_SIZE = 1024, }; @@ -638,13 +634,6 @@ enum { MLX5_PTYS_EN = 1 << 2, }; -struct mlx5_db_pgdir { - struct list_head list; - DECLARE_BITMAP(bitmap, MLX5_DB_PER_PAGE); - __be32 *db_page; - dma_addr_t db_dma; -}; - typedef void (*mlx5_cmd_cbk_t)(int status, void *context); struct mlx5_cmd_work_ent { From bba1574c2f11d674bf343f3cf307b33d19f73d9d Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Tue, 25 Oct 2016 18:36:25 +0300 Subject: [PATCH 767/884] net/mlx5: Always Query HCA caps after setting them Always query the HCA caps after setting them to update the capablities data structures. Not doing so results in incorrect capabilities being reported including max_dc, max_qp and several others. Fixes: 59211bd3b632 ("net/mlx5: Split the load/unload flow into hardware and software flows") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index d9c3c70b29e4..8a63910bfccf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -844,12 +844,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) struct pci_dev *pdev = dev->pdev; int err; - err = mlx5_query_hca_caps(dev); - if (err) { - dev_err(&pdev->dev, "query hca failed\n"); - goto out; - } - err = mlx5_query_board_id(dev); if (err) { dev_err(&pdev->dev, "query board id failed\n"); @@ -1023,6 +1017,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_start_health_poll(dev); + err = mlx5_query_hca_caps(dev); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto err_stop_poll; + } + if (boot && mlx5_init_once(dev, priv)) { dev_err(&pdev->dev, "sw objs init failed\n"); goto err_stop_poll; From eccec8da3b4e6f403040c7187338a46d2ea27c20 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 25 Oct 2016 18:36:26 +0300 Subject: [PATCH 768/884] net/mlx5: Keep autogroups list ordered Finding a new autogroup range is done by going over a group list sorted by each group start index. The search is stopped after finding the first free range. Adding the newly created group to the list is wrongly added to the end of the list regardless of its start index as the parameter of where to insert it is ignored. This commit makes sure to use that unused parameter to insert it where requested. Fixes: f0d22d187473 ('net/mlx5_core: Introduce flow steering autogrouped flow table') Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5da2cc878582..17559c842905 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -879,7 +879,7 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * tree_init_node(&fg->node, !is_auto_fg, del_flow_group); tree_add_node(&fg->node, &ft->node); /* Add node to group list */ - list_add(&fg->node.list, ft->node.children.prev); + list_add(&fg->node.list, prev_fg); return fg; } @@ -893,7 +893,7 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, return ERR_PTR(-EPERM); lock_ref_node(&ft->node); - fg = create_flow_group_common(ft, fg_in, &ft->node.children, false); + fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false); unlock_ref_node(&ft->node); return fg; @@ -1012,7 +1012,7 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, u32 *match_criteria) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct list_head *prev = &ft->node.children; + struct list_head *prev = ft->node.children.prev; unsigned int candidate_index = 0; struct mlx5_flow_group *fg; void *match_criteria_addr; From 32dba76a7a3104b2815f31b1755f4c0ba1f01a78 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 25 Oct 2016 18:36:27 +0300 Subject: [PATCH 769/884] net/mlx5: Fix autogroups groups num not decreasing Autogroups groups num is increased when creating a new flow group, but is never decreased. Now decreasing it when deleting a flow group. Fixes: f0d22d187473 ('net/mlx5_core: Introduce flow steering autogrouped flow table') Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 17559c842905..89696048b045 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -436,6 +436,9 @@ static void del_flow_group(struct fs_node *node) fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); + if (ft->autogroup.active) + ft->autogroup.num_groups--; + if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); From e83d6955fe422f120b0e98e4f02496af89845eef Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 25 Oct 2016 18:36:28 +0300 Subject: [PATCH 770/884] net/mlx5: Correctly initialize last use of flow counters Currently, last use timestamp is initialized to zero. This is not the expected value by higher layers such as when we do TC action offloading. To fix that, set it to the current time, e.g when the counter/rule is offloaded. This is the same behaviour of non-offloaded TC actions. Fixes: 43a335e055bb ('mlx5_core: Flow counters infrastructure') Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 3a9195b4169d..3b026c151cf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -218,6 +218,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) goto err_out; if (aging) { + counter->cache.lastuse = jiffies; counter->aging = true; spin_lock(&fc_stats->addlist_lock); From 2b029556667a7fc1aea731c5828e501656f87653 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Tue, 25 Oct 2016 18:36:29 +0300 Subject: [PATCH 771/884] net/mlx5e: Choose best nearest LRO timeout Instead of predicting the index of the wanted LRO timeout value from hardware capabilities, look for the nearest LRO timeout value. Fixes: 5c50368f3831 ('net/mlx5e: Light-weight netdev open/stop') Signed-off-by: Saeed Mahameed Signed-off-by: Mohamad Haj Yahia Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 5 +++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 460363b66cb1..7a43502a89cc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -85,6 +85,9 @@ #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) +#define MLX5E_DEFAULT_LRO_TIMEOUT 32 +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 @@ -221,6 +224,7 @@ struct mlx5e_params { struct ieee_ets ets; #endif bool rx_am_enabled; + u32 lro_timeout; }; struct mlx5e_tstamp { @@ -888,5 +892,6 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7eaf38020a8f..5c8ab3eabe9c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1971,9 +1971,7 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) MLX5_SET(tirc, tirc, lro_max_ip_payload_size, (priv->params.lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, - MLX5_CAP_ETH(priv->mdev, - lro_timer_supported_periods[2])); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout); } void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv) @@ -3401,6 +3399,18 @@ static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev, } } +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, const struct mlx5e_profile *profile, @@ -3419,6 +3429,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->profile = profile; priv->ppriv = ppriv; + priv->params.lro_timeout = + mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ From 5e1e93c7047381b81236f82f60c15d49c510d1a7 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:30 +0300 Subject: [PATCH 772/884] net/mlx5e: Unregister netdev before detaching it Detaching the netdev before unregistering it cause some netdev cleanup ndos to fail because they check presence of the netdev, so we need to unregister the netdev first. Fixes: 26e59d8077a3 ('net/mlx5e: Implement mlx5e interface attach/detach callbacks') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5c8ab3eabe9c..f4c687ce4c59 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4048,7 +4048,6 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; - unregister_netdev(netdev); destroy_workqueue(priv->wq); if (profile->cleanup) profile->cleanup(priv); @@ -4065,6 +4064,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); + unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); mlx5e_destroy_netdev(mdev, priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3c97da103d30..7fe6559e4ab3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -457,6 +457,7 @@ void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5e_priv *priv = rep->priv_data; struct net_device *netdev = priv->netdev; + unregister_netdev(netdev); mlx5e_detach_netdev(esw->dev, netdev); mlx5e_destroy_netdev(esw->dev, priv); } From 247f139cdae73b4f47bd348d05dff1afd40b84b6 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:31 +0300 Subject: [PATCH 773/884] net/mlx5: Change the acl enable prototype to return status The Ingress/Egress ACL enable function may fail and it should return status to its caller to avoid NULL pointer dereference. Fixes: f942380c1239 ('net/mlx5: E-Switch, Vport ingress/egress ACLs rules for spoofchk') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 50 +++++++++++++------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index abbf2c369923..be1f7333ab7f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -931,8 +931,8 @@ static void esw_vport_change_handler(struct work_struct *work) mutex_unlock(&esw->state_lock); } -static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_group *vlan_grp = NULL; @@ -949,9 +949,11 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, int table_size = 2; int err = 0; - if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support) || - !IS_ERR_OR_NULL(vport->egress.acl)) - return; + if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) + return -EOPNOTSUPP; + + if (!IS_ERR_OR_NULL(vport->egress.acl)) + return 0; esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n", vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); @@ -959,12 +961,12 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch egress flow namespace\n"); - return; + return -EIO; } flow_group_in = mlx5_vzalloc(inlen); if (!flow_group_in) - return; + return -ENOMEM; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); if (IS_ERR(acl)) { @@ -1009,6 +1011,7 @@ out: mlx5_destroy_flow_group(vlan_grp); if (err && !IS_ERR_OR_NULL(acl)) mlx5_destroy_flow_table(acl); + return err; } static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, @@ -1041,8 +1044,8 @@ static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, vport->egress.acl = NULL; } -static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +static int esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -1063,9 +1066,11 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, int table_size = 4; int err = 0; - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) || - !IS_ERR_OR_NULL(vport->ingress.acl)) - return; + if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) + return -EOPNOTSUPP; + + if (!IS_ERR_OR_NULL(vport->ingress.acl)) + return 0; esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); @@ -1073,12 +1078,12 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS); if (!root_ns) { esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n"); - return; + return -EIO; } flow_group_in = mlx5_vzalloc(inlen); if (!flow_group_in) - return; + return -ENOMEM; acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); if (IS_ERR(acl)) { @@ -1167,6 +1172,7 @@ out: } kvfree(flow_group_in); + return err; } static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, @@ -1225,7 +1231,13 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, return 0; } - esw_vport_enable_ingress_acl(esw, vport); + err = esw_vport_enable_ingress_acl(esw, vport); + if (err) { + mlx5_core_warn(esw->dev, + "failed to enable ingress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } esw_debug(esw->dev, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", @@ -1299,7 +1311,13 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, return 0; } - esw_vport_enable_egress_acl(esw, vport); + err = esw_vport_enable_egress_acl(esw, vport); + if (err) { + mlx5_core_warn(esw->dev, + "failed to enable egress acl (%d) on vport[%d]\n", + err, vport->vport); + return err; + } esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", From 2241007b3d783cbdbaa78c30bdb1994278b6f9b9 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:32 +0300 Subject: [PATCH 774/884] net/mlx5: Clear health sick bit when starting health poll The health sick status should be cleared when we start the health poll. This is crucial for driver reload (unload + load) in order to behave right in case of health issue. Fixes: fd76ee4da55a ('net/mlx5_core: Fix internal error detection conditions') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/health.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 1a05fb965c8d..2cb4094c9c49 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -281,6 +281,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; init_timer(&health->timer); + health->sick = 0; health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; From 05ac2c0b7438ea08c5d54b48797acf9b22cb2f6f Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:33 +0300 Subject: [PATCH 775/884] net/mlx5: Fix race between PCI error handlers and health work Currently there is a race between the health care work and the kernel pci error handlers because both of them detect the error, the first one to be called will do the error handling. There is a chance that health care will disable the pci after resuming pci slot. Also create a separate WQ because now we will have two types of health works, one for the error detection and one for the recovery. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/health.c | 30 +++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/main.c | 9 ++++-- include/linux/mlx5/driver.h | 4 +++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 2cb4094c9c49..2d00022c92d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -64,6 +64,10 @@ enum { MLX5_NIC_IFC_NO_DRAM_NIC = 2 }; +enum { + MLX5_DROP_NEW_HEALTH_WORK, +}; + static u8 get_nic_interface(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; @@ -272,7 +276,13 @@ static void poll_health(unsigned long data) if (in_fatal(dev) && !health->sick) { health->sick = true; print_health_info(dev); - schedule_work(&health->work); + spin_lock(&health->wq_lock); + if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + queue_work(health->wq, &health->work); + else + dev_err(&dev->pdev->dev, + "new health works are not permitted at this stage\n"); + spin_unlock(&health->wq_lock); } } @@ -282,6 +292,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) init_timer(&health->timer); health->sick = 0; + clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -298,11 +309,21 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev) del_timer_sync(&health->timer); } +void mlx5_drain_health_wq(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + spin_lock(&health->wq_lock); + set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + spin_unlock(&health->wq_lock); + cancel_work_sync(&health->work); +} + void mlx5_health_cleanup(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - flush_work(&health->work); + destroy_workqueue(health->wq); } int mlx5_health_init(struct mlx5_core_dev *dev) @@ -317,8 +338,11 @@ int mlx5_health_init(struct mlx5_core_dev *dev) strcpy(name, "mlx5_health"); strcat(name, dev_name(&dev->pdev->dev)); + health->wq = create_singlethread_workqueue(name); kfree(name); - + if (!health->wq) + return -ENOMEM; + spin_lock_init(&health->wq_lock); INIT_WORK(&health->work, health_care); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8a63910bfccf..9f90226bc120 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1315,8 +1315,13 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); - pci_save_state(pdev); - mlx5_pci_disable_device(dev); + /* In case of kernel call save the pci state and drain health wq */ + if (state) { + pci_save_state(pdev); + mlx5_drain_health_wq(dev); + mlx5_pci_disable_device(dev); + } + return state == pci_channel_io_perm_failure ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5dbda60a09f4..7d9a5d08eb59 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -418,7 +418,10 @@ struct mlx5_core_health { u32 prev; int miss_counter; bool sick; + /* wq spinlock to synchronize draining */ + spinlock_t wq_lock; struct workqueue_struct *wq; + unsigned long flags; struct work_struct work; }; @@ -778,6 +781,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev); int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); +void mlx5_drain_health_wq(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); From 04c0c1ab38e95105d950db5b84e727637e149ce7 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Tue, 25 Oct 2016 18:36:34 +0300 Subject: [PATCH 776/884] net/mlx5: PCI error recovery health care simulation In case that the kernel PCI error handlers are not called, we will trigger our own recovery flow. The health work will give priority to the kernel pci error handlers to recover the PCI by waiting for a small period, if the pci error handlers are not triggered the manual recovery flow will be executed. We don't save pci state in case of manual recovery because it will ruin the pci configuration space and we will lose dma sync. Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver') Signed-off-by: Mohamad Haj Yahia Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/health.c | 45 +++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/main.c | 18 +++++--- .../ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + include/linux/mlx5/driver.h | 1 + 4 files changed, 56 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 2d00022c92d8..5bcf93422ee0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -61,14 +61,15 @@ enum { enum { MLX5_NIC_IFC_FULL = 0, MLX5_NIC_IFC_DISABLED = 1, - MLX5_NIC_IFC_NO_DRAM_NIC = 2 + MLX5_NIC_IFC_NO_DRAM_NIC = 2, + MLX5_NIC_IFC_INVALID = 3 }; enum { MLX5_DROP_NEW_HEALTH_WORK, }; -static u8 get_nic_interface(struct mlx5_core_dev *dev) +static u8 get_nic_state(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; } @@ -101,7 +102,7 @@ static int in_fatal(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; - if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED) + if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) return 1; if (ioread32be(&h->fw_ver) == 0xffffffff) @@ -131,7 +132,7 @@ unlock: static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) { - u8 nic_interface = get_nic_interface(dev); + u8 nic_interface = get_nic_state(dev); switch (nic_interface) { case MLX5_NIC_IFC_FULL: @@ -153,8 +154,34 @@ static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) mlx5_disable_device(dev); } +static void health_recover(struct work_struct *work) +{ + struct mlx5_core_health *health; + struct delayed_work *dwork; + struct mlx5_core_dev *dev; + struct mlx5_priv *priv; + u8 nic_state; + + dwork = container_of(work, struct delayed_work, work); + health = container_of(dwork, struct mlx5_core_health, recover_work); + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + + nic_state = get_nic_state(dev); + if (nic_state == MLX5_NIC_IFC_INVALID) { + dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n"); + return; + } + + dev_err(&dev->pdev->dev, "starting health recovery flow\n"); + mlx5_recover_device(dev); +} + +/* How much time to wait until health resetting the driver (in msecs) */ +#define MLX5_RECOVERY_DELAY_MSECS 60000 static void health_care(struct work_struct *work) { + unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); struct mlx5_core_health *health; struct mlx5_core_dev *dev; struct mlx5_priv *priv; @@ -164,6 +191,14 @@ static void health_care(struct work_struct *work) dev = container_of(priv, struct mlx5_core_dev, priv); mlx5_core_warn(dev, "handling bad device here\n"); mlx5_handle_bad_state(dev); + + spin_lock(&health->wq_lock); + if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) + schedule_delayed_work(&health->recover_work, recover_delay); + else + dev_err(&dev->pdev->dev, + "new health works are not permitted at this stage\n"); + spin_unlock(&health->wq_lock); } static const char *hsynd_str(u8 synd) @@ -316,6 +351,7 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock(&health->wq_lock); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); spin_unlock(&health->wq_lock); + cancel_delayed_work_sync(&health->recover_work); cancel_work_sync(&health->work); } @@ -344,6 +380,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) return -ENOMEM; spin_lock_init(&health->wq_lock); INIT_WORK(&health->work, health_care); + INIT_DELAYED_WORK(&health->recover_work, health_recover); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 9f90226bc120..d5433c49b2b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1313,6 +1313,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, struct mlx5_priv *priv = &dev->priv; dev_info(&pdev->dev, "%s was called\n", __func__); + mlx5_enter_error_state(dev); mlx5_unload_one(dev, priv, false); /* In case of kernel call save the pci state and drain health wq */ @@ -1378,11 +1379,6 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) return PCI_ERS_RESULT_RECOVERED; } -void mlx5_disable_device(struct mlx5_core_dev *dev) -{ - mlx5_pci_err_detected(dev->pdev, 0); -} - static void mlx5_pci_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); @@ -1432,6 +1428,18 @@ static const struct pci_device_id mlx5_core_pci_table[] = { MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); +void mlx5_disable_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_err_detected(dev->pdev, 0); +} + +void mlx5_recover_device(struct mlx5_core_dev *dev) +{ + mlx5_pci_disable_device(dev); + if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) + mlx5_pci_resume(dev->pdev); +} + static struct pci_driver mlx5_core_driver = { .name = DRIVER_NAME, .id_table = mlx5_core_pci_table, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 3d0cfb9f18f9..187662c8ea96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -83,6 +83,7 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +void mlx5_recover_device(struct mlx5_core_dev *dev); int mlx5_sriov_init(struct mlx5_core_dev *dev); void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); int mlx5_sriov_attach(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7d9a5d08eb59..ecc451d89ccd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -423,6 +423,7 @@ struct mlx5_core_health { struct workqueue_struct *wq; unsigned long flags; struct work_struct work; + struct delayed_work recover_work; }; struct mlx5_cq_table { From 6b276190c50a12511d889d9079ffb901ff94a822 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 25 Oct 2016 18:36:35 +0300 Subject: [PATCH 777/884] net/mlx5: Avoid passing dma address 0 to firmware Currently the firmware can't work with a page with dma address 0. Passing such an address to the firmware will cause the give_pages command to fail. To avoid this, in case we get a 0 dma address of a page from the dma engine, we avoid passing it to FW by remapping to get an address other than 0. Fixes: bf0bf77f6519 ('mlx5: Support communicating arbitrary host...') Signed-off-by: Noa Osherovich Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/pagealloc.c | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index cc4fd61914d3..a57d5a81eb05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -209,6 +209,7 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr) static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) { struct page *page; + u64 zero_addr = 1; u64 addr; int err; int nid = dev_to_node(&dev->pdev->dev); @@ -218,26 +219,35 @@ static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id) mlx5_core_warn(dev, "failed to allocate page\n"); return -ENOMEM; } +map: addr = dma_map_page(&dev->pdev->dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); if (dma_mapping_error(&dev->pdev->dev, addr)) { mlx5_core_warn(dev, "failed dma mapping page\n"); err = -ENOMEM; - goto out_alloc; + goto err_mapping; } + + /* Firmware doesn't support page with physical address 0 */ + if (addr == 0) { + zero_addr = addr; + goto map; + } + err = insert_page(dev, addr, page, func_id); if (err) { mlx5_core_err(dev, "failed to track allocated page\n"); - goto out_mapping; + dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); } - return 0; +err_mapping: + if (err) + __free_page(page); -out_mapping: - dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL); - -out_alloc: - __free_page(page); + if (zero_addr == 0) + dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE, + DMA_BIDIRECTIONAL); return err; } From e4cabca54911a6be6f2e80e48fa497c7e19019a5 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 25 Oct 2016 18:08:49 -0400 Subject: [PATCH 778/884] inet: Fix missing return value in inet6_hash As part of a series to implement faster SO_REUSEPORT lookups, commit 086c653f5862 ("sock: struct proto hash function may error") added return values to protocol hash functions and commit 496611d7b5ea ("inet: create IPv6-equivalent inet_hash function") implemented a new hash function for IPv6. However, the latter does not respect the former's convention. This properly propagates the hash errors in the IPv6 case. Fixes: 496611d7b5ea ("inet: create IPv6-equivalent inet_hash function") Reported-by: Soheil Hassas Yeganeh Signed-off-by: Craig Gallek Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv6/inet6_hashtables.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 2fd0374a35b1..02761c9fe43e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -264,13 +264,15 @@ EXPORT_SYMBOL_GPL(inet6_hash_connect); int inet6_hash(struct sock *sk) { + int err = 0; + if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); - __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); + err = __inet_hash(sk, NULL, ipv6_rcv_saddr_equal); local_bh_enable(); } - return 0; + return err; } EXPORT_SYMBOL_GPL(inet6_hash); From 96a8eb1eeed2c3485cdba198fab3a2faaec386d3 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 26 Oct 2016 00:37:53 +0200 Subject: [PATCH 779/884] bpf: fix samples to add fake KBUILD_MODNAME Some of the sample files are causing issues when they are loaded with tc and cls_bpf, meaning tc bails out while trying to parse the resulting ELF file as program/map/etc sections are not present, which can be easily spotted with readelf(1). Currently, BPF samples are including some of the kernel headers and mid term we should change them to refrain from this, really. When dynamic debugging is enabled, we bail out due to undeclared KBUILD_MODNAME, which is easily overlooked in the build as clang spills this along with other noisy warnings from various header includes, and llc still generates an ELF file with mentioned characteristics. For just playing around with BPF examples, this can be a bit of a hurdle to take. Just add a fake KBUILD_MODNAME as a band-aid to fix the issue, same is done in xdp*_kern samples already. Fixes: 65d472fb007d ("samples/bpf: add 'pointer to packet' tests") Fixes: 6afb1e28b859 ("samples/bpf: Add tunnel set/get tests.") Fixes: a3f74617340b ("cgroup: bpf: Add an example to do cgroup checking in BPF") Reported-by: Chandrasekar Kannan Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/parse_ldabs.c | 1 + samples/bpf/parse_simple.c | 1 + samples/bpf/parse_varlen.c | 1 + samples/bpf/tcbpf1_kern.c | 1 + samples/bpf/tcbpf2_kern.c | 1 + samples/bpf/test_cgrp2_tc_kern.c | 1 + 6 files changed, 6 insertions(+) diff --git a/samples/bpf/parse_ldabs.c b/samples/bpf/parse_ldabs.c index d17550198d06..6db6b21fdc6d 100644 --- a/samples/bpf/parse_ldabs.c +++ b/samples/bpf/parse_ldabs.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/parse_simple.c b/samples/bpf/parse_simple.c index cf2511c33905..10af53d33cc2 100644 --- a/samples/bpf/parse_simple.c +++ b/samples/bpf/parse_simple.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/parse_varlen.c b/samples/bpf/parse_varlen.c index edab34dce79b..95c16324760c 100644 --- a/samples/bpf/parse_varlen.c +++ b/samples/bpf/parse_varlen.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c index fa051b3d53ee..274c884c87fe 100644 --- a/samples/bpf/tcbpf1_kern.c +++ b/samples/bpf/tcbpf1_kern.c @@ -1,3 +1,4 @@ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c index 3303bb85593b..9c823a609e75 100644 --- a/samples/bpf/tcbpf2_kern.c +++ b/samples/bpf/tcbpf2_kern.c @@ -5,6 +5,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c index 10ff73404e3a..1547b36a7b7b 100644 --- a/samples/bpf/test_cgrp2_tc_kern.c +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -4,6 +4,7 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ +#define KBUILD_MODNAME "foo" #include #include #include From ae148b085876fa771d9ef2c05f85d4b4bf09ce0d Mon Sep 17 00:00:00 2001 From: Eli Cooper Date: Wed, 26 Oct 2016 10:11:09 +0800 Subject: [PATCH 780/884] ip6_tunnel: Update skb->protocol to ETH_P_IPV6 in ip6_tnl_xmit() This patch updates skb->protocol to ETH_P_IPV6 in ip6_tnl_xmit() when an IPv6 header is installed to a socket buffer. This is not a cosmetic change. Without updating this value, GSO packets transmitted through an ipip6 tunnel have the protocol of ETH_P_IP and skb_mac_gso_segment() will attempt to call gso_segment() for IPv4, which results in the packets being dropped. Fixes: b8921ca83eed ("ip4ip6: Support for GSO/GRO") Signed-off-by: Eli Cooper Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 5692d6b8da95..87784560dc46 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1172,6 +1172,7 @@ route_lookup: if (err) return err; + skb->protocol = htons(ETH_P_IPV6); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); ipv6h = ipv6_hdr(skb); From 5ee67b587a2b0092bdea3123349c8c3c43e8e46e Mon Sep 17 00:00:00 2001 From: Yuan Yao Date: Mon, 17 Oct 2016 18:02:34 +0800 Subject: [PATCH 781/884] spi: dspi: clear SPI_SR before enable interrupt Once dspi is used in uboot, the SPI_SR have been set by some value. At this time, if kernel enable the interrupt before clear the status flag, that will trigger the wrong interrupt. Signed-off-by: Yuan Yao Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 35c0dd945668..a67b0ff6a362 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -70,6 +70,7 @@ #define SPI_SR 0x2c #define SPI_SR_EOQF 0x10000000 #define SPI_SR_TCFQF 0x80000000 +#define SPI_SR_CLEAR 0xdaad0000 #define SPI_RSER 0x30 #define SPI_RSER_EOQFE 0x10000000 @@ -646,6 +647,11 @@ static const struct regmap_config dspi_regmap_config = { .max_register = 0x88, }; +static void dspi_init(struct fsl_dspi *dspi) +{ + regmap_write(dspi->regmap, SPI_SR, SPI_SR_CLEAR); +} + static int dspi_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -709,6 +715,7 @@ static int dspi_probe(struct platform_device *pdev) return PTR_ERR(dspi->regmap); } + dspi_init(dspi); dspi->irq = platform_get_irq(pdev, 0); if (dspi->irq < 0) { dev_err(&pdev->dev, "can't get platform irq\n"); From a4256bc9ec36159e84d710c7c44aaa244a6380a8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Oct 2016 18:16:20 +0200 Subject: [PATCH 782/884] IB/mlx4: avoid a -Wmaybe-uninitialize warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is an old warning about mlx4_SW2HW_EQ_wrapper on x86: ethernet/mellanox/mlx4/resource_tracker.c: In function ‘mlx4_SW2HW_EQ_wrapper’: ethernet/mellanox/mlx4/resource_tracker.c:3071:10: error: ‘eq’ may be used uninitialized in this function [-Werror=maybe-uninitialized] The problem here is that gcc won't track the state of the variable across a spin_unlock. Moving the assignment out of the lock is safe here and avoids the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Yishai Hadas Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 84d7857ccc27..c548beaaf910 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1605,13 +1605,14 @@ static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index, r->com.from_state = r->com.state; r->com.to_state = state; r->com.state = RES_EQ_BUSY; - if (eq) - *eq = r; } } spin_unlock_irq(mlx4_tlock(dev)); + if (!err && eq) + *eq = r; + return err; } From 166e6045cacccb3046883a1a4c977f173fec5ccd Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 26 Oct 2016 13:26:38 +0530 Subject: [PATCH 783/884] cxgb4: Fix error handling in alloc_uld_rxqs(). Fix to release resources properly in error handling path of alloc_uld_rxqs(), This patch also removes unwanted arguments and avoids calling the same function twice. Fixes: 94cdb8bb993a (cxgb4: Add support for dynamic allocation of resources for ULD Signed-off-by: Ganesh Goudar Signed-off-by: David S. Miller --- .../net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 40 +++++++++---------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 0945fa49a5dd..2471ff465d5c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -135,15 +135,17 @@ static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, } static int alloc_uld_rxqs(struct adapter *adap, - struct sge_uld_rxq_info *rxq_info, - unsigned int nq, unsigned int offset, bool lro) + struct sge_uld_rxq_info *rxq_info, bool lro) { struct sge *s = &adap->sge; - struct sge_ofld_rxq *q = rxq_info->uldrxq + offset; - unsigned short *ids = rxq_info->rspq_id + offset; - unsigned int per_chan = nq / adap->params.nports; + unsigned int nq = rxq_info->nrxq + rxq_info->nciq; + struct sge_ofld_rxq *q = rxq_info->uldrxq; + unsigned short *ids = rxq_info->rspq_id; unsigned int bmap_idx = 0; - int i, err, msi_idx; + unsigned int per_chan; + int i, err, msi_idx, que_idx = 0; + + per_chan = rxq_info->nrxq / adap->params.nports; if (adap->flags & USING_MSIX) msi_idx = 1; @@ -151,12 +153,18 @@ static int alloc_uld_rxqs(struct adapter *adap, msi_idx = -((int)s->intrq.abs_id + 1); for (i = 0; i < nq; i++, q++) { + if (i == rxq_info->nrxq) { + /* start allocation of concentrator queues */ + per_chan = rxq_info->nciq / adap->params.nports; + que_idx = 0; + } + if (msi_idx >= 0) { bmap_idx = get_msix_idx_from_bmap(adap); msi_idx = adap->msix_info_ulds[bmap_idx].idx; } err = t4_sge_alloc_rxq(adap, &q->rspq, false, - adap->port[i / per_chan], + adap->port[que_idx++ / per_chan], msi_idx, q->fl.size ? &q->fl : NULL, uldrx_handler, @@ -165,29 +173,19 @@ static int alloc_uld_rxqs(struct adapter *adap, if (err) goto freeout; if (msi_idx >= 0) - rxq_info->msix_tbl[i + offset] = bmap_idx; + rxq_info->msix_tbl[i] = bmap_idx; memset(&q->stats, 0, sizeof(q->stats)); if (ids) ids[i] = q->rspq.abs_id; } return 0; freeout: - q = rxq_info->uldrxq + offset; + q = rxq_info->uldrxq; for ( ; i; i--, q++) { if (q->rspq.desc) free_rspq_fl(adap, &q->rspq, q->fl.size ? &q->fl : NULL); } - - /* We need to free rxq also in case of ciq allocation failure */ - if (offset) { - q = rxq_info->uldrxq + offset; - for ( ; i; i--, q++) { - if (q->rspq.desc) - free_rspq_fl(adap, &q->rspq, - q->fl.size ? &q->fl : NULL); - } - } return err; } @@ -205,9 +203,7 @@ setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) return -ENOMEM; } - ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && - !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq, - rxq_info->nrxq, lro)); + ret = !(!alloc_uld_rxqs(adap, rxq_info, lro)); /* Tell uP to route control queue completions to rdma rspq */ if (adap->flags & FULL_INIT_DONE && From 4700e9ce6ed8526a14e6cf9d7e319f675c16d0a7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Oct 2016 14:44:33 +0200 Subject: [PATCH 784/884] net_sched actions: use nla_parse_nested() Use nla_parse_nested instead of open-coding the call to nla_parse() with the attribute data/len. Signed-off-by: Johannes Berg Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a512b18c0088..f893d180da1c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1028,8 +1028,7 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n) if (tb[1] == NULL) return NULL; - if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]), - nla_len(tb[1]), NULL) < 0) + if (nla_parse_nested(tb2, TCA_ACT_MAX, tb[1], NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; From 104ba78c98808ae837d1f63aae58c183db5505df Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Wed, 26 Oct 2016 11:23:07 -0400 Subject: [PATCH 785/884] packet: on direct_xmit, limit tso and csum to supported devices When transmitting on a packet socket with PACKET_VNET_HDR and PACKET_QDISC_BYPASS, validate device support for features requested in vnet_hdr. Drop TSO packets sent to devices that do not support TSO or have the feature disabled. Note that the latter currently do process those packets correctly, regardless of not advertising the feature. Because of SKB_GSO_DODGY, it is not sufficient to test device features with netif_needs_gso. Full validate_xmit_skb is needed. Switch to software checksum for non-TSO packets that request checksum offload if that device feature is unsupported or disabled. Note that similar to the TSO case, device drivers may perform checksum offload correctly even when not advertising it. When switching to software checksum, packets hit skb_checksum_help, which has two BUG_ON checksum not in linear segment. Packet sockets always allocate at least up to csum_start + csum_off + 2 as linear. Tested by running github.com/wdebruij/kerneltools/psock_txring_vnet.c ethtool -K eth0 tso off tx on psock_txring_vnet -d $dst -s $src -i eth0 -l 2000 -n 1 -q -v psock_txring_vnet -d $dst -s $src -i eth0 -l 2000 -n 1 -q -v -N ethtool -K eth0 tx off psock_txring_vnet -d $dst -s $src -i eth0 -l 1000 -n 1 -q -v -G psock_txring_vnet -d $dst -s $src -i eth0 -l 1000 -n 1 -q -v -G -N v2: - add EXPORT_SYMBOL_GPL(validate_xmit_skb_list) Fixes: d346a3fae3ff ("packet: introduce PACKET_QDISC_BYPASS socket option") Signed-off-by: Willem de Bruijn Acked-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/core/dev.c | 1 + net/packet/af_packet.c | 9 ++++----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index dbc871306910..f745112f7efa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3035,6 +3035,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } return head; } +EXPORT_SYMBOL_GPL(validate_xmit_skb_list); static void qdisc_pkt_len_init(struct sk_buff *skb) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 11db0d619c00..d2238b204691 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -250,7 +250,7 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static int packet_direct_xmit(struct sk_buff *skb) { struct net_device *dev = skb->dev; - netdev_features_t features; + struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; @@ -258,9 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb) !netif_carrier_ok(dev))) goto drop; - features = netif_skb_features(skb); - if (skb_needs_linearize(skb, features) && - __skb_linearize(skb)) + skb = validate_xmit_skb_list(skb, dev); + if (skb != orig_skb) goto drop; txq = skb_get_tx_queue(dev, skb); @@ -280,7 +279,7 @@ static int packet_direct_xmit(struct sk_buff *skb) return ret; drop: atomic_long_inc(&dev->tx_dropped); - kfree_skb(skb); + kfree_skb_list(skb); return NET_XMIT_DROP; } From e934f684856393a0b2aecc7fdd8357a48b79c535 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 26 Oct 2016 08:30:29 -0700 Subject: [PATCH 786/884] Revert "hv_netvsc: report vmbus name in ethtool" This reverts commit e3f74b841d48 ("hv_netvsc: report vmbus name in ethtool")' because of problem introduced by commit f9a56e5d6a0ba ("Drivers: hv: make VMBus bus ids persistent"). This changed the format of the vmbus name and this new format is too long to fit in the bus_info field of ethtool. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/netvsc_drv.c | 4 ---- include/linux/hyperv.h | 7 ------- 2 files changed, 11 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index c71d966d905f..f6382150b16a 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -699,12 +699,8 @@ int netvsc_recv_callback(struct hv_device *device_obj, static void netvsc_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { - struct net_device_context *net_device_ctx = netdev_priv(net); - struct hv_device *dev = net_device_ctx->device_ctx; - strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); - strlcpy(info->bus_info, vmbus_dev_name(dev), sizeof(info->bus_info)); } static void netvsc_get_channels(struct net_device *net, diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 6824556d37ed..cd184bdca58f 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1169,13 +1169,6 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, const char *mod_name); void vmbus_driver_unregister(struct hv_driver *hv_driver); -static inline const char *vmbus_dev_name(const struct hv_device *device_obj) -{ - const struct kobject *kobj = &device_obj->device.kobj; - - return kobj->name; -} - void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, From fd33b2447bec7926ceaf4a4b74b68ea2466f2ae0 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 26 Oct 2016 11:47:02 -0600 Subject: [PATCH 787/884] net: mv643xx_eth: Fetch the phy connection type from DT The MAC is capable of RGMII mode and that is probably a more typical connection type than GMII today (eg it is used by Marvell Reference designs for several SOCs). Let DT users specify the standard phy-connection-type = "rgmii-id"; On a phy node. Signed-off-by: Jason Gunthorpe Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../bindings/net/marvell-orion-net.txt | 1 + drivers/net/ethernet/marvell/mv643xx_eth.c | 23 +++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Documentation/devicetree/bindings/net/marvell-orion-net.txt b/Documentation/devicetree/bindings/net/marvell-orion-net.txt index bce52b2ec55e..6fd988c84c4f 100644 --- a/Documentation/devicetree/bindings/net/marvell-orion-net.txt +++ b/Documentation/devicetree/bindings/net/marvell-orion-net.txt @@ -49,6 +49,7 @@ Optional port properties: and - phy-handle: See ethernet.txt file in the same directory. + - phy-mode: See ethernet.txt file in the same directory. or diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 55831188bc32..bf5cc55ba24c 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2968,6 +2968,22 @@ static void set_params(struct mv643xx_eth_private *mp, mp->txq_count = pd->tx_queue_count ? : 1; } +static int get_phy_mode(struct mv643xx_eth_private *mp) +{ + struct device *dev = mp->dev->dev.parent; + int iface = -1; + + if (dev->of_node) + iface = of_get_phy_mode(dev->of_node); + + /* Historical default if unspecified. We could also read/write + * the interface state in the PSC1 + */ + if (iface < 0) + iface = PHY_INTERFACE_MODE_GMII; + return iface; +} + static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, int phy_addr) { @@ -2994,7 +3010,7 @@ static struct phy_device *phy_scan(struct mv643xx_eth_private *mp, "orion-mdio-mii", addr); phydev = phy_connect(mp->dev, phy_id, mv643xx_eth_adjust_link, - PHY_INTERFACE_MODE_GMII); + get_phy_mode(mp)); if (!IS_ERR(phydev)) { phy_addr_set(mp, addr); break; @@ -3090,6 +3106,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (!dev) return -ENOMEM; + SET_NETDEV_DEV(dev, &pdev->dev); mp = netdev_priv(dev); platform_set_drvdata(pdev, mp); @@ -3129,7 +3146,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev) if (pd->phy_node) { mp->phy = of_phy_connect(mp->dev, pd->phy_node, mv643xx_eth_adjust_link, 0, - PHY_INTERFACE_MODE_GMII); + get_phy_mode(mp)); if (!mp->phy) err = -ENODEV; else @@ -3187,8 +3204,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev) dev->priv_flags |= IFF_UNICAST_FLT; dev->gso_max_segs = MV643XX_MAX_TSO_SEGS; - SET_NETDEV_DEV(dev, &pdev->dev); - if (mp->shared->win_protect) wrl(mp, WINDOW_PROTECT(mp->port_num), mp->shared->win_protect); From 8d7533e5aaad1c94386a8101a36b0617987966b7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 26 Oct 2016 13:57:38 -0500 Subject: [PATCH 788/884] ibmvnic: Fix releasing of sub-CRQ IRQs in interrupt context Schedule these XPORT event tasks in the shared workqueue so that IRQs are not freed in an interrupt context when sub-CRQs are released. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 35 ++++++++++++++++++++---------- drivers/net/ethernet/ibm/ibmvnic.h | 1 + 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 213162df1a9b..0459d19a5f16 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3232,6 +3232,27 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) spin_unlock_irqrestore(&adapter->inflight_lock, flags); } +static void ibmvnic_xport_event(struct work_struct *work) +{ + struct ibmvnic_adapter *adapter = container_of(work, + struct ibmvnic_adapter, + ibmvnic_xport); + struct device *dev = &adapter->vdev->dev; + int rc; + + ibmvnic_free_inflight(adapter); + release_sub_crqs(adapter); + if (adapter->migrated) { + rc = ibmvnic_reenable_crq_queue(adapter); + if (rc) + dev_err(dev, "Error after enable rc=%ld\n", rc); + adapter->migrated = false; + rc = ibmvnic_send_crq_init(adapter); + if (rc) + dev_err(dev, "Error sending init rc=%ld\n", rc); + } +} + static void ibmvnic_handle_crq(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { @@ -3267,15 +3288,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { dev_info(dev, "Re-enabling adapter\n"); adapter->migrated = true; - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); - rc = ibmvnic_reenable_crq_queue(adapter); - if (rc) - dev_err(dev, "Error after enable rc=%ld\n", rc); - adapter->migrated = false; - rc = ibmvnic_send_crq_init(adapter); - if (rc) - dev_err(dev, "Error sending init rc=%ld\n", rc); + schedule_work(&adapter->ibmvnic_xport); } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { dev_info(dev, "Backing device failover detected\n"); netif_carrier_off(netdev); @@ -3284,8 +3297,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, /* The adapter lost the connection */ dev_err(dev, "Virtual Adapter failed (rc=%d)\n", gen_crq->cmd); - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); + schedule_work(&adapter->ibmvnic_xport); } return; case IBMVNIC_CRQ_CMD_RSP: @@ -3726,6 +3738,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) SET_NETDEV_DEV(netdev, &dev->dev); INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp); + INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event); spin_lock_init(&adapter->stats_lock); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 878e2c059f5c..dd775d951b73 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1048,5 +1048,6 @@ struct ibmvnic_adapter { u8 map_id; struct work_struct vnic_crq_init; + struct work_struct ibmvnic_xport; bool failover; }; From ff57087f314be8d458dca6bdd41be5b4236482a0 Mon Sep 17 00:00:00 2001 From: shamir rabinovitch Date: Thu, 27 Oct 2016 05:46:38 -0400 Subject: [PATCH 789/884] rds: debug messages are enabled by default rds use Kconfig option called "RDS_DEBUG" to enable rds debug messages. This option cause the rds Makefile to add -DDEBUG to the rds gcc command line. When CONFIG_DYNAMIC_DEBUG is enabled, the "DEBUG" macro is used by include/linux/dynamic_debug.h to decide if dynamic debug prints should be sent by default to the kernel log. rds should not enable this macro for production builds. rds dynamic debug work as expected follow this fix. Signed-off-by: Shamir Rabinovitch Acked-by: Santosh Shilimkar Reviewed-by: Wengang Wang Signed-off-by: David S. Miller --- net/rds/Makefile | 2 +- net/rds/rds.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rds/Makefile b/net/rds/Makefile index 0e72bec1529f..56c7d27eefee 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -13,5 +13,5 @@ obj-$(CONFIG_RDS_TCP) += rds_tcp.o rds_tcp-y := tcp.o tcp_connect.o tcp_listen.o tcp_recv.o \ tcp_send.o tcp_stats.o -ccflags-$(CONFIG_RDS_DEBUG) := -DDEBUG +ccflags-$(CONFIG_RDS_DEBUG) := -DRDS_DEBUG diff --git a/net/rds/rds.h b/net/rds/rds.h index fd0bccb2f9f9..67ba67c058b1 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -33,7 +33,7 @@ #define KERNEL_HAS_ATOMIC64 #endif -#ifdef DEBUG +#ifdef RDS_DEBUG #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else /* sigh, pr_debug() causes unused variable warnings */ From aa0c08feae8161b945520ada753d0dfe62b14fe7 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 27 Oct 2016 16:27:13 +0300 Subject: [PATCH 790/884] net/mlx4_core: Fix the resource-type enum in res tracker to conform to FW spec The resource type enum in the resource tracker was incorrect. RES_EQ was put in the position of RES_NPORT_ID (a FC resource). Since the remaining resources maintain their current values, and RES_EQ is not passed from slaves to the hypervisor in any FW command, this change affects only the hypervisor. Therefore, there is no backwards-compatibility issue. Fixes: 623ed84b1f95 ("mlx4_core: initial header-file changes for SRIOV support") Signed-off-by: Jack Morgenstein Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index e4878f31e45d..7aad07644289 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -145,9 +145,10 @@ enum mlx4_resource { RES_MTT, RES_MAC, RES_VLAN, - RES_EQ, + RES_NPORT_ID, RES_COUNTER, RES_FS_RULE, + RES_EQ, MLX4_NUM_OF_RESOURCE_TYPE }; From 33a1f8b196dca933313c001866c4df3f3ca11f78 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Oct 2016 16:27:14 +0300 Subject: [PATCH 791/884] net/mlx4_core: Avoid setting ports to auto when only one port type is supported When only one port type is supported, it should be read only. We reject changing requests, even to the auto sense mode. Fixes: 27bf91d6a0d5 ("mlx4_core: Add link type autosensing") Signed-off-by: Maor Gottlieb Signed-off-by: Moshe Shemesh Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 7183ac4135d2..6f4e67bc3538 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1102,6 +1102,14 @@ static int __set_port_type(struct mlx4_port_info *info, int i; int err = 0; + if ((port_type & mdev->caps.supported_type[info->port]) != port_type) { + mlx4_err(mdev, + "Requested port type for port %d is not supported on this HCA\n", + info->port); + err = -EINVAL; + goto err_sup; + } + mlx4_stop_sense(mdev); mutex_lock(&priv->port_mutex); info->tmp_type = port_type; @@ -1147,7 +1155,7 @@ static int __set_port_type(struct mlx4_port_info *info, out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); - +err_sup: return err; } From 72da2e911f79d4c7132d7431a97d46659ee862be Mon Sep 17 00:00:00 2001 From: Moshe Lazer Date: Thu, 27 Oct 2016 16:27:15 +0300 Subject: [PATCH 792/884] net/mlx4_core: Change the default value of enable_qos Change the default status of quality of service back to disabled, as it hurts performance in some cases. Fixes: 38438f7c7e8c ("net/mlx4: Set enhanced QoS support by default when ...") Signed-off-by: Moshe Lazer Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/fw.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c41ab31a39f8..84bab9f0732e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -49,9 +49,9 @@ enum { extern void __buggy_use_of_MLX4_GET(void); extern void __buggy_use_of_MLX4_PUT(void); -static bool enable_qos = true; +static bool enable_qos; module_param(enable_qos, bool, 0444); -MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: on)"); +MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); #define MLX4_GET(dest, source, offset) \ do { \ From 4850cf4581578216468b7b3c3d06cc5abb0a697d Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 27 Oct 2016 16:27:16 +0300 Subject: [PATCH 793/884] net/mlx4_en: Resolve dividing by zero in 32-bit system When doing roundup_pow_of_two for large enough number with bit 31, an overflow will occur and a value equal to 1 will be returned. In this case 1 will be subtracted from the return value and division by zero will be reached. Fixes: 31c128b66e5b ("net/mlx4_en: Choose time-stamping shift value according to HW frequency") Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_clock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 08fc5fc56d43..a5fc46bbcbe2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -245,8 +245,11 @@ static u32 freq_to_shift(u16 freq) { u32 freq_khz = freq * 1000; u64 max_val_cycles = freq_khz * 1000 * MLX4_EN_WRAP_AROUND_SEC; + u64 tmp_rounded = + roundup_pow_of_two(max_val_cycles) > max_val_cycles ? + roundup_pow_of_two(max_val_cycles) - 1 : UINT_MAX; u64 max_val_cycles_rounded = is_power_of_2(max_val_cycles + 1) ? - max_val_cycles : roundup_pow_of_two(max_val_cycles) - 1; + max_val_cycles : tmp_rounded; /* calculate max possible multiplier in order to fit in 64bit */ u64 max_mul = div_u64(0xffffffffffffffffULL, max_val_cycles_rounded); From 8d59de8f7bb3db296331c665779c653b0c8d13ba Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 27 Oct 2016 16:27:17 +0300 Subject: [PATCH 794/884] net/mlx4_en: Process all completions in RX rings after port goes up Currently there is a race between incoming traffic and initialization flow. HW is able to receive the packets after INIT_PORT is done and unicast steering is configured. Before we set priv->port_up NAPI is not scheduled and receive queues become full. Therefore we never get new interrupts about the completions. This issue could happen if running heavy traffic during bringing port up. The resolution is to schedule NAPI once port_up is set. If receive queues were full this will process all cqes and release them. Fixes: c27a02cd94d6 ("mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC") Signed-off-by: Erez Shitrit Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 7e703bed7b82..e25c11dff525 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1733,6 +1733,13 @@ int mlx4_en_start_port(struct net_device *dev) udp_tunnel_get_rx_info(dev); priv->port_up = true; + + /* Process all completions if exist to prevent + * the queues freezing if they are full + */ + for (i = 0; i < priv->rx_ring_num; i++) + napi_schedule(&priv->rx_cq[i]->napi); + netif_tx_start_all_queues(dev); netif_device_attach(dev); From 9d2afba058722d40cc02f430229c91611c0e8d16 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 27 Oct 2016 16:27:18 +0300 Subject: [PATCH 795/884] net/mlx4_en: Fix panic during reboot Fix a kernel panic that occurs as a result of an asynchronous event handled in roce_gid_mgmt: mlx4_en_get_drvinfo is called and accesses freed resources. This happens in a shutdown flow only, since pci device is destroyed while netdevice is still alive. Fixes: c27a02cd94d6 ("mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC") Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e25c11dff525..314f54c8dbed 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2201,6 +2201,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (!shutdown) free_netdev(dev); + dev->ethtool_ops = NULL; } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) From 81d184199e328fdad5633da139a10337327154e0 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 27 Oct 2016 16:27:19 +0300 Subject: [PATCH 796/884] net/mlx4_core: Do not access comm channel if it has not yet been initialized In the Hypervisor, there are several FW commands which are invoked before the comm channel is initialized (in mlx4_multi_func_init). These include MOD_STAT_CONFIG, QUERY_DEV_CAP, INIT_HCA, and others. If any of these commands fails, say with a timeout, the Hypervisor driver enters the internal error reset flow. In this flow, the driver attempts to notify all slaves via the comm channel that an internal error has occurred. Since the comm channel has not yet been initialized (i.e., mapped via ioremap), this will cause dereferencing a NULL pointer. To fix this, do not access the comm channel in the internal error flow if it has not yet been initialized. Fixes: 55ad359225b2 ("net/mlx4_core: Enable device recovery flow with SRIOV") Fixes: ab9c17a009ee ("mlx4_core: Modify driver initialization flow to accommodate SRIOV for Ethernet") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index b1cef7a0f7ca..e36bebcab3f2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2469,6 +2469,7 @@ err_comm_admin: kfree(priv->mfunc.master.slave_state); err_comm: iounmap(priv->mfunc.comm); + priv->mfunc.comm = NULL; err_vhcr: dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, priv->mfunc.vhcr, @@ -2537,6 +2538,13 @@ void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev) int slave; u32 slave_read; + /* If the comm channel has not yet been initialized, + * skip reporting the internal error event to all + * the communication channels. + */ + if (!priv->mfunc.comm) + return; + /* Report an internal error event to all * communication channels. */ @@ -2571,6 +2579,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) } iounmap(priv->mfunc.comm); + priv->mfunc.comm = NULL; } void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) From 6f2e0d2c3bf0f8d322ab7516c57340c7189cca02 Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Thu, 27 Oct 2016 16:27:20 +0300 Subject: [PATCH 797/884] net/mlx4: Fix firmware command timeout during interrupt test Currently interrupt test that is part of ethtool selftest runs the check over all interrupt vectors of the device. In mlx4_en package part of interrupt vectors are uninitialized since mlx4_ib doesn't exist. This causes NOP FW command to time out. Change logic to test current port interrupt vectors only. Signed-off-by: Eugenia Emantayev Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_selftest.c | 26 +++++++- drivers/net/ethernet/mellanox/mlx4/eq.c | 62 +++++++++---------- include/linux/mlx4/device.h | 3 +- 3 files changed, 55 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index b66e03d9711f..c06346a82496 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -118,6 +118,29 @@ mlx4_en_test_loopback_exit: return !loopback_ok; } +static int mlx4_en_test_interrupts(struct mlx4_en_priv *priv) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err = 0; + int i = 0; + + err = mlx4_test_async(mdev->dev); + /* When not in MSI_X or slave, test only async */ + if (!(mdev->dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(mdev->dev)) + return err; + + /* A loop over all completion vectors of current port, + * for each vector check whether it works by mapping command + * completions to that vector and performing a NOP command + */ + for (i = 0; i < priv->rx_ring_num; i++) { + err = mlx4_test_interrupt(mdev->dev, priv->rx_cq[i]->vector); + if (err) + break; + } + + return err; +} static int mlx4_en_test_link(struct mlx4_en_priv *priv) { @@ -151,7 +174,6 @@ static int mlx4_en_test_speed(struct mlx4_en_priv *priv) void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) { struct mlx4_en_priv *priv = netdev_priv(dev); - struct mlx4_en_dev *mdev = priv->mdev; int i, carrier_ok; memset(buf, 0, sizeof(u64) * MLX4_EN_NUM_SELF_TEST); @@ -177,7 +199,7 @@ void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf) netif_carrier_on(dev); } - buf[0] = mlx4_test_interrupts(mdev->dev); + buf[0] = mlx4_en_test_interrupts(priv); buf[1] = mlx4_en_test_link(priv); buf[2] = mlx4_en_test_speed(priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index cf8f8a72a801..cd3638e6fe25 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -1361,53 +1361,49 @@ void mlx4_cleanup_eq_table(struct mlx4_dev *dev) kfree(priv->eq_table.uar_map); } -/* A test that verifies that we can accept interrupts on all - * the irq vectors of the device. +/* A test that verifies that we can accept interrupts + * on the vector allocated for asynchronous events + */ +int mlx4_test_async(struct mlx4_dev *dev) +{ + return mlx4_NOP(dev); +} +EXPORT_SYMBOL(mlx4_test_async); + +/* A test that verifies that we can accept interrupts + * on the given irq vector of the tested port. * Interrupts are checked using the NOP command. */ -int mlx4_test_interrupts(struct mlx4_dev *dev) +int mlx4_test_interrupt(struct mlx4_dev *dev, int vector) { struct mlx4_priv *priv = mlx4_priv(dev); - int i; int err; - err = mlx4_NOP(dev); - /* When not in MSI_X, there is only one irq to check */ - if (!(dev->flags & MLX4_FLAG_MSI_X) || mlx4_is_slave(dev)) - return err; + /* Temporary use polling for command completions */ + mlx4_cmd_use_polling(dev); - /* A loop over all completion vectors, for each vector we will check - * whether it works by mapping command completions to that vector - * and performing a NOP command - */ - for(i = 0; !err && (i < dev->caps.num_comp_vectors); ++i) { - /* Make sure request_irq was called */ - if (!priv->eq_table.eq[i].have_irq) - continue; - - /* Temporary use polling for command completions */ - mlx4_cmd_use_polling(dev); - - /* Map the new eq to handle all asynchronous events */ - err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, - priv->eq_table.eq[i].eqn); - if (err) { - mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); - mlx4_cmd_use_events(dev); - break; - } - - /* Go back to using events */ - mlx4_cmd_use_events(dev); - err = mlx4_NOP(dev); + /* Map the new eq to handle all asynchronous events */ + err = mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, + priv->eq_table.eq[MLX4_CQ_TO_EQ_VECTOR(vector)].eqn); + if (err) { + mlx4_warn(dev, "Failed mapping eq for interrupt test\n"); + goto out; } + /* Go back to using events */ + mlx4_cmd_use_events(dev); + err = mlx4_NOP(dev); + /* Return to default */ + mlx4_cmd_use_polling(dev); +out: mlx4_MAP_EQ(dev, get_async_ev_mask(dev), 0, priv->eq_table.eq[MLX4_EQ_ASYNC].eqn); + mlx4_cmd_use_events(dev); + return err; } -EXPORT_SYMBOL(mlx4_test_interrupts); +EXPORT_SYMBOL(mlx4_test_interrupt); bool mlx4_is_eq_vector_valid(struct mlx4_dev *dev, u8 port, int vector) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f6a164297358..3be7abd6e722 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1399,7 +1399,8 @@ void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u32 *lkey, u32 *rkey); int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr); int mlx4_SYNC_TPT(struct mlx4_dev *dev); -int mlx4_test_interrupts(struct mlx4_dev *dev); +int mlx4_test_interrupt(struct mlx4_dev *dev, int vector); +int mlx4_test_async(struct mlx4_dev *dev); int mlx4_query_diag_counters(struct mlx4_dev *dev, u8 op_modifier, const u32 offset[], u32 value[], size_t array_len, u8 port); From d2582a03939ed0a80ffcd3ea5345505bc8067c54 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 27 Oct 2016 16:27:21 +0300 Subject: [PATCH 798/884] net/mlx4_en: Fix potential deadlock in port statistics flow mlx4_en_DUMP_ETH_STATS took the *counter mutex* and then called the FW command, with WRAPPED attribute. As a result, the fw command is wrapped on the Hypervisor when it calls mlx4_en_DUMP_ETH_STATS. The FW command wrapper flow on the hypervisor takes the *slave_cmd_mutex* during processing. At the same time, a VF could be in the process of coming up, and could call mlx4_QUERY_FUNC_CAP. On the hypervisor, the command flow takes the *slave_cmd_mutex*, then executes mlx4_QUERY_FUNC_CAP_wrapper. mlx4_QUERY_FUNC_CAP wrapper calls mlx4_get_default_counter_index(), which takes the *counter mutex*. DEADLOCK. The fix is that the DUMP_ETH_STATS fw command should be called with the NATIVE attribute, so that on the hypervisor, this command does not enter the wrapper flow. Since the Hypervisor no longer goes through the wrapper code, we also simply return 0 in mlx4_DUMP_ETH_STATS_wrapper (i.e.the function succeeds, but the returned data will be all zeroes). No need to test if it is the Hypervisor going through the wrapper. Fixes: f9baff509f8a ("mlx4_core: Add "native" argument to mlx4_cmd ...") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_port.c | 4 ++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 -- drivers/net/ethernet/mellanox/mlx4/port.c | 13 +------------ 3 files changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 5aa8b751f417..59473a0ebcdf 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -166,7 +166,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) return PTR_ERR(mailbox); err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0, MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_WRAPPED); + MLX4_CMD_NATIVE); if (err) goto out; @@ -322,7 +322,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod | MLX4_DUMP_ETH_STATS_FLOW_CONTROL, 0, MLX4_CMD_DUMP_ETH_STATS, - MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); if (err) goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 7aad07644289..88ee7d8a5923 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1330,8 +1330,6 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_info *cmd); int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function, int port, void *buf); -int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, u32 in_mod, - struct mlx4_cmd_mailbox *outbox); int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index c5b2064297a1..b656dd5772e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -1728,24 +1728,13 @@ int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave, return err; } -int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, - u32 in_mod, struct mlx4_cmd_mailbox *outbox) -{ - return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0, - MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B, - MLX4_CMD_NATIVE); -} - int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { - if (slave != dev->caps.function) - return 0; - return mlx4_common_dump_eth_stats(dev, slave, - vhcr->in_modifier, outbox); + return 0; } int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, From eb4b678825992aa434d32b2f615d2090281e0f88 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 27 Oct 2016 16:27:22 +0300 Subject: [PATCH 799/884] net/mlx4_en: Save slave ethtool stats command Following the previous patch, as an optimization, the slave will not even bother sending the DUMP_ETH_STATS command over the comm channel. Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 314f54c8dbed..12c99a2655f2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1917,8 +1917,9 @@ static void mlx4_en_clear_stats(struct net_device *dev) struct mlx4_en_dev *mdev = priv->mdev; int i; - if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) - en_dbg(HW, priv, "Failed dumping statistics\n"); + if (!mlx4_is_slave(mdev->dev)) + if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) + en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); From a909d3e636995ba7c349e2ca5dbb528154d4ac30 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 29 Oct 2016 13:52:02 -0700 Subject: [PATCH 800/884] Linux 4.9-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 93beca4312c4..a2650f9c6a25 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From f9d4286b9516b02e795214412d36885f572b57ad Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 27 Oct 2016 16:30:06 +0200 Subject: [PATCH 801/884] arch/powerpc: Update parameters for csum_tcpudp_magic & csum_tcpudp_nofold Commit 01cfbad "ipv4: Update parameters for csum_tcpudp_magic to their original types" changed parameters for csum_tcpudp_magic and csum_tcpudp_nofold for many platforms but not for PowerPC. Fixes: 01cfbad "ipv4: Update parameters for csum_tcpudp_magic to their original types" Cc: Alexander Duyck Signed-off-by: Ivan Vecera Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- arch/powerpc/include/asm/checksum.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index ee655ed1ff1b..1e8fceb308a5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,10 +53,8 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } -static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { #ifdef __powerpc64__ unsigned long s = (__force u32)sum; @@ -83,10 +81,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, * computes the checksum of the TCP/UDP pseudo-header * returns a 16-bit checksum, already complemented */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } From dbc34e73c2bee4ff66c3a6b0ea5d65c25a6b6994 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 29 Oct 2016 17:18:17 -0400 Subject: [PATCH 802/884] Revert "ibmvnic: Fix releasing of sub-CRQ IRQs in interrupt context" This reverts commit 8d7533e5aaad1c94386a8101a36b0617987966b7. It introduced kbuild failures, new version coming. Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 35 ++++++++++-------------------- drivers/net/ethernet/ibm/ibmvnic.h | 1 - 2 files changed, 11 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 0459d19a5f16..213162df1a9b 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3232,27 +3232,6 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) spin_unlock_irqrestore(&adapter->inflight_lock, flags); } -static void ibmvnic_xport_event(struct work_struct *work) -{ - struct ibmvnic_adapter *adapter = container_of(work, - struct ibmvnic_adapter, - ibmvnic_xport); - struct device *dev = &adapter->vdev->dev; - int rc; - - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); - if (adapter->migrated) { - rc = ibmvnic_reenable_crq_queue(adapter); - if (rc) - dev_err(dev, "Error after enable rc=%ld\n", rc); - adapter->migrated = false; - rc = ibmvnic_send_crq_init(adapter); - if (rc) - dev_err(dev, "Error sending init rc=%ld\n", rc); - } -} - static void ibmvnic_handle_crq(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { @@ -3288,7 +3267,15 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { dev_info(dev, "Re-enabling adapter\n"); adapter->migrated = true; - schedule_work(&adapter->ibmvnic_xport); + ibmvnic_free_inflight(adapter); + release_sub_crqs(adapter); + rc = ibmvnic_reenable_crq_queue(adapter); + if (rc) + dev_err(dev, "Error after enable rc=%ld\n", rc); + adapter->migrated = false; + rc = ibmvnic_send_crq_init(adapter); + if (rc) + dev_err(dev, "Error sending init rc=%ld\n", rc); } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { dev_info(dev, "Backing device failover detected\n"); netif_carrier_off(netdev); @@ -3297,7 +3284,8 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, /* The adapter lost the connection */ dev_err(dev, "Virtual Adapter failed (rc=%d)\n", gen_crq->cmd); - schedule_work(&adapter->ibmvnic_xport); + ibmvnic_free_inflight(adapter); + release_sub_crqs(adapter); } return; case IBMVNIC_CRQ_CMD_RSP: @@ -3738,7 +3726,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) SET_NETDEV_DEV(netdev, &dev->dev); INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp); - INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event); spin_lock_init(&adapter->stats_lock); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index dd775d951b73..878e2c059f5c 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1048,6 +1048,5 @@ struct ibmvnic_adapter { u8 map_id; struct work_struct vnic_crq_init; - struct work_struct ibmvnic_xport; bool failover; }; From 9888d7b02c7793cbbcbdd05dd9e14cc0e78d1db7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 27 Oct 2016 12:28:51 -0500 Subject: [PATCH 803/884] ibmvnic: Fix releasing of sub-CRQ IRQs in interrupt context Schedule these XPORT event tasks in the shared workqueue so that IRQs are not freed in an interrupt context when sub-CRQs are released. Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 35 ++++++++++++++++++++---------- drivers/net/ethernet/ibm/ibmvnic.h | 1 + 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 213162df1a9b..a922fb94fceb 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3232,6 +3232,27 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) spin_unlock_irqrestore(&adapter->inflight_lock, flags); } +static void ibmvnic_xport_event(struct work_struct *work) +{ + struct ibmvnic_adapter *adapter = container_of(work, + struct ibmvnic_adapter, + ibmvnic_xport); + struct device *dev = &adapter->vdev->dev; + long rc; + + ibmvnic_free_inflight(adapter); + release_sub_crqs(adapter); + if (adapter->migrated) { + rc = ibmvnic_reenable_crq_queue(adapter); + if (rc) + dev_err(dev, "Error after enable rc=%ld\n", rc); + adapter->migrated = false; + rc = ibmvnic_send_crq_init(adapter); + if (rc) + dev_err(dev, "Error sending init rc=%ld\n", rc); + } +} + static void ibmvnic_handle_crq(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { @@ -3267,15 +3288,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, if (gen_crq->cmd == IBMVNIC_PARTITION_MIGRATED) { dev_info(dev, "Re-enabling adapter\n"); adapter->migrated = true; - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); - rc = ibmvnic_reenable_crq_queue(adapter); - if (rc) - dev_err(dev, "Error after enable rc=%ld\n", rc); - adapter->migrated = false; - rc = ibmvnic_send_crq_init(adapter); - if (rc) - dev_err(dev, "Error sending init rc=%ld\n", rc); + schedule_work(&adapter->ibmvnic_xport); } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { dev_info(dev, "Backing device failover detected\n"); netif_carrier_off(netdev); @@ -3284,8 +3297,7 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, /* The adapter lost the connection */ dev_err(dev, "Virtual Adapter failed (rc=%d)\n", gen_crq->cmd); - ibmvnic_free_inflight(adapter); - release_sub_crqs(adapter); + schedule_work(&adapter->ibmvnic_xport); } return; case IBMVNIC_CRQ_CMD_RSP: @@ -3726,6 +3738,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) SET_NETDEV_DEV(netdev, &dev->dev); INIT_WORK(&adapter->vnic_crq_init, handle_crq_init_rsp); + INIT_WORK(&adapter->ibmvnic_xport, ibmvnic_xport_event); spin_lock_init(&adapter->stats_lock); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 878e2c059f5c..dd775d951b73 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -1048,5 +1048,6 @@ struct ibmvnic_adapter { u8 map_id; struct work_struct vnic_crq_init; + struct work_struct ibmvnic_xport; bool failover; }; From 8bf371e6adff29758cc3c57c17df4486513081f8 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Thu, 27 Oct 2016 12:28:52 -0500 Subject: [PATCH 804/884] ibmvnic: Fix missing brackets in init_sub_crq_irqs Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index a922fb94fceb..5f44c5520fbc 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1461,14 +1461,16 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) return rc; req_rx_irq_failed: - for (j = 0; j < i; j++) + for (j = 0; j < i; j++) { free_irq(adapter->rx_scrq[j]->irq, adapter->rx_scrq[j]); irq_dispose_mapping(adapter->rx_scrq[j]->irq); + } i = adapter->req_tx_queues; req_tx_irq_failed: - for (j = 0; j < i; j++) + for (j = 0; j < i; j++) { free_irq(adapter->tx_scrq[j]->irq, adapter->tx_scrq[j]); irq_dispose_mapping(adapter->rx_scrq[j]->irq); + } release_sub_crqs_no_irqs(adapter); return rc; } From 06bd2b1ed04ca9fdbc767859885944a1e8b86b40 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Thu, 27 Oct 2016 18:51:55 -0400 Subject: [PATCH 805/884] tipc: fix broadcast link synchronization problem In commit 2d18ac4ba745 ("tipc: extend broadcast link initialization criteria") we tried to fix a problem with the initial synchronization of broadcast link acknowledge values. Unfortunately that solution is not sufficient to solve the issue. We have seen it happen that LINK_PROTOCOL/STATE packets with a valid non-zero unicast acknowledge number may bypass BCAST_PROTOCOL initialization, NAME_DISTRIBUTOR and other STATE packets with invalid broadcast acknowledge numbers, leading to premature opening of the broadcast link. When the bypassed packets finally arrive, they are inadvertently accepted, and the already correctly initialized acknowledge number in the broadcast receive link is overwritten by the invalid (zero) value of the said packets. After this the broadcast link goes stale. We now fix this by marking the packets where we know the acknowledge value is or may be invalid, and then ignoring the acks from those. To this purpose, we claim an unused bit in the header to indicate that the value is invalid. We set the bit to 1 in the initial BCAST_PROTOCOL synchronization packet and all initial ("bulk") NAME_DISTRIBUTOR packets, plus those LINK_PROTOCOL packets sent out before the broadcast links are fully synchronized. This minor protocol update is fully backwards compatible. Reported-by: John Thompson Tested-by: John Thompson Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/bcast.c | 14 ++++++++++---- net/tipc/bcast.h | 3 ++- net/tipc/link.c | 2 ++ net/tipc/msg.h | 17 +++++++++++++++++ net/tipc/name_distr.c | 1 + net/tipc/node.c | 2 +- 6 files changed, 33 insertions(+), 6 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 753f774cb46f..aa1babbea385 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -247,11 +247,17 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) * * RCU is locked, no other locks set */ -void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked) +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; + u16 acked = msg_bcast_ack(hdr); struct sk_buff_head xmitq; + /* Ignore bc acks sent by peer before bcast synch point was received */ + if (msg_bc_ack_invalid(hdr)) + return; + __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); @@ -279,11 +285,11 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); - if (msg_type(hdr) == STATE_MSG) { + if (msg_type(hdr) != STATE_MSG) { + tipc_link_bc_init_rcv(l, hdr); + } else if (!msg_bc_ack_invalid(hdr)) { tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); - } else { - tipc_link_bc_init_rcv(l, hdr); } tipc_bcast_unlock(net); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 5ffe34472ccd..855d53c64ab3 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -55,7 +55,8 @@ void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id); int tipc_bcast_get_mtu(struct net *net); int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); -void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked); +void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); diff --git a/net/tipc/link.c b/net/tipc/link.c index b36e16cdc945..1055164c6232 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1312,6 +1312,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_next_sent(hdr, l->snd_nxt); msg_set_ack(hdr, l->rcv_nxt - 1); msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); + msg_set_bc_ack_invalid(hdr, !node_up); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); @@ -1574,6 +1575,7 @@ static void tipc_link_build_bc_init_msg(struct tipc_link *l, __skb_queue_head_init(&list); if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) return; + msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true); tipc_link_xmit(l, &list, xmitq); } diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c3832cdf2278..50a739860d37 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -714,6 +714,23 @@ static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) msg_set_bits(m, 5, 13, 0x1, s); } +static inline bool msg_bc_ack_invalid(struct tipc_msg *m) +{ + switch (msg_user(m)) { + case BCAST_PROTOCOL: + case NAME_DISTRIBUTOR: + case LINK_PROTOCOL: + return msg_bits(m, 5, 14, 0x1); + default: + return false; + } +} + +static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid) +{ + msg_set_bits(m, 5, 14, 0x1, invalid); +} + static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index a04fe9be1c60..c1cfd92de17a 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -156,6 +156,7 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, pr_warn("Bulk publication failure\n"); return; } + msg_set_bc_ack_invalid(buf_msg(skb), true); item = (struct distr_item *)msg_data(buf_msg(skb)); } diff --git a/net/tipc/node.c b/net/tipc/node.c index 7ef14e2d2356..9d2f4c2b08ab 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1535,7 +1535,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) if (unlikely(usr == LINK_PROTOCOL)) tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) - tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); + tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr); /* Receive packet directly if conditions permit */ tipc_node_read_lock(n); From 9fe1c98ac90023842ae7cd921badfa1029e45bd1 Mon Sep 17 00:00:00 2001 From: Govindarajulu Varadarajan Date: Thu, 27 Oct 2016 16:01:03 -0700 Subject: [PATCH 806/884] enic: fix rq disable When MTU is changed from 9000 to 1500 while there is burst of inbound 9000 bytes packets, adaptor sometimes delivers 9000 bytes packets to 1500 bytes buffers. This causes memory corruption and sometimes crash. This is because of a race condition in adaptor between "RQ disable" clearing descriptor mini-cache and mini-cache valid bit being set by completion of descriptor fetch. This can result in stale RQ desc being cached and used when packets arrive. In this case, the stale descriptor have old MTU value. Solution is to write RQ->disable twice. The first write will stop any further desc fetches, allowing the second disable to clear the mini-cache valid bit without danger of a race. Also, the check for rq->running becoming 0 after writing rq->enable to 0 is not done properly. When incoming packets are flooding the interface, rq->running will pulse high for each dropped packet. Since the driver was waiting for 10us between each poll, it is possible to see rq->running = 1 1000 times in a row, even though it is not actually stuck running. This results in false failure of vnic_rq_disable(). Fix is to try more than 1000 time without delay between polls to ensure we do not miss when running goes low. In old adaptors rq->enable needs to be re-written to 0 when posted_index is reset in vnic_rq_clean() in order to keep rq->prefetch_index in sync. Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com> Signed-off-by: David S. Miller --- drivers/net/ethernet/cisco/enic/vnic_rq.c | 32 ++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/cisco/enic/vnic_rq.c b/drivers/net/ethernet/cisco/enic/vnic_rq.c index e572a527b18d..36bc2c71fba9 100644 --- a/drivers/net/ethernet/cisco/enic/vnic_rq.c +++ b/drivers/net/ethernet/cisco/enic/vnic_rq.c @@ -169,19 +169,28 @@ int vnic_rq_disable(struct vnic_rq *rq) { unsigned int wait; struct vnic_dev *vdev = rq->vdev; + int i; - iowrite32(0, &rq->ctrl->enable); + /* Due to a race condition with clearing RQ "mini-cache" in hw, we need + * to disable the RQ twice to guarantee that stale descriptors are not + * used when this RQ is re-enabled. + */ + for (i = 0; i < 2; i++) { + iowrite32(0, &rq->ctrl->enable); - /* Wait for HW to ACK disable request */ - for (wait = 0; wait < 1000; wait++) { - if (!(ioread32(&rq->ctrl->running))) - return 0; - udelay(10); + /* Wait for HW to ACK disable request */ + for (wait = 20000; wait > 0; wait--) + if (!ioread32(&rq->ctrl->running)) + break; + if (!wait) { + vdev_neterr(vdev, "Failed to disable RQ[%d]\n", + rq->index); + + return -ETIMEDOUT; + } } - vdev_neterr(vdev, "Failed to disable RQ[%d]\n", rq->index); - - return -ETIMEDOUT; + return 0; } void vnic_rq_clean(struct vnic_rq *rq, @@ -212,6 +221,11 @@ void vnic_rq_clean(struct vnic_rq *rq, [fetch_index % VNIC_RQ_BUF_BLK_ENTRIES(count)]; iowrite32(fetch_index, &rq->ctrl->posted_index); + /* Anytime we write fetch_index, we need to re-write 0 to rq->enable + * to re-sync internal VIC state. + */ + iowrite32(0, &rq->ctrl->enable); + vnic_dev_clear_desc_ring(&rq->ring); } From 3034783472f5353f71af44ed52ad9ee65f9f6d17 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 28 Oct 2016 12:40:20 +0300 Subject: [PATCH 807/884] net: phy: dp83848: add dp83822 PHY support This PHY has a compatible register set with DP83848x so add support for it. Acked-by: Andrew F. Davis Signed-off-by: Roger Quadros Signed-off-by: David S. Miller --- drivers/net/phy/dp83848.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c index 03d54c4adc88..800b39f06279 100644 --- a/drivers/net/phy/dp83848.c +++ b/drivers/net/phy/dp83848.c @@ -19,6 +19,7 @@ #define TI_DP83848C_PHY_ID 0x20005ca0 #define NS_DP83848C_PHY_ID 0x20005c90 #define TLK10X_PHY_ID 0x2000a210 +#define TI_DP83822_PHY_ID 0x2000a240 /* Registers */ #define DP83848_MICR 0x11 /* MII Interrupt Control Register */ @@ -77,6 +78,7 @@ static struct mdio_device_id __maybe_unused dp83848_tbl[] = { { TI_DP83848C_PHY_ID, 0xfffffff0 }, { NS_DP83848C_PHY_ID, 0xfffffff0 }, { TLK10X_PHY_ID, 0xfffffff0 }, + { TI_DP83822_PHY_ID, 0xfffffff0 }, { } }; MODULE_DEVICE_TABLE(mdio, dp83848_tbl); @@ -105,6 +107,7 @@ static struct phy_driver dp83848_driver[] = { DP83848_PHY_DRIVER(TI_DP83848C_PHY_ID, "TI DP83848C 10/100 Mbps PHY"), DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"), DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"), + DP83848_PHY_DRIVER(TI_DP83822_PHY_ID, "TI DP83822 10/100 Mbps PHY"), }; module_phy_driver(dp83848_driver); From 087892d29b75c025086d99b29d385a3dac0169fc Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sat, 29 Oct 2016 17:04:35 +0300 Subject: [PATCH 808/884] qede: Fix out-of-bound fastpath memory access Driver allocates a shadow array for transmitted SKBs with X entries; That means valid indices are {0,...,X - 1}. [X == 8191] Problem is the driver also uses X as a mask for a producer/consumer in order to choose the right entry in the array which allows access to entry X which is out of bounds. To fix this, simply allocate X + 1 entries in the shadow array. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 444b271059b2..7def29aaf65c 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -2940,7 +2940,7 @@ static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq) txq->num_tx_buffers = edev->q_num_tx_buffers; /* Allocate the parallel driver ring for Tx buffers */ - size = sizeof(*txq->sw_tx_ring) * NUM_TX_BDS_MAX; + size = sizeof(*txq->sw_tx_ring) * TX_RING_SIZE; txq->sw_tx_ring = kzalloc(size, GFP_KERNEL); if (!txq->sw_tx_ring) { DP_NOTICE(edev, "Tx buffers ring allocation failed\n"); @@ -2951,7 +2951,7 @@ static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq) QED_CHAIN_USE_TO_CONSUME_PRODUCE, QED_CHAIN_MODE_PBL, QED_CHAIN_CNT_TYPE_U16, - NUM_TX_BDS_MAX, + TX_RING_SIZE, sizeof(*p_virt), &txq->tx_pbl); if (rc) goto err; From c6fcc4fc5f8b592600c7409e769ab68da0fb1eca Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Fri, 28 Oct 2016 09:59:15 -0700 Subject: [PATCH 809/884] vxlan: avoid using stale vxlan socket. When vxlan device is closed vxlan socket is freed. This operation can race with vxlan-xmit function which dereferences vxlan socket. Following patch uses RCU mechanism to avoid this situation. Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 80 ++++++++++++++++++++++++++++----------------- include/net/vxlan.h | 4 +-- 2 files changed, 52 insertions(+), 32 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index c1639a3e95a4..f3c2fa3ab0d5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -943,17 +943,20 @@ static bool vxlan_snoop(struct net_device *dev, static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) { struct vxlan_dev *vxlan; + struct vxlan_sock *sock4; + struct vxlan_sock *sock6 = NULL; unsigned short family = dev->default_dst.remote_ip.sa.sa_family; + sock4 = rtnl_dereference(dev->vn4_sock); + /* The vxlan_sock is only used by dev, leaving group has * no effect on other vxlan devices. */ - if (family == AF_INET && dev->vn4_sock && - atomic_read(&dev->vn4_sock->refcnt) == 1) + if (family == AF_INET && sock4 && atomic_read(&sock4->refcnt) == 1) return false; #if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6 && dev->vn6_sock && - atomic_read(&dev->vn6_sock->refcnt) == 1) + sock6 = rtnl_dereference(dev->vn6_sock); + if (family == AF_INET6 && sock6 && atomic_read(&sock6->refcnt) == 1) return false; #endif @@ -961,10 +964,12 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev) if (!netif_running(vxlan->dev) || vxlan == dev) continue; - if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock) + if (family == AF_INET && + rtnl_dereference(vxlan->vn4_sock) != sock4) continue; #if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock) + if (family == AF_INET6 && + rtnl_dereference(vxlan->vn6_sock) != sock6) continue; #endif @@ -1005,22 +1010,25 @@ static bool __vxlan_sock_release_prep(struct vxlan_sock *vs) static void vxlan_sock_release(struct vxlan_dev *vxlan) { - bool ipv4 = __vxlan_sock_release_prep(vxlan->vn4_sock); + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); #if IS_ENABLED(CONFIG_IPV6) - bool ipv6 = __vxlan_sock_release_prep(vxlan->vn6_sock); + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + rcu_assign_pointer(vxlan->vn6_sock, NULL); #endif + rcu_assign_pointer(vxlan->vn4_sock, NULL); synchronize_net(); - if (ipv4) { - udp_tunnel_sock_release(vxlan->vn4_sock->sock); - kfree(vxlan->vn4_sock); + if (__vxlan_sock_release_prep(sock4)) { + udp_tunnel_sock_release(sock4->sock); + kfree(sock4); } #if IS_ENABLED(CONFIG_IPV6) - if (ipv6) { - udp_tunnel_sock_release(vxlan->vn6_sock->sock); - kfree(vxlan->vn6_sock); + if (__vxlan_sock_release_prep(sock6)) { + udp_tunnel_sock_release(sock6->sock); + kfree(sock6); } #endif } @@ -1036,18 +1044,21 @@ static int vxlan_igmp_join(struct vxlan_dev *vxlan) int ret = -EINVAL; if (ip->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, .imr_ifindex = ifindex, }; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; lock_sock(sk); ret = ip_mc_join_group(sk, &mreq); release_sock(sk); #if IS_ENABLED(CONFIG_IPV6) } else { - sk = vxlan->vn6_sock->sock->sk; + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + sk = sock6->sock->sk; lock_sock(sk); ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex, &ip->sin6.sin6_addr); @@ -1067,18 +1078,21 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan) int ret = -EINVAL; if (ip->sa.sa_family == AF_INET) { + struct vxlan_sock *sock4 = rtnl_dereference(vxlan->vn4_sock); struct ip_mreqn mreq = { .imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr, .imr_ifindex = ifindex, }; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; lock_sock(sk); ret = ip_mc_leave_group(sk, &mreq); release_sock(sk); #if IS_ENABLED(CONFIG_IPV6) } else { - sk = vxlan->vn6_sock->sock->sk; + struct vxlan_sock *sock6 = rtnl_dereference(vxlan->vn6_sock); + + sk = sock6->sock->sk; lock_sock(sk); ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex, &ip->sin6.sin6_addr); @@ -1828,11 +1842,15 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { + struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct dst_entry *ndst; struct flowi6 fl6; int err; + if (!sock6) + return ERR_PTR(-EIO); + if (tos && !info) use_cache = false; if (use_cache) { @@ -1850,7 +1868,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, fl6.flowi6_proto = IPPROTO_UDP; err = ipv6_stub->ipv6_dst_lookup(vxlan->net, - vxlan->vn6_sock->sock->sk, + sock6->sock->sk, &ndst, &fl6); if (err < 0) return ERR_PTR(err); @@ -1995,9 +2013,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (dst->sa.sa_family == AF_INET) { - if (!vxlan->vn4_sock) + struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); + + if (!sock4) goto drop; - sk = vxlan->vn4_sock->sock->sk; + sk = sock4->sock->sk; rt = vxlan_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, @@ -2050,12 +2070,13 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { + struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); struct dst_entry *ndst; u32 rt6i_flags; - if (!vxlan->vn6_sock) + if (!sock6) goto drop; - sk = vxlan->vn6_sock->sock->sk; + sk = sock6->sock->sk; ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, @@ -2415,9 +2436,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) dport = info->key.tp_dst ? : vxlan->cfg.dst_port; if (ip_tunnel_info_af(info) == AF_INET) { + struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; - if (!vxlan->vn4_sock) + if (!sock4) return -EINVAL; rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, info->key.u.ipv4.dst, @@ -2429,8 +2451,6 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) struct dst_entry *ndst; - if (!vxlan->vn6_sock) - return -EINVAL; ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos, info->key.label, &info->key.u.ipv6.dst, &info->key.u.ipv6.src, NULL, info); @@ -2740,10 +2760,10 @@ static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6) return PTR_ERR(vs); #if IS_ENABLED(CONFIG_IPV6) if (ipv6) - vxlan->vn6_sock = vs; + rcu_assign_pointer(vxlan->vn6_sock, vs); else #endif - vxlan->vn4_sock = vs; + rcu_assign_pointer(vxlan->vn4_sock, vs); vxlan_vs_add_dev(vs, vxlan); return 0; } @@ -2754,9 +2774,9 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan) bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA; int ret = 0; - vxlan->vn4_sock = NULL; + RCU_INIT_POINTER(vxlan->vn4_sock, NULL); #if IS_ENABLED(CONFIG_IPV6) - vxlan->vn6_sock = NULL; + RCU_INIT_POINTER(vxlan->vn6_sock, NULL); if (ipv6 || metadata) ret = __vxlan_sock_add(vxlan, true); #endif diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 0255613a54a4..308adc4154f4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -225,9 +225,9 @@ struct vxlan_config { struct vxlan_dev { struct hlist_node hlist; /* vni hash table */ struct list_head next; /* vxlan's per namespace list */ - struct vxlan_sock *vn4_sock; /* listening socket for IPv4 */ + struct vxlan_sock __rcu *vn4_sock; /* listening socket for IPv4 */ #if IS_ENABLED(CONFIG_IPV6) - struct vxlan_sock *vn6_sock; /* listening socket for IPv6 */ + struct vxlan_sock __rcu *vn6_sock; /* listening socket for IPv6 */ #endif struct net_device *dev; struct net *net; /* netns for packet i/o */ From fceb9c3e38252992bbf1a3028cc2f7b871211533 Mon Sep 17 00:00:00 2001 From: pravin shelar Date: Fri, 28 Oct 2016 09:59:16 -0700 Subject: [PATCH 810/884] geneve: avoid using stale geneve socket. This patch is similar to earlier vxlan patch. Geneve device close operation frees geneve socket. This operation can race with geneve-xmit function which dereferences geneve socket. Following patch uses RCU mechanism to avoid this situation. Signed-off-by: Pravin B Shelar Acked-by: John W. Linville Signed-off-by: David S. Miller --- drivers/net/geneve.c | 45 +++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 16af1ce99233..42edd7b7902f 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -58,9 +58,9 @@ struct geneve_dev { struct hlist_node hlist; /* vni hash table */ struct net *net; /* netns for packet i/o */ struct net_device *dev; /* netdev for geneve tunnel */ - struct geneve_sock *sock4; /* IPv4 socket used for geneve tunnel */ + struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ #if IS_ENABLED(CONFIG_IPV6) - struct geneve_sock *sock6; /* IPv6 socket used for geneve tunnel */ + struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ #endif u8 vni[3]; /* virtual network ID for tunnel */ u8 ttl; /* TTL override */ @@ -543,9 +543,19 @@ static void __geneve_sock_release(struct geneve_sock *gs) static void geneve_sock_release(struct geneve_dev *geneve) { - __geneve_sock_release(geneve->sock4); + struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); #if IS_ENABLED(CONFIG_IPV6) - __geneve_sock_release(geneve->sock6); + struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); + + rcu_assign_pointer(geneve->sock6, NULL); +#endif + + rcu_assign_pointer(geneve->sock4, NULL); + synchronize_net(); + + __geneve_sock_release(gs4); +#if IS_ENABLED(CONFIG_IPV6) + __geneve_sock_release(gs6); #endif } @@ -586,10 +596,10 @@ out: gs->flags = geneve->flags; #if IS_ENABLED(CONFIG_IPV6) if (ipv6) - geneve->sock6 = gs; + rcu_assign_pointer(geneve->sock6, gs); else #endif - geneve->sock4 = gs; + rcu_assign_pointer(geneve->sock4, gs); hash = geneve_net_vni_hash(geneve->vni); hlist_add_head_rcu(&geneve->hlist, &gs->vni_list[hash]); @@ -603,9 +613,7 @@ static int geneve_open(struct net_device *dev) bool metadata = geneve->collect_md; int ret = 0; - geneve->sock4 = NULL; #if IS_ENABLED(CONFIG_IPV6) - geneve->sock6 = NULL; if (ipv6 || metadata) ret = geneve_sock_add(geneve, true); #endif @@ -720,6 +728,9 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct rtable *rt = NULL; __u8 tos; + if (!rcu_dereference(geneve->sock4)) + return ERR_PTR(-EIO); + memset(fl4, 0, sizeof(*fl4)); fl4->flowi4_mark = skb->mark; fl4->flowi4_proto = IPPROTO_UDP; @@ -772,11 +783,15 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); - struct geneve_sock *gs6 = geneve->sock6; struct dst_entry *dst = NULL; struct dst_cache *dst_cache; + struct geneve_sock *gs6; __u8 prio; + gs6 = rcu_dereference(geneve->sock6); + if (!gs6) + return ERR_PTR(-EIO); + memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_mark = skb->mark; fl6->flowi6_proto = IPPROTO_UDP; @@ -842,7 +857,7 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_info *info) { struct geneve_dev *geneve = netdev_priv(dev); - struct geneve_sock *gs4 = geneve->sock4; + struct geneve_sock *gs4; struct rtable *rt = NULL; const struct iphdr *iip; /* interior IP header */ int err = -EINVAL; @@ -853,6 +868,10 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); u32 flags = geneve->flags; + gs4 = rcu_dereference(geneve->sock4); + if (!gs4) + goto tx_error; + if (geneve->collect_md) { if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { netdev_dbg(dev, "no tunnel metadata\n"); @@ -932,9 +951,9 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_info *info) { struct geneve_dev *geneve = netdev_priv(dev); - struct geneve_sock *gs6 = geneve->sock6; struct dst_entry *dst = NULL; const struct iphdr *iip; /* interior IP header */ + struct geneve_sock *gs6; int err = -EINVAL; struct flowi6 fl6; __u8 prio, ttl; @@ -943,6 +962,10 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); u32 flags = geneve->flags; + gs6 = rcu_dereference(geneve->sock6); + if (!gs6) + goto tx_error; + if (geneve->collect_md) { if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { netdev_dbg(dev, "no tunnel metadata\n"); From a0be1db4304f51c99af6b6e515549794182a94df Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 14 Sep 2016 17:33:26 +0100 Subject: [PATCH 811/884] virtio_pci: Limit DMA mask to 44 bits for legacy virtio devices Legacy virtio defines the virtqueue base using a 32-bit PFN field, with a read-only register indicating a fixed page size of 4k. This can cause problems for DMA allocators that allocate top down from the DMA mask, which is set to 64 bits. In this case, the addresses are silently truncated to 44-bit, leading to IOMMU faults, failure to read from the queue or data corruption. This patch restricts the coherent DMA mask for legacy PCI virtio devices to 44 bits, which matches the specification. Cc: stable@vger.kernel.org Cc: Andy Lutomirski Cc: Michael S. Tsirkin Cc: Benjamin Serebrin Signed-off-by: Will Deacon Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_legacy.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 8c4e61783441..6d9e5173d5fa 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -212,10 +212,18 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) return -ENODEV; } - rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pci_dev->dev, - DMA_BIT_MASK(32)); + rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); + if (rc) { + rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); + } else { + /* + * The virtio ring base address is expressed as a 32-bit PFN, + * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. + */ + dma_set_coherent_mask(&pci_dev->dev, + DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); + } + if (rc) dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); From 0ea1e4a6d9b62cf29e210d2b4ba9fd43917522e3 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Wed, 31 Aug 2016 14:00:04 +0200 Subject: [PATCH 812/884] virtio_ring: Make interrupt suppression spec compliant According to the spec, if the VIRTIO_RING_F_EVENT_IDX feature bit is negotiated the driver MUST set flags to 0. Not dirtying the available ring in virtqueue_disable_cb also has a minor positive performance impact, improving L1 dcache load missed by ~0.5% in vring_bench. Writes to the used event field (vring_used_event) are still unconditional. Cc: Michael S. Tsirkin Cc: # f277ec4 virtio_ring: shadow available Cc: Signed-off-by: Ladi Prosek Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index ed9c9eeedfe5..6b2cd922d322 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -732,7 +732,8 @@ void virtqueue_disable_cb(struct virtqueue *_vq) if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) { vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); + if (!vq->event) + vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); } } @@ -764,7 +765,8 @@ unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) * entry. Always do both to keep code simple. */ if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); + if (!vq->event) + vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); } vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx); END_USE(vq); @@ -832,10 +834,11 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) * more to do. */ /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to * either clear the flags bit or point the event index at the next - * entry. Always do both to keep code simple. */ + * entry. Always update the event index to keep code simple. */ if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) { vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT; - vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); + if (!vq->event) + vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow); } /* TODO: tune this threshold */ bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4; @@ -953,7 +956,8 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, /* No callback? Tell other side not to bother us. */ if (!callback) { vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT; - vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow); + if (!vq->event) + vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow); } /* Put everything in free lists. */ From 8424af5336b34043a705d66bdf2c1428048ef085 Mon Sep 17 00:00:00 2001 From: Konstantin Neumoin Date: Thu, 29 Sep 2016 13:17:12 +0300 Subject: [PATCH 813/884] virtio: update balloon size in balloon "probe" The following commit 'fad7b7b27b6a (virtio_balloon: Use a workqueue instead of "vballoon" kthread)' has added a regression. Original code with kthread starts the thread inside probe and checks the necessity to update balloon inside the thread immediately. Nowadays the code behaves differently. Work is queued only on the first command from the host after the negotiation. Thus there is a window especially at the guest startup or the module reloading when the balloon size is not updated until the notification from the host. This patch adds balloon size check at the end of the probe to match original behaviour. Signed-off-by: Konstantin Neumoin Signed-off-by: Denis V. Lunev Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 4e7003db12c4..181793f07852 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -577,6 +577,8 @@ static int virtballoon_probe(struct virtio_device *vdev) virtio_device_ready(vdev); + if (towards_target(vb)) + virtballoon_changed(vdev); return 0; out_del_vqs: From 44d65ea1615099ae252407f2554338d450cfdb1c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 Oct 2016 11:39:10 +0200 Subject: [PATCH 814/884] ringtest: use link-time optimization By using -flto and -fwhole-program, all functions from the ring implementation can be treated as static and possibly inlined. Force this to happen through the GCC flatten attribute. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/Makefile | 4 ++-- tools/virtio/ringtest/main.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile index 877a8a4721b6..c012edbdb13b 100644 --- a/tools/virtio/ringtest/Makefile +++ b/tools/virtio/ringtest/Makefile @@ -3,8 +3,8 @@ all: all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring CFLAGS += -Wall -CFLAGS += -pthread -O2 -ggdb -LDFLAGS += -pthread -O2 -ggdb +CFLAGS += -pthread -O2 -ggdb -flto -fwhole-program +LDFLAGS += -pthread -O2 -ggdb -flto -fwhole-program main.o: main.c main.h ring.o: ring.c main.h diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c index 147abb452a6c..bda7f0dad981 100644 --- a/tools/virtio/ringtest/main.c +++ b/tools/virtio/ringtest/main.c @@ -96,7 +96,7 @@ void set_affinity(const char *arg) assert(!ret); } -static void run_guest(void) +static void __attribute__((__flatten__)) run_guest(void) { int completed_before; int completed = 0; @@ -149,7 +149,7 @@ static void run_guest(void) } } -static void run_host(void) +static void __attribute__((__flatten__)) run_host(void) { int completed_before; int completed = 0; From d3c3589b8b3cd4fabf4cd137facb42a7fb36bd7f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 Oct 2016 11:39:11 +0200 Subject: [PATCH 815/884] ringtest: commonize implementation of poll_avail/poll_used Provide new primitives used_empty/avail_empty and build poll_avail/poll_used on top of it. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/main.c | 12 +++++ tools/virtio/ringtest/main.h | 4 +- tools/virtio/ringtest/noring.c | 6 ++- tools/virtio/ringtest/ptr_ring.c | 22 ++------- tools/virtio/ringtest/ring.c | 18 +++---- tools/virtio/ringtest/virtio_ring_0_9.c | 64 ++++++------------------- 6 files changed, 43 insertions(+), 83 deletions(-) diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c index bda7f0dad981..b00ecd619969 100644 --- a/tools/virtio/ringtest/main.c +++ b/tools/virtio/ringtest/main.c @@ -96,6 +96,12 @@ void set_affinity(const char *arg) assert(!ret); } +void poll_used(void) +{ + while (used_empty()) + busy_wait(); +} + static void __attribute__((__flatten__)) run_guest(void) { int completed_before; @@ -149,6 +155,12 @@ static void __attribute__((__flatten__)) run_guest(void) } } +void poll_avail(void) +{ + while (avail_empty()) + busy_wait(); +} + static void __attribute__((__flatten__)) run_host(void) { int completed_before; diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h index 16917acb0ade..34e63cc4c572 100644 --- a/tools/virtio/ringtest/main.h +++ b/tools/virtio/ringtest/main.h @@ -56,15 +56,15 @@ void alloc_ring(void); int add_inbuf(unsigned, void *, void *); void *get_buf(unsigned *, void **); void disable_call(); +bool used_empty(); bool enable_call(); void kick_available(); -void poll_used(); /* host side */ void disable_kick(); +bool avail_empty(); bool enable_kick(); bool use_buf(unsigned *, void **); void call_used(); -void poll_avail(); /* implemented by main */ extern bool do_sleep; diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c index eda2f4824130..b8d1c1daac7c 100644 --- a/tools/virtio/ringtest/noring.c +++ b/tools/virtio/ringtest/noring.c @@ -24,8 +24,9 @@ void *get_buf(unsigned *lenp, void **bufp) return "Buffer"; } -void poll_used(void) +bool used_empty() { + return false; } void disable_call() @@ -54,8 +55,9 @@ bool enable_kick() assert(0); } -void poll_avail(void) +bool avail_empty() { + return false; } bool use_buf(unsigned *lenp, void **bufp) diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index bd2ad1d3b7a9..635b07b4fdd3 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -133,18 +133,9 @@ void *get_buf(unsigned *lenp, void **bufp) return datap; } -void poll_used(void) +bool used_empty() { - void *b; - - do { - if (tailcnt == headcnt || __ptr_ring_full(&array)) { - b = NULL; - barrier(); - } else { - b = "Buffer\n"; - } - } while (!b); + return (tailcnt == headcnt || __ptr_ring_full(&array)); } void disable_call() @@ -173,14 +164,9 @@ bool enable_kick() assert(0); } -void poll_avail(void) +bool avail_empty() { - void *b; - - do { - barrier(); - b = __ptr_ring_peek(&array); - } while (!b); + return !__ptr_ring_peek(&array); } bool use_buf(unsigned *lenp, void **bufp) diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c index c25c8d248b6b..747c5dd47be8 100644 --- a/tools/virtio/ringtest/ring.c +++ b/tools/virtio/ringtest/ring.c @@ -163,12 +163,11 @@ void *get_buf(unsigned *lenp, void **bufp) return datap; } -void poll_used(void) +bool used_empty() { unsigned head = (ring_size - 1) & guest.last_used_idx; - while (ring[head].flags & DESC_HW) - busy_wait(); + return (ring[head].flags & DESC_HW); } void disable_call() @@ -180,13 +179,11 @@ void disable_call() bool enable_call() { - unsigned head = (ring_size - 1) & guest.last_used_idx; - event->call_index = guest.last_used_idx; /* Flush call index write */ /* Barrier D (for pairing) */ smp_mb(); - return ring[head].flags & DESC_HW; + return used_empty(); } void kick_available(void) @@ -213,20 +210,17 @@ void disable_kick() bool enable_kick() { - unsigned head = (ring_size - 1) & host.used_idx; - event->kick_index = host.used_idx; /* Barrier C (for pairing) */ smp_mb(); - return !(ring[head].flags & DESC_HW); + return avail_empty(); } -void poll_avail(void) +bool avail_empty() { unsigned head = (ring_size - 1) & host.used_idx; - while (!(ring[head].flags & DESC_HW)) - busy_wait(); + return !(ring[head].flags & DESC_HW); } bool use_buf(unsigned *lenp, void **bufp) diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c index 761866212aac..bbc3043b2fb1 100644 --- a/tools/virtio/ringtest/virtio_ring_0_9.c +++ b/tools/virtio/ringtest/virtio_ring_0_9.c @@ -194,24 +194,16 @@ void *get_buf(unsigned *lenp, void **bufp) return datap; } -void poll_used(void) +bool used_empty() { + unsigned short last_used_idx = guest.last_used_idx; #ifdef RING_POLL - unsigned head = (ring_size - 1) & guest.last_used_idx; + unsigned short head = last_used_idx & (ring_size - 1); + unsigned index = ring.used->ring[head].id; - for (;;) { - unsigned index = ring.used->ring[head].id; - - if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) - busy_wait(); - else - break; - } + return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); #else - unsigned head = guest.last_used_idx; - - while (ring.used->idx == head) - busy_wait(); + return ring.used->idx == last_used_idx; #endif } @@ -224,22 +216,11 @@ void disable_call() bool enable_call() { - unsigned short last_used_idx; - - vring_used_event(&ring) = (last_used_idx = guest.last_used_idx); + vring_used_event(&ring) = guest.last_used_idx; /* Flush call index write */ /* Barrier D (for pairing) */ smp_mb(); -#ifdef RING_POLL - { - unsigned short head = last_used_idx & (ring_size - 1); - unsigned index = ring.used->ring[head].id; - - return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); - } -#else - return ring.used->idx == last_used_idx; -#endif + return used_empty(); } void kick_available(void) @@ -266,36 +247,21 @@ void disable_kick() bool enable_kick() { - unsigned head = host.used_idx; - - vring_avail_event(&ring) = head; + vring_avail_event(&ring) = host.used_idx; /* Barrier C (for pairing) */ smp_mb(); -#ifdef RING_POLL - { - unsigned index = ring.avail->ring[head & (ring_size - 1)]; - - return (index ^ head ^ 0x8000) & ~(ring_size - 1); - } -#else - return head == ring.avail->idx; -#endif + return avail_empty(); } -void poll_avail(void) +bool avail_empty() { unsigned head = host.used_idx; #ifdef RING_POLL - for (;;) { - unsigned index = ring.avail->ring[head & (ring_size - 1)]; - if ((index ^ head ^ 0x8000) & ~(ring_size - 1)) - busy_wait(); - else - break; - } + unsigned index = ring.avail->ring[head & (ring_size - 1)]; + + return ((index ^ head ^ 0x8000) & ~(ring_size - 1)); #else - while (ring.avail->idx == head) - busy_wait(); + return head == ring.avail->idx; #endif } From 948a8ac2964f39161ebf832dea0aa1ced90101bb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 Oct 2016 11:39:12 +0200 Subject: [PATCH 816/884] ringtest: poll for new buffers once before updating event index Updating the event index has a memory barrier and causes more work on the other side to actually signal the event. It is unnecessary if a new buffer has already appeared on the ring, so poll once before doing the update. The effect of this on the 0.9 ring implementation is pretty much invisible, but on the new-style ring it provides a consistent 3% performance improvement. Signed-off-by: Paolo Bonzini Signed-off-by: Michael S. Tsirkin --- tools/virtio/ringtest/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c index b00ecd619969..f31353fac541 100644 --- a/tools/virtio/ringtest/main.c +++ b/tools/virtio/ringtest/main.c @@ -147,7 +147,7 @@ static void __attribute__((__flatten__)) run_guest(void) assert(completed <= bufs); assert(started <= bufs); if (do_sleep) { - if (enable_call()) + if (used_empty() && enable_call()) wait_for_call(); } else { poll_used(); @@ -172,7 +172,7 @@ static void __attribute__((__flatten__)) run_host(void) for (;;) { if (do_sleep) { - if (enable_kick()) + if (avail_empty() && enable_kick()) wait_for_kick(); } else { poll_avail(); From 34563769e438d2881f62cf4d9badc4e589ac0ec0 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Tue, 11 Oct 2016 12:05:15 +0100 Subject: [PATCH 817/884] virtio: console: Unlock vqs while freeing buffers Commit c6017e793b93 ("virtio: console: add locks around buffer removal in port unplug path") added locking around the freeing of buffers in the vq. However, when free_buf() is called with can_sleep = true and rproc is enabled, it calls dma_free_coherent() directly, requiring interrupts to be enabled. Currently a WARNING is triggered due to the spin locking around free_buf, with a call stack like this: WARNING: CPU: 3 PID: 121 at ./include/linux/dma-mapping.h:433 free_buf+0x1a8/0x288 Call Trace: [<8040c538>] show_stack+0x74/0xc0 [<80757240>] dump_stack+0xd0/0x110 [<80430d98>] __warn+0xfc/0x130 [<80430ee0>] warn_slowpath_null+0x2c/0x3c [<807e7c6c>] free_buf+0x1a8/0x288 [<807ea590>] remove_port_data+0x50/0xac [<807ea6a0>] unplug_port+0xb4/0x1bc [<807ea858>] virtcons_remove+0xb0/0xfc [<807b6734>] virtio_dev_remove+0x58/0xc0 [<807f918c>] __device_release_driver+0xac/0x134 [<807f924c>] device_release_driver+0x38/0x50 [<807f7edc>] bus_remove_device+0xfc/0x130 [<807f4b74>] device_del+0x17c/0x21c [<807f4c38>] device_unregister+0x24/0x38 [<807b6b50>] unregister_virtio_device+0x28/0x44 Fix this by restructuring the loops to allow the locks to only be taken where it is necessary to protect the vqs, and release it while the buffer is being freed. Fixes: c6017e793b93 ("virtio: console: add locks around buffer removal in port unplug path") Cc: stable@vger.kernel.org Signed-off-by: Matt Redfearn Signed-off-by: Michael S. Tsirkin --- drivers/char/virtio_console.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index d433b1db1fdd..5649234b7316 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1539,19 +1539,29 @@ static void remove_port_data(struct port *port) spin_lock_irq(&port->inbuf_lock); /* Remove unused data this port might have received. */ discard_port_data(port); + spin_unlock_irq(&port->inbuf_lock); /* Remove buffers we queued up for the Host to send us data in. */ - while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf, true); - spin_unlock_irq(&port->inbuf_lock); + do { + spin_lock_irq(&port->inbuf_lock); + buf = virtqueue_detach_unused_buf(port->in_vq); + spin_unlock_irq(&port->inbuf_lock); + if (buf) + free_buf(buf, true); + } while (buf); spin_lock_irq(&port->outvq_lock); reclaim_consumed_buffers(port); + spin_unlock_irq(&port->outvq_lock); /* Free pending buffers from the out-queue. */ - while ((buf = virtqueue_detach_unused_buf(port->out_vq))) - free_buf(buf, true); - spin_unlock_irq(&port->outvq_lock); + do { + spin_lock_irq(&port->outvq_lock); + buf = virtqueue_detach_unused_buf(port->out_vq); + spin_unlock_irq(&port->outvq_lock); + if (buf) + free_buf(buf, true); + } while (buf); } /* From 3dae2c6152fbbc6224343551158b61aad585cedf Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 11 Oct 2016 11:02:14 +0200 Subject: [PATCH 818/884] virtio: remove config.c Remove unused file config.c Signed-off-by: Juergen Gross Signed-off-by: Michael S. Tsirkin --- drivers/virtio/config.c | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 drivers/virtio/config.c diff --git a/drivers/virtio/config.c b/drivers/virtio/config.c deleted file mode 100644 index f70bcd2ff98f..000000000000 --- a/drivers/virtio/config.c +++ /dev/null @@ -1,12 +0,0 @@ -/* Configuration space parsing helpers for virtio. - * - * The configuration is [type][len][... len bytes ...] fields. - * - * Copyright 2007 Rusty Russell, IBM Corporation. - * GPL v2 or later. - */ -#include -#include -#include -#include - From 668866b6e8dffa5583d8694f1d8ddd89a8bee745 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 13 Sep 2016 11:32:22 +0200 Subject: [PATCH 819/884] virtio_blk: Use kmalloc_array() in init_vq() Multiplications for the size determination of memory allocations indicated that array data structures should be processed. Thus use the corresponding function "kmalloc_array". This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 2dc5c96c186a..b2bc62b9fbe6 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -390,13 +390,13 @@ static int init_vq(struct virtio_blk *vblk) if (err) num_vqs = 1; - vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL); + vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); if (!vblk->vqs) return -ENOMEM; - names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL); - callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL); - vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL); + names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); + callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); + vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); if (!names || !callbacks || !vqs) { err = -ENOMEM; goto out; From 2ff98449ee883d6e7484f5937370df42d6789e07 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 13 Sep 2016 13:43:50 +0200 Subject: [PATCH 820/884] virtio_blk: Delete an unnecessary initialisation in init_vq() The local variable "err" will be set to an appropriate value by a following statement. Thus omit the explicit initialisation at the beginning. Signed-off-by: Markus Elfring Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index b2bc62b9fbe6..5545a679abd8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -376,7 +376,7 @@ static void virtblk_config_changed(struct virtio_device *vdev) static int init_vq(struct virtio_blk *vblk) { - int err = 0; + int err; int i; vq_callback_t **callbacks; const char **names; From 678ff27d25c62e3a25fa069914182a8a6391e121 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 14 Aug 2016 20:57:14 +0300 Subject: [PATCH 821/884] virtio/vhost: add Jason to list of maintainers Jason's been one of the mst active contributors to virtio and vhost, it will help to formalize this and list him as co-maintainer. Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3d838cf49f81..eee287e0b740 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12780,6 +12780,7 @@ F: include/uapi/linux/virtio_console.h VIRTIO CORE, NET AND BLOCK DRIVERS M: "Michael S. Tsirkin" +M: Jason Wang L: virtualization@lists.linux-foundation.org S: Maintained F: Documentation/devicetree/bindings/virtio/ @@ -12810,6 +12811,7 @@ F: include/uapi/linux/virtio_gpu.h VIRTIO HOST (VHOST) M: "Michael S. Tsirkin" +M: Jason Wang L: kvm@vger.kernel.org L: virtualization@lists.linux-foundation.org L: netdev@vger.kernel.org From 75bfa81bf0897ba87f1e1b9b576a07536029b86a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 31 Oct 2016 00:38:21 +0200 Subject: [PATCH 822/884] virtio_ring: mark vring_dma_dev inline This inline function is unused on configurations where dma_map/unmap are empty macros. Make the function inline to avoid gcc errors because of an unused static function. Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6b2cd922d322..489bfc61cf30 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -167,7 +167,7 @@ static bool vring_use_dma_api(struct virtio_device *vdev) * making all of the arch DMA ops work on the vring device itself * is a mess. For now, we use the parent device for DMA ops. */ -static struct device *vring_dma_dev(const struct vring_virtqueue *vq) +static inline struct device *vring_dma_dev(const struct vring_virtqueue *vq) { return vq->vq.vdev->dev.parent; } From fd3220d37b1f6f0cab6142d98b0e6c4082e63299 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 31 Oct 2016 14:42:14 +0100 Subject: [PATCH 823/884] ovl: update S_ISGID when setting posix ACLs This change fixes xfstest generic/375, which failed to clear the setgid bit in the following test case on overlayfs: touch $testfile chown 100:100 $testfile chmod 2755 $testfile _runas -u 100 -g 101 -- setfacl -m u::rwx,g::rwx,o::rwx $testfile Reported-by: Amir Goldstein Signed-off-by: Miklos Szeredi Tested-by: Amir Goldstein Fixes: d837a49bd57f ("ovl: fix POSIX ACL setting") Cc: # v4.8 --- fs/overlayfs/super.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index bcf3965be819..edd46a0e951d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1037,6 +1037,21 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler, posix_acl_release(acl); + /* + * Check if sgid bit needs to be cleared (actual setacl operation will + * be done with mounter's capabilities and so that won't do it for us). + */ + if (unlikely(inode->i_mode & S_ISGID) && + handler->flags == ACL_TYPE_ACCESS && + !in_group_p(inode->i_gid) && + !capable_wrt_inode_uidgid(inode, CAP_FSETID)) { + struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; + + err = ovl_setattr(dentry, &iattr); + if (err) + return err; + } + err = ovl_xattr_set(dentry, handler->name, value, size, flags); if (!err) ovl_copyattr(ovl_inode_real(inode, NULL), inode); From b93d4a0eb308d4400b84c8b24c1b80e09a9497d0 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 31 Oct 2016 14:42:14 +0100 Subject: [PATCH 824/884] ovl: fix get_acl() on tmpfs tmpfs doesn't have ->get_acl() because it only uses cached acls. This fixes the acl tests in pjdfstest when tmpfs is used as the upper layer of the overlay. Reported-by: Amir Goldstein Signed-off-by: Miklos Szeredi Fixes: 39a25b2b3762 ("ovl: define ->get_acl() for overlay inodes") Cc: # v4.8 --- fs/overlayfs/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index c58f01babf30..7fb53d055537 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -270,9 +270,6 @@ struct posix_acl *ovl_get_acl(struct inode *inode, int type) if (!IS_ENABLED(CONFIG_FS_POSIX_ACL) || !IS_POSIXACL(realinode)) return NULL; - if (!realinode->i_op->get_acl) - return NULL; - old_cred = ovl_override_creds(inode->i_sb); acl = get_acl(realinode, type); revert_creds(old_cred); From 641089c1549d8d3df0b047b5de7e9a111362cdce Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 31 Oct 2016 14:42:14 +0100 Subject: [PATCH 825/884] ovl: fsync after copy-up Make sure the copied up file hits the disk before renaming to the final destination. If this is not done then the copy-up may corrupt the data in the file in case of a crash. Signed-off-by: Miklos Szeredi Cc: --- fs/overlayfs/copy_up.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index aeb60f791418..36795eed40b0 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -178,6 +178,8 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) len -= bytes; } + if (!error) + error = vfs_fsync(new_file, 0); fput(new_file); out_fput: fput(old_file); From befd99656c5eb765fe9d96045c4cba099fd938db Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Tue, 4 Oct 2016 09:49:47 +0300 Subject: [PATCH 826/884] tpm: remove invalid min length check from tpm_do_selftest() Removal of this check was not properly amended to the original commit. Cc: stable@vger.kernel.org Fixes: 0c541332231e ("tpm: use tpm_pcr_read_dev() in tpm_do_selftest()") Signed-off-by: Jarkko Sakkinen Signed-off-by: James Morris --- drivers/char/tpm/tpm-interface.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 8de61876f633..3a9149cf0110 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -813,9 +813,6 @@ int tpm_do_selftest(struct tpm_chip *chip) continue; } - if (rc < TPM_HEADER_SIZE) - return -EFAULT; - if (rc == TPM_ERR_DISABLED || rc == TPM_ERR_DEACTIVATED) { dev_info(&chip->dev, "TPM is disabled/deactivated (0x%X)\n", rc); From da7389ac6c83e7aa8b04ebe5ba546df2a7873c5c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 31 Oct 2016 10:40:13 -0700 Subject: [PATCH 827/884] gcc-plugins: Export symbols needed by gcc This explicitly exports symbols that gcc expects from plugins. Based on code from Emese Revfy. Signed-off-by: Kees Cook --- scripts/gcc-plugins/cyc_complexity_plugin.c | 4 ++-- scripts/gcc-plugins/gcc-common.h | 1 + scripts/gcc-plugins/latent_entropy_plugin.c | 6 +++--- scripts/gcc-plugins/sancov_plugin.c | 4 ++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/scripts/gcc-plugins/cyc_complexity_plugin.c b/scripts/gcc-plugins/cyc_complexity_plugin.c index 34df974c6ba3..8af7db06122d 100644 --- a/scripts/gcc-plugins/cyc_complexity_plugin.c +++ b/scripts/gcc-plugins/cyc_complexity_plugin.c @@ -20,7 +20,7 @@ #include "gcc-common.h" -int plugin_is_GPL_compatible; +__visible int plugin_is_GPL_compatible; static struct plugin_info cyc_complexity_plugin_info = { .version = "20160225", @@ -49,7 +49,7 @@ static unsigned int cyc_complexity_execute(void) #include "gcc-generate-gimple-pass.h" -int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) +__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) { const char * const plugin_name = plugin_info->base_name; struct register_pass_info cyc_complexity_pass_info; diff --git a/scripts/gcc-plugins/gcc-common.h b/scripts/gcc-plugins/gcc-common.h index 172850bcd0d9..950fd2e64bb7 100644 --- a/scripts/gcc-plugins/gcc-common.h +++ b/scripts/gcc-plugins/gcc-common.h @@ -130,6 +130,7 @@ extern void dump_gimple_stmt(pretty_printer *, gimple, int, int); #endif #define __unused __attribute__((__unused__)) +#define __visible __attribute__((visibility("default"))) #define DECL_NAME_POINTER(node) IDENTIFIER_POINTER(DECL_NAME(node)) #define DECL_NAME_LENGTH(node) IDENTIFIER_LENGTH(DECL_NAME(node)) diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index ff1939b804ae..9d3d4adf3be8 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -77,7 +77,7 @@ #include "gcc-common.h" -int plugin_is_GPL_compatible; +__visible int plugin_is_GPL_compatible; static GTY(()) tree latent_entropy_decl; @@ -584,8 +584,8 @@ static void latent_entropy_start_unit(void *gcc_data __unused, | TODO_update_ssa #include "gcc-generate-gimple-pass.h" -int plugin_init(struct plugin_name_args *plugin_info, - struct plugin_gcc_version *version) +__visible int plugin_init(struct plugin_name_args *plugin_info, + struct plugin_gcc_version *version) { bool enabled = true; const char * const plugin_name = plugin_info->base_name; diff --git a/scripts/gcc-plugins/sancov_plugin.c b/scripts/gcc-plugins/sancov_plugin.c index aedd6113cb73..7ea0b3f50739 100644 --- a/scripts/gcc-plugins/sancov_plugin.c +++ b/scripts/gcc-plugins/sancov_plugin.c @@ -21,7 +21,7 @@ #include "gcc-common.h" -int plugin_is_GPL_compatible; +__visible int plugin_is_GPL_compatible; tree sancov_fndecl; @@ -86,7 +86,7 @@ static void sancov_start_unit(void __unused *gcc_data, void __unused *user_data) #endif } -int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) +__visible int plugin_init(struct plugin_name_args *plugin_info, struct plugin_gcc_version *version) { int i; struct register_pass_info sancov_plugin_pass_info; From 58bea4144d235cee5bb51203b032ddafd6d1cf8d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 19 Oct 2016 00:08:04 +0200 Subject: [PATCH 828/884] latent_entropy: Fix wrong gcc code generation with 64 bit variables The stack frame size could grow too large when the plugin used long long on 32-bit architectures when the given function had too many basic blocks. The gcc warning was: drivers/pci/hotplug/ibmphp_ebda.c: In function 'ibmphp_access_ebda': drivers/pci/hotplug/ibmphp_ebda.c:409:1: warning: the frame size of 1108 bytes is larger than 1024 bytes [-Wframe-larger-than=] This switches latent_entropy from u64 to unsigned long. Thanks to PaX Team and Emese Revfy for the patch. Signed-off-by: Kees Cook --- mm/page_alloc.c | 2 +- scripts/gcc-plugins/latent_entropy_plugin.c | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 2b3bf6767d54..1b10c14de5db 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -92,7 +92,7 @@ int _node_numa_mem_[MAX_NUMNODES]; #endif #ifdef CONFIG_GCC_PLUGIN_LATENT_ENTROPY -volatile u64 latent_entropy __latent_entropy; +volatile unsigned long latent_entropy __latent_entropy; EXPORT_SYMBOL(latent_entropy); #endif diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c index 9d3d4adf3be8..8160f1c1b56e 100644 --- a/scripts/gcc-plugins/latent_entropy_plugin.c +++ b/scripts/gcc-plugins/latent_entropy_plugin.c @@ -340,7 +340,7 @@ static enum tree_code get_op(tree *rhs) break; } if (rhs) - *rhs = build_int_cstu(unsigned_intDI_type_node, random_const); + *rhs = build_int_cstu(long_unsigned_type_node, random_const); return op; } @@ -372,7 +372,7 @@ static void __perturb_latent_entropy(gimple_stmt_iterator *gsi, enum tree_code op; /* 1. create temporary copy of latent_entropy */ - temp = create_var(unsigned_intDI_type_node, "tmp_latent_entropy"); + temp = create_var(long_unsigned_type_node, "temp_latent_entropy"); /* 2. read... */ add_referenced_var(latent_entropy_decl); @@ -459,13 +459,13 @@ static void init_local_entropy(basic_block bb, tree local_entropy) gsi_insert_before(&gsi, call, GSI_NEW_STMT); update_stmt(call); - udi_frame_addr = fold_convert(unsigned_intDI_type_node, frame_addr); + udi_frame_addr = fold_convert(long_unsigned_type_node, frame_addr); assign = gimple_build_assign(local_entropy, udi_frame_addr); gsi_insert_after(&gsi, assign, GSI_NEW_STMT); update_stmt(assign); /* 3. create temporary copy of latent_entropy */ - tmp = create_var(unsigned_intDI_type_node, "tmp_latent_entropy"); + tmp = create_var(long_unsigned_type_node, "temp_latent_entropy"); /* 4. read the global entropy variable into local entropy */ add_referenced_var(latent_entropy_decl); @@ -480,7 +480,7 @@ static void init_local_entropy(basic_block bb, tree local_entropy) update_stmt(assign); rand_cst = get_random_const(); - rand_const = build_int_cstu(unsigned_intDI_type_node, rand_cst); + rand_const = build_int_cstu(long_unsigned_type_node, rand_cst); op = get_op(NULL); assign = create_assign(op, local_entropy, local_entropy, rand_const); gsi_insert_after(&gsi, assign, GSI_NEW_STMT); @@ -529,7 +529,7 @@ static unsigned int latent_entropy_execute(void) } /* 1. create the local entropy variable */ - local_entropy = create_var(unsigned_intDI_type_node, "local_entropy"); + local_entropy = create_var(long_unsigned_type_node, "local_entropy"); /* 2. initialize the local entropy variable */ init_local_entropy(bb, local_entropy); @@ -561,10 +561,9 @@ static void latent_entropy_start_unit(void *gcc_data __unused, if (in_lto_p) return; - /* extern volatile u64 latent_entropy */ - gcc_assert(TYPE_PRECISION(long_long_unsigned_type_node) == 64); - quals = TYPE_QUALS(long_long_unsigned_type_node) | TYPE_QUAL_VOLATILE; - type = build_qualified_type(long_long_unsigned_type_node, quals); + /* extern volatile unsigned long latent_entropy */ + quals = TYPE_QUALS(long_unsigned_type_node) | TYPE_QUAL_VOLATILE; + type = build_qualified_type(long_unsigned_type_node, quals); id = get_identifier("latent_entropy"); latent_entropy_decl = build_decl(UNKNOWN_LOCATION, VAR_DECL, id, type); From 953b956a2e6d35298e684f251bad98ea6c96f982 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Mon, 24 Oct 2016 13:59:15 +0200 Subject: [PATCH 829/884] gpio: GPIO_GET_LINE{HANDLE,EVENT}_IOCTL: Fix file descriptor leak When allocating a new line handle or event a file is allocated that it is associated to. The file is attached to a file descriptor of the current process and the file descriptor is returned to userspace using copy_to_user(). If this copy operation fails the line handle or event allocation is aborted, all acquired resources are freed and an error is returned. But the file struct is not freed and left attached to the userspace application and even though the file descriptor number was not copied it is trivial to guess. If a userspace application performs a IOCTL on such a left over file descriptor it will trigger a use-after-free and if the file descriptor is closed (latest when the application exits) a double-free is triggered. anon_inode_getfd() performs 3 tasks, allocate a file struct, allocate a file descriptor for the current process and install the file struct in the file descriptor. As soon as the file struct is installed in the file descriptor it is accessible by userspace (even if the IOCTL itself hasn't completed yet), this means uninstalling the fd on the error path is not an option, since userspace might already got a reference to the file. Instead anon_inode_getfd() needs to be broken into its individual steps. The allocation of the file struct and file descriptor is done first, then the copy_to_user() is executed and only if it succeeds the file is installed. Since the file struct is reference counted it can not be just freed, but its reference needs to be dropped, which will also call the release() callback, which will free the state attached to the file. So in this case the normal error cleanup path should not be taken. Cc: stable@vger.kernel.org Fixes: d932cd49182f ("gpio: free handles in fringe cases") Signed-off-by: Lars-Peter Clausen Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib.c | 57 +++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 20e09b7c2de3..93ed0e00c578 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -423,6 +424,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) { struct gpiohandle_request handlereq; struct linehandle_state *lh; + struct file *file; int fd, i, ret; if (copy_from_user(&handlereq, ip, sizeof(handlereq))) @@ -499,26 +501,41 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) i--; lh->numdescs = handlereq.lines; - fd = anon_inode_getfd("gpio-linehandle", - &linehandle_fileops, - lh, - O_RDONLY | O_CLOEXEC); + fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); if (fd < 0) { ret = fd; goto out_free_descs; } + file = anon_inode_getfile("gpio-linehandle", + &linehandle_fileops, + lh, + O_RDONLY | O_CLOEXEC); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto out_put_unused_fd; + } + handlereq.fd = fd; if (copy_to_user(ip, &handlereq, sizeof(handlereq))) { - ret = -EFAULT; - goto out_free_descs; + /* + * fput() will trigger the release() callback, so do not go onto + * the regular error cleanup path here. + */ + fput(file); + put_unused_fd(fd); + return -EFAULT; } + fd_install(fd, file); + dev_dbg(&gdev->dev, "registered chardev handle for %d lines\n", lh->numdescs); return 0; +out_put_unused_fd: + put_unused_fd(fd); out_free_descs: for (; i >= 0; i--) gpiod_free(lh->descs[i]); @@ -721,6 +738,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) struct gpioevent_request eventreq; struct lineevent_state *le; struct gpio_desc *desc; + struct file *file; u32 offset; u32 lflags; u32 eflags; @@ -815,23 +833,38 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) if (ret) goto out_free_desc; - fd = anon_inode_getfd("gpio-event", - &lineevent_fileops, - le, - O_RDONLY | O_CLOEXEC); + fd = get_unused_fd_flags(O_RDONLY | O_CLOEXEC); if (fd < 0) { ret = fd; goto out_free_irq; } + file = anon_inode_getfile("gpio-event", + &lineevent_fileops, + le, + O_RDONLY | O_CLOEXEC); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto out_put_unused_fd; + } + eventreq.fd = fd; if (copy_to_user(ip, &eventreq, sizeof(eventreq))) { - ret = -EFAULT; - goto out_free_irq; + /* + * fput() will trigger the release() callback, so do not go onto + * the regular error cleanup path here. + */ + fput(file); + put_unused_fd(fd); + return -EFAULT; } + fd_install(fd, file); + return 0; +out_put_unused_fd: + put_unused_fd(fd); out_free_irq: free_irq(le->irq, le); out_free_desc: From c7e9d39831a31682285cc31ddf7dd06c0fe59138 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 25 Oct 2016 10:47:44 +0900 Subject: [PATCH 830/884] gpio: of: fix GPIO drivers with multiple gpio_chip for a single node Sylvain Lemieux reports the LPC32xx GPIO driver is broken since commit 762c2e46c059 ("gpio: of: remove of_gpiochip_and_xlate() and struct gg_data"). Probably, gpio-etraxfs.c and gpio-davinci.c are broken too. Those drivers register multiple gpio_chip that are associated to a single OF node, and their own .of_xlate() checks if the passed gpio_chip is valid. Now, the problem is of_find_gpiochip_by_node() returns the first gpio_chip found to match the given node. So, .of_xlate() fails, except for the first GPIO bank. Reverting the commit could be a solution, but I do not want to go back to the mess of struct gg_data. Another solution here is to take the match by a node pointer and the success of .of_xlate(). It is a bit clumsy to call .of_xlate twice; for gpio_chip matching and for really getting the gpio_desc index. Perhaps, our long-term goal might be to convert the drivers to single chip registration, but this commit will solve the problem until then. Fixes: 762c2e46c059 ("gpio: of: remove of_gpiochip_and_xlate() and struct gg_data") Signed-off-by: Masahiro Yamada Reported-by: Sylvain Lemieux Tested-by: David Lechner Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index ecad3f0e3b77..193f15d50bba 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -26,14 +26,18 @@ #include "gpiolib.h" -static int of_gpiochip_match_node(struct gpio_chip *chip, void *data) +static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data) { - return chip->gpiodev->dev.of_node == data; + struct of_phandle_args *gpiospec = data; + + return chip->gpiodev->dev.of_node == gpiospec->np && + chip->of_xlate(chip, gpiospec, NULL) >= 0; } -static struct gpio_chip *of_find_gpiochip_by_node(struct device_node *np) +static struct gpio_chip *of_find_gpiochip_by_xlate( + struct of_phandle_args *gpiospec) { - return gpiochip_find(np, of_gpiochip_match_node); + return gpiochip_find(gpiospec, of_gpiochip_match_node_and_xlate); } static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip, @@ -79,7 +83,7 @@ struct gpio_desc *of_get_named_gpiod_flags(struct device_node *np, return ERR_PTR(ret); } - chip = of_find_gpiochip_by_node(gpiospec.np); + chip = of_find_gpiochip_by_xlate(&gpiospec); if (!chip) { desc = ERR_PTR(-EPROBE_DEFER); goto out; From b0a6af8b34c9ad20894aa46f85f4bf59d444f286 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Mon, 31 Oct 2016 23:48:22 +0100 Subject: [PATCH 831/884] drm/nouveau/acpi: fix check for power resources support Check whether the kernel really supports power resources for a device, otherwise the power might not be removed when the device is runtime suspended (DSM should still work in these cases where PR does not). This is a workaround for a problem where ACPICA and Windows 10 differ in behavior. ACPICA does not correctly enumerate power resources within a conditional block (due to delayed execution of such blocks) and as a result power_resources is set to false even if _PR3 exists. Fixes: 692a17dcc292 ("drm/nouveau/acpi: fix lockup with PCIe runtime PM") Link: https://bugs.freedesktop.org/show_bug.cgi?id=98398 Reported-and-tested-by: Rick Kerkhof Reviewed-by: Mika Westerberg Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Peter Wu Signed-off-by: Dave Airlie --- drivers/gpu/drm/nouveau/nouveau_acpi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c index dc57b628e074..193573d191e5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_acpi.c +++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c @@ -240,7 +240,8 @@ static bool nouveau_pr3_present(struct pci_dev *pdev) if (!parent_adev) return false; - return acpi_has_method(parent_adev->handle, "_PR3"); + return parent_adev->power.flags.power_resources && + acpi_has_method(parent_adev->handle, "_PR3"); } static void nouveau_dsm_pci_probe(struct pci_dev *pdev, acpi_handle *dhandle_out, From 405c0759712f57b680f66aee9c55cd06ad1cbdef Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 31 Oct 2016 08:11:43 -0700 Subject: [PATCH 832/884] fork: Add task stack refcounting sanity check and prevent premature task stack freeing If something goes wrong with task stack refcounting and a stack refcount hits zero too early, warn and leak it rather than potentially freeing it early (and silently). Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/f29119c783a9680a4b4656e751b6123917ace94b.1477926663.git.luto@kernel.org Signed-off-by: Ingo Molnar --- kernel/fork.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/fork.c b/kernel/fork.c index 623259fc794d..997ac1d584f7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -315,6 +315,9 @@ static void account_kernel_stack(struct task_struct *tsk, int account) static void release_task_stack(struct task_struct *tsk) { + if (WARN_ON(tsk->state != TASK_DEAD)) + return; /* Better to leak the stack than to free prematurely */ + account_kernel_stack(tsk, -1); arch_release_thread_stack(tsk->stack); free_thread_stack(tsk); @@ -1862,6 +1865,7 @@ bad_fork_cleanup_count: atomic_dec(&p->cred->user->processes); exit_creds(p); bad_fork_free: + p->state = TASK_DEAD; put_task_stack(p); free_task(p); fork_out: From 812d47889a8e418d7bea9bec383581a34c19183e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 19 Oct 2016 15:03:41 -0600 Subject: [PATCH 833/884] gpio/mvebu: Use irq_domain_add_linear This fixes the irq allocation in this driver to not print: irq: Cannot allocate irq_descs @ IRQ34, assuming pre-allocated irq: Cannot allocate irq_descs @ IRQ66, assuming pre-allocated Which happens because the driver already called irq_alloc_descs() and so the change to use irq_domain_add_simple resulted in calling irq_alloc_descs() twice. Modernize the irq allocation in this driver to use the irq_domain_add_linear flow directly and eliminate the use of irq_domain_add_simple/legacy Fixes: ce931f571b6d ("gpio/mvebu: convert to use irq_domain_add_simple()") Signed-off-by: Jason Gunthorpe Signed-off-by: Linus Walleij --- drivers/gpio/gpio-mvebu.c | 92 ++++++++++++++++++--------------------- 1 file changed, 43 insertions(+), 49 deletions(-) diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c index cd5dc27320a2..1ed6132b993c 100644 --- a/drivers/gpio/gpio-mvebu.c +++ b/drivers/gpio/gpio-mvebu.c @@ -293,10 +293,10 @@ static void mvebu_gpio_irq_ack(struct irq_data *d) { struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct mvebu_gpio_chip *mvchip = gc->private; - u32 mask = ~(1 << (d->irq - gc->irq_base)); + u32 mask = d->mask; irq_gc_lock(gc); - writel_relaxed(mask, mvebu_gpioreg_edge_cause(mvchip)); + writel_relaxed(~mask, mvebu_gpioreg_edge_cause(mvchip)); irq_gc_unlock(gc); } @@ -305,7 +305,7 @@ static void mvebu_gpio_edge_irq_mask(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct mvebu_gpio_chip *mvchip = gc->private; struct irq_chip_type *ct = irq_data_get_chip_type(d); - u32 mask = 1 << (d->irq - gc->irq_base); + u32 mask = d->mask; irq_gc_lock(gc); ct->mask_cache_priv &= ~mask; @@ -319,8 +319,7 @@ static void mvebu_gpio_edge_irq_unmask(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct mvebu_gpio_chip *mvchip = gc->private; struct irq_chip_type *ct = irq_data_get_chip_type(d); - - u32 mask = 1 << (d->irq - gc->irq_base); + u32 mask = d->mask; irq_gc_lock(gc); ct->mask_cache_priv |= mask; @@ -333,8 +332,7 @@ static void mvebu_gpio_level_irq_mask(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct mvebu_gpio_chip *mvchip = gc->private; struct irq_chip_type *ct = irq_data_get_chip_type(d); - - u32 mask = 1 << (d->irq - gc->irq_base); + u32 mask = d->mask; irq_gc_lock(gc); ct->mask_cache_priv &= ~mask; @@ -347,8 +345,7 @@ static void mvebu_gpio_level_irq_unmask(struct irq_data *d) struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); struct mvebu_gpio_chip *mvchip = gc->private; struct irq_chip_type *ct = irq_data_get_chip_type(d); - - u32 mask = 1 << (d->irq - gc->irq_base); + u32 mask = d->mask; irq_gc_lock(gc); ct->mask_cache_priv |= mask; @@ -462,7 +459,7 @@ static void mvebu_gpio_irq_handler(struct irq_desc *desc) for (i = 0; i < mvchip->chip.ngpio; i++) { int irq; - irq = mvchip->irqbase + i; + irq = irq_find_mapping(mvchip->domain, i); if (!(cause & (1 << i))) continue; @@ -655,6 +652,7 @@ static int mvebu_gpio_probe(struct platform_device *pdev) struct irq_chip_type *ct; struct clk *clk; unsigned int ngpios; + bool have_irqs; int soc_variant; int i, cpu, id; int err; @@ -665,6 +663,9 @@ static int mvebu_gpio_probe(struct platform_device *pdev) else soc_variant = MVEBU_GPIO_SOC_VARIANT_ORION; + /* Some gpio controllers do not provide irq support */ + have_irqs = of_irq_count(np) != 0; + mvchip = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_gpio_chip), GFP_KERNEL); if (!mvchip) @@ -697,7 +698,8 @@ static int mvebu_gpio_probe(struct platform_device *pdev) mvchip->chip.get = mvebu_gpio_get; mvchip->chip.direction_output = mvebu_gpio_direction_output; mvchip->chip.set = mvebu_gpio_set; - mvchip->chip.to_irq = mvebu_gpio_to_irq; + if (have_irqs) + mvchip->chip.to_irq = mvebu_gpio_to_irq; mvchip->chip.base = id * MVEBU_MAX_GPIO_PER_BANK; mvchip->chip.ngpio = ngpios; mvchip->chip.can_sleep = false; @@ -758,34 +760,30 @@ static int mvebu_gpio_probe(struct platform_device *pdev) devm_gpiochip_add_data(&pdev->dev, &mvchip->chip, mvchip); /* Some gpio controllers do not provide irq support */ - if (!of_irq_count(np)) + if (!have_irqs) return 0; - /* Setup the interrupt handlers. Each chip can have up to 4 - * interrupt handlers, with each handler dealing with 8 GPIO - * pins. */ - for (i = 0; i < 4; i++) { - int irq = platform_get_irq(pdev, i); - - if (irq < 0) - continue; - irq_set_chained_handler_and_data(irq, mvebu_gpio_irq_handler, - mvchip); + mvchip->domain = + irq_domain_add_linear(np, ngpios, &irq_generic_chip_ops, NULL); + if (!mvchip->domain) { + dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n", + mvchip->chip.label); + return -ENODEV; } - mvchip->irqbase = irq_alloc_descs(-1, 0, ngpios, -1); - if (mvchip->irqbase < 0) { - dev_err(&pdev->dev, "no irqs\n"); - return mvchip->irqbase; - } - - gc = irq_alloc_generic_chip("mvebu_gpio_irq", 2, mvchip->irqbase, - mvchip->membase, handle_level_irq); - if (!gc) { - dev_err(&pdev->dev, "Cannot allocate generic irq_chip\n"); - return -ENOMEM; + err = irq_alloc_domain_generic_chips( + mvchip->domain, ngpios, 2, np->name, handle_level_irq, + IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_LEVEL, 0, 0); + if (err) { + dev_err(&pdev->dev, "couldn't allocate irq chips %s (DT).\n", + mvchip->chip.label); + goto err_domain; } + /* NOTE: The common accessors cannot be used because of the percpu + * access to the mask registers + */ + gc = irq_get_domain_generic_chip(mvchip->domain, 0); gc->private = mvchip; ct = &gc->chip_types[0]; ct->type = IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW; @@ -803,27 +801,23 @@ static int mvebu_gpio_probe(struct platform_device *pdev) ct->handler = handle_edge_irq; ct->chip.name = mvchip->chip.label; - irq_setup_generic_chip(gc, IRQ_MSK(ngpios), 0, - IRQ_NOREQUEST, IRQ_LEVEL | IRQ_NOPROBE); + /* Setup the interrupt handlers. Each chip can have up to 4 + * interrupt handlers, with each handler dealing with 8 GPIO + * pins. + */ + for (i = 0; i < 4; i++) { + int irq = platform_get_irq(pdev, i); - /* Setup irq domain on top of the generic chip. */ - mvchip->domain = irq_domain_add_simple(np, mvchip->chip.ngpio, - mvchip->irqbase, - &irq_domain_simple_ops, - mvchip); - if (!mvchip->domain) { - dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n", - mvchip->chip.label); - err = -ENODEV; - goto err_generic_chip; + if (irq < 0) + continue; + irq_set_chained_handler_and_data(irq, mvebu_gpio_irq_handler, + mvchip); } return 0; -err_generic_chip: - irq_remove_generic_chip(gc, IRQ_MSK(ngpios), IRQ_NOREQUEST, - IRQ_LEVEL | IRQ_NOPROBE); - kfree(gc); +err_domain: + irq_domain_remove(mvchip->domain); return err; } From 8d42629be0cb6c58a5c77cd02e47974c0e724e63 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 28 Oct 2016 22:22:33 -0400 Subject: [PATCH 834/884] svcrdma: backchannel cannot share a page for send and rcv buffers The underlying transport releases the page pointed to by rq_buffer during xprt_rdma_bc_send_request. When the backchannel reply arrives, rq_rbuffer then points to freed memory. Fixes: 68778945e46f ('SUNRPC: Separate buffer pointers for RPC ...') Signed-off-by: Chuck Lever Cc: Jeff Layton Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index fc4535ead7c2..20027f8de129 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -177,19 +177,26 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -EINVAL; } + /* svc_rdma_sendto releases this page */ page = alloc_page(RPCRDMA_DEF_GFP); if (!page) return -ENOMEM; - rqst->rq_buffer = page_address(page); - rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize; + + rqst->rq_rbuffer = kmalloc(rqst->rq_rcvsize, RPCRDMA_DEF_GFP); + if (!rqst->rq_rbuffer) { + put_page(page); + return -ENOMEM; + } return 0; } static void xprt_rdma_bc_free(struct rpc_task *task) { - /* No-op: ctxt and page have already been freed. */ + struct rpc_rqst *rqst = task->tk_rqstp; + + kfree(rqst->rq_rbuffer); } static int From f46c445b79906a9da55c13e0a6f6b6a006b892fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 29 Oct 2016 18:19:03 -0400 Subject: [PATCH 835/884] nfsd: Fix general protection fault in release_lock_stateid() When I push NFSv4.1 / RDMA hard, (xfstests generic/089, for example), I get this crash on the server: Oct 28 22:04:30 klimt kernel: general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC Oct 28 22:04:30 klimt kernel: Modules linked in: cts rpcsec_gss_krb5 iTCO_wdt iTCO_vendor_support sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm btrfs irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw gf128mul glue_helper ablk_helper cryptd xor pcspkr raid6_pq i2c_i801 i2c_smbus lpc_ich mfd_core sg mei_me mei ioatdma shpchp wmi ipmi_si ipmi_msghandler rpcrdma ib_ipoib rdma_ucm acpi_power_meter acpi_pad ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c mlx4_ib mlx4_en ib_core sr_mod cdrom sd_mod ast drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel igb ahci libahci ptp mlx4_core pps_core dca libata i2c_algo_bit i2c_core dm_mirror dm_region_hash dm_log dm_mod Oct 28 22:04:30 klimt kernel: CPU: 7 PID: 1558 Comm: nfsd Not tainted 4.9.0-rc2-00005-g82cd754 #8 Oct 28 22:04:30 klimt kernel: Hardware name: Supermicro Super Server/X10SRL-F, BIOS 1.0c 09/09/2015 Oct 28 22:04:30 klimt kernel: task: ffff880835c3a100 task.stack: ffff8808420d8000 Oct 28 22:04:30 klimt kernel: RIP: 0010:[] [] release_lock_stateid+0x1f/0x60 [nfsd] Oct 28 22:04:30 klimt kernel: RSP: 0018:ffff8808420dbce0 EFLAGS: 00010246 Oct 28 22:04:30 klimt kernel: RAX: ffff88084e6660f0 RBX: ffff88084e667020 RCX: 0000000000000000 Oct 28 22:04:30 klimt kernel: RDX: 0000000000000007 RSI: 0000000000000000 RDI: ffff88084e667020 Oct 28 22:04:30 klimt kernel: RBP: ffff8808420dbcf8 R08: 0000000000000001 R09: 0000000000000000 Oct 28 22:04:30 klimt kernel: R10: ffff880835c3a100 R11: ffff880835c3aca8 R12: 6b6b6b6b6b6b6b6b Oct 28 22:04:30 klimt kernel: R13: ffff88084e6670d8 R14: ffff880835f546f0 R15: ffff880835f1c548 Oct 28 22:04:30 klimt kernel: FS: 0000000000000000(0000) GS:ffff88087bdc0000(0000) knlGS:0000000000000000 Oct 28 22:04:30 klimt kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Oct 28 22:04:30 klimt kernel: CR2: 00007ff020389000 CR3: 0000000001c06000 CR4: 00000000001406e0 Oct 28 22:04:30 klimt kernel: Stack: Oct 28 22:04:30 klimt kernel: ffff88084e667020 0000000000000000 ffff88084e6670d8 ffff8808420dbd20 Oct 28 22:04:30 klimt kernel: ffffffffa05ac80d ffff880835f54548 ffff88084e640008 ffff880835f545b0 Oct 28 22:04:30 klimt kernel: ffff8808420dbd70 ffffffffa059803d ffff880835f1c768 0000000000000870 Oct 28 22:04:30 klimt kernel: Call Trace: Oct 28 22:04:30 klimt kernel: [] nfsd4_free_stateid+0xfd/0x1b0 [nfsd] Oct 28 22:04:30 klimt kernel: [] nfsd4_proc_compound+0x40d/0x690 [nfsd] Oct 28 22:04:30 klimt kernel: [] nfsd_dispatch+0xd4/0x1d0 [nfsd] Oct 28 22:04:30 klimt kernel: [] svc_process_common+0x3d9/0x700 [sunrpc] Oct 28 22:04:30 klimt kernel: [] svc_process+0xf4/0x330 [sunrpc] Oct 28 22:04:30 klimt kernel: [] nfsd+0xfa/0x160 [nfsd] Oct 28 22:04:30 klimt kernel: [] ? nfsd_destroy+0x170/0x170 [nfsd] Oct 28 22:04:30 klimt kernel: [] kthread+0x10b/0x120 Oct 28 22:04:30 klimt kernel: [] ? kthread_stop+0x280/0x280 Oct 28 22:04:30 klimt kernel: [] ret_from_fork+0x2a/0x40 Oct 28 22:04:30 klimt kernel: Code: c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 48 8b 87 b0 00 00 00 48 89 fb 4c 8b a0 98 00 00 00 <49> 8b 44 24 20 48 8d b8 80 03 00 00 e8 10 66 1a e1 48 89 df e8 Oct 28 22:04:30 klimt kernel: RIP [] release_lock_stateid+0x1f/0x60 [nfsd] Oct 28 22:04:30 klimt kernel: RSP Oct 28 22:04:30 klimt kernel: ---[ end trace cf5d0b371973e167 ]--- Jeff Layton says: > Hm...now that I look though, this is a little suspicious: > > struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); > > I wonder if it's possible for the openstateid to have already been > destroyed at this point. > > We might be better off doing something like this to get the client pointer: > > stp->st_stid.sc_client; > > ...which should be more direct and less dependent on other stateids > staying valid. With the suggested change, I am no longer able to reproduce the above oops. v2: Fix unhash_lock_stateid() as well Fix-suggested-by: Jeff Layton Fixes: 42691398be08 ('nfsd: Fix race between FREE_STATEID and LOCK') Signed-off-by: Chuck Lever Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 95474196a17e..4b4beaaa4eaa 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1227,9 +1227,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp, static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); - - lockdep_assert_held(&oo->oo_owner.so_client->cl_lock); + lockdep_assert_held(&stp->st_stid.sc_client->cl_lock); list_del_init(&stp->st_locks); nfs4_unhash_stid(&stp->st_stid); @@ -1238,12 +1236,12 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp) static void release_lock_stateid(struct nfs4_ol_stateid *stp) { - struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner); + struct nfs4_client *clp = stp->st_stid.sc_client; bool unhashed; - spin_lock(&oo->oo_owner.so_client->cl_lock); + spin_lock(&clp->cl_lock); unhashed = unhash_lock_stateid(stp); - spin_unlock(&oo->oo_owner.so_client->cl_lock); + spin_unlock(&clp->cl_lock); if (unhashed) nfs4_put_stid(&stp->st_stid); } From ea26e4ec08d4727e3a9e48a6b74695861effcbd9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Nov 2016 00:39:48 +0100 Subject: [PATCH 836/884] KVM: x86: drop TSC offsetting kvm_x86_ops to fix KVM_GET/SET_CLOCK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit a545ab6a0085 ("kvm: x86: add tsc_offset field to struct kvm_vcpu_arch", 2016-09-07) the offset between host and L1 TSC is cached and need not be fished out of the VMCS or VMCB. This means that we can implement adjust_tsc_offset_guest and read_l1_tsc entirely in generic code. The simplification is particularly significant for VMX code, where vmx->nested.vmcs01_tsc_offset was duplicating what is now in vcpu->arch.tsc_offset. Therefore the vmcs01_tsc_offset can be dropped completely. More importantly, this fixes KVM_GET_CLOCK/KVM_SET_CLOCK which, after commit 108b249c453d ("KVM: x86: introduce get_kvmclock_ns", 2016-09-01) called read_l1_tsc while the VMCS was not loaded. It thus returned bogus values on Intel CPUs. Fixes: 108b249c453dd7132599ab6dc7e435a7036c193f Reported-by: Roman Kagan Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 --- arch/x86/kvm/svm.c | 23 ------------------- arch/x86/kvm/vmx.c | 39 +++------------------------------ arch/x86/kvm/x86.c | 6 ++--- 4 files changed, 6 insertions(+), 65 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4b20f7304b9c..bdde80731f49 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -948,7 +948,6 @@ struct kvm_x86_ops { int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); bool (*invpcid_supported)(void); - void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -958,8 +957,6 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); - u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); - void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); int (*check_intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f8157a36ab09..8ca1eca5038d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1138,21 +1138,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.tsc_offset += adjustment; - if (is_guest_mode(vcpu)) - svm->nested.hsave->control.tsc_offset += adjustment; - else - trace_kvm_write_tsc_offset(vcpu->vcpu_id, - svm->vmcb->control.tsc_offset - adjustment, - svm->vmcb->control.tsc_offset); - - mark_dirty(svm->vmcb, VMCB_INTERCEPTS); -} - static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; @@ -3449,12 +3434,6 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } -static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) -{ - struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); - return vmcb->control.tsc_offset + host_tsc; -} - static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5422,8 +5401,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .has_wbinvd_exit = svm_has_wbinvd_exit, .write_tsc_offset = svm_write_tsc_offset, - .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, - .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 74a4df993a51..754c3a7f444a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -421,7 +421,6 @@ struct nested_vmx { /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; - u64 vmcs01_tsc_offset; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; @@ -2604,20 +2603,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu) return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; } -/* - * Like guest_read_tsc, but always returns L1's notion of the timestamp - * counter, even if a nested guest (L2) is currently running. - */ -static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) -{ - u64 tsc_offset; - - tsc_offset = is_guest_mode(vcpu) ? - to_vmx(vcpu)->nested.vmcs01_tsc_offset : - vmcs_read64(TSC_OFFSET); - return host_tsc + tsc_offset; -} - /* * writes 'offset' into guest's timestamp counter offset register */ @@ -2631,7 +2616,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * to the newly set TSC to get L2's TSC. */ struct vmcs12 *vmcs12; - to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset; /* recalculate vmcs02.TSC_OFFSET: */ vmcs12 = get_vmcs12(vcpu); vmcs_write64(TSC_OFFSET, offset + @@ -2644,19 +2628,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) } } -static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) -{ - u64 offset = vmcs_read64(TSC_OFFSET); - - vmcs_write64(TSC_OFFSET, offset + adjustment); - if (is_guest_mode(vcpu)) { - /* Even when running L2, the adjustment needs to apply to L1 */ - to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; - } else - trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset, - offset + adjustment); -} - static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); @@ -10061,9 +10032,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vmcs_write64(TSC_OFFSET, - vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); + vcpu->arch.tsc_offset + vmcs12->tsc_offset); else - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -10293,8 +10264,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) enter_guest_mode(vcpu); - vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); - if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); @@ -10818,7 +10787,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, load_vmcs12_host_state(vcpu, vmcs12); /* Update any VMCS fields that might have changed while L2 ran */ - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (vmx->hv_deadline_tsc == -1) vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); @@ -11339,8 +11308,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .write_tsc_offset = vmx_write_tsc_offset, - .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest, - .read_l1_tsc = vmx_read_l1_tsc, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e954be8a3185..3017de0431bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1409,7 +1409,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { - return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); + return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc); } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); @@ -1547,7 +1547,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { - kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); + kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment); } static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) @@ -1555,7 +1555,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) WARN_ON(adjustment < 0); adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); - kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); + adjust_tsc_offset_guest(vcpu, adjustment); } #ifdef CONFIG_X86_64 From 355f4fb1405ec29d0fac49b4d41fcd78cbd455d5 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 28 Oct 2016 08:29:39 -0700 Subject: [PATCH 837/884] kvm: nVMX: VMCLEAR an active shadow VMCS after last use After a successful VM-entry with the "VMCS shadowing" VM-execution control set, the shadow VMCS referenced by the VMCS link pointer field in the current VMCS becomes active on the logical processor. A VMCS that is made active on more than one logical processor may become corrupted. Therefore, before an active VMCS can be migrated to another logical processor, the first logical processor must execute a VMCLEAR for the active VMCS. VMCLEAR both ensures that all VMCS data are written to memory and makes the VMCS inactive. Signed-off-by: Jim Mattson Reviewed-By: David Matlack Message-Id: <1477668579-22555-1-git-send-email-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 754c3a7f444a..5382b82462fc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -187,6 +187,7 @@ struct vmcs { */ struct loaded_vmcs { struct vmcs *vmcs; + struct vmcs *shadow_vmcs; int cpu; int launched; struct list_head loaded_vmcss_on_cpu_link; @@ -411,7 +412,6 @@ struct nested_vmx { * memory during VMXOFF, VMCLEAR, VMPTRLD. */ struct vmcs12 *cached_vmcs12; - struct vmcs *current_shadow_vmcs; /* * Indicates if the shadow vmcs must be updated with the * data hold by vmcs12 @@ -1418,6 +1418,8 @@ static void vmcs_clear(struct vmcs *vmcs) static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) { vmcs_clear(loaded_vmcs->vmcs); + if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) + vmcs_clear(loaded_vmcs->shadow_vmcs); loaded_vmcs->cpu = -1; loaded_vmcs->launched = 0; } @@ -3533,6 +3535,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } static void free_kvm_area(void) @@ -6667,6 +6670,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); + item->vmcs02.shadow_vmcs = NULL; if (!item->vmcs02.vmcs) { kfree(item); return NULL; @@ -7043,7 +7047,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ vmcs_clear(shadow_vmcs); - vmx->nested.current_shadow_vmcs = shadow_vmcs; + vmx->vmcs01.shadow_vmcs = shadow_vmcs; } INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); @@ -7145,8 +7149,11 @@ static void free_nested(struct vcpu_vmx *vmx) free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } - if (enable_shadow_vmcs) - free_vmcs(vmx->nested.current_shadow_vmcs); + if (enable_shadow_vmcs) { + vmcs_clear(vmx->vmcs01.shadow_vmcs); + free_vmcs(vmx->vmcs01.shadow_vmcs); + vmx->vmcs01.shadow_vmcs = NULL; + } kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { @@ -7323,7 +7330,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) int i; unsigned long field; u64 field_value; - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; const unsigned long *fields = shadow_read_write_fields; const int num_fields = max_shadow_read_write_fields; @@ -7372,7 +7379,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) int i, q; unsigned long field; u64 field_value = 0; - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; vmcs_load(shadow_vmcs); @@ -7562,7 +7569,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, - __pa(vmx->nested.current_shadow_vmcs)); + __pa(vmx->vmcs01.shadow_vmcs)); vmx->nested.sync_shadow_vmcs = true; } } @@ -9127,6 +9134,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs = &vmx->vmcs01; vmx->loaded_vmcs->vmcs = alloc_vmcs(); + vmx->loaded_vmcs->shadow_vmcs = NULL; if (!vmx->loaded_vmcs->vmcs) goto free_msrs; if (!vmm_exclusive) From d9092f52d7e61dd1557f2db2400ddb430e85937e Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Thu, 27 Oct 2016 11:25:52 -0700 Subject: [PATCH 838/884] kvm: x86: Check memopp before dereference (CVE-2016-8630) Commit 41061cdb98 ("KVM: emulate: do not initialize memopp") removes a check for non-NULL under incorrect assumptions. An undefined instruction with a ModR/M byte with Mod=0 and R/M-5 (e.g. 0xc7 0x15) will attempt to dereference a null pointer here. Fixes: 41061cdb98a0bec464278b4db8e894a3121671f5 Message-Id: <1477592752-126650-2-git-send-email-osh@google.com> Signed-off-by: Owen Hofmann Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4e95d3eb2955..cbd7b92585bb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5045,7 +5045,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); - if (ctxt->rip_relative) + if (ctxt->rip_relative && likely(ctxt->memopp)) ctxt->memopp->addr.mem.ea = address_mask(ctxt, ctxt->memopp->addr.mem.ea + ctxt->_eip); From 6f63d0f6be4e48b3042df9541694f6f2c405017e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 28 Oct 2016 20:56:07 +0200 Subject: [PATCH 839/884] parisc: use KERN_CONT when printing device inventory Recent changes to printk require KERN_CONT uses to continue logging messages. So add KERN_CONT to output of device inventory. Signed-off-by: Helge Deller --- arch/parisc/kernel/drivers.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index f8150669b8c6..700e2d2da096 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -873,11 +873,11 @@ static void print_parisc_device(struct parisc_device *dev) if (dev->num_addrs) { int k; - printk(", additional addresses: "); + pr_cont(", additional addresses: "); for (k = 0; k < dev->num_addrs; k++) - printk("0x%lx ", dev->addr[k]); + pr_cont("0x%lx ", dev->addr[k]); } - printk("\n"); + pr_cont("\n"); } /** From f4125cfdb3008363137f744c101e5d76ead760ba Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 28 Oct 2016 22:13:42 +0200 Subject: [PATCH 840/884] parisc: Avoid trashing sr2 and sr3 in LWS code There is no need to trash sr2 and sr3 in the Light-weight syscall (LWS). sr2 already points to kernel space (it's zero in userspace, otherwise syscalls wouldn't work), and since the LWS code is executed in userspace, we can simply ignore to preload sr3. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 53 ++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index d03422e5f188..40190dbecdc0 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -474,11 +474,6 @@ lws_start: comiclr,>> __NR_lws_entries, %r20, %r0 b,n lws_exit_nosys - /* WARNING: Trashing sr2 and sr3 */ - mfsp %sr7,%r1 /* get userspace into sr3 */ - mtsp %r1,%sr3 - mtsp %r0,%sr2 /* get kernel space into sr2 */ - /* Load table start */ ldil L%lws_table, %r1 ldo R%lws_table(%r1), %r28 /* Scratch use of r28 */ @@ -627,9 +622,9 @@ cas_action: stw %r1, 4(%sr2,%r20) #endif /* The load and store could fail */ -1: ldw,ma 0(%sr3,%r26), %r28 +1: ldw,ma 0(%r26), %r28 sub,<> %r28, %r25, %r0 -2: stw,ma %r24, 0(%sr3,%r26) +2: stw,ma %r24, 0(%r26) /* Free lock */ stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG @@ -706,9 +701,9 @@ lws_compare_and_swap_2: nop /* 8bit load */ -4: ldb 0(%sr3,%r25), %r25 +4: ldb 0(%r25), %r25 b cas2_lock_start -5: ldb 0(%sr3,%r24), %r24 +5: ldb 0(%r24), %r24 nop nop nop @@ -716,9 +711,9 @@ lws_compare_and_swap_2: nop /* 16bit load */ -6: ldh 0(%sr3,%r25), %r25 +6: ldh 0(%r25), %r25 b cas2_lock_start -7: ldh 0(%sr3,%r24), %r24 +7: ldh 0(%r24), %r24 nop nop nop @@ -726,9 +721,9 @@ lws_compare_and_swap_2: nop /* 32bit load */ -8: ldw 0(%sr3,%r25), %r25 +8: ldw 0(%r25), %r25 b cas2_lock_start -9: ldw 0(%sr3,%r24), %r24 +9: ldw 0(%r24), %r24 nop nop nop @@ -737,14 +732,14 @@ lws_compare_and_swap_2: /* 64bit load */ #ifdef CONFIG_64BIT -10: ldd 0(%sr3,%r25), %r25 -11: ldd 0(%sr3,%r24), %r24 +10: ldd 0(%r25), %r25 +11: ldd 0(%r24), %r24 #else /* Load new value into r22/r23 - high/low */ -10: ldw 0(%sr3,%r25), %r22 -11: ldw 4(%sr3,%r25), %r23 +10: ldw 0(%r25), %r22 +11: ldw 4(%r25), %r23 /* Load new value into fr4 for atomic store later */ -12: flddx 0(%sr3,%r24), %fr4 +12: flddx 0(%r24), %fr4 #endif cas2_lock_start: @@ -794,30 +789,30 @@ cas2_action: ldo 1(%r0),%r28 /* 8bit CAS */ -13: ldb,ma 0(%sr3,%r26), %r29 +13: ldb,ma 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -14: stb,ma %r24, 0(%sr3,%r26) +14: stb,ma %r24, 0(%r26) b cas2_end copy %r0, %r28 nop nop /* 16bit CAS */ -15: ldh,ma 0(%sr3,%r26), %r29 +15: ldh,ma 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -16: sth,ma %r24, 0(%sr3,%r26) +16: sth,ma %r24, 0(%r26) b cas2_end copy %r0, %r28 nop nop /* 32bit CAS */ -17: ldw,ma 0(%sr3,%r26), %r29 +17: ldw,ma 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -18: stw,ma %r24, 0(%sr3,%r26) +18: stw,ma %r24, 0(%r26) b cas2_end copy %r0, %r28 nop @@ -825,22 +820,22 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT -19: ldd,ma 0(%sr3,%r26), %r29 +19: ldd,ma 0(%r26), %r29 sub,*= %r29, %r25, %r0 b,n cas2_end -20: std,ma %r24, 0(%sr3,%r26) +20: std,ma %r24, 0(%r26) copy %r0, %r28 #else /* Compare first word */ -19: ldw,ma 0(%sr3,%r26), %r29 +19: ldw,ma 0(%r26), %r29 sub,= %r29, %r22, %r0 b,n cas2_end /* Compare second word */ -20: ldw,ma 4(%sr3,%r26), %r29 +20: ldw,ma 4(%r26), %r29 sub,= %r29, %r23, %r0 b,n cas2_end /* Perform the store */ -21: fstdx %fr4, 0(%sr3,%r26) +21: fstdx %fr4, 0(%r26) copy %r0, %r28 #endif From 6ed518328d0189e0fdf1bb7c73290d546143ea66 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 28 Oct 2016 23:00:34 -0400 Subject: [PATCH 841/884] parisc: Ensure consistent state when switching to kernel stack at syscall entry We have one critical section in the syscall entry path in which we switch from the userspace stack to kernel stack. In the event of an external interrupt, the interrupt code distinguishes between those two states by analyzing the value of sr7. If sr7 is zero, it uses the kernel stack. Therefore it's important, that the value of sr7 is in sync with the currently enabled stack. This patch now disables interrupts while executing the critical section. This prevents the interrupt handler to possibly see an inconsistent state which in the worst case can lead to crashes. Interestingly, in the syscall exit path interrupts were already disabled in the critical section which switches back to the userspace stack. Cc: Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 40190dbecdc0..deec1f8465e5 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -106,8 +106,6 @@ linux_gateway_entry: mtsp %r0,%sr4 /* get kernel space into sr4 */ mtsp %r0,%sr5 /* get kernel space into sr5 */ mtsp %r0,%sr6 /* get kernel space into sr6 */ - mfsp %sr7,%r1 /* save user sr7 */ - mtsp %r1,%sr3 /* and store it in sr3 */ #ifdef CONFIG_64BIT /* for now we can *always* set the W bit on entry to the syscall @@ -133,6 +131,14 @@ linux_gateway_entry: depdi 0, 31, 32, %r21 1: #endif + + /* We use a rsm/ssm pair to prevent sr3 from being clobbered + * by external interrupts. + */ + mfsp %sr7,%r1 /* save user sr7 */ + rsm PSW_SM_I, %r0 /* disable interrupts */ + mtsp %r1,%sr3 /* and store it in sr3 */ + mfctl %cr30,%r1 xor %r1,%r30,%r30 /* ye olde xor trick */ xor %r1,%r30,%r1 @@ -147,6 +153,7 @@ linux_gateway_entry: */ mtsp %r0,%sr7 /* get kernel space into sr7 */ + ssm PSW_SM_I, %r0 /* enable interrupts */ STREGM %r1,FRAME_SIZE(%r30) /* save r1 (usp) here for now */ mfctl %cr30,%r1 /* get task ptr in %r1 */ LDREG TI_TASK(%r1),%r1 From 6a6e2a14bbc06a627acc71108808fdc866078db4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 29 Oct 2016 23:52:43 +0200 Subject: [PATCH 842/884] parisc: Use LINUX_GATEWAY_ADDR define instead of hardcoded value LINUX_GATEWAY_ADDR is defined in unistd.h. Let's use it. Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index deec1f8465e5..23de307c3052 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -100,7 +100,7 @@ set_thread_pointer: .endr /* This address must remain fixed at 0x100 for glibc's syscalls to work */ - .align 256 + .align LINUX_GATEWAY_ADDR linux_gateway_entry: gate .+8, %r0 /* become privileged */ mtsp %r0,%sr4 /* get kernel space into sr4 */ From 18088db042dd9ae25e8a1e069eb3a16db601ef8a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 29 Oct 2016 23:47:57 +0200 Subject: [PATCH 843/884] parisc: Ignore the pkey system calls for now Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index a9b9407f38f7..6b0741e7a7ed 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -368,7 +368,9 @@ #define __IGNORE_select /* newselect */ #define __IGNORE_fadvise64 /* fadvise64_64 */ - +#define __IGNORE_pkey_mprotect +#define __IGNORE_pkey_alloc +#define __IGNORE_pkey_free #define LINUX_GATEWAY_ADDR 0x100 From eed6f0eda0a009c282bcdd828c273e1239ae0cac Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 18 Oct 2016 15:32:30 -0200 Subject: [PATCH 844/884] virtio-gpu: fix vblank events virtio-gpu sends vblank events in virtio_gpu_crtc_atomic_flush, and because of that it must be called for disabled planes too. Ask drm_atomic_helper_commit_planes to do that. v2: update to use new drm_atomic_helper_commit_planes() API. Signed-off-by: Gerd Hoffmann Signed-off-by: Gustavo Padovan Signed-off-by: Dave Airlie --- drivers/gpu/drm/virtio/virtgpu_display.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 7cf3678623c3..58048709c34e 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -338,8 +338,7 @@ static void vgdev_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_commit_modeset_disables(dev, state); drm_atomic_helper_commit_modeset_enables(dev, state); - drm_atomic_helper_commit_planes(dev, state, - DRM_PLANE_COMMIT_ACTIVE_ONLY); + drm_atomic_helper_commit_planes(dev, state, 0); drm_atomic_helper_commit_hw_done(state); From 382005027fedc50b28d40ae64ef1461cca38953e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Wed, 2 Nov 2016 19:50:29 +0900 Subject: [PATCH 845/884] sched/core: Fix oops in sched_show_task() When CONFIG_THREAD_INFO_IN_TASK=y, it is possible that an exited thread remains in the task list after its stack pointer was already set to NULL. Therefore, thread_saved_pc() and stack_not_used() in sched_show_task() will trigger NULL pointer dereference if an attempt to dump such thread's traces (e.g. SysRq-t, khungtaskd) is made. Since show_stack() in sched_show_task() calls try_get_task_stack() and sched_show_task() is called from interrupt context, calling try_get_task_stack() from sched_show_task() will be safe as well. Signed-off-by: Tetsuo Handa Acked-by: Andy Lutomirski Acked-by: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: brgerst@gmail.com Cc: jann@thejh.net Cc: keescook@chromium.org Cc: linux-api@vger.kernel.org Cc: tycho.andersen@canonical.com Link: http://lkml.kernel.org/r/201611021950.FEJ34368.HFFJOOMLtQOVSF@I-love.SAKURA.ne.jp Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 42d4027f9e26..9abf66b6c271 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5192,6 +5192,8 @@ void sched_show_task(struct task_struct *p) int ppid; unsigned long state = p->state; + if (!try_get_task_stack(p)) + return; if (state) state = __ffs(state) + 1; printk(KERN_INFO "%-15.15s %c", p->comm, @@ -5221,6 +5223,7 @@ void sched_show_task(struct task_struct *p) print_worker_info(KERN_INFO, p); show_stack(p, NULL); + put_task_stack(p); } void show_state_filter(unsigned long state_filter) From 8243d5597793b5e85143c9a935e1b971c59740a9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 1 Nov 2016 17:47:18 -0600 Subject: [PATCH 846/884] sched/core: Remove pointless printout in sched_show_task() In sched_show_task() we print out a useless hex number, not even a symbol, and there's a big question mark whether this even makes sense anyway, I suspect we should just remove it all. Signed-off-by: Linus Torvalds Acked-by: Andy Lutomirski Cc: Peter Zijlstra Cc: Tetsuo Handa Cc: Thomas Gleixner Cc: bp@alien8.de Cc: brgerst@gmail.com Cc: jann@thejh.net Cc: keescook@chromium.org Cc: linux-api@vger.kernel.org Cc: tycho.andersen@canonical.com Link: http://lkml.kernel.org/r/CA+55aFzphURPFzAvU4z6Moy7ZmimcwPuUdYU8bj9z0J+S8X1rw@mail.gmail.com Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9abf66b6c271..154fd689fe02 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5198,17 +5198,8 @@ void sched_show_task(struct task_struct *p) state = __ffs(state) + 1; printk(KERN_INFO "%-15.15s %c", p->comm, state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'); -#if BITS_PER_LONG == 32 - if (state == TASK_RUNNING) - printk(KERN_CONT " running "); - else - printk(KERN_CONT " %08lx ", thread_saved_pc(p)); -#else if (state == TASK_RUNNING) printk(KERN_CONT " running task "); - else - printk(KERN_CONT " %016lx ", thread_saved_pc(p)); -#endif #ifdef CONFIG_DEBUG_STACK_USAGE free = stack_not_used(p); #endif From 667121ace9dbafb368618dbabcf07901c962ddac Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sat, 29 Oct 2016 21:28:18 +0200 Subject: [PATCH 847/884] firewire: net: guard against rx buffer overflows The IP-over-1394 driver firewire-net lacked input validation when handling incoming fragmented datagrams. A maliciously formed fragment with a respectively large datagram_offset would cause a memcpy past the datagram buffer. So, drop any packets carrying a fragment with offset + length larger than datagram_size. In addition, ensure that - GASP header, unfragmented encapsulation header, or fragment encapsulation header actually exists before we access it, - the encapsulated datagram or fragment is of nonzero size. Reported-by: Eyal Itkin Reviewed-by: Eyal Itkin Fixes: CVE 2016-8633 Cc: stable@vger.kernel.org Signed-off-by: Stefan Richter --- drivers/firewire/net.c | 51 +++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index 309311b1faae..cbaa8d776d83 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -578,6 +578,9 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, int retval; u16 ether_type; + if (len <= RFC2374_UNFRAG_HDR_SIZE) + return 0; + hdr.w0 = be32_to_cpu(buf[0]); lf = fwnet_get_hdr_lf(&hdr); if (lf == RFC2374_HDR_UNFRAG) { @@ -602,7 +605,12 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, return fwnet_finish_incoming_packet(net, skb, source_node_id, is_broadcast, ether_type); } + /* A datagram fragment has been received, now the fun begins. */ + + if (len <= RFC2374_FRAG_HDR_SIZE) + return 0; + hdr.w1 = ntohl(buf[1]); buf += 2; len -= RFC2374_FRAG_HDR_SIZE; @@ -616,6 +624,9 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, datagram_label = fwnet_get_hdr_dgl(&hdr); dg_size = fwnet_get_hdr_dg_size(&hdr); /* ??? + 1 */ + if (fg_off + len > dg_size) + return 0; + spin_lock_irqsave(&dev->lock, flags); peer = fwnet_peer_find_by_node_id(dev, source_node_id, generation); @@ -722,6 +733,22 @@ static void fwnet_receive_packet(struct fw_card *card, struct fw_request *r, fw_send_response(card, r, rcode); } +static int gasp_source_id(__be32 *p) +{ + return be32_to_cpu(p[0]) >> 16; +} + +static u32 gasp_specifier_id(__be32 *p) +{ + return (be32_to_cpu(p[0]) & 0xffff) << 8 | + (be32_to_cpu(p[1]) & 0xff000000) >> 24; +} + +static u32 gasp_version(__be32 *p) +{ + return be32_to_cpu(p[1]) & 0xffffff; +} + static void fwnet_receive_broadcast(struct fw_iso_context *context, u32 cycle, size_t header_length, void *header, void *data) { @@ -731,9 +758,6 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context, __be32 *buf_ptr; int retval; u32 length; - u16 source_node_id; - u32 specifier_id; - u32 ver; unsigned long offset; unsigned long flags; @@ -750,22 +774,17 @@ static void fwnet_receive_broadcast(struct fw_iso_context *context, spin_unlock_irqrestore(&dev->lock, flags); - specifier_id = (be32_to_cpu(buf_ptr[0]) & 0xffff) << 8 - | (be32_to_cpu(buf_ptr[1]) & 0xff000000) >> 24; - ver = be32_to_cpu(buf_ptr[1]) & 0xffffff; - source_node_id = be32_to_cpu(buf_ptr[0]) >> 16; - - if (specifier_id == IANA_SPECIFIER_ID && - (ver == RFC2734_SW_VERSION + if (length > IEEE1394_GASP_HDR_SIZE && + gasp_specifier_id(buf_ptr) == IANA_SPECIFIER_ID && + (gasp_version(buf_ptr) == RFC2734_SW_VERSION #if IS_ENABLED(CONFIG_IPV6) - || ver == RFC3146_SW_VERSION + || gasp_version(buf_ptr) == RFC3146_SW_VERSION #endif - )) { - buf_ptr += 2; - length -= IEEE1394_GASP_HDR_SIZE; - fwnet_incoming_packet(dev, buf_ptr, length, source_node_id, + )) + fwnet_incoming_packet(dev, buf_ptr + 2, + length - IEEE1394_GASP_HDR_SIZE, + gasp_source_id(buf_ptr), context->card->generation, true); - } packet.payload_length = dev->rcv_buffer_size; packet.interrupt = 1; From e9300a4b7bbae83af1f7703938c94cf6dc6d308f Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 30 Oct 2016 17:32:01 +0100 Subject: [PATCH 848/884] firewire: net: fix fragmented datagram_size off-by-one RFC 2734 defines the datagram_size field in fragment encapsulation headers thus: datagram_size: The encoded size of the entire IP datagram. The value of datagram_size [...] SHALL be one less than the value of Total Length in the datagram's IP header (see STD 5, RFC 791). Accordingly, the eth1394 driver of Linux 2.6.36 and older set and got this field with a -/+1 offset: ether1394_tx() /* transmit */ ether1394_encapsulate_prep() hdr->ff.dg_size = dg_size - 1; ether1394_data_handler() /* receive */ if (hdr->common.lf == ETH1394_HDR_LF_FF) dg_size = hdr->ff.dg_size + 1; else dg_size = hdr->sf.dg_size + 1; Likewise, I observe OS X 10.4 and Windows XP Pro SP3 to transmit 1500 byte sized datagrams in fragments with datagram_size=1499 if link fragmentation is required. Only firewire-net sets and gets datagram_size without this offset. The result is lacking interoperability of firewire-net with OS X, Windows XP, and presumably Linux' eth1394. (I did not test with the latter.) For example, FTP data transfers to a Linux firewire-net box with max_rec smaller than the 1500 bytes MTU - from OS X fail entirely, - from Win XP start out with a bunch of fragmented datagrams which time out, then continue with unfragmented datagrams because Win XP temporarily reduces the MTU to 576 bytes. So let's fix firewire-net's datagram_size accessors. Note that firewire-net thereby loses interoperability with unpatched firewire-net, but only if link fragmentation is employed. (This happens with large broadcast datagrams, and with large datagrams on several FireWire CardBus cards with smaller max_rec than equivalent PCI cards, and it can be worked around by setting a small enough MTU.) Cc: stable@vger.kernel.org Signed-off-by: Stefan Richter --- drivers/firewire/net.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c index cbaa8d776d83..15475892af0c 100644 --- a/drivers/firewire/net.c +++ b/drivers/firewire/net.c @@ -73,13 +73,13 @@ struct rfc2734_header { #define fwnet_get_hdr_lf(h) (((h)->w0 & 0xc0000000) >> 30) #define fwnet_get_hdr_ether_type(h) (((h)->w0 & 0x0000ffff)) -#define fwnet_get_hdr_dg_size(h) (((h)->w0 & 0x0fff0000) >> 16) +#define fwnet_get_hdr_dg_size(h) ((((h)->w0 & 0x0fff0000) >> 16) + 1) #define fwnet_get_hdr_fg_off(h) (((h)->w0 & 0x00000fff)) #define fwnet_get_hdr_dgl(h) (((h)->w1 & 0xffff0000) >> 16) -#define fwnet_set_hdr_lf(lf) ((lf) << 30) +#define fwnet_set_hdr_lf(lf) ((lf) << 30) #define fwnet_set_hdr_ether_type(et) (et) -#define fwnet_set_hdr_dg_size(dgs) ((dgs) << 16) +#define fwnet_set_hdr_dg_size(dgs) (((dgs) - 1) << 16) #define fwnet_set_hdr_fg_off(fgo) (fgo) #define fwnet_set_hdr_dgl(dgl) ((dgl) << 16) @@ -622,7 +622,7 @@ static int fwnet_incoming_packet(struct fwnet_device *dev, __be32 *buf, int len, fg_off = fwnet_get_hdr_fg_off(&hdr); } datagram_label = fwnet_get_hdr_dgl(&hdr); - dg_size = fwnet_get_hdr_dg_size(&hdr); /* ??? + 1 */ + dg_size = fwnet_get_hdr_dg_size(&hdr); if (fg_off + len > dg_size) return 0; From 682c1e52215da4a3e89c14aad60bfc0d400b025f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 15 Oct 2016 23:03:43 +0100 Subject: [PATCH 849/884] MIPS: CPC: Provide default mips_cpc_default_phys_base to ignore CPC Provide a default implementation of mips_cpc_default_phys_base() which simply returns 0, and adjust mips_cpc_phys_base() to allow for mips_cpc_default_phys_base() returning 0. This allows kernels which include CPC support to be built without platform code & simply ignore the CPC if it wasn't already enabled by the bootloader. This fixes link failures such as the following from generic defconfigs: arch/mips/built-in.o: In function `mips_cpc_phys_base': arch/mips/kernel/mips-cpc.c:47: undefined reference to `mips_cpc_default_phys_base' [ralf@linux-mips.org: changed prototype for coding style compliance.] Signed-off-by: Paul Burton Reported-by: kbuild test robot Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14401/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/mips-cpc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 2a45867d3b4f..a4964c334cab 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -21,6 +21,11 @@ static DEFINE_PER_CPU_ALIGNED(spinlock_t, cpc_core_lock); static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags); +phys_addr_t __weak mips_cpc_default_phys_base(void) +{ + return 0; +} + /** * mips_cpc_phys_base - retrieve the physical base address of the CPC * @@ -43,8 +48,12 @@ static phys_addr_t mips_cpc_phys_base(void) if (cpc_base & CM_GCR_CPC_BASE_CPCEN_MSK) return cpc_base & CM_GCR_CPC_BASE_CPCBASE_MSK; - /* Otherwise, give it the default address & enable it */ + /* Otherwise, use the default address */ cpc_base = mips_cpc_default_phys_base(); + if (!cpc_base) + return cpc_base; + + /* Enable the CPC, mapped at the default address */ write_gcr_cpc_base(cpc_base | CM_GCR_CPC_BASE_CPCEN_MSK); return cpc_base; } From 93032e31a5f12df847639db3ad0b7f300bf44b7b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 14 Oct 2016 10:17:32 +0100 Subject: [PATCH 850/884] MIPS: Malta: Fixup reboot Commit 10b6ea0959de ("MIPS: Malta: Use syscon-reboot driver to reboot") converted the Malta board to use the generic syscon-reboot driver to handle reboots, but incorrectly used the value 0x4d rather than 0x42 as the magic to write to the reboot register. I also incorrectly believed that syscon/regmap would default to native endianness, but this isn't the case. Force this by specifying with a native-endian property in the devicetree. Signed-off-by: Paul Burton Fixes: 10b6ea0959de ("MIPS: Malta: Use syscon-reboot driver to reboot") Reported-by: Guenter Roeck Cc: linux-mips@linux-mips.org Tested-by: Guenter Roeck Tested-by: Maciej W. Rozycki Patchwork: https://patchwork.linux-mips.org/patch/14396/ Signed-off-by: Ralf Baechle --- arch/mips/boot/dts/mti/malta.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/boot/dts/mti/malta.dts b/arch/mips/boot/dts/mti/malta.dts index f604a272d91d..ffe3a1508e72 100644 --- a/arch/mips/boot/dts/mti/malta.dts +++ b/arch/mips/boot/dts/mti/malta.dts @@ -84,12 +84,13 @@ fpga_regs: system-controller@1f000000 { compatible = "mti,malta-fpga", "syscon", "simple-mfd"; reg = <0x1f000000 0x1000>; + native-endian; reboot { compatible = "syscon-reboot"; regmap = <&fpga_regs>; offset = <0x500>; - mask = <0x4d>; + mask = <0x42>; }; }; From 4736697963385e6257ee8e260e97347e858cd962 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 17 Oct 2016 17:21:46 +0100 Subject: [PATCH 851/884] MIPS: KASLR: Fix handling of NULL FDT If platform code returns a NULL pointer to the FDT, initial_boot_params will not get set to a valid pointer and attempting to find the /chosen node in it will cause a NULL pointer dereference and the kernel to crash immediately on startup - with no output to the console. Fix this by checking that initial_boot_params is valid before using it. Fixes: 405bc8fd12f5 ("MIPS: Kernel: Implement KASLR using CONFIG_RELOCATABLE") Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Matt Redfearn Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14414/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/relocate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index ca1cc30c0891..1958910b75c0 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -200,7 +200,7 @@ static inline __init unsigned long get_random_boot(void) #if defined(CONFIG_USE_OF) /* Get any additional entropy passed in device tree */ - { + if (initial_boot_params) { int node, len; u64 *prop; From 9a59061cfd7ac00f21111d2e8aa7f4ab11d27f6c Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 17 Oct 2016 17:25:24 +0100 Subject: [PATCH 852/884] MIPS: generic: Fix KASLR for generic kernel. The KASLR code requires that the plat_get_fdt() function return the address of the device tree, and it must be available early in the boot, before prom_init() is called. Move the code determining the address of the device tree into plat_get_fdt, and call that from prom_init(). The fdt pointer will be set up by plat_get_fdt() called from relocate_kernel initially and once the relocated kernel has started, prom_init() will use it again to determine the address in the relocated image. Fixes: eed0eabd12ef ("MIPS: generic: Introduce generic DT-based board support") Signed-off-by: Matt Redfearn Reviewed-by: James Hogan Reviewed-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14415/ Signed-off-by: Ralf Baechle --- arch/mips/generic/init.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c index 0ea73e845440..d493ccbf274a 100644 --- a/arch/mips/generic/init.c +++ b/arch/mips/generic/init.c @@ -29,10 +29,20 @@ static __initdata const struct mips_machine *mach; static __initdata const void *mach_match_data; void __init prom_init(void) +{ + plat_get_fdt(); + BUG_ON(!fdt); +} + +void __init *plat_get_fdt(void) { const struct mips_machine *check_mach; const struct of_device_id *match; + if (fdt) + /* Already set up */ + return (void *)fdt; + if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_arg1)) { /* * We booted using the UHI boot protocol, so we have been @@ -75,12 +85,6 @@ void __init prom_init(void) /* Retrieve the machine's FDT */ fdt = mach->fdt; } - - BUG_ON(!fdt); -} - -void __init *plat_get_fdt(void) -{ return (void *)fdt; } From 818f38c5b7c4482abd71c64ac4d49911fbefaf9e Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 17 Oct 2016 10:09:39 +0100 Subject: [PATCH 853/884] MIPS: Fix build of compressed image Changes introduced to arch/mips/Makefile for the generic kernel resulted in build errors when making a compressed image if platform-y has multiple values, like this: make[2]: *** No rule to make target `alchemy/'. make[1]: *** [vmlinuz] Error 2 make[1]: Target `_all' not remade because of errors. make: *** [sub-make] Error 2 make: Target `_all' not remade because of errors. Fix this by quoting $(platform-y) as it is passed to the Makefile in arch/mips/boot/compressed/Makefile Reported-by: kernelci.org bot Link: https://storage.kernelci.org/next/next-20161017/mips-gpr_defconfig/build.log Signed-off-by: Matt Redfearn Reviewed-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14405/ Signed-off-by: Ralf Baechle --- arch/mips/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index fbf40d3c8123..1a6bac7b076f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -263,7 +263,7 @@ KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0) bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ VMLINUX_ENTRY_ADDRESS=$(entry-y) \ - PLATFORM=$(platform-y) + PLATFORM="$(platform-y)" ifdef CONFIG_32BIT bootvars-y += ADDR_BITS=32 endif From bcf084de5d429c0a321de6e2c508440148da1884 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 19 Oct 2016 14:33:20 +0100 Subject: [PATCH 854/884] MIPS: traps: Fix output of show_backtrace Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from show_backtrace on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14429/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 1f5fdee1dfc3..6ae5ea752d4c 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -156,7 +156,7 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs) print_ip_sym(pc); pc = unwind_stack(task, &sp, pc, &ra); } while (pc); - printk("\n"); + pr_cont("\n"); } /* From fe4e09e701213213cf2024e2979ea227c2e36c60 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 19 Oct 2016 14:33:21 +0100 Subject: [PATCH 855/884] MIPS: traps: Fix output of show_stacktrace Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from show_stacktrace on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Also start a new line with printk such that the presence of timing information does not interfere with output. Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14430/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 6ae5ea752d4c..25ce866b2895 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -174,22 +174,24 @@ static void show_stacktrace(struct task_struct *task, printk("Stack :"); i = 0; while ((unsigned long) sp & (PAGE_SIZE - 1)) { - if (i && ((i % (64 / field)) == 0)) - printk("\n "); + if (i && ((i % (64 / field)) == 0)) { + pr_cont("\n"); + printk(" "); + } if (i > 39) { - printk(" ..."); + pr_cont(" ..."); break; } if (__get_user(stackdata, sp++)) { - printk(" (Bad stack address)"); + pr_cont(" (Bad stack address)"); break; } - printk(" %0*lx", field, stackdata); + pr_cont(" %0*lx", field, stackdata); i++; } - printk("\n"); + pr_cont("\n"); show_backtrace(task, regs); } From 41000c5819ee5aea8c5c3b388e4e21e679c1b95c Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 19 Oct 2016 14:33:22 +0100 Subject: [PATCH 856/884] MIPS: traps: Fix output of show_code Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from show_code on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14431/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 25ce866b2895..273b4a419f76 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -231,18 +231,19 @@ static void show_code(unsigned int __user *pc) long i; unsigned short __user *pc16 = NULL; - printk("\nCode:"); + printk("Code:"); if ((unsigned long)pc & 1) pc16 = (unsigned short __user *)((unsigned long)pc & ~1); for(i = -3 ; i < 6 ; i++) { unsigned int insn; if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) { - printk(" (Bad address in epc)\n"); + pr_cont(" (Bad address in epc)\n"); break; } - printk("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>')); + pr_cont("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>')); } + pr_cont("\n"); } static void __show_regs(const struct pt_regs *regs) From 752f5499823edb0c13c594f739363527178f714d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 19 Oct 2016 14:33:23 +0100 Subject: [PATCH 857/884] MIPS: Fix __show_regs() output Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from __show_regs() on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected register output. Signed-off-by: Paul Burton Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14432/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 42 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 273b4a419f76..b9a910b208f9 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -262,15 +262,15 @@ static void __show_regs(const struct pt_regs *regs) if ((i % 4) == 0) printk("$%2d :", i); if (i == 0) - printk(" %0*lx", field, 0UL); + pr_cont(" %0*lx", field, 0UL); else if (i == 26 || i == 27) - printk(" %*s", field, ""); + pr_cont(" %*s", field, ""); else - printk(" %0*lx", field, regs->regs[i]); + pr_cont(" %0*lx", field, regs->regs[i]); i++; if ((i % 4) == 0) - printk("\n"); + pr_cont("\n"); } #ifdef CONFIG_CPU_HAS_SMARTMIPS @@ -291,46 +291,46 @@ static void __show_regs(const struct pt_regs *regs) if (cpu_has_3kex) { if (regs->cp0_status & ST0_KUO) - printk("KUo "); + pr_cont("KUo "); if (regs->cp0_status & ST0_IEO) - printk("IEo "); + pr_cont("IEo "); if (regs->cp0_status & ST0_KUP) - printk("KUp "); + pr_cont("KUp "); if (regs->cp0_status & ST0_IEP) - printk("IEp "); + pr_cont("IEp "); if (regs->cp0_status & ST0_KUC) - printk("KUc "); + pr_cont("KUc "); if (regs->cp0_status & ST0_IEC) - printk("IEc "); + pr_cont("IEc "); } else if (cpu_has_4kex) { if (regs->cp0_status & ST0_KX) - printk("KX "); + pr_cont("KX "); if (regs->cp0_status & ST0_SX) - printk("SX "); + pr_cont("SX "); if (regs->cp0_status & ST0_UX) - printk("UX "); + pr_cont("UX "); switch (regs->cp0_status & ST0_KSU) { case KSU_USER: - printk("USER "); + pr_cont("USER "); break; case KSU_SUPERVISOR: - printk("SUPERVISOR "); + pr_cont("SUPERVISOR "); break; case KSU_KERNEL: - printk("KERNEL "); + pr_cont("KERNEL "); break; default: - printk("BAD_MODE "); + pr_cont("BAD_MODE "); break; } if (regs->cp0_status & ST0_ERL) - printk("ERL "); + pr_cont("ERL "); if (regs->cp0_status & ST0_EXL) - printk("EXL "); + pr_cont("EXL "); if (regs->cp0_status & ST0_IE) - printk("IE "); + pr_cont("IE "); } - printk("\n"); + pr_cont("\n"); exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; printk("Cause : %08x (ExcCode %02x)\n", cause, exccode); From 8a98495c7008d5bdede3d53e05e69ecdc0bc614b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 21 Oct 2016 20:06:40 +0100 Subject: [PATCH 858/884] MIPS: dump_tlb: Fix printk continuations Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from TLB dumps on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Continuation is also used for the second line of each TLB entry printed in dump_tlb.c even though it has a newline, since it is a continuation of the interpretation of the same TLB entry. For example: [ 46.371884] Index: 0 pgmask=16kb va=77654000 asid=73 gid=00 [ri=0 xi=0 pa=ffc18000 c=5 d=0 v=1 g=0] [ri=0 xi=0 pa=ffc1c000 c=5 d=0 v=1 g=0] [ 46.385380] Index: 12 pgmask=16kb va=004b4000 asid=73 gid=00 [ri=0 xi=0 pa=00000000 c=0 d=0 v=0 g=0] [ri=0 xi=0 pa=ffb00000 c=5 d=1 v=1 g=0] Signed-off-by: James Hogan Cc: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14444/ Signed-off-by: Ralf Baechle --- arch/mips/lib/dump_tlb.c | 44 ++++++++++++++++++------------------ arch/mips/lib/r3k_dump_tlb.c | 18 +++++++-------- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/arch/mips/lib/dump_tlb.c b/arch/mips/lib/dump_tlb.c index 0f80b936e75e..6eb50a7137db 100644 --- a/arch/mips/lib/dump_tlb.c +++ b/arch/mips/lib/dump_tlb.c @@ -135,42 +135,42 @@ static void dump_tlb(int first, int last) c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; - printk("va=%0*lx asid=%0*lx", - vwidth, (entryhi & ~0x1fffUL), - asidwidth, entryhi & asidmask); + pr_cont("va=%0*lx asid=%0*lx", + vwidth, (entryhi & ~0x1fffUL), + asidwidth, entryhi & asidmask); if (cpu_has_guestid) - printk(" gid=%02lx", - (guestctl1 & MIPS_GCTL1_RID) + pr_cont(" gid=%02lx", + (guestctl1 & MIPS_GCTL1_RID) >> MIPS_GCTL1_RID_SHIFT); /* RI/XI are in awkward places, so mask them off separately */ pa = entrylo0 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); if (xpa) pa |= (unsigned long long)readx_c0_entrylo0() << 30; pa = (pa << 6) & PAGE_MASK; - printk("\n\t["); + pr_cont("\n\t["); if (cpu_has_rixi) - printk("ri=%d xi=%d ", - (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0, - (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0); - printk("pa=%0*llx c=%d d=%d v=%d g=%d] [", - pwidth, pa, c0, - (entrylo0 & ENTRYLO_D) ? 1 : 0, - (entrylo0 & ENTRYLO_V) ? 1 : 0, - (entrylo0 & ENTRYLO_G) ? 1 : 0); + pr_cont("ri=%d xi=%d ", + (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0, + (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0); + pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d] [", + pwidth, pa, c0, + (entrylo0 & ENTRYLO_D) ? 1 : 0, + (entrylo0 & ENTRYLO_V) ? 1 : 0, + (entrylo0 & ENTRYLO_G) ? 1 : 0); /* RI/XI are in awkward places, so mask them off separately */ pa = entrylo1 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); if (xpa) pa |= (unsigned long long)readx_c0_entrylo1() << 30; pa = (pa << 6) & PAGE_MASK; if (cpu_has_rixi) - printk("ri=%d xi=%d ", - (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0, - (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0); - printk("pa=%0*llx c=%d d=%d v=%d g=%d]\n", - pwidth, pa, c1, - (entrylo1 & ENTRYLO_D) ? 1 : 0, - (entrylo1 & ENTRYLO_V) ? 1 : 0, - (entrylo1 & ENTRYLO_G) ? 1 : 0); + pr_cont("ri=%d xi=%d ", + (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0, + (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0); + pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d]\n", + pwidth, pa, c1, + (entrylo1 & ENTRYLO_D) ? 1 : 0, + (entrylo1 & ENTRYLO_V) ? 1 : 0, + (entrylo1 & ENTRYLO_G) ? 1 : 0); } printk("\n"); diff --git a/arch/mips/lib/r3k_dump_tlb.c b/arch/mips/lib/r3k_dump_tlb.c index 744f4a7bc49d..85b4086e553e 100644 --- a/arch/mips/lib/r3k_dump_tlb.c +++ b/arch/mips/lib/r3k_dump_tlb.c @@ -53,15 +53,15 @@ static void dump_tlb(int first, int last) */ printk("Index: %2d ", i); - printk("va=%08lx asid=%08lx" - " [pa=%06lx n=%d d=%d v=%d g=%d]", - entryhi & PAGE_MASK, - entryhi & asid_mask, - entrylo0 & PAGE_MASK, - (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0, - (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0, - (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0, - (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0); + pr_cont("va=%08lx asid=%08lx" + " [pa=%06lx n=%d d=%d v=%d g=%d]", + entryhi & PAGE_MASK, + entryhi & asid_mask, + entrylo0 & PAGE_MASK, + (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0); } } printk("\n"); From c9e5603974573367c4d80964a845237a2297228c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 28 Oct 2016 08:20:09 +0100 Subject: [PATCH 859/884] MIPS: ptrace: Also initialize the FP context on individual FCSR writes Complement commit ac9ad83bc318 ("MIPS: prevent FP context set via ptrace being discarded") and also initialize the FP context whenever FCSR alone is written with a PTRACE_POKEUSR request addressing FPC_CSR, rather than along with the full FPU register set in the case of the PTRACE_SETFPREGS request. Signed-off-by: Maciej W. Rozycki Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14459/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 6103b24d1bfc..fd3a9580661a 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -817,6 +817,7 @@ long arch_ptrace(struct task_struct *child, long request, break; #endif case FPC_CSR: + init_fp_ctx(child); ptrace_setfcr31(child, data); break; case DSP_BASE ... DSP_BASE + 5: { From 5a1aca4469fdccd5b74ba0b4e490173b2b447895 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 28 Oct 2016 08:21:03 +0100 Subject: [PATCH 860/884] MIPS: Fix FCSR Cause bit handling for correct SIGFPE issue Sanitize FCSR Cause bit handling, following a trail of past attempts: * commit 4249548454f7 ("MIPS: ptrace: Fix FP context restoration FCSR regression"), * commit 443c44032a54 ("MIPS: Always clear FCSR cause bits after emulation"), * commit 64bedffe4968 ("MIPS: Clear [MSA]FPE CSR.Cause after notify_die()"), * commit b1442d39fac2 ("MIPS: Prevent user from setting FCSR cause bits"), * commit b54d2901517d ("Properly handle branch delay slots in connection with signals."). Specifically do not mask these bits out in ptrace(2) processing and send a SIGFPE signal instead whenever a matching pair of an FCSR Cause and Enable bit is seen as execution of an affected context is about to resume. Only then clear Cause bits, and even then do not clear any bits that are set but masked with the respective Enable bits. Adjust Cause bit clearing throughout code likewise, except within the FPU emulator proper where they are set according to IEEE 754 exceptions raised as the operation emulated executed. Do so so that any IEEE 754 exceptions subject to their default handling are recorded like with operations executed by FPU hardware. Signed-off-by: Maciej W. Rozycki Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14460/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/fpu_emulator.h | 13 +++++ arch/mips/include/asm/switch_to.h | 18 +++++++ arch/mips/kernel/mips-r2-to-r6-emul.c | 10 ++-- arch/mips/kernel/ptrace.c | 7 ++- arch/mips/kernel/traps.c | 72 +++++++++++++++------------ 5 files changed, 78 insertions(+), 42 deletions(-) diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index 355dc25172e7..c05369e0b8d6 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -63,6 +63,8 @@ do { \ extern int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr); +void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, + struct task_struct *tsk); int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31); int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, @@ -81,4 +83,15 @@ static inline void fpu_emulator_init_fpu(void) set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN); } +/* + * Mask the FCSR Cause bits according to the Enable bits, observing + * that Unimplemented is always enabled. + */ +static inline unsigned long mask_fcr31_x(unsigned long fcr31) +{ + return fcr31 & (FPU_CSR_UNI_X | + ((fcr31 & FPU_CSR_ALL_E) << + (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E)))); +} + #endif /* _ASM_FPU_EMULATOR_H */ diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index ebb5c0f2f90d..c0ae27971e31 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -75,6 +75,22 @@ do { if (cpu_has_rw_llb) { \ } \ } while (0) +/* + * Check FCSR for any unmasked exceptions pending set with `ptrace', + * clear them and send a signal. + */ +#define __sanitize_fcr31(next) \ +do { \ + unsigned long fcr31 = mask_fcr31_x(next->thread.fpu.fcr31); \ + void __user *pc; \ + \ + if (unlikely(fcr31)) { \ + pc = (void __user *)task_pt_regs(next)->cp0_epc; \ + next->thread.fpu.fcr31 &= ~fcr31; \ + force_fcr31_sig(fcr31, pc, next); \ + } \ +} while (0) + /* * For newly created kernel threads switch_to() will return to * ret_from_kernel_thread, newly created user threads to ret_from_fork. @@ -85,6 +101,8 @@ do { if (cpu_has_rw_llb) { \ do { \ __mips_mt_fpaff_switch_to(prev); \ lose_fpu_inatomic(1, prev); \ + if (tsk_used_math(next)) \ + __sanitize_fcr31(next); \ if (cpu_has_dsp) { \ __save_dsp(prev); \ __restore_dsp(next); \ diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index 22dedd62818a..bd09853aecdf 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -899,7 +899,7 @@ static inline int mipsr2_find_op_func(struct pt_regs *regs, u32 inst, * mipsr2_decoder: Decode and emulate a MIPS R2 instruction * @regs: Process register set * @inst: Instruction to decode and emulate - * @fcr31: Floating Point Control and Status Register returned + * @fcr31: Floating Point Control and Status Register Cause bits returned */ int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31) { @@ -1172,13 +1172,13 @@ fpu_emul: err = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 0, &fault_addr); - *fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + *fcr31 = res = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~res; /* * this is a tricky issue - lose_fpu() uses LL/SC atomics diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index fd3a9580661a..a92994d60e91 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -79,16 +79,15 @@ void ptrace_disable(struct task_struct *child) } /* - * Poke at FCSR according to its mask. Don't set the cause bits as - * this is currently not handled correctly in FP context restoration - * and will cause an oops if a corresponding enable bit is set. + * Poke at FCSR according to its mask. Set the Cause bits even + * if a corresponding Enable bit is set. This will be noticed at + * the time the thread is switched to and SIGFPE thrown accordingly. */ static void ptrace_setfcr31(struct task_struct *child, u32 value) { u32 fcr31; u32 mask; - value &= ~FPU_CSR_ALL_X; fcr31 = child->thread.fpu.fcr31; mask = boot_cpu_data.fpu_msk31; child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b9a910b208f9..3905003dfe2b 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -708,6 +708,32 @@ asmlinkage void do_ov(struct pt_regs *regs) exception_exit(prev_state); } +/* + * Send SIGFPE according to FCSR Cause bits, which must have already + * been masked against Enable bits. This is impotant as Inexact can + * happen together with Overflow or Underflow, and `ptrace' can set + * any bits. + */ +void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, + struct task_struct *tsk) +{ + struct siginfo si = { .si_addr = fault_addr, .si_signo = SIGFPE }; + + if (fcr31 & FPU_CSR_INV_X) + si.si_code = FPE_FLTINV; + else if (fcr31 & FPU_CSR_DIV_X) + si.si_code = FPE_FLTDIV; + else if (fcr31 & FPU_CSR_OVF_X) + si.si_code = FPE_FLTOVF; + else if (fcr31 & FPU_CSR_UDF_X) + si.si_code = FPE_FLTUND; + else if (fcr31 & FPU_CSR_INE_X) + si.si_code = FPE_FLTRES; + else + si.si_code = __SI_FAULT; + force_sig_info(SIGFPE, &si, tsk); +} + int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) { struct siginfo si = { 0 }; @@ -718,27 +744,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) return 0; case SIGFPE: - si.si_addr = fault_addr; - si.si_signo = sig; - /* - * Inexact can happen together with Overflow or Underflow. - * Respect the mask to deliver the correct exception. - */ - fcr31 &= (fcr31 & FPU_CSR_ALL_E) << - (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E)); - if (fcr31 & FPU_CSR_INV_X) - si.si_code = FPE_FLTINV; - else if (fcr31 & FPU_CSR_DIV_X) - si.si_code = FPE_FLTDIV; - else if (fcr31 & FPU_CSR_OVF_X) - si.si_code = FPE_FLTOVF; - else if (fcr31 & FPU_CSR_UDF_X) - si.si_code = FPE_FLTUND; - else if (fcr31 & FPU_CSR_INE_X) - si.si_code = FPE_FLTRES; - else - si.si_code = __SI_FAULT; - force_sig_info(sig, &si, current); + force_fcr31_sig(fcr31, fault_addr, current); return 1; case SIGBUS: @@ -802,13 +808,13 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode, /* Run the emulator */ sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Restore the hardware register state */ own_fpu(1); @@ -834,7 +840,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) goto out; /* Clear FCSR.Cause before enabling interrupts */ - write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X); + write_32bit_cp1_register(CP1_STATUS, fcr31 & ~mask_fcr31_x(fcr31)); local_irq_enable(); die_if_kernel("FP exception in kernel code", regs); @@ -856,13 +862,13 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) /* Run the emulator */ sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Restore the hardware register state */ own_fpu(1); /* Using the FPU again. */ @@ -1427,13 +1433,13 @@ asmlinkage void do_cpu(struct pt_regs *regs) sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 0, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* * We can't allow the emulated instruction to leave - * any of the cause bits set in $fcr31. + * any enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Send a signal if required. */ if (!process_fpemu_return(sig, fault_addr, fcr31) && !err) From 35938a00ba86ae7a7404b66b526968ca2b8d3127 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 31 Oct 2016 16:25:44 +0000 Subject: [PATCH 861/884] MIPS: Fix ISA I FP sigcontext access violation handling Complement commit 0ae8dceaebe3 ("Merge with 2.3.10.") and use the local `fault' handler to recover from FP sigcontext access violation faults, like corresponding code does in r4k_fpu.S. The `bad_stack' handler is in syscall.c and is not suitable here as we want to propagate the error condition up through the caller rather than killing the thread outright. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14474/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_fpu.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index b4ac6374a38f..c4c8c1b65be9 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -21,7 +21,7 @@ #define EX(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,bad_stack; \ + PTR 9b,fault; \ .previous .set noreorder From 6daaa3266db9cc488612690e42c23b0763e2b49a Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 31 Oct 2016 16:26:24 +0000 Subject: [PATCH 862/884] MIPS: Remove FIR from ISA I FP signal context Complement commit e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") and remove the Floating Point Implementation Register (FIR) from the FP register set recorded in a signal context with MIPS I processors too, in line with the change applied to r4k_fpu.S. The `sc_fpc_eir' slot is unused according to our current ABI and the FIR register is read-only and always directly accessible from user software. [ralf@linux-mips.org: This is also required because the next commit depends on it.] Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14475/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_fpu.S | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index c4c8c1b65be9..ce249eae91ce 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -64,13 +64,9 @@ LEAF(_save_fp_context) EX(swc1 $f29,(SC_FPREGS+232)(a0)) EX(swc1 $f30,(SC_FPREGS+240)(a0)) EX(swc1 $f31,(SC_FPREGS+248)(a0)) - EX(sw t1,(SC_FPC_CSR)(a0)) - cfc1 t0,$0 # implementation/version jr ra + EX(sw t1,(SC_FPC_CSR)(a0)) .set pop - .set nomacro - EX(sw t0,(SC_FPC_EIR)(a0)) - .set macro END(_save_fp_context) /* From 758ef0a939d4c003381d2a97d9fb51b2d6d7e162 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 31 Oct 2016 16:27:01 +0000 Subject: [PATCH 863/884] MIPS: Fix ISA I/II FP signal context offsets Fix a regression introduced with commit 2db9ca0a3551 ("MIPS: Use struct mips_abi offsets to save FP context") for MIPS I/I FP signal contexts, by converting save/restore code to the updated internal API. Start FGR offsets from 0 rather than SC_FPREGS from $a0 and use $a1 rather than the offset of SC_FPC_CSR from $a0 for the Floating Point Control/Status Register (FCSR). Document the new internal API and adjust assembly code formatting for consistency. Signed-off-by: Maciej W. Rozycki Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14476/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_fpu.S | 159 ++++++++++++++++++----------------- arch/mips/kernel/r6000_fpu.S | 89 +++++++++++--------- 2 files changed, 131 insertions(+), 117 deletions(-) diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index ce249eae91ce..70ca63752cfa 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -26,97 +26,104 @@ .set noreorder .set mips1 - /* Save floating point context */ + +/** + * _save_fp_context() - save FP context from the FPU + * @a0 - pointer to fpregs field of sigcontext + * @a1 - pointer to fpc_csr field of sigcontext + * + * Save FP context, including the 32 FP data registers and the FP + * control & status register, from the FPU to signal context. + */ LEAF(_save_fp_context) .set push SET_HARDFLOAT li v0, 0 # assume success - cfc1 t1,fcr31 - EX(swc1 $f0,(SC_FPREGS+0)(a0)) - EX(swc1 $f1,(SC_FPREGS+8)(a0)) - EX(swc1 $f2,(SC_FPREGS+16)(a0)) - EX(swc1 $f3,(SC_FPREGS+24)(a0)) - EX(swc1 $f4,(SC_FPREGS+32)(a0)) - EX(swc1 $f5,(SC_FPREGS+40)(a0)) - EX(swc1 $f6,(SC_FPREGS+48)(a0)) - EX(swc1 $f7,(SC_FPREGS+56)(a0)) - EX(swc1 $f8,(SC_FPREGS+64)(a0)) - EX(swc1 $f9,(SC_FPREGS+72)(a0)) - EX(swc1 $f10,(SC_FPREGS+80)(a0)) - EX(swc1 $f11,(SC_FPREGS+88)(a0)) - EX(swc1 $f12,(SC_FPREGS+96)(a0)) - EX(swc1 $f13,(SC_FPREGS+104)(a0)) - EX(swc1 $f14,(SC_FPREGS+112)(a0)) - EX(swc1 $f15,(SC_FPREGS+120)(a0)) - EX(swc1 $f16,(SC_FPREGS+128)(a0)) - EX(swc1 $f17,(SC_FPREGS+136)(a0)) - EX(swc1 $f18,(SC_FPREGS+144)(a0)) - EX(swc1 $f19,(SC_FPREGS+152)(a0)) - EX(swc1 $f20,(SC_FPREGS+160)(a0)) - EX(swc1 $f21,(SC_FPREGS+168)(a0)) - EX(swc1 $f22,(SC_FPREGS+176)(a0)) - EX(swc1 $f23,(SC_FPREGS+184)(a0)) - EX(swc1 $f24,(SC_FPREGS+192)(a0)) - EX(swc1 $f25,(SC_FPREGS+200)(a0)) - EX(swc1 $f26,(SC_FPREGS+208)(a0)) - EX(swc1 $f27,(SC_FPREGS+216)(a0)) - EX(swc1 $f28,(SC_FPREGS+224)(a0)) - EX(swc1 $f29,(SC_FPREGS+232)(a0)) - EX(swc1 $f30,(SC_FPREGS+240)(a0)) - EX(swc1 $f31,(SC_FPREGS+248)(a0)) + cfc1 t1, fcr31 + EX(swc1 $f0, 0(a0)) + EX(swc1 $f1, 8(a0)) + EX(swc1 $f2, 16(a0)) + EX(swc1 $f3, 24(a0)) + EX(swc1 $f4, 32(a0)) + EX(swc1 $f5, 40(a0)) + EX(swc1 $f6, 48(a0)) + EX(swc1 $f7, 56(a0)) + EX(swc1 $f8, 64(a0)) + EX(swc1 $f9, 72(a0)) + EX(swc1 $f10, 80(a0)) + EX(swc1 $f11, 88(a0)) + EX(swc1 $f12, 96(a0)) + EX(swc1 $f13, 104(a0)) + EX(swc1 $f14, 112(a0)) + EX(swc1 $f15, 120(a0)) + EX(swc1 $f16, 128(a0)) + EX(swc1 $f17, 136(a0)) + EX(swc1 $f18, 144(a0)) + EX(swc1 $f19, 152(a0)) + EX(swc1 $f20, 160(a0)) + EX(swc1 $f21, 168(a0)) + EX(swc1 $f22, 176(a0)) + EX(swc1 $f23, 184(a0)) + EX(swc1 $f24, 192(a0)) + EX(swc1 $f25, 200(a0)) + EX(swc1 $f26, 208(a0)) + EX(swc1 $f27, 216(a0)) + EX(swc1 $f28, 224(a0)) + EX(swc1 $f29, 232(a0)) + EX(swc1 $f30, 240(a0)) + EX(swc1 $f31, 248(a0)) jr ra - EX(sw t1,(SC_FPC_CSR)(a0)) + EX(sw t1, (a1)) .set pop END(_save_fp_context) -/* - * Restore FPU state: - * - fp gp registers - * - cp1 status/control register +/** + * _restore_fp_context() - restore FP context to the FPU + * @a0 - pointer to fpregs field of sigcontext + * @a1 - pointer to fpc_csr field of sigcontext * - * We base the decision which registers to restore from the signal stack - * frame on the current content of c0_status, not on the content of the - * stack frame which might have been changed by the user. + * Restore FP context, including the 32 FP data registers and the FP + * control & status register, from signal context to the FPU. */ LEAF(_restore_fp_context) .set push SET_HARDFLOAT li v0, 0 # assume success - EX(lw t0,(SC_FPC_CSR)(a0)) - EX(lwc1 $f0,(SC_FPREGS+0)(a0)) - EX(lwc1 $f1,(SC_FPREGS+8)(a0)) - EX(lwc1 $f2,(SC_FPREGS+16)(a0)) - EX(lwc1 $f3,(SC_FPREGS+24)(a0)) - EX(lwc1 $f4,(SC_FPREGS+32)(a0)) - EX(lwc1 $f5,(SC_FPREGS+40)(a0)) - EX(lwc1 $f6,(SC_FPREGS+48)(a0)) - EX(lwc1 $f7,(SC_FPREGS+56)(a0)) - EX(lwc1 $f8,(SC_FPREGS+64)(a0)) - EX(lwc1 $f9,(SC_FPREGS+72)(a0)) - EX(lwc1 $f10,(SC_FPREGS+80)(a0)) - EX(lwc1 $f11,(SC_FPREGS+88)(a0)) - EX(lwc1 $f12,(SC_FPREGS+96)(a0)) - EX(lwc1 $f13,(SC_FPREGS+104)(a0)) - EX(lwc1 $f14,(SC_FPREGS+112)(a0)) - EX(lwc1 $f15,(SC_FPREGS+120)(a0)) - EX(lwc1 $f16,(SC_FPREGS+128)(a0)) - EX(lwc1 $f17,(SC_FPREGS+136)(a0)) - EX(lwc1 $f18,(SC_FPREGS+144)(a0)) - EX(lwc1 $f19,(SC_FPREGS+152)(a0)) - EX(lwc1 $f20,(SC_FPREGS+160)(a0)) - EX(lwc1 $f21,(SC_FPREGS+168)(a0)) - EX(lwc1 $f22,(SC_FPREGS+176)(a0)) - EX(lwc1 $f23,(SC_FPREGS+184)(a0)) - EX(lwc1 $f24,(SC_FPREGS+192)(a0)) - EX(lwc1 $f25,(SC_FPREGS+200)(a0)) - EX(lwc1 $f26,(SC_FPREGS+208)(a0)) - EX(lwc1 $f27,(SC_FPREGS+216)(a0)) - EX(lwc1 $f28,(SC_FPREGS+224)(a0)) - EX(lwc1 $f29,(SC_FPREGS+232)(a0)) - EX(lwc1 $f30,(SC_FPREGS+240)(a0)) - EX(lwc1 $f31,(SC_FPREGS+248)(a0)) + EX(lw t0, (a1)) + EX(lwc1 $f0, 0(a0)) + EX(lwc1 $f1, 8(a0)) + EX(lwc1 $f2, 16(a0)) + EX(lwc1 $f3, 24(a0)) + EX(lwc1 $f4, 32(a0)) + EX(lwc1 $f5, 40(a0)) + EX(lwc1 $f6, 48(a0)) + EX(lwc1 $f7, 56(a0)) + EX(lwc1 $f8, 64(a0)) + EX(lwc1 $f9, 72(a0)) + EX(lwc1 $f10, 80(a0)) + EX(lwc1 $f11, 88(a0)) + EX(lwc1 $f12, 96(a0)) + EX(lwc1 $f13, 104(a0)) + EX(lwc1 $f14, 112(a0)) + EX(lwc1 $f15, 120(a0)) + EX(lwc1 $f16, 128(a0)) + EX(lwc1 $f17, 136(a0)) + EX(lwc1 $f18, 144(a0)) + EX(lwc1 $f19, 152(a0)) + EX(lwc1 $f20, 160(a0)) + EX(lwc1 $f21, 168(a0)) + EX(lwc1 $f22, 176(a0)) + EX(lwc1 $f23, 184(a0)) + EX(lwc1 $f24, 192(a0)) + EX(lwc1 $f25, 200(a0)) + EX(lwc1 $f26, 208(a0)) + EX(lwc1 $f27, 216(a0)) + EX(lwc1 $f28, 224(a0)) + EX(lwc1 $f29, 232(a0)) + EX(lwc1 $f30, 240(a0)) + EX(lwc1 $f31, 248(a0)) jr ra - ctc1 t0,fcr31 + ctc1 t0, fcr31 .set pop END(_restore_fp_context) .set reorder diff --git a/arch/mips/kernel/r6000_fpu.S b/arch/mips/kernel/r6000_fpu.S index 47077380c15c..9cc7bfab3419 100644 --- a/arch/mips/kernel/r6000_fpu.S +++ b/arch/mips/kernel/r6000_fpu.S @@ -21,7 +21,14 @@ .set push SET_HARDFLOAT - /* Save floating point context */ +/** + * _save_fp_context() - save FP context from the FPU + * @a0 - pointer to fpregs field of sigcontext + * @a1 - pointer to fpc_csr field of sigcontext + * + * Save FP context, including the 32 FP data registers and the FP + * control & status register, from the FPU to signal context. + */ LEAF(_save_fp_context) mfc0 t0,CP0_STATUS sll t0,t0,2 @@ -30,59 +37,59 @@ cfc1 t1,fcr31 /* Store the 16 double precision registers */ - sdc1 $f0,(SC_FPREGS+0)(a0) - sdc1 $f2,(SC_FPREGS+16)(a0) - sdc1 $f4,(SC_FPREGS+32)(a0) - sdc1 $f6,(SC_FPREGS+48)(a0) - sdc1 $f8,(SC_FPREGS+64)(a0) - sdc1 $f10,(SC_FPREGS+80)(a0) - sdc1 $f12,(SC_FPREGS+96)(a0) - sdc1 $f14,(SC_FPREGS+112)(a0) - sdc1 $f16,(SC_FPREGS+128)(a0) - sdc1 $f18,(SC_FPREGS+144)(a0) - sdc1 $f20,(SC_FPREGS+160)(a0) - sdc1 $f22,(SC_FPREGS+176)(a0) - sdc1 $f24,(SC_FPREGS+192)(a0) - sdc1 $f26,(SC_FPREGS+208)(a0) - sdc1 $f28,(SC_FPREGS+224)(a0) - sdc1 $f30,(SC_FPREGS+240)(a0) + sdc1 $f0,0(a0) + sdc1 $f2,16(a0) + sdc1 $f4,32(a0) + sdc1 $f6,48(a0) + sdc1 $f8,64(a0) + sdc1 $f10,80(a0) + sdc1 $f12,96(a0) + sdc1 $f14,112(a0) + sdc1 $f16,128(a0) + sdc1 $f18,144(a0) + sdc1 $f20,160(a0) + sdc1 $f22,176(a0) + sdc1 $f24,192(a0) + sdc1 $f26,208(a0) + sdc1 $f28,224(a0) + sdc1 $f30,240(a0) jr ra - sw t0,SC_FPC_CSR(a0) + sw t0,(a1) 1: jr ra nop END(_save_fp_context) -/* Restore FPU state: - * - fp gp registers - * - cp1 status/control register +/** + * _restore_fp_context() - restore FP context to the FPU + * @a0 - pointer to fpregs field of sigcontext + * @a1 - pointer to fpc_csr field of sigcontext * - * We base the decision which registers to restore from the signal stack - * frame on the current content of c0_status, not on the content of the - * stack frame which might have been changed by the user. + * Restore FP context, including the 32 FP data registers and the FP + * control & status register, from signal context to the FPU. */ LEAF(_restore_fp_context) mfc0 t0,CP0_STATUS sll t0,t0,2 bgez t0,1f - lw t0,SC_FPC_CSR(a0) + lw t0,(a1) /* Restore the 16 double precision registers */ - ldc1 $f0,(SC_FPREGS+0)(a0) - ldc1 $f2,(SC_FPREGS+16)(a0) - ldc1 $f4,(SC_FPREGS+32)(a0) - ldc1 $f6,(SC_FPREGS+48)(a0) - ldc1 $f8,(SC_FPREGS+64)(a0) - ldc1 $f10,(SC_FPREGS+80)(a0) - ldc1 $f12,(SC_FPREGS+96)(a0) - ldc1 $f14,(SC_FPREGS+112)(a0) - ldc1 $f16,(SC_FPREGS+128)(a0) - ldc1 $f18,(SC_FPREGS+144)(a0) - ldc1 $f20,(SC_FPREGS+160)(a0) - ldc1 $f22,(SC_FPREGS+176)(a0) - ldc1 $f24,(SC_FPREGS+192)(a0) - ldc1 $f26,(SC_FPREGS+208)(a0) - ldc1 $f28,(SC_FPREGS+224)(a0) - ldc1 $f30,(SC_FPREGS+240)(a0) + ldc1 $f0,0(a0) + ldc1 $f2,16(a0) + ldc1 $f4,32(a0) + ldc1 $f6,48(a0) + ldc1 $f8,64(a0) + ldc1 $f10,80(a0) + ldc1 $f12,96(a0) + ldc1 $f14,112(a0) + ldc1 $f16,128(a0) + ldc1 $f18,144(a0) + ldc1 $f20,160(a0) + ldc1 $f22,176(a0) + ldc1 $f24,192(a0) + ldc1 $f26,208(a0) + ldc1 $f28,224(a0) + ldc1 $f30,240(a0) jr ra ctc1 t0,fcr31 1: jr ra From f92722dc4545ebfa0f99a2f986fd88c112a22a42 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 31 Oct 2016 16:27:40 +0000 Subject: [PATCH 864/884] MIPS: Correct MIPS I FP sigcontext layout Complement commit 80cbfad79096 ("MIPS: Correct MIPS I FP context layout") and correct the way Floating Point General registers are stored in a signal context with MIPS I hardware. Use the S.D and L.D assembly macros to have pairs of SWC1 instructions and pairs of LWC1 instructions produced, respectively, in an arrangement which makes the memory representation of floating-point data passed compatible with that used by hardware SDC1 and LDC1 instructions, where available, regardless of the hardware endianness used. This matches the layout used by r4k_fpu.S, ensuring run-time compatibility for MIPS I software across all o32 hardware platforms. Define an EX2 macro to handle exceptions from both hardware instructions implicitly produced from S.D and L.D assembly macros. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14477/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_fpu.S | 103 +++++++++++++---------------------- 1 file changed, 39 insertions(+), 64 deletions(-) diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 70ca63752cfa..918f2f6d3861 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -24,6 +24,13 @@ PTR 9b,fault; \ .previous +#define EX2(a,b) \ +9: a,##b; \ + .section __ex_table,"a"; \ + PTR 9b,bad_stack; \ + PTR 9b+4,bad_stack; \ + .previous + .set noreorder .set mips1 @@ -40,38 +47,22 @@ LEAF(_save_fp_context) SET_HARDFLOAT li v0, 0 # assume success cfc1 t1, fcr31 - EX(swc1 $f0, 0(a0)) - EX(swc1 $f1, 8(a0)) - EX(swc1 $f2, 16(a0)) - EX(swc1 $f3, 24(a0)) - EX(swc1 $f4, 32(a0)) - EX(swc1 $f5, 40(a0)) - EX(swc1 $f6, 48(a0)) - EX(swc1 $f7, 56(a0)) - EX(swc1 $f8, 64(a0)) - EX(swc1 $f9, 72(a0)) - EX(swc1 $f10, 80(a0)) - EX(swc1 $f11, 88(a0)) - EX(swc1 $f12, 96(a0)) - EX(swc1 $f13, 104(a0)) - EX(swc1 $f14, 112(a0)) - EX(swc1 $f15, 120(a0)) - EX(swc1 $f16, 128(a0)) - EX(swc1 $f17, 136(a0)) - EX(swc1 $f18, 144(a0)) - EX(swc1 $f19, 152(a0)) - EX(swc1 $f20, 160(a0)) - EX(swc1 $f21, 168(a0)) - EX(swc1 $f22, 176(a0)) - EX(swc1 $f23, 184(a0)) - EX(swc1 $f24, 192(a0)) - EX(swc1 $f25, 200(a0)) - EX(swc1 $f26, 208(a0)) - EX(swc1 $f27, 216(a0)) - EX(swc1 $f28, 224(a0)) - EX(swc1 $f29, 232(a0)) - EX(swc1 $f30, 240(a0)) - EX(swc1 $f31, 248(a0)) + EX2(s.d $f0, 0(a0)) + EX2(s.d $f2, 16(a0)) + EX2(s.d $f4, 32(a0)) + EX2(s.d $f6, 48(a0)) + EX2(s.d $f8, 64(a0)) + EX2(s.d $f10, 80(a0)) + EX2(s.d $f12, 96(a0)) + EX2(s.d $f14, 112(a0)) + EX2(s.d $f16, 128(a0)) + EX2(s.d $f18, 144(a0)) + EX2(s.d $f20, 160(a0)) + EX2(s.d $f22, 176(a0)) + EX2(s.d $f24, 192(a0)) + EX2(s.d $f26, 208(a0)) + EX2(s.d $f28, 224(a0)) + EX2(s.d $f30, 240(a0)) jr ra EX(sw t1, (a1)) .set pop @@ -90,38 +81,22 @@ LEAF(_restore_fp_context) SET_HARDFLOAT li v0, 0 # assume success EX(lw t0, (a1)) - EX(lwc1 $f0, 0(a0)) - EX(lwc1 $f1, 8(a0)) - EX(lwc1 $f2, 16(a0)) - EX(lwc1 $f3, 24(a0)) - EX(lwc1 $f4, 32(a0)) - EX(lwc1 $f5, 40(a0)) - EX(lwc1 $f6, 48(a0)) - EX(lwc1 $f7, 56(a0)) - EX(lwc1 $f8, 64(a0)) - EX(lwc1 $f9, 72(a0)) - EX(lwc1 $f10, 80(a0)) - EX(lwc1 $f11, 88(a0)) - EX(lwc1 $f12, 96(a0)) - EX(lwc1 $f13, 104(a0)) - EX(lwc1 $f14, 112(a0)) - EX(lwc1 $f15, 120(a0)) - EX(lwc1 $f16, 128(a0)) - EX(lwc1 $f17, 136(a0)) - EX(lwc1 $f18, 144(a0)) - EX(lwc1 $f19, 152(a0)) - EX(lwc1 $f20, 160(a0)) - EX(lwc1 $f21, 168(a0)) - EX(lwc1 $f22, 176(a0)) - EX(lwc1 $f23, 184(a0)) - EX(lwc1 $f24, 192(a0)) - EX(lwc1 $f25, 200(a0)) - EX(lwc1 $f26, 208(a0)) - EX(lwc1 $f27, 216(a0)) - EX(lwc1 $f28, 224(a0)) - EX(lwc1 $f29, 232(a0)) - EX(lwc1 $f30, 240(a0)) - EX(lwc1 $f31, 248(a0)) + EX2(l.d $f0, 0(a0)) + EX2(l.d $f2, 16(a0)) + EX2(l.d $f4, 32(a0)) + EX2(l.d $f6, 48(a0)) + EX2(l.d $f8, 64(a0)) + EX2(l.d $f10, 80(a0)) + EX2(l.d $f12, 96(a0)) + EX2(l.d $f14, 112(a0)) + EX2(l.d $f16, 128(a0)) + EX2(l.d $f18, 144(a0)) + EX2(l.d $f20, 160(a0)) + EX2(l.d $f22, 176(a0)) + EX2(l.d $f24, 192(a0)) + EX2(l.d $f26, 208(a0)) + EX2(l.d $f28, 224(a0)) + EX2(l.d $f30, 240(a0)) jr ra ctc1 t0, fcr31 .set pop From 16a767ec63167ef70c056795782d6c9c76ba5a5c Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 1 Nov 2016 13:59:09 +0000 Subject: [PATCH 865/884] MIPS: Fix max_low_pfn with disabled highmem When low memory doesn't reach HIGHMEM_START (e.g. up to 256MB at PA=0 is common) and highmem is present above HIGHMEM_START (e.g. on Malta the RAM overlayed by the IO region is aliased at PA=0x90000000), max_low_pfn will be initially calculated very large and then clipped down to HIGHMEM_START. This causes crashes when reading /sys/kernel/mm/page_idle/bitmap (i.e. CONFIG_IDLE_PAGE_TRACKING=y) when highmem is disabled. pfn_valid() will compare against max_mapnr which is derived from max_low_pfn when there is no highend_pfn set up, and will return true for PFNs right up to HIGHMEM_START, even though they are beyond the end of low memory and no page structs will actually exist for these PFNs. This is fixed by skipping high memory regions when initially calculating max_low_pfn if highmem is disabled, so it doesn't get clipped too high. We also clip regions which overlap the highmem boundary when highmem is disabled, so that max_pfn doesn't extend into highmem either. Signed-off-by: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14490/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 0d57909d9026..f66e5ce505b2 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -368,6 +368,19 @@ static void __init bootmem_init(void) end = PFN_DOWN(boot_mem_map.map[i].addr + boot_mem_map.map[i].size); +#ifndef CONFIG_HIGHMEM + /* + * Skip highmem here so we get an accurate max_low_pfn if low + * memory stops short of high memory. + * If the region overlaps HIGHMEM_START, end is clipped so + * max_pfn excludes the highmem portion. + */ + if (start >= PFN_DOWN(HIGHMEM_START)) + continue; + if (end > PFN_DOWN(HIGHMEM_START)) + end = PFN_DOWN(HIGHMEM_START); +#endif + if (end > max_low_pfn) max_low_pfn = end; if (start < min_low_pfn) From 147b36d5b70c083cc76770c47d60b347e8eaf231 Mon Sep 17 00:00:00 2001 From: Vladimir Zapolskiy Date: Mon, 31 Oct 2016 21:46:24 +0200 Subject: [PATCH 866/884] i2c: core: fix NULL pointer dereference under race condition Race condition between registering an I2C device driver and deregistering an I2C adapter device which is assumed to manage that I2C device may lead to a NULL pointer dereference due to the uninitialized list head of driver clients. The root cause of the issue is that the I2C bus may know about the registered device driver and thus it is matched by bus_for_each_drv(), but the list of clients is not initialized and commonly it is NULL, because I2C device drivers define struct i2c_driver as static and clients field is expected to be initialized by I2C core: i2c_register_driver() i2c_del_adapter() driver_register() ... bus_add_driver() ... ... bus_for_each_drv(..., __process_removed_adapter) ... i2c_do_del_adapter() ... list_for_each_entry_safe(..., &driver->clients, ...) INIT_LIST_HEAD(&driver->clients); To solve the problem it is sufficient to do clients list head initialization before calling driver_register(). The problem was found while using an I2C device driver with a sluggish registration routine on a bus provided by a physically detachable I2C master controller, but practically the oops may be reproduced under the race between arbitraty I2C device driver registration and managing I2C bus device removal e.g. by unbinding the latter over sysfs: % echo 21a4000.i2c > /sys/bus/platform/drivers/imx-i2c/unbind Unable to handle kernel NULL pointer dereference at virtual address 00000000 Internal error: Oops: 17 [#1] SMP ARM CPU: 2 PID: 533 Comm: sh Not tainted 4.9.0-rc3+ #61 Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) task: e5ada400 task.stack: e4936000 PC is at i2c_do_del_adapter+0x20/0xcc LR is at __process_removed_adapter+0x14/0x1c Flags: NzCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment none Control: 10c5387d Table: 35bd004a DAC: 00000051 Process sh (pid: 533, stack limit = 0xe4936210) Stack: (0xe4937d28 to 0xe4938000) Backtrace: [] (i2c_do_del_adapter) from [] (__process_removed_adapter+0x14/0x1c) [] (__process_removed_adapter) from [] (bus_for_each_drv+0x6c/0xa0) [] (bus_for_each_drv) from [] (i2c_del_adapter+0xbc/0x284) [] (i2c_del_adapter) from [] (i2c_imx_remove+0x44/0x164 [i2c_imx]) [] (i2c_imx_remove [i2c_imx]) from [] (platform_drv_remove+0x2c/0x44) [] (platform_drv_remove) from [] (__device_release_driver+0x90/0x12c) [] (__device_release_driver) from [] (device_release_driver+0x28/0x34) [] (device_release_driver) from [] (unbind_store+0x80/0x104) [] (unbind_store) from [] (drv_attr_store+0x28/0x34) [] (drv_attr_store) from [] (sysfs_kf_write+0x50/0x54) [] (sysfs_kf_write) from [] (kernfs_fop_write+0x100/0x214) [] (kernfs_fop_write) from [] (__vfs_write+0x34/0x120) [] (__vfs_write) from [] (vfs_write+0xa8/0x170) [] (vfs_write) from [] (SyS_write+0x4c/0xa8) [] (SyS_write) from [] (ret_fast_syscall+0x0/0x1c) Signed-off-by: Vladimir Zapolskiy Signed-off-by: Wolfram Sang Cc: stable@kernel.org --- drivers/i2c/i2c-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 1704fc84d647..b432b64e307a 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -2179,6 +2179,7 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver) /* add the driver to the list of i2c drivers in the driver core */ driver->driver.owner = owner; driver->driver.bus = &i2c_bus_type; + INIT_LIST_HEAD(&driver->clients); /* When registration returns, the driver core * will have called probe() for all matching-but-unbound devices. @@ -2189,7 +2190,6 @@ int i2c_register_driver(struct module *owner, struct i2c_driver *driver) pr_debug("driver [%s] registered\n", driver->driver.name); - INIT_LIST_HEAD(&driver->clients); /* Walk the adapters that are already present */ i2c_for_each_dev(driver, __process_new_driver); From 416379f9ebded501eda882e6af0a7aafc1866700 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 14 Oct 2016 23:54:55 +0200 Subject: [PATCH 867/884] PCI: designware: Check for iATU unroll support after initializing host dw_pcie_iatu_unroll_enabled() reads a dbi_base register. Reading any dbi_base register before pp->ops->host_init has been called causes "imprecise external abort" on platforms like ARTPEC-6, where the PCIe module is disabled at boot and first enabled in pp->ops->host_init. Move dw_pcie_iatu_unroll_enabled() to dw_pcie_setup_rc(), since it is after pp->ops->host_init, but before pp->iatu_unroll_enabled is actually used. Fixes: a0601a470537 ("PCI: designware: Add iATU Unroll feature") Tested-by: James Le Cuirot Signed-off-by: Niklas Cassel Signed-off-by: Bjorn Helgaas Acked-by: Joao Pinto Acked-by: Olof Johansson --- drivers/pci/host/pcie-designware.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c index 035f50c03281..bed19994c1e9 100644 --- a/drivers/pci/host/pcie-designware.c +++ b/drivers/pci/host/pcie-designware.c @@ -637,8 +637,6 @@ int dw_pcie_host_init(struct pcie_port *pp) } } - pp->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pp); - if (pp->ops->host_init) pp->ops->host_init(pp); @@ -809,6 +807,11 @@ void dw_pcie_setup_rc(struct pcie_port *pp) { u32 val; + /* get iATU unroll support */ + pp->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pp); + dev_dbg(pp->dev, "iATU unroll: %s\n", + pp->iatu_unroll_enabled ? "enabled" : "disabled"); + /* set the number of lanes */ val = dw_pcie_readl_rc(pp, PCIE_PORT_LINK_CONTROL); val &= ~PORT_LINK_MODE_MASK; From bc33b0ca11e3df467777a4fa7639ba488c9d4911 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 5 Nov 2016 16:23:36 -0700 Subject: [PATCH 868/884] Linux 4.9-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a2650f9c6a25..f97f786de58d 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From f5e886ef9b45a3dbfd42b054a13c755894ea8402 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Sep 2016 13:23:25 -0700 Subject: [PATCH 869/884] x86/MCE: Do not look at panic_on_oops in the severity grading The MCE tolerance levels control whether we panic on a machine check or do something else like generating a signal and logging error information. This is controlled by the mce= command line parameter. However, if panic_on_oops is set, it will force a panic for such an MCE even though the user didn't want to. So don't check panic_on_oops in the severity grading anymore. One of the use cases for that is recovery from uncorrectable errors with mce=2. [ Boris: rewrite commit message. ] Signed-off-by: Yinghai Lu Acked-by: Tony Luck Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/20160916202325.4972-1-yinghai@kernel.org Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce-severity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index 631356c8cca4..c7efbcfbeda6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -311,7 +311,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e *msg = s->msg; s->covered = 1; if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { - if (panic_on_oops || tolerant < 1) + if (tolerant < 1) return MCE_PANIC_SEVERITY; } return s->sev; From 8c203dbb78ca7a9aed4e2570c866b0f43c752e41 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Nov 2016 13:04:41 +0100 Subject: [PATCH 870/884] x86/RAS: Add TSC timestamp to the injected MCE The MCE injection code does not provide the time stamp information for the injected MCE. Add it. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20161101120911.13163-3-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/ras/mce_amd_inj.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 1ac76479c266..8730c2882fff 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -275,6 +275,8 @@ static void do_inject(void) unsigned int cpu = i_mce.extcpu; u8 b = i_mce.bank; + rdtscll(i_mce.tsc); + if (i_mce.misc) i_mce.status |= MCI_STATUS_MISCV; From cd9c57cad3fe89ea949b9266cddc947c0838f7af Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Nov 2016 12:52:27 +0100 Subject: [PATCH 871/884] x86/MCE: Dump MCE to dmesg if no consumers When there are no error record consumers registered with the kernel, the only thing that appears in dmesg is something like: [ 300.000326] mce: [Hardware Error]: Machine check events logged and the error records are gone. Which is seriously counterproductive. So let's dump them to dmesg instead, in such a case. Requested-by: Eric Morton Signed-off-by: Borislav Petkov Cc: Tony Luck Link: http://lkml.kernel.org/r/20161101120911.13163-4-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 52 ++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7fdf453d895..4ca00474804b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -207,8 +207,12 @@ EXPORT_SYMBOL_GPL(mce_inject_log); static struct notifier_block mce_srao_nb; +static atomic_t num_notifiers; + void mce_register_decode_chain(struct notifier_block *nb) { + atomic_inc(&num_notifiers); + /* Ensure SRAO notifier has the highest priority in the decode chain. */ if (nb != &mce_srao_nb && nb->priority == INT_MAX) nb->priority -= 1; @@ -219,6 +223,8 @@ EXPORT_SYMBOL_GPL(mce_register_decode_chain); void mce_unregister_decode_chain(struct notifier_block *nb) { + atomic_dec(&num_notifiers); + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); } EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); @@ -270,17 +276,17 @@ struct mca_msr_regs msr_ops = { .misc = misc_reg }; -static void print_mce(struct mce *m) +static void __print_mce(struct mce *m) { - int ret = 0; - - pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", - m->extcpu, m->mcgstatus, m->bank, m->status); + pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n", + m->extcpu, + (m->mcgstatus & MCG_STATUS_MCIP ? " Exception" : ""), + m->mcgstatus, m->bank, m->status); if (m->ip) { pr_emerg(HW_ERR "RIP%s %02x:<%016Lx> ", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + m->cs, m->ip); if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); @@ -308,6 +314,13 @@ static void print_mce(struct mce *m) pr_emerg(HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n", m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, cpu_data(m->extcpu).microcode); +} + +static void print_mce(struct mce *m) +{ + int ret = 0; + + __print_mce(m); /* * Print out human-readable details about the MCE error, @@ -569,6 +582,32 @@ static struct notifier_block mce_srao_nb = { .priority = INT_MAX, }; +static int mce_default_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *m = (struct mce *)data; + + if (!m) + return NOTIFY_DONE; + + /* + * Run the default notifier if we have only the SRAO + * notifier and us registered. + */ + if (atomic_read(&num_notifiers) > 2) + return NOTIFY_DONE; + + __print_mce(m); + + return NOTIFY_DONE; +} + +static struct notifier_block mce_default_nb = { + .notifier_call = mce_default_notifier, + /* lowest prio, we want it to run last. */ + .priority = 0, +}; + /* * Read ADDR and MISC registers. */ @@ -2138,6 +2177,7 @@ int __init mcheck_init(void) { mcheck_intel_therm_init(); mce_register_decode_chain(&mce_srao_nb); + mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); INIT_WORK(&mce_work, mce_process_work); From 79349f529ab1a629b9e43e81b4a5b2c22d1e9a65 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 1 Nov 2016 17:33:00 +0100 Subject: [PATCH 872/884] x86/RAS: Simplify SMCA bank descriptor struct Call the struct simply smca_bank, it's instance ID can be simply ->id. Makes the code much more readable. Signed-off-by: Borislav Petkov Tested-by: Yazen Ghannam Link: http://lkml.kernel.org/r/20161103125556.15482-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 7 ++++--- arch/x86/kernel/cpu/mcheck/mce_amd.c | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 9bd7ff5ffbcc..4d97875d9543 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -371,12 +371,13 @@ struct smca_hwid_mcatype { u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ }; -struct smca_bank_info { +struct smca_bank { struct smca_hwid_mcatype *type; - u32 type_instance; + /* Instance ID */ + u32 id; }; -extern struct smca_bank_info smca_banks[MAX_NR_BANKS]; +extern struct smca_bank smca_banks[MAX_NR_BANKS]; #endif diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9b5403462936..ac2f4f2cad18 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -116,7 +116,7 @@ static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, }; -struct smca_bank_info smca_banks[MAX_NR_BANKS]; +struct smca_bank smca_banks[MAX_NR_BANKS]; EXPORT_SYMBOL_GPL(smca_banks); /* @@ -150,14 +150,14 @@ static void get_smca_bank_info(unsigned int bank) { unsigned int i, hwid_mcatype, cpu = smp_processor_id(); struct smca_hwid_mcatype *type; - u32 high, instanceId; + u32 high, instance_id; u16 hwid, mcatype; /* Collect bank_info using CPU 0 for now. */ if (cpu) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) { + if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instance_id, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } @@ -170,7 +170,7 @@ static void get_smca_bank_info(unsigned int bank) type = &smca_hwid_mcatypes[i]; if (hwid_mcatype == type->hwid_mcatype) { smca_banks[bank].type = type; - smca_banks[bank].type_instance = instanceId; + smca_banks[bank].id = instance_id; break; } } @@ -839,7 +839,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "%s_%x", smca_bank_names[bank_type].name, - smca_banks[bank].type_instance); + smca_banks[bank].id); return buf_mcatype; } From 1ce9cd7f9f0b71af7c496b816734bc2dc699363a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 Nov 2016 12:48:01 +0100 Subject: [PATCH 873/884] x86/RAS: Simplify SMCA HWID descriptor struct Call it simply smca_hwid and call local variables "hwid". More readable. Signed-off-by: Borislav Petkov Tested-by: Yazen Ghannam Link: http://lkml.kernel.org/r/20161103125556.15482-2-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_amd.c | 24 +++++++++--------------- drivers/edac/mce_amd.c | 10 +++++----- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4d97875d9543..ccc801a0da0f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -365,14 +365,14 @@ extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES]; #define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) -struct smca_hwid_mcatype { +struct smca_hwid { unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ u32 hwid_mcatype; /* (hwid,mcatype) tuple */ u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ }; struct smca_bank { - struct smca_hwid_mcatype *type; + struct smca_hwid *hwid; /* Instance ID */ u32 id; }; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ac2f4f2cad18..ff81667af2f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -86,7 +86,7 @@ struct smca_bank_name smca_bank_names[] = { }; EXPORT_SYMBOL_GPL(smca_bank_names); -static struct smca_hwid_mcatype smca_hwid_mcatypes[] = { +static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ /* ZN Core (HWID=0xB0) MCA types */ @@ -142,16 +142,11 @@ static void default_deferred_error_interrupt(void) } void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; -/* - * CPU Initialization - */ - static void get_smca_bank_info(unsigned int bank) { unsigned int i, hwid_mcatype, cpu = smp_processor_id(); - struct smca_hwid_mcatype *type; + struct smca_hwid *s_hwid; u32 high, instance_id; - u16 hwid, mcatype; /* Collect bank_info using CPU 0 for now. */ if (cpu) @@ -162,14 +157,13 @@ static void get_smca_bank_info(unsigned int bank) return; } - hwid = high & MCI_IPID_HWID; - mcatype = (high & MCI_IPID_MCATYPE) >> 16; - hwid_mcatype = HWID_MCATYPE(hwid, mcatype); + hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID, + (high & MCI_IPID_MCATYPE) >> 16); for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { - type = &smca_hwid_mcatypes[i]; - if (hwid_mcatype == type->hwid_mcatype) { - smca_banks[bank].type = type; + s_hwid = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == s_hwid->hwid_mcatype) { + smca_banks[bank].hwid = s_hwid; smca_banks[bank].id = instance_id; break; } @@ -826,10 +820,10 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return th_names[bank]; } - if (!smca_banks[bank].type) + if (!smca_banks[bank].hwid) return NULL; - bank_type = smca_banks[bank].type->bank_type; + bank_type = smca_banks[bank].hwid->bank_type; if (b && bank_type == SMCA_UMC) { if (b->block < ARRAY_SIZE(smca_umc_block_names)) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index daaac2c79ca7..8e96c6ddf272 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -851,7 +851,7 @@ static void decode_mc6_mce(struct mce *m) /* Decode errors according to Scalable MCA specification */ static void decode_smca_errors(struct mce *m) { - struct smca_hwid_mcatype *type; + struct smca_hwid *hwid; unsigned int bank_type; const char *ip_name; u8 xec = XEC(m->status, xec_mask); @@ -862,18 +862,18 @@ static void decode_smca_errors(struct mce *m) if (boot_cpu_data.x86 >= 0x17 && m->bank == 4) pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n"); - type = smca_banks[m->bank].type; - if (!type) + hwid = smca_banks[m->bank].hwid; + if (!hwid) return; - bank_type = type->bank_type; + bank_type = hwid->bank_type; ip_name = smca_bank_names[bank_type].long_name; pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); /* Only print the decode of valid error codes */ if (xec < smca_mce_descs[bank_type].num_descs && - (type->xec_bitmap & BIT_ULL(xec))) { + (hwid->xec_bitmap & BIT_ULL(xec))) { pr_emerg(HW_ERR "%s Error: ", ip_name); pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); } From a9a1c0ee04aa771e5523ae33e458c702261ab547 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 2 Nov 2016 13:24:47 +0100 Subject: [PATCH 874/884] x86/RAS: Rename smca_bank_names to smca_names Make it differ more from struct smca_bank_name for better readability. Signed-off-by: Borislav Petkov Tested-by: Yazen Ghannam Link: http://lkml.kernel.org/r/20161103125556.15482-3-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd.c | 6 +++--- drivers/edac/mce_amd.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ccc801a0da0f..8ffd21596dd7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -361,7 +361,7 @@ struct smca_bank_name { const char *long_name; /* Long name for pretty-printing */ }; -extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES]; +extern struct smca_bank_name smca_names[N_SMCA_BANK_TYPES]; #define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ff81667af2f2..afeb02b87127 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -69,7 +69,7 @@ static const char * const smca_umc_block_names[] = { "misc_umc" }; -struct smca_bank_name smca_bank_names[] = { +struct smca_bank_name smca_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, @@ -84,7 +84,7 @@ struct smca_bank_name smca_bank_names[] = { [SMCA_PSP] = { "psp", "Platform Security Processor" }, [SMCA_SMU] = { "smu", "System Management Unit" }, }; -EXPORT_SYMBOL_GPL(smca_bank_names); +EXPORT_SYMBOL_GPL(smca_names); static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ @@ -832,7 +832,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) } snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, - "%s_%x", smca_bank_names[bank_type].name, + "%s_%x", smca_names[bank_type].name, smca_banks[bank].id); return buf_mcatype; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 8e96c6ddf272..3dee58583d25 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -867,7 +867,7 @@ static void decode_smca_errors(struct mce *m) return; bank_type = hwid->bank_type; - ip_name = smca_bank_names[bank_type].long_name; + ip_name = smca_names[bank_type].long_name; pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); From c09a8c40e0a0b4994925ac8eba91b85d76f440a3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 3 Nov 2016 21:12:33 +0100 Subject: [PATCH 875/884] x86/RAS: Hide SMCA bank names Add accessor functions and hide the smca_names array. Also, add a sanity-check to bank HWID assignment in get_smca_bank_info(). Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20161104152317.5r276t35df53qk76@pd.tnic Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/mce.h | 8 +------ arch/x86/kernel/cpu/mcheck/mce_amd.c | 32 +++++++++++++++++++++++++--- drivers/edac/mce_amd.c | 2 +- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 8ffd21596dd7..748b8da8e627 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -356,13 +356,6 @@ enum smca_bank_types { N_SMCA_BANK_TYPES }; -struct smca_bank_name { - const char *name; /* Short name for sysfs */ - const char *long_name; /* Long name for pretty-printing */ -}; - -extern struct smca_bank_name smca_names[N_SMCA_BANK_TYPES]; - #define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) struct smca_hwid { @@ -379,6 +372,7 @@ struct smca_bank { extern struct smca_bank smca_banks[MAX_NR_BANKS]; +extern const char *smca_get_long_name(enum smca_bank_types t); #endif #endif /* _ASM_X86_MCE_H */ diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index afeb02b87127..e68a305ff666 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -69,7 +69,12 @@ static const char * const smca_umc_block_names[] = { "misc_umc" }; -struct smca_bank_name smca_names[] = { +struct smca_bank_name { + const char *name; /* Short name for sysfs */ + const char *long_name; /* Long name for pretty-printing */ +}; + +static struct smca_bank_name smca_names[] = { [SMCA_LS] = { "load_store", "Load Store Unit" }, [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, @@ -84,7 +89,23 @@ struct smca_bank_name smca_names[] = { [SMCA_PSP] = { "psp", "Platform Security Processor" }, [SMCA_SMU] = { "smu", "System Management Unit" }, }; -EXPORT_SYMBOL_GPL(smca_names); + +const char *smca_get_name(enum smca_bank_types t) +{ + if (t >= N_SMCA_BANK_TYPES) + return NULL; + + return smca_names[t].name; +} + +const char *smca_get_long_name(enum smca_bank_types t) +{ + if (t >= N_SMCA_BANK_TYPES) + return NULL; + + return smca_names[t].long_name; +} +EXPORT_SYMBOL_GPL(smca_get_long_name); static struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype, xec_bitmap } */ @@ -163,6 +184,11 @@ static void get_smca_bank_info(unsigned int bank) for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { s_hwid = &smca_hwid_mcatypes[i]; if (hwid_mcatype == s_hwid->hwid_mcatype) { + + WARN(smca_banks[bank].hwid, + "Bank %s already initialized!\n", + smca_get_name(s_hwid->bank_type)); + smca_banks[bank].hwid = s_hwid; smca_banks[bank].id = instance_id; break; @@ -832,7 +858,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) } snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, - "%s_%x", smca_names[bank_type].name, + "%s_%x", smca_get_name(bank_type), smca_banks[bank].id); return buf_mcatype; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 3dee58583d25..80762acd8cc8 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -867,7 +867,7 @@ static void decode_smca_errors(struct mce *m) return; bank_type = hwid->bank_type; - ip_name = smca_names[bank_type].long_name; + ip_name = smca_get_long_name(bank_type); pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); From 54467353a96577f840cd2348981417c559b21b4b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 10 Nov 2016 14:10:53 +0100 Subject: [PATCH 876/884] x86/MCE: Correct TSC timestamping of error records We did have logic in the MCE code which would TSC-timestamp an error record only when it is exact - i.e., when it wasn't detected by polling. This isn't the case anymore. So let's fix that: We have a valid TSC timestamp in the error record only when it has been a precise detection, i.e., either in the #MC handler or in one of the interrupt handlers (thresholding, deferred, ...). All other error records still have mce.time which contains the wall time in order to be able to place the error record in time at least approximately. Also, this fixes another bug where machine_check_poll() would clear mce.tsc unconditionally even if we requested precise MCP_TIMESTAMP logging. The proper fix would be to generate timestamp only when it has been requested and not always. But that would require a more thorough code audit of all mce_gather_info/mce_setup() users. Add a FIXME for now. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Cc: Tony Luck Cc: kernel test robot Cc: linux-edac Cc: lkp@01.org Link: http://lkml.kernel.org/r/20161110131053.kybsijfs5venpjnf@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce.c | 16 ++++++++++------ arch/x86/kernel/cpu/mcheck/mce_intel.c | 4 ++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4ca00474804b..aab96f8d52b0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -706,6 +706,15 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_gather_info(&m, NULL); + /* + * m.tsc was set in mce_setup(). Clear it if not requested. + * + * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid + * that dance. + */ + if (!(flags & MCP_TIMESTAMP)) + m.tsc = 0; + for (i = 0; i < mca_cfg.banks; i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) continue; @@ -713,14 +722,12 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) m.misc = 0; m.addr = 0; m.bank = i; - m.tsc = 0; barrier(); m.status = mce_rdmsrl(msr_ops.status(i)); if (!(m.status & MCI_STATUS_VAL)) continue; - /* * Uncorrected or signalled events are handled by the exception * handler when it is enabled, so don't process those here. @@ -735,9 +742,6 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_read_aux(&m, i); - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; - severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) @@ -1394,7 +1398,7 @@ static void mce_timer_fn(unsigned long data) iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { - machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_poll_banks)); + machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); if (mce_intel_cmci_poll()) { iv = mce_adjust_timer(iv); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 1defb8ea882c..be0b2fad47c5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -130,7 +130,7 @@ bool mce_intel_cmci_poll(void) * Reset the counter if we've logged an error in the last poll * during the storm. */ - if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned))) + if (machine_check_poll(0, this_cpu_ptr(&mce_banks_owned))) this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL); else this_cpu_dec(cmci_backoff_cnt); @@ -342,7 +342,7 @@ void cmci_recheck(void) return; local_irq_save(flags); - machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); + machine_check_poll(0, this_cpu_ptr(&mce_banks_owned)); local_irq_restore(flags); } From 859af13a103fc99a9a62064ef8f05f7d9ee58609 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 14:32:35 -0600 Subject: [PATCH 877/884] x86/mce/AMD: Fix HWID_MCATYPE calculation by grouping arguments The calculation of the hwid_mcatype value in get_smca_bank_info() became incorrect after applying the following commit: 1ce9cd7f9f0b ("x86/RAS: Simplify SMCA HWID descriptor struct") This causes the function to not match a bank to its type. Disassembly of hwid_mcatype calculation after change: db: 8b 45 e0 mov -0x20(%rbp),%eax de: 41 89 c4 mov %eax,%r12d e1: 25 00 00 ff 0f and $0xfff0000,%eax e6: 41 c1 ec 10 shr $0x10,%r12d ea: 41 09 c4 or %eax,%r12d Disassembly of hwid_mcatype calculation in original code: 286: 8b 45 d0 mov -0x30(%rbp),%eax 289: 41 89 c5 mov %eax,%r13d 28c: c1 e8 10 shr $0x10,%eax 28f: 41 81 e5 ff 0f 00 00 and $0xfff,%r13d 296: 41 c1 e5 10 shl $0x10,%r13d 29a: 41 09 c5 or %eax,%r13d Grouping the arguments to the HWID_MCATYPE() macro fixes the issue. ( Boris suggested adding parentheses in the macro. ) Signed-off-by: Yazen Ghannam Cc: Aravind Gopalakrishnan Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 748b8da8e627..ecda6a93179f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -356,7 +356,7 @@ enum smca_bank_types { N_SMCA_BANK_TYPES }; -#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype) +#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) struct smca_hwid { unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ From 18807ddb7f88d4ac3797302bafb18143d573e66f Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 15 Nov 2016 15:13:53 -0600 Subject: [PATCH 878/884] x86/mce/AMD: Reset Threshold Limit after logging error The error count field in MCA_MISC does not get reset by hardware when the threshold has been reached. Software is expected to reset it. Currently, the threshold limit only gets reset during init or when a user writes to sysfs. If the user is not monitoring threshold interrupts and resetting the limit then the user will only see 1 interrupt when the limit is first hit. So if, for example, the limit is set to 10 then only 1 interrupt will be recorded after 10 errors even if 100 errors have occurred. The user may then assume that only 10 errors have occurred. Signed-off-by: Yazen Ghannam Cc: Aravind Gopalakrishnan Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1479244433-69267-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index e68a305ff666..5b33702ca86e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -665,6 +665,7 @@ static void amd_threshold_interrupt(void) { u32 low = 0, high = 0, address = 0; unsigned int bank, block, cpu = smp_processor_id(); + struct thresh_restart tr; /* assume first bank caused it */ for (bank = 0; bank < mca_cfg.banks; ++bank) { @@ -701,6 +702,11 @@ static void amd_threshold_interrupt(void) log: __log_error(bank, false, true, ((u64)high << 32) | low); + + /* Reset threshold block after logging error. */ + memset(&tr, 0, sizeof(tr)); + tr.b = &per_cpu(threshold_banks, cpu)[bank]->blocks[block]; + threshold_restart_bank(&tr); } /* From c7993890e772c450764d39ba872444307bbdd460 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 15:10:53 -0600 Subject: [PATCH 879/884] x86/amd_nb: Make amd_northbridges internal to amd_nb.c Hide amd_northbridges in amd_nb.c so that external callers will have to use the exported accessor functions. Also, fix some checkpatch.pl warnings. Signed-off-by: Yazen Ghannam Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1478812257-5424-2-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/amd_nb.h | 18 +++--------------- arch/x86/kernel/amd_nb.c | 33 +++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 5e828da2e18f..b442ad75b3b3 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -66,7 +66,6 @@ struct amd_northbridge_info { u64 flags; struct amd_northbridge *nb; }; -extern struct amd_northbridge_info amd_northbridges; #define AMD_NB_GART BIT(0) #define AMD_NB_L3_INDEX_DISABLE BIT(1) @@ -74,20 +73,9 @@ extern struct amd_northbridge_info amd_northbridges; #ifdef CONFIG_AMD_NB -static inline u16 amd_nb_num(void) -{ - return amd_northbridges.num; -} - -static inline bool amd_nb_has_feature(unsigned feature) -{ - return ((amd_northbridges.flags & feature) == feature); -} - -static inline struct amd_northbridge *node_to_amd_nb(int node) -{ - return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; -} +u16 amd_nb_num(void); +bool amd_nb_has_feature(unsigned int feature); +struct amd_northbridge *node_to_amd_nb(int node); static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev) { diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4fdf6230d93c..015328901896 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -44,8 +44,25 @@ const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = { { } }; -struct amd_northbridge_info amd_northbridges; -EXPORT_SYMBOL(amd_northbridges); +static struct amd_northbridge_info amd_northbridges; + +u16 amd_nb_num(void) +{ + return amd_northbridges.num; +} +EXPORT_SYMBOL(amd_nb_num); + +bool amd_nb_has_feature(unsigned int feature) +{ + return ((amd_northbridges.flags & feature) == feature); +} +EXPORT_SYMBOL(amd_nb_has_feature); + +struct amd_northbridge *node_to_amd_nb(int node) +{ + return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; +} +EXPORT_SYMBOL(node_to_amd_nb); static struct pci_dev *next_northbridge(struct pci_dev *dev, const struct pci_device_id *ids) @@ -64,7 +81,7 @@ int amd_cache_northbridges(void) struct amd_northbridge *nb; struct pci_dev *misc, *link; - if (amd_nb_num()) + if (amd_northbridges.num) return 0; misc = NULL; @@ -82,7 +99,7 @@ int amd_cache_northbridges(void) amd_northbridges.num = i; link = misc = NULL; - for (i = 0; i != amd_nb_num(); i++) { + for (i = 0; i != amd_northbridges.num; i++) { node_to_amd_nb(i)->misc = misc = next_northbridge(misc, amd_nb_misc_ids); node_to_amd_nb(i)->link = link = @@ -226,14 +243,14 @@ static void amd_cache_gart(void) if (!amd_nb_has_feature(AMD_NB_GART)) return; - flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL); + flush_words = kmalloc_array(amd_northbridges.num, sizeof(u32), GFP_KERNEL); if (!flush_words) { amd_northbridges.flags &= ~AMD_NB_GART; pr_notice("Cannot initialize GART flush words, GART support disabled\n"); return; } - for (i = 0; i != amd_nb_num(); i++) + for (i = 0; i != amd_northbridges.num; i++) pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c, &flush_words[i]); } @@ -252,12 +269,12 @@ void amd_flush_garts(void) that it doesn't matter to serialize more. -AK */ spin_lock_irqsave(&gart_lock, flags); flushed = 0; - for (i = 0; i < amd_nb_num(); i++) { + for (i = 0; i < amd_northbridges.num; i++) { pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c, flush_words[i] | 1); flushed++; } - for (i = 0; i < amd_nb_num(); i++) { + for (i = 0; i < amd_northbridges.num; i++) { u32 w; /* Make sure the hardware actually executed the flush*/ for (;;) { From de6bd0835ac148a0882528fe1fe5bcee709fe911 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 15:10:54 -0600 Subject: [PATCH 880/884] x86/amd_nb: Make all exports EXPORT_SYMBOL_GPL Make all EXPORT_SYMBOL's into EXPORT_SYMBOL_GPL. While we're at it let's fix some checkpatch warnings. Signed-off-by: Yazen Ghannam Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1478812257-5424-3-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/amd_nb.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 015328901896..3c1cb4595595 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -26,7 +26,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, {} }; -EXPORT_SYMBOL(amd_nb_misc_ids); +EXPORT_SYMBOL_GPL(amd_nb_misc_ids); static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, @@ -50,19 +50,19 @@ u16 amd_nb_num(void) { return amd_northbridges.num; } -EXPORT_SYMBOL(amd_nb_num); +EXPORT_SYMBOL_GPL(amd_nb_num); bool amd_nb_has_feature(unsigned int feature) { return ((amd_northbridges.flags & feature) == feature); } -EXPORT_SYMBOL(amd_nb_has_feature); +EXPORT_SYMBOL_GPL(amd_nb_has_feature); struct amd_northbridge *node_to_amd_nb(int node) { return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } -EXPORT_SYMBOL(node_to_amd_nb); +EXPORT_SYMBOL_GPL(node_to_amd_nb); static struct pci_dev *next_northbridge(struct pci_dev *dev, const struct pci_device_id *ids) @@ -91,7 +91,7 @@ int amd_cache_northbridges(void) if (!i) return -ENODEV; - nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL); + nb = kcalloc(i, sizeof(struct amd_northbridge), GFP_KERNEL); if (!nb) return -ENOMEM; @@ -156,13 +156,13 @@ struct resource *amd_get_mmconfig_range(struct resource *res) { u32 address; u64 base, msr; - unsigned segn_busn_bits; + unsigned int segn_busn_bits; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return NULL; /* assume all cpus from fam10h have mmconfig */ - if (boot_cpu_data.x86 < 0x10) + if (boot_cpu_data.x86 < 0x10) return NULL; address = MSR_FAM10H_MMIO_CONF_BASE; @@ -263,10 +263,12 @@ void amd_flush_garts(void) if (!amd_nb_has_feature(AMD_NB_GART)) return; - /* Avoid races between AGP and IOMMU. In theory it's not needed - but I'm not sure if the hardware won't lose flush requests - when another is pending. This whole thing is so expensive anyways - that it doesn't matter to serialize more. -AK */ + /* + * Avoid races between AGP and IOMMU. In theory it's not needed + * but I'm not sure if the hardware won't lose flush requests + * when another is pending. This whole thing is so expensive anyways + * that it doesn't matter to serialize more. -AK + */ spin_lock_irqsave(&gart_lock, flags); flushed = 0; for (i = 0; i < amd_northbridges.num; i++) { From b791c6b6a55c402367cc544f54921074253db061 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 15:10:55 -0600 Subject: [PATCH 881/884] x86/amd_nb: Add Fam17h Data Fabric as "Northbridge" AMD Fam17h uses a Data Fabric component instead of a traditional Northbridge. However, the DF is similar to a NB in that there is one per die and it uses PCI config D18Fx registers. So let's reuse the existing AMD_NB infrastructure for Data Fabrics. Signed-off-by: Yazen Ghannam Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1478812257-5424-4-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/kernel/amd_nb.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 3c1cb4595595..86c2cec9e6cc 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -13,6 +13,9 @@ #include #include +#define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 +#define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 + static u32 *flush_words; const struct pci_device_id amd_nb_misc_ids[] = { @@ -24,6 +27,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, {} }; EXPORT_SYMBOL_GPL(amd_nb_misc_ids); @@ -34,6 +38,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, {} }; From ddfe43cdc0da3189feac4bb9f0f818bef6d6e56e Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 10 Nov 2016 15:10:56 -0600 Subject: [PATCH 882/884] x86/amd_nb: Add SMN and Indirect Data Fabric access for AMD Fam17h Some devices on Fam17h can only be accessed through the System Management Network (SMN). The SMN is accessed by a pair of index/data registers in PCI config space. Add a pair of functions to read from and write to the SMN. The Data Fabric on Fam17h allows multiple devices to use the same register space. The registers of a specific device are accessed indirectly using the device's DF InstanceId. Currently, we only need to read from these devices, so only define a read function for now. Signed-off-by: Yazen Ghannam Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/1478812257-5424-5-git-send-email-Yazen.Ghannam@amd.com [ Boris: make __amd_smn_rw() even more compact. ] Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/amd_nb.h | 5 ++ arch/x86/kernel/amd_nb.c | 108 +++++++++++++++++++++++++++++++++- 2 files changed, 111 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b442ad75b3b3..00c88a01301d 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -21,6 +21,10 @@ extern int amd_numa_init(void); extern int amd_get_subcaches(int); extern int amd_set_subcaches(int, unsigned long); +extern int amd_smn_read(u16 node, u32 address, u32 *value); +extern int amd_smn_write(u16 node, u32 address, u32 value); +extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); + struct amd_l3_cache { unsigned indices; u8 subcaches[4]; @@ -55,6 +59,7 @@ struct threshold_bank { }; struct amd_northbridge { + struct pci_dev *root; struct pci_dev *misc; struct pci_dev *link; struct amd_l3_cache l3_cache; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 86c2cec9e6cc..458da8509b75 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -13,11 +13,20 @@ #include #include +#define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +/* Protect the PCI config register pairs used for SMN and DF indirect access. */ +static DEFINE_MUTEX(smn_mutex); + static u32 *flush_words; +static const struct pci_device_id amd_root_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, + {} +}; + const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, @@ -80,11 +89,104 @@ static struct pci_dev *next_northbridge(struct pci_dev *dev, return dev; } +static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write) +{ + struct pci_dev *root; + int err = -ENODEV; + + if (node >= amd_northbridges.num) + goto out; + + root = node_to_amd_nb(node)->root; + if (!root) + goto out; + + mutex_lock(&smn_mutex); + + err = pci_write_config_dword(root, 0x60, address); + if (err) { + pr_warn("Error programming SMN address 0x%x.\n", address); + goto out_unlock; + } + + err = (write ? pci_write_config_dword(root, 0x64, *value) + : pci_read_config_dword(root, 0x64, value)); + if (err) + pr_warn("Error %s SMN address 0x%x.\n", + (write ? "writing to" : "reading from"), address); + +out_unlock: + mutex_unlock(&smn_mutex); + +out: + return err; +} + +int amd_smn_read(u16 node, u32 address, u32 *value) +{ + return __amd_smn_rw(node, address, value, false); +} +EXPORT_SYMBOL_GPL(amd_smn_read); + +int amd_smn_write(u16 node, u32 address, u32 value) +{ + return __amd_smn_rw(node, address, &value, true); +} +EXPORT_SYMBOL_GPL(amd_smn_write); + +/* + * Data Fabric Indirect Access uses FICAA/FICAD. + * + * Fabric Indirect Configuration Access Address (FICAA): Constructed based + * on the device's Instance Id and the PCI function and register offset of + * the desired register. + * + * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO + * and FICAD HI registers but so far we only need the LO register. + */ +int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + struct pci_dev *F4; + u32 ficaa; + int err = -ENODEV; + + if (node >= amd_northbridges.num) + goto out; + + F4 = node_to_amd_nb(node)->link; + if (!F4) + goto out; + + ficaa = 1; + ficaa |= reg & 0x3FC; + ficaa |= (func & 0x7) << 11; + ficaa |= instance_id << 16; + + mutex_lock(&smn_mutex); + + err = pci_write_config_dword(F4, 0x5C, ficaa); + if (err) { + pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); + goto out_unlock; + } + + err = pci_read_config_dword(F4, 0x98, lo); + if (err) + pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); + +out_unlock: + mutex_unlock(&smn_mutex); + +out: + return err; +} +EXPORT_SYMBOL_GPL(amd_df_indirect_read); + int amd_cache_northbridges(void) { u16 i = 0; struct amd_northbridge *nb; - struct pci_dev *misc, *link; + struct pci_dev *root, *misc, *link; if (amd_northbridges.num) return 0; @@ -103,8 +205,10 @@ int amd_cache_northbridges(void) amd_northbridges.nb = nb; amd_northbridges.num = i; - link = misc = NULL; + link = misc = root = NULL; for (i = 0; i != amd_northbridges.num; i++) { + node_to_amd_nb(i)->root = root = + next_northbridge(root, amd_root_ids); node_to_amd_nb(i)->misc = misc = next_northbridge(misc, amd_nb_misc_ids); node_to_amd_nb(i)->link = link = From f5382de9d4b1d67cc858c49fa6077720621576ea Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 17 Nov 2016 17:57:27 -0500 Subject: [PATCH 883/884] x86/mce/AMD: Add system physical address translation for AMD Fam17h The Unified Memory Controllers (UMCs) on Fam17h log a normalized address in their MCA_ADDR registers. We need to convert that normalized address to a system physical address in order to support a few facilities: 1) To offline poisoned pages in DRAM proactively in the deferred error handler. 2) To print sysaddr and page info for DRAM ECC errors in EDAC. [ Boris: fixes/cleanups ontop: * hi_addr_offset = 0 - no need for that branch. Stick it all under the HiAddrOffsetEn case. It confines hi_addr_offset's declaration too. * Move variables to the innermost scope they're used at so that we save on stack and not blow it up immediately on function entry. * Do not modify *sys_addr prematurely - we want to not exit early and have modified *sys_addr some, which callers get to see. We either convert to a sys_addr or we don't do anything. And we signal that with the retval of the function. * Rename label out -> out_err - because it is the error path. * No need to pr_err of the conversion failed case: imagine a sparsely-populated machine with UMCs which don't have DIMMs. Callers should look at the retval instead and issue a printk only when really necessary. No need for useless info in dmesg. * s/temp_reg/tmp/ and other variable names shortening => shorter code. * Use BIT() everywhere. * Make error messages more informative. * Small build fix for the !CONFIG_X86_MCE_AMD case. * ... and more minor cleanups. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Aravind Gopalakrishnan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/20161122111133.mjzpvzhf7o7yl2oa@pd.tnic [ Typo fixes. ] Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/mce.h | 2 + arch/x86/kernel/cpu/mcheck/mce_amd.c | 200 +++++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bada636d1065..5c81f3967a5d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1025,7 +1025,7 @@ config X86_MCE_INTEL config X86_MCE_AMD def_bool y prompt "AMD MCE features" - depends on X86_MCE && X86_LOCAL_APIC + depends on X86_MCE && X86_LOCAL_APIC && AMD_NB ---help--- Additional support for AMD specific MCE features such as the DRAM Error Threshold. diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ecda6a93179f..caab413c91ad 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -252,8 +252,10 @@ static inline void cmci_recheck(void) {} #ifdef CONFIG_X86_MCE_AMD void mce_amd_feature_init(struct cpuinfo_x86 *c); +int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr); #else static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } +static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; }; #endif int mce_available(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5b33702ca86e..1a0b0a0e5e2b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -553,6 +553,206 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) deferred_error_interrupt_enable(c); } +int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) +{ + u64 dram_base_addr, dram_limit_addr, dram_hole_base; + /* We start from the normalized address */ + u64 ret_addr = norm_addr; + + u32 tmp; + + u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; + u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; + u8 intlv_addr_sel, intlv_addr_bit; + u8 num_intlv_bits, hashed_bit; + u8 lgcy_mmio_hole_en, base = 0; + u8 cs_mask, cs_id = 0; + bool hash_enabled = false; + + /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ + if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) + goto out_err; + + /* Remove HiAddrOffset from normalized address, if enabled: */ + if (tmp & BIT(0)) { + u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; + + if (norm_addr >= hi_addr_offset) { + ret_addr -= hi_addr_offset; + base = 1; + } + } + + /* Read D18F0x110 (DramBaseAddress). */ + if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp)) + goto out_err; + + /* Check if address range is valid. */ + if (!(tmp & BIT(0))) { + pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", + __func__, tmp); + goto out_err; + } + + lgcy_mmio_hole_en = tmp & BIT(1); + intlv_num_chan = (tmp >> 4) & 0xF; + intlv_addr_sel = (tmp >> 8) & 0x7; + dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16; + + /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ + if (intlv_addr_sel > 3) { + pr_err("%s: Invalid interleave address select %d.\n", + __func__, intlv_addr_sel); + goto out_err; + } + + /* Read D18F0x114 (DramLimitAddress). */ + if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) + goto out_err; + + intlv_num_sockets = (tmp >> 8) & 0x1; + intlv_num_dies = (tmp >> 10) & 0x3; + dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); + + intlv_addr_bit = intlv_addr_sel + 8; + + /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ + switch (intlv_num_chan) { + case 0: intlv_num_chan = 0; break; + case 1: intlv_num_chan = 1; break; + case 3: intlv_num_chan = 2; break; + case 5: intlv_num_chan = 3; break; + case 7: intlv_num_chan = 4; break; + + case 8: intlv_num_chan = 1; + hash_enabled = true; + break; + default: + pr_err("%s: Invalid number of interleaved channels %d.\n", + __func__, intlv_num_chan); + goto out_err; + } + + num_intlv_bits = intlv_num_chan; + + if (intlv_num_dies > 2) { + pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", + __func__, intlv_num_dies); + goto out_err; + } + + num_intlv_bits += intlv_num_dies; + + /* Add a bit if sockets are interleaved. */ + num_intlv_bits += intlv_num_sockets; + + /* Assert num_intlv_bits <= 4 */ + if (num_intlv_bits > 4) { + pr_err("%s: Invalid interleave bits %d.\n", + __func__, num_intlv_bits); + goto out_err; + } + + if (num_intlv_bits > 0) { + u64 temp_addr_x, temp_addr_i, temp_addr_y; + u8 die_id_bit, sock_id_bit, cs_fabric_id; + + /* + * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. + * This is the fabric id for this coherent slave. Use + * umc/channel# as instance id of the coherent slave + * for FICAA. + */ + if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) + goto out_err; + + cs_fabric_id = (tmp >> 8) & 0xFF; + die_id_bit = 0; + + /* If interleaved over more than 1 channel: */ + if (intlv_num_chan) { + die_id_bit = intlv_num_chan; + cs_mask = (1 << die_id_bit) - 1; + cs_id = cs_fabric_id & cs_mask; + } + + sock_id_bit = die_id_bit; + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (intlv_num_dies || intlv_num_sockets) + if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp)) + goto out_err; + + /* If interleaved over more than 1 die. */ + if (intlv_num_dies) { + sock_id_bit = die_id_bit + intlv_num_dies; + die_id_shift = (tmp >> 24) & 0xF; + die_id_mask = (tmp >> 8) & 0xFF; + + cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; + } + + /* If interleaved over more than 1 socket. */ + if (intlv_num_sockets) { + socket_id_shift = (tmp >> 28) & 0xF; + socket_id_mask = (tmp >> 16) & 0xFF; + + cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; + } + + /* + * The pre-interleaved address consists of XXXXXXIIIYYYYY + * where III is the ID for this CS, and XXXXXXYYYYY are the + * address bits from the post-interleaved address. + * "num_intlv_bits" has been calculated to tell us how many "I" + * bits there are. "intlv_addr_bit" tells us how many "Y" bits + * there are (where "I" starts). + */ + temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0); + temp_addr_i = (cs_id << intlv_addr_bit); + temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; + ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; + } + + /* Add dram base address */ + ret_addr += dram_base_addr; + + /* If legacy MMIO hole enabled */ + if (lgcy_mmio_hole_en) { + if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp)) + goto out_err; + + dram_hole_base = tmp & GENMASK(31, 24); + if (ret_addr >= dram_hole_base) + ret_addr += (BIT_ULL(32) - dram_hole_base); + } + + if (hash_enabled) { + /* Save some parentheses and grab ls-bit at the end. */ + hashed_bit = (ret_addr >> 12) ^ + (ret_addr >> 18) ^ + (ret_addr >> 21) ^ + (ret_addr >> 30) ^ + cs_id; + + hashed_bit &= BIT(0); + + if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0))) + ret_addr ^= BIT(intlv_addr_bit); + } + + /* Is calculated system address is above DRAM limit address? */ + if (ret_addr > dram_limit_addr) + goto out_err; + + *sys_addr = ret_addr; + return 0; + +out_err: + return -EINVAL; +} +EXPORT_SYMBOL_GPL(umc_normaddr_to_sysaddr); + static void __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) { From 3f5a7896a5096fd50030a04d4c3f28a7441e30a5 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 18 Nov 2016 09:48:36 -0800 Subject: [PATCH 884/884] x86/mce: Include the PPIN in MCE records when available Intel Xeons from Ivy Bridge onwards support a processor identification number set in the factory. To the user this is a handy unique number to identify a particular CPU. Intel can decode this to the fab/production run to track errors. On systems that have it, include it in the machine check record. I'm told that this would be helpful for users that run large data centers with multi-socket servers to keep track of which CPUs are seeing errors. Boris: * Add some clarifying comments and spacing. * Mask out [63:2] in the disabled-but-not-locked case * Call the MSR variable "val" for more readability. Signed-off-by: Tony Luck Cc: Ashok Raj Cc: linux-edac Cc: x86-ml Link: http://lkml.kernel.org/r/20161123114855.njguoaygp3qnbkia@pd.tnic Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/include/asm/msr-index.h | 4 +++ arch/x86/include/uapi/asm/mce.h | 1 + arch/x86/kernel/cpu/mcheck/mce.c | 4 +++ arch/x86/kernel/cpu/mcheck/mce_intel.c | 37 ++++++++++++++++++++++++++ 5 files changed, 47 insertions(+) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index a39629206864..d625b651e526 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -193,6 +193,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 78f3760ca1f2..710273c617b8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -37,6 +37,10 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) /* Intel MSRs. Some also available on other CPUs */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + #define MSR_IA32_PERFCTR0 0x000000c1 #define MSR_IA32_PERFCTR1 0x000000c2 #define MSR_FSB_FREQ 0x000000cd diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 69a6e07e3149..eb6247a7009b 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -28,6 +28,7 @@ struct mce { __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + __u64 ppin; /* Protected Processor Inventory Number */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index aab96f8d52b0..a3cb27af4f9b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -43,6 +43,7 @@ #include #include +#include #include #include #include @@ -135,6 +136,9 @@ void mce_setup(struct mce *m) m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); + + if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) + rdmsrl(MSR_PPIN, m->ppin); } DEFINE_PER_CPU(struct mce, injectm); diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index be0b2fad47c5..190b3e6cef4d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -464,11 +466,46 @@ static void intel_clear_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val); } +static void intel_ppin_init(struct cpuinfo_x86 *c) +{ + unsigned long long val; + + /* + * Even if testing the presence of the MSR would be enough, we don't + * want to risk the situation where other models reuse this MSR for + * other purposes. + */ + switch (c->x86_model) { + case INTEL_FAM6_IVYBRIDGE_X: + case INTEL_FAM6_HASWELL_X: + case INTEL_FAM6_BROADWELL_XEON_D: + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SKYLAKE_X: + if (rdmsrl_safe(MSR_PPIN_CTL, &val)) + return; + + if ((val & 3UL) == 1UL) { + /* PPIN available but disabled: */ + return; + } + + /* If PPIN is disabled, but not locked, try to enable: */ + if (!(val & 3UL)) { + wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); + rdmsrl_safe(MSR_PPIN_CTL, &val); + } + + if ((val & 3UL) == 2UL) + set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); + } +} + void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_thermal(c); intel_init_cmci(); intel_init_lmce(); + intel_ppin_init(c); } void mce_intel_feature_clear(struct cpuinfo_x86 *c)