From f0414087a1b9933adac40974c3ea50261227b5f0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Jul 2019 14:44:43 +0200
Subject: [PATCH 001/157] dmaengine: dw-edma: fix unnecessary stack usage

Putting large constant data on the stack causes unnecessary overhead
and stack usage:

drivers/dma/dw-edma/dw-edma-v0-debugfs.c:285:6: error: stack frame size of 1376 bytes in function 'dw_edma_v0_debugfs_on' [-Werror,-Wframe-larger-than=]

Mark the variable 'static const' in order for the compiler to move it
into the .rodata section where it does no such harm.

Fixes: 305aebeff879 ("dmaengine: Add Synopsys eDMA IP version 0 debugfs support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Link: https://lore.kernel.org/r/20190722124457.1093886-1-arnd@arndb.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-edma/dw-edma-v0-debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
index 3226f528cc11..5728b6fe938c 100644
--- a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
+++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
@@ -48,7 +48,7 @@ static struct {
 } lim[2][EDMA_V0_MAX_NR_CH];
 
 struct debugfs_entries {
-	char					name[24];
+	const char				*name;
 	dma_addr_t				*reg;
 };
 

From 756c3ef93492af382c541e039c1417b96a3d335e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Jul 2019 14:44:44 +0200
Subject: [PATCH 002/157] dmaengine: dw-edma: fix __iomem type confusion

The new driver mixes up dma_addr_t and __iomem pointers, which results
in warnings on some 32-bit architectures, like:

drivers/dma/dw-edma/dw-edma-v0-core.c: In function '__dw_regs':
drivers/dma/dw-edma/dw-edma-v0-core.c:28:9: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast]
  return (struct dw_edma_v0_regs __iomem *)dw->rg_region.vaddr;

Make it use __iomem pointers consistently here, and avoid using dma_addr_t
for __iomem tokens altogether.

A small complication here is the debugfs code, which passes an __iomem
token as the private data for debugfs files, requiring the use of
extra __force.

Fixes: 7e4b8a4fbe2c ("dmaengine: Add Synopsys eDMA IP version 0 support")
Link: https://lore.kernel.org/lkml/20190617131918.2518727-1-arnd@arndb.de/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20190722124457.1093886-2-arnd@arndb.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-edma/dw-edma-core.h       |  2 +-
 drivers/dma/dw-edma/dw-edma-pcie.c       | 18 ++++++++--------
 drivers/dma/dw-edma/dw-edma-v0-core.c    | 10 ++++-----
 drivers/dma/dw-edma/dw-edma-v0-debugfs.c | 27 ++++++++++++------------
 4 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/drivers/dma/dw-edma/dw-edma-core.h b/drivers/dma/dw-edma/dw-edma-core.h
index b6cc90cbc9dc..4e5f9f6e901b 100644
--- a/drivers/dma/dw-edma/dw-edma-core.h
+++ b/drivers/dma/dw-edma/dw-edma-core.h
@@ -50,7 +50,7 @@ struct dw_edma_burst {
 
 struct dw_edma_region {
 	phys_addr_t			paddr;
-	dma_addr_t			vaddr;
+	void				__iomem *vaddr;
 	size_t				sz;
 };
 
diff --git a/drivers/dma/dw-edma/dw-edma-pcie.c b/drivers/dma/dw-edma/dw-edma-pcie.c
index 4c96e1c948f2..dc85f55e1bb8 100644
--- a/drivers/dma/dw-edma/dw-edma-pcie.c
+++ b/drivers/dma/dw-edma/dw-edma-pcie.c
@@ -130,19 +130,19 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 	chip->id = pdev->devfn;
 	chip->irq = pdev->irq;
 
-	dw->rg_region.vaddr = (dma_addr_t)pcim_iomap_table(pdev)[pdata->rg_bar];
+	dw->rg_region.vaddr = pcim_iomap_table(pdev)[pdata->rg_bar];
 	dw->rg_region.vaddr += pdata->rg_off;
 	dw->rg_region.paddr = pdev->resource[pdata->rg_bar].start;
 	dw->rg_region.paddr += pdata->rg_off;
 	dw->rg_region.sz = pdata->rg_sz;
 
-	dw->ll_region.vaddr = (dma_addr_t)pcim_iomap_table(pdev)[pdata->ll_bar];
+	dw->ll_region.vaddr = pcim_iomap_table(pdev)[pdata->ll_bar];
 	dw->ll_region.vaddr += pdata->ll_off;
 	dw->ll_region.paddr = pdev->resource[pdata->ll_bar].start;
 	dw->ll_region.paddr += pdata->ll_off;
 	dw->ll_region.sz = pdata->ll_sz;
 
-	dw->dt_region.vaddr = (dma_addr_t)pcim_iomap_table(pdev)[pdata->dt_bar];
+	dw->dt_region.vaddr = pcim_iomap_table(pdev)[pdata->dt_bar];
 	dw->dt_region.vaddr += pdata->dt_off;
 	dw->dt_region.paddr = pdev->resource[pdata->dt_bar].start;
 	dw->dt_region.paddr += pdata->dt_off;
@@ -158,17 +158,17 @@ static int dw_edma_pcie_probe(struct pci_dev *pdev,
 	pci_dbg(pdev, "Mode:\t%s\n",
 		dw->mode == EDMA_MODE_LEGACY ? "Legacy" : "Unroll");
 
-	pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%pa, p=%pa)\n",
+	pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
 		pdata->rg_bar, pdata->rg_off, pdata->rg_sz,
-		&dw->rg_region.vaddr, &dw->rg_region.paddr);
+		dw->rg_region.vaddr, &dw->rg_region.paddr);
 
-	pci_dbg(pdev, "L. List:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%pa, p=%pa)\n",
+	pci_dbg(pdev, "L. List:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
 		pdata->ll_bar, pdata->ll_off, pdata->ll_sz,
-		&dw->ll_region.vaddr, &dw->ll_region.paddr);
+		dw->ll_region.vaddr, &dw->ll_region.paddr);
 
-	pci_dbg(pdev, "Data:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%pa, p=%pa)\n",
+	pci_dbg(pdev, "Data:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
 		pdata->dt_bar, pdata->dt_off, pdata->dt_sz,
-		&dw->dt_region.vaddr, &dw->dt_region.paddr);
+		dw->dt_region.vaddr, &dw->dt_region.paddr);
 
 	pci_dbg(pdev, "Nr. IRQs:\t%u\n", dw->nr_irqs);
 
diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c
index 8a3180ed49a6..97e3fd41c8a8 100644
--- a/drivers/dma/dw-edma/dw-edma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-edma-v0-core.c
@@ -25,7 +25,7 @@ enum dw_edma_control {
 
 static inline struct dw_edma_v0_regs __iomem *__dw_regs(struct dw_edma *dw)
 {
-	return (struct dw_edma_v0_regs __iomem *)dw->rg_region.vaddr;
+	return dw->rg_region.vaddr;
 }
 
 #define SET(dw, name, value)				\
@@ -192,13 +192,13 @@ u32 dw_edma_v0_core_status_abort_int(struct dw_edma *dw, enum dw_edma_dir dir)
 static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
 {
 	struct dw_edma_burst *child;
-	struct dw_edma_v0_lli *lli;
-	struct dw_edma_v0_llp *llp;
+	struct dw_edma_v0_lli __iomem *lli;
+	struct dw_edma_v0_llp __iomem *llp;
 	u32 control = 0, i = 0;
 	u64 sar, dar, addr;
 	int j;
 
-	lli = (struct dw_edma_v0_lli *)chunk->ll_region.vaddr;
+	lli = chunk->ll_region.vaddr;
 
 	if (chunk->cb)
 		control = DW_EDMA_V0_CB;
@@ -224,7 +224,7 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
 		i++;
 	}
 
-	llp = (struct dw_edma_v0_llp *)&lli[i];
+	llp = (void __iomem *)&lli[i];
 	control = DW_EDMA_V0_LLP | DW_EDMA_V0_TCB;
 	if (!chunk->cb)
 		control |= DW_EDMA_V0_CB;
diff --git a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
index 5728b6fe938c..42739508c0d8 100644
--- a/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
+++ b/drivers/dma/dw-edma/dw-edma-v0-debugfs.c
@@ -14,7 +14,7 @@
 #include "dw-edma-core.h"
 
 #define REGS_ADDR(name) \
-	((dma_addr_t *)&regs->name)
+	((void __force *)&regs->name)
 #define REGISTER(name) \
 	{ #name, REGS_ADDR(name) }
 
@@ -40,11 +40,11 @@
 
 static struct dentry				*base_dir;
 static struct dw_edma				*dw;
-static struct dw_edma_v0_regs			*regs;
+static struct dw_edma_v0_regs			__iomem *regs;
 
 static struct {
-	void					*start;
-	void					*end;
+	void					__iomem *start;
+	void					__iomem *end;
 } lim[2][EDMA_V0_MAX_NR_CH];
 
 struct debugfs_entries {
@@ -54,22 +54,23 @@ struct debugfs_entries {
 
 static int dw_edma_debugfs_u32_get(void *data, u64 *val)
 {
+	void __iomem *reg = (void __force __iomem *)data;
 	if (dw->mode == EDMA_MODE_LEGACY &&
-	    data >= (void *)&regs->type.legacy.ch) {
-		void *ptr = (void *)&regs->type.legacy.ch;
+	    reg >= (void __iomem *)&regs->type.legacy.ch) {
+		void __iomem *ptr = &regs->type.legacy.ch;
 		u32 viewport_sel = 0;
 		unsigned long flags;
 		u16 ch;
 
 		for (ch = 0; ch < dw->wr_ch_cnt; ch++)
-			if (lim[0][ch].start >= data && data < lim[0][ch].end) {
-				ptr += (data - lim[0][ch].start);
+			if (lim[0][ch].start >= reg && reg < lim[0][ch].end) {
+				ptr += (reg - lim[0][ch].start);
 				goto legacy_sel_wr;
 			}
 
 		for (ch = 0; ch < dw->rd_ch_cnt; ch++)
-			if (lim[1][ch].start >= data && data < lim[1][ch].end) {
-				ptr += (data - lim[1][ch].start);
+			if (lim[1][ch].start >= reg && reg < lim[1][ch].end) {
+				ptr += (reg - lim[1][ch].start);
 				goto legacy_sel_rd;
 			}
 
@@ -86,7 +87,7 @@ legacy_sel_wr:
 
 		raw_spin_unlock_irqrestore(&dw->lock, flags);
 	} else {
-		*val = readl(data);
+		*val = readl(reg);
 	}
 
 	return 0;
@@ -105,7 +106,7 @@ static void dw_edma_debugfs_create_x32(const struct debugfs_entries entries[],
 	}
 }
 
-static void dw_edma_debugfs_regs_ch(struct dw_edma_v0_ch_regs *regs,
+static void dw_edma_debugfs_regs_ch(struct dw_edma_v0_ch_regs __iomem *regs,
 				    struct dentry *dir)
 {
 	int nr_entries;
@@ -288,7 +289,7 @@ void dw_edma_v0_debugfs_on(struct dw_edma_chip *chip)
 	if (!dw)
 		return;
 
-	regs = (struct dw_edma_v0_regs *)dw->rg_region.vaddr;
+	regs = dw->rg_region.vaddr;
 	if (!regs)
 		return;
 

From 6f4722b1d1ebf274deb0459a36fad57a7d7fab31 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 22 Jul 2019 14:44:45 +0200
Subject: [PATCH 003/157] dmaengine: dw-edma: fix endianess confusion

When building with 'make C=1', sparse reports an endianess bug:

drivers/dma/dw-edma/dw-edma-v0-debugfs.c:60:30: warning: cast removes address space of expression
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: warning: incorrect type in argument 1 (different address spaces)
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24:    expected void const volatile [noderef] <asn:2>*addr
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24:    got void *[assigned] ptr
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: warning: incorrect type in argument 1 (different address spaces)
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24:    expected void const volatile [noderef] <asn:2>*addr
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24:    got void *[assigned] ptr
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24: warning: incorrect type in argument 1 (different address spaces)
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24:    expected void const volatile [noderef] <asn:2>*addr
drivers/dma/dw-edma/dw-edma-v0-debugfs.c:86:24:    got void *[assigned] ptr

The current code is clearly wrong, as it passes an endian-swapped word
into a register function where it gets swapped again. Just pass the variables
directly into lower_32_bits()/upper_32_bits().

Fixes: 7e4b8a4fbe2c ("dmaengine: Add Synopsys eDMA IP version 0 support")
Link: https://lore.kernel.org/lkml/20190617131820.2470686-1-arnd@arndb.de/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
Link: https://lore.kernel.org/r/20190722124457.1093886-3-arnd@arndb.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dw-edma/dw-edma-v0-core.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/drivers/dma/dw-edma/dw-edma-v0-core.c b/drivers/dma/dw-edma/dw-edma-v0-core.c
index 97e3fd41c8a8..692de47b1670 100644
--- a/drivers/dma/dw-edma/dw-edma-v0-core.c
+++ b/drivers/dma/dw-edma/dw-edma-v0-core.c
@@ -195,7 +195,6 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
 	struct dw_edma_v0_lli __iomem *lli;
 	struct dw_edma_v0_llp __iomem *llp;
 	u32 control = 0, i = 0;
-	u64 sar, dar, addr;
 	int j;
 
 	lli = chunk->ll_region.vaddr;
@@ -214,13 +213,11 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
 		/* Transfer size */
 		SET_LL(&lli[i].transfer_size, child->sz);
 		/* SAR - low, high */
-		sar = cpu_to_le64(child->sar);
-		SET_LL(&lli[i].sar_low, lower_32_bits(sar));
-		SET_LL(&lli[i].sar_high, upper_32_bits(sar));
+		SET_LL(&lli[i].sar_low, lower_32_bits(child->sar));
+		SET_LL(&lli[i].sar_high, upper_32_bits(child->sar));
 		/* DAR - low, high */
-		dar = cpu_to_le64(child->dar);
-		SET_LL(&lli[i].dar_low, lower_32_bits(dar));
-		SET_LL(&lli[i].dar_high, upper_32_bits(dar));
+		SET_LL(&lli[i].dar_low, lower_32_bits(child->dar));
+		SET_LL(&lli[i].dar_high, upper_32_bits(child->dar));
 		i++;
 	}
 
@@ -232,9 +229,8 @@ static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
 	/* Channel control */
 	SET_LL(&llp->control, control);
 	/* Linked list  - low, high */
-	addr = cpu_to_le64(chunk->ll_region.paddr);
-	SET_LL(&llp->llp_low, lower_32_bits(addr));
-	SET_LL(&llp->llp_high, upper_32_bits(addr));
+	SET_LL(&llp->llp_low, lower_32_bits(chunk->ll_region.paddr));
+	SET_LL(&llp->llp_high, upper_32_bits(chunk->ll_region.paddr));
 }
 
 void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
@@ -242,7 +238,6 @@ void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
 	struct dw_edma_chan *chan = chunk->chan;
 	struct dw_edma *dw = chan->chip->dw;
 	u32 tmp;
-	u64 llp;
 
 	dw_edma_v0_core_write_chunk(chunk);
 
@@ -262,9 +257,10 @@ void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
 		SET_CH(dw, chan->dir, chan->id, ch_control1,
 		       (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE));
 		/* Linked list - low, high */
-		llp = cpu_to_le64(chunk->ll_region.paddr);
-		SET_CH(dw, chan->dir, chan->id, llp_low, lower_32_bits(llp));
-		SET_CH(dw, chan->dir, chan->id, llp_high, upper_32_bits(llp));
+		SET_CH(dw, chan->dir, chan->id, llp_low,
+		       lower_32_bits(chunk->ll_region.paddr));
+		SET_CH(dw, chan->dir, chan->id, llp_high,
+		       upper_32_bits(chunk->ll_region.paddr));
 	}
 	/* Doorbell */
 	SET_RW(dw, chan->dir, doorbell,

From 5d6fb560729a5d5554e23db8d00eb57cd0021083 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 12 Jul 2019 11:13:30 +0200
Subject: [PATCH 004/157] dmaengine: ste_dma40: fix unneeded variable warning

clang-9 points out that there are two variables that depending on the
configuration may only be used in an ARRAY_SIZE() expression but not
referenced:

drivers/dma/ste_dma40.c:145:12: error: variable 'd40_backup_regs' is not needed and will not be emitted [-Werror,-Wunneeded-internal-declaration]
static u32 d40_backup_regs[] = {
           ^
drivers/dma/ste_dma40.c:214:12: error: variable 'd40_backup_regs_chan' is not needed and will not be emitted [-Werror,-Wunneeded-internal-declaration]
static u32 d40_backup_regs_chan[] = {

Mark these __maybe_unused to shut up the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20190712091357.744515-1-arnd@arndb.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ste_dma40.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 89d710899010..de8bfd9a76e9 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -142,7 +142,7 @@ enum d40_events {
  * when the DMA hw is powered off.
  * TODO: Add save/restore of D40_DREG_GCC on dma40 v3 or later, if that works.
  */
-static u32 d40_backup_regs[] = {
+static __maybe_unused u32 d40_backup_regs[] = {
 	D40_DREG_LCPA,
 	D40_DREG_LCLA,
 	D40_DREG_PRMSE,
@@ -211,7 +211,7 @@ static u32 d40_backup_regs_v4b[] = {
 
 #define BACKUP_REGS_SZ_V4B ARRAY_SIZE(d40_backup_regs_v4b)
 
-static u32 d40_backup_regs_chan[] = {
+static __maybe_unused u32 d40_backup_regs_chan[] = {
 	D40_CHAN_REG_SSCFG,
 	D40_CHAN_REG_SSELT,
 	D40_CHAN_REG_SSPTR,

From d105ef8120dd522c3233a1395ee0b318ed0929e8 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 9 Jul 2019 16:32:58 +0800
Subject: [PATCH 005/157] dmaengine: tegra210-adma: Fix unused function
 warnings

If CONFIG_PM is not set, build warnings:

drivers/dma/tegra210-adma.c:747:12: warning: tegra_adma_runtime_resume defined but not used [-Wunused-function]
 static int tegra_adma_runtime_resume(struct device *dev)
drivers/dma/tegra210-adma.c:715:12: warning: tegra_adma_runtime_suspend defined but not used [-Wunused-function]
 static int tegra_adma_runtime_suspend(struct device *dev)

Mark the two function as __maybe_unused.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Fixes: 3145d73e69ba ("dmaengine: tegra210-adma: remove PM_CLK dependency")
Fixes: f46b195799b5 ("dmaengine: tegra-adma: Add support for Tegra210 ADMA")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://lore.kernel.org/r/20190709083258.57112-1-yuehaibing@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/tegra210-adma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
index 2805853e963f..b33cf6e8ab8e 100644
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -712,7 +712,7 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
 	return chan;
 }
 
-static int tegra_adma_runtime_suspend(struct device *dev)
+static int __maybe_unused tegra_adma_runtime_suspend(struct device *dev)
 {
 	struct tegra_adma *tdma = dev_get_drvdata(dev);
 	struct tegra_adma_chan_regs *ch_reg;
@@ -744,7 +744,7 @@ clk_disable:
 	return 0;
 }
 
-static int tegra_adma_runtime_resume(struct device *dev)
+static int __maybe_unused tegra_adma_runtime_resume(struct device *dev)
 {
 	struct tegra_adma *tdma = dev_get_drvdata(dev);
 	struct tegra_adma_chan_regs *ch_reg;

From fab7772bfbcfe8fb8e3e352a6a8fcaf044cded17 Mon Sep 17 00:00:00 2001
From: Anthony Iliopoulos <ailiopoulos@suse.com>
Date: Mon, 29 Jul 2019 14:40:40 +0200
Subject: [PATCH 006/157] nvme-multipath: revalidate nvme_ns_head gendisk in
 nvme_validate_ns

When CONFIG_NVME_MULTIPATH is set, only the hidden gendisk associated
with the per-controller ns is run through revalidate_disk when a
rescan is triggered, while the visible blockdev never gets its size
(bdev->bd_inode->i_size) updated to reflect any capacity changes that
may have occurred.

This prevents online resizing of nvme block devices and in extension of
any filesystems atop that will are unable to expand while mounted, as
userspace relies on the blockdev size for obtaining the disk capacity
(via BLKGETSIZE/64 ioctls).

Fix this by explicitly revalidating the actual namespace gendisk in
addition to the per-controller gendisk, when multipath is enabled.

Signed-off-by: Anthony Iliopoulos <ailiopoulos@suse.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8f3fbe5ca937..80c7a7ee240b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1715,6 +1715,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
 	if (ns->head->disk) {
 		nvme_update_disk_info(ns->head->disk, ns, id);
 		blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
+		revalidate_disk(ns->head->disk);
 	}
 #endif
 }

From 2067b2b3f4846402a040286135f98f46f8919939 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 30 Jul 2019 14:38:51 +0100
Subject: [PATCH 007/157] afs: Fix the CB.ProbeUuid service handler to reply
 correctly

Fix the service handler function for the CB.ProbeUuid RPC call so that it
replies in the correct manner - that is an empty reply for success and an
abort of 1 for failure.

Putting 0 or 1 in an integer in the body of the reply should result in the
fileserver throwing an RX_PROTOCOL_ERROR abort and discarding its record of
the client; older servers, however, don't necessarily check that all the
data got consumed, and so might incorrectly think that they got a positive
response and associate the client with the wrong host record.

If the client is incorrectly associated, this will result in callbacks
intended for a different client being delivered to this one and then, when
the other client connects and responds positively, all of the callback
promises meant for the client that issued the improper response will be
lost and it won't receive any further change notifications.

Fixes: 9396d496d745 ("afs: support the CB.ProbeUuid RPC op")
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
---
 fs/afs/cmservice.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4f1b6f466ff5..b86195e4dc6c 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -505,18 +505,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 	struct afs_call *call = container_of(work, struct afs_call, work);
 	struct afs_uuid *r = call->request;
 
-	struct {
-		__be32	match;
-	} reply;
-
 	_enter("");
 
 	if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
-		reply.match = htonl(0);
+		afs_send_empty_reply(call);
 	else
-		reply.match = htonl(1);
+		rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
+					1, 1, "K-1");
 
-	afs_send_simple_reply(call, &reply, sizeof(reply));
 	afs_put_call(call);
 	_leave("");
 }

From 4a46fdba449a5cd890271df5a9e23927d519ed00 Mon Sep 17 00:00:00 2001
From: Marc Dionne <marc.dionne@auristor.com>
Date: Tue, 30 Jul 2019 14:38:51 +0100
Subject: [PATCH 008/157] afs: Fix loop index mixup in
 afs_deliver_vl_get_entry_by_name_u()

afs_deliver_vl_get_entry_by_name_u() scans through the vl entry
received from the volume location server and builds a return list
containing the sites that are currently valid.  When assigning
values for the return list, the index into the vl entry (i) is used
rather than the one for the new list (entry->nr_server).  If all
sites are usable, this works out fine as the indices will match.
If some sites are not valid, for example if AFS_VLSF_DONTUSE is
set, fs_mask and the uuid will be set for the wrong return site.

Fix this by using entry->nr_server as the index into the arrays
being filled in rather than i.

This can lead to EDESTADDRREQ errors if none of the returned sites
have a valid fs_mask.

Fixes: d2ddc776a458 ("afs: Overhaul volume and server record caching and fileserver rotation")
Signed-off-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeffrey Altman <jaltman@auristor.com>
---
 fs/afs/vlclient.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index d7e0fd3c00df..cfb0ac4bd039 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -56,23 +56,24 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
 		struct afs_uuid__xdr *xdr;
 		struct afs_uuid *uuid;
 		int j;
+		int n = entry->nr_servers;
 
 		tmp = ntohl(uvldb->serverFlags[i]);
 		if (tmp & AFS_VLSF_DONTUSE ||
 		    (new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
 			continue;
 		if (tmp & AFS_VLSF_RWVOL) {
-			entry->fs_mask[i] |= AFS_VOL_VTM_RW;
+			entry->fs_mask[n] |= AFS_VOL_VTM_RW;
 			if (vlflags & AFS_VLF_BACKEXISTS)
-				entry->fs_mask[i] |= AFS_VOL_VTM_BAK;
+				entry->fs_mask[n] |= AFS_VOL_VTM_BAK;
 		}
 		if (tmp & AFS_VLSF_ROVOL)
-			entry->fs_mask[i] |= AFS_VOL_VTM_RO;
-		if (!entry->fs_mask[i])
+			entry->fs_mask[n] |= AFS_VOL_VTM_RO;
+		if (!entry->fs_mask[n])
 			continue;
 
 		xdr = &uvldb->serverNumber[i];
-		uuid = (struct afs_uuid *)&entry->fs_server[i];
+		uuid = (struct afs_uuid *)&entry->fs_server[n];
 		uuid->time_low			= xdr->time_low;
 		uuid->time_mid			= htons(ntohl(xdr->time_mid));
 		uuid->time_hi_and_version	= htons(ntohl(xdr->time_hi_and_version));

From a6eed4ab5dd4bfb696c1a3f49742b8d1846a66a0 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Tue, 30 Jul 2019 14:38:51 +0100
Subject: [PATCH 009/157] fs: afs: Fix a possible null-pointer dereference in
 afs_put_read()

In afs_read_dir(), there is an if statement on line 255 to check whether
req->pages is NULL:
	if (!req->pages)
		goto error;

If req->pages is NULL, afs_put_read() on line 337 is executed.
In afs_put_read(), req->pages[i] is used on line 195.
Thus, a possible null-pointer dereference may occur in this case.

To fix this possible bug, an if statement is added in afs_put_read() to
check req->pages.

This bug is found by a static analysis tool STCheck written by us.

Fixes: f3ddee8dc4e2 ("afs: Fix directory handling")
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/file.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/afs/file.c b/fs/afs/file.c
index 56b69576274d..dd3c55c9101c 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -191,11 +191,13 @@ void afs_put_read(struct afs_read *req)
 	int i;
 
 	if (refcount_dec_and_test(&req->usage)) {
-		for (i = 0; i < req->nr_pages; i++)
-			if (req->pages[i])
-				put_page(req->pages[i]);
-		if (req->pages != req->array)
-			kfree(req->pages);
+		if (req->pages) {
+			for (i = 0; i < req->nr_pages; i++)
+				if (req->pages[i])
+					put_page(req->pages[i]);
+			if (req->pages != req->array)
+				kfree(req->pages);
+		}
 		kfree(req);
 	}
 }

From 37c0bbb3326674940e657118306ac52364314523 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 30 Jul 2019 14:38:51 +0100
Subject: [PATCH 010/157] afs: Fix off-by-one in afs_rename() expected data
 version calculation

When afs_rename() calculates the expected data version of the target
directory in a cross-directory rename, it doesn't increment it as it
should, so it always thinks that the target inode is unexpectedly modified
on the server.

Fixes: a58823ac4589 ("afs: Fix application of status and callback to be under same lock")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e640d67274be..20aa18b38a49 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1804,7 +1804,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				afs_end_vnode_operation(&fc);
 				goto error_rehash;
 			}
-			new_data_version = new_dvnode->status.data_version;
+			new_data_version = new_dvnode->status.data_version + 1;
 		} else {
 			new_data_version = orig_data_version;
 			new_scb = &scb[0];

From 5dc84855b0fc7e1db182b55c5564fd539d6eff92 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 30 Jul 2019 14:38:51 +0100
Subject: [PATCH 011/157] afs: Only update d_fsdata if different in
 afs_d_revalidate()

In the in-kernel afs filesystem, d_fsdata is set with the data version of
the parent directory.  afs_d_revalidate() will update this to the current
directory version, but it shouldn't do this if it the value it read from
d_fsdata is the same as no lock is held and cmpxchg() is not used.

Fix the code to only change the value if it is different from the current
directory version.

Fixes: 260a980317da ("[AFS]: Add "directory write" support.")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 20aa18b38a49..618e26cea887 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1017,7 +1017,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	dir_version = (long)dir->status.data_version;
 	de_version = (long)dentry->d_fsdata;
 	if (de_version == dir_version)
-		goto out_valid;
+		goto out_valid_noupdate;
 
 	dir_version = (long)dir->invalid_before;
 	if (de_version - dir_version >= 0)
@@ -1081,6 +1081,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 
 out_valid:
 	dentry->d_fsdata = (void *)dir_version;
+out_valid_noupdate:
 	dput(parent);
 	key_put(key);
 	_leave(" = 1 [valid]");

From 9dd0b82ef530cdfe805c9f7079c99e104be59a14 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 30 Jul 2019 14:38:52 +0100
Subject: [PATCH 012/157] afs: Fix missing dentry data version updating

In the in-kernel afs filesystem, the d_fsdata dentry field is used to hold
the data version of the parent directory when it was created or when
d_revalidate() last caused it to be updated.  This is compared to the
->invalid_before field in the directory inode, rather than the actual data
version number, thereby allowing changes due to local edits to be ignored.
Only if the server data version gets bumped unexpectedly (eg. by a
competing client), do we need to revalidate stuff.

However, the d_fsdata field should also be updated if an rpc op is
performed that modifies that particular dentry.  Such ops return the
revised data version of the directory(ies) involved, so we should use that.

This is particularly problematic for rename, since a dentry from one
directory may be moved directly into another directory (ie. mv a/x b/x).
It would then be sporting the wrong data version - and if this is in the
future, for the destination directory, revalidations would be missed,
leading to foreign renames and hard-link deletion being missed.

Fix this by the following means:

 (1) Return the data version number from operations that read the directory
     contents - if they issue the read.  This starts in afs_dir_iterate()
     and is used, ignored or passed back by its callers.

 (2) In afs_lookup*(), set the dentry version to the version returned by
     (1) before d_splice_alias() is called and the dentry published.

 (3) In afs_d_revalidate(), set the dentry version to that returned from
     (1) if an rpc call was issued.  This means that if a parallel
     procedure, such as mkdir(), modifies the directory, we won't
     accidentally use the data version from that.

 (4) In afs_{mkdir,create,link,symlink}(), set the new dentry's version to
     the directory data version before d_instantiate() is called.

 (5) In afs_{rmdir,unlink}, update the target dentry's version to the
     directory data version as soon as we've updated the directory inode.

 (6) In afs_rename(), we need to unhash the old dentry before we start so
     that we don't get afs_d_revalidate() reverting the version change in
     cross-directory renames.

     We then need to set both the old and the new dentry versions the data
     version of the new directory before we call d_move() as d_move() will
     rehash them.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: David Howells <dhowells@redhat.com>
---
 fs/afs/dir.c | 84 +++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 70 insertions(+), 14 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 618e26cea887..81207dc3c997 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -440,7 +440,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
  * iterate through the data blob that lists the contents of an AFS directory
  */
 static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
-			   struct key *key)
+			   struct key *key, afs_dataversion_t *_dir_version)
 {
 	struct afs_vnode *dvnode = AFS_FS_I(dir);
 	struct afs_xdr_dir_page *dbuf;
@@ -460,6 +460,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
 	req = afs_read_dir(dvnode, key);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
+	*_dir_version = req->data_version;
 
 	/* round the file position up to the next entry boundary */
 	ctx->pos += sizeof(union afs_xdr_dirent) - 1;
@@ -514,7 +515,10 @@ out:
  */
 static int afs_readdir(struct file *file, struct dir_context *ctx)
 {
-	return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file));
+	afs_dataversion_t dir_version;
+
+	return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file),
+			       &dir_version);
 }
 
 /*
@@ -555,7 +559,8 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
  * - just returns the FID the dentry name maps to if found
  */
 static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
-			     struct afs_fid *fid, struct key *key)
+			     struct afs_fid *fid, struct key *key,
+			     afs_dataversion_t *_dir_version)
 {
 	struct afs_super_info *as = dir->i_sb->s_fs_info;
 	struct afs_lookup_one_cookie cookie = {
@@ -568,7 +573,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
 	_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
 
 	/* search the directory */
-	ret = afs_dir_iterate(dir, &cookie.ctx, key);
+	ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version);
 	if (ret < 0) {
 		_leave(" = %d [iter]", ret);
 		return ret;
@@ -642,6 +647,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
 	struct afs_server *server;
 	struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
 	struct inode *inode = NULL, *ti;
+	afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
 	int ret, i;
 
 	_enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
@@ -669,12 +675,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
 		cookie->fids[i].vid = as->volume->vid;
 
 	/* search the directory */
-	ret = afs_dir_iterate(dir, &cookie->ctx, key);
+	ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
 	if (ret < 0) {
 		inode = ERR_PTR(ret);
 		goto out;
 	}
 
+	dentry->d_fsdata = (void *)(unsigned long)data_version;
+
 	inode = ERR_PTR(-ENOENT);
 	if (!cookie->found)
 		goto out;
@@ -968,7 +976,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	struct dentry *parent;
 	struct inode *inode;
 	struct key *key;
-	long dir_version, de_version;
+	afs_dataversion_t dir_version;
+	long de_version;
 	int ret;
 
 	if (flags & LOOKUP_RCU)
@@ -1014,20 +1023,20 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	 * on a 32-bit system, we only have 32 bits in the dentry to store the
 	 * version.
 	 */
-	dir_version = (long)dir->status.data_version;
+	dir_version = dir->status.data_version;
 	de_version = (long)dentry->d_fsdata;
-	if (de_version == dir_version)
+	if (de_version == (long)dir_version)
 		goto out_valid_noupdate;
 
-	dir_version = (long)dir->invalid_before;
-	if (de_version - dir_version >= 0)
+	dir_version = dir->invalid_before;
+	if (de_version - (long)dir_version >= 0)
 		goto out_valid;
 
 	_debug("dir modified");
 	afs_stat_v(dir, n_reval);
 
 	/* search the directory for this vnode */
-	ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key);
+	ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key, &dir_version);
 	switch (ret) {
 	case 0:
 		/* the filename maps to something */
@@ -1080,7 +1089,7 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
 	}
 
 out_valid:
-	dentry->d_fsdata = (void *)dir_version;
+	dentry->d_fsdata = (void *)(unsigned long)dir_version;
 out_valid_noupdate:
 	dput(parent);
 	key_put(key);
@@ -1186,6 +1195,20 @@ static void afs_prep_for_new_inode(struct afs_fs_cursor *fc,
 	iget_data->cb_s_break = fc->cbi->server->cb_s_break;
 }
 
+/*
+ * Note that a dentry got changed.  We need to set d_fsdata to the data version
+ * number derived from the result of the operation.  It doesn't matter if
+ * d_fsdata goes backwards as we'll just revalidate.
+ */
+static void afs_update_dentry_version(struct afs_fs_cursor *fc,
+				      struct dentry *dentry,
+				      struct afs_status_cb *scb)
+{
+	if (fc->ac.error == 0)
+		dentry->d_fsdata =
+			(void *)(unsigned long)scb->status.data_version;
+}
+
 /*
  * create a directory on an AFS filesystem
  */
@@ -1228,6 +1251,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 		afs_check_for_remote_deletion(&fc, dvnode);
 		afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
 					&data_version, &scb[0]);
+		afs_update_dentry_version(&fc, dentry, &scb[0]);
 		afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
@@ -1320,6 +1344,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
 
 		afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
 					&data_version, scb);
+		afs_update_dentry_version(&fc, dentry, scb);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret == 0) {
 			afs_dir_remove_subdir(dentry);
@@ -1459,6 +1484,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
 					&data_version, &scb[0]);
 		afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
 					&data_version_2, &scb[1]);
+		afs_update_dentry_version(&fc, dentry, &scb[0]);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
 			ret = afs_dir_remove_link(dvnode, dentry, key);
@@ -1527,6 +1553,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		afs_check_for_remote_deletion(&fc, dvnode);
 		afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
 					&data_version, &scb[0]);
+		afs_update_dentry_version(&fc, dentry, &scb[0]);
 		afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
@@ -1608,6 +1635,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
 		afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
 					NULL, &scb[1]);
 		ihold(&vnode->vfs_inode);
+		afs_update_dentry_version(&fc, dentry, &scb[0]);
 		d_instantiate(dentry, &vnode->vfs_inode);
 
 		mutex_unlock(&vnode->io_lock);
@@ -1687,6 +1715,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
 		afs_check_for_remote_deletion(&fc, dvnode);
 		afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
 					&data_version, &scb[0]);
+		afs_update_dentry_version(&fc, dentry, &scb[0]);
 		afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
@@ -1792,6 +1821,17 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		}
 	}
 
+	/* This bit is potentially nasty as there's a potential race with
+	 * afs_d_revalidate{,_rcu}().  We have to change d_fsdata on the dentry
+	 * to reflect it's new parent's new data_version after the op, but
+	 * d_revalidate may see old_dentry between the op having taken place
+	 * and the version being updated.
+	 *
+	 * So drop the old_dentry for now to make other threads go through
+	 * lookup instead - which we hold a lock against.
+	 */
+	d_drop(old_dentry);
+
 	ret = -ERESTARTSYS;
 	if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
 		afs_dataversion_t orig_data_version;
@@ -1803,7 +1843,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		if (orig_dvnode != new_dvnode) {
 			if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
 				afs_end_vnode_operation(&fc);
-				goto error_rehash;
+				goto error_rehash_old;
 			}
 			new_data_version = new_dvnode->status.data_version + 1;
 		} else {
@@ -1828,7 +1868,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		}
 		ret = afs_end_vnode_operation(&fc);
 		if (ret < 0)
-			goto error_rehash;
+			goto error_rehash_old;
 	}
 
 	if (ret == 0) {
@@ -1854,10 +1894,26 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
 				drop_nlink(new_inode);
 			spin_unlock(&new_inode->i_lock);
 		}
+
+		/* Now we can update d_fsdata on the dentries to reflect their
+		 * new parent's data_version.
+		 *
+		 * Note that if we ever implement RENAME_EXCHANGE, we'll have
+		 * to update both dentries with opposing dir versions.
+		 */
+		if (new_dvnode != orig_dvnode) {
+			afs_update_dentry_version(&fc, old_dentry, &scb[1]);
+			afs_update_dentry_version(&fc, new_dentry, &scb[1]);
+		} else {
+			afs_update_dentry_version(&fc, old_dentry, &scb[0]);
+			afs_update_dentry_version(&fc, new_dentry, &scb[0]);
+		}
 		d_move(old_dentry, new_dentry);
 		goto error_tmp;
 	}
 
+error_rehash_old:
+	d_rehash(new_dentry);
 error_rehash:
 	if (rehash)
 		d_rehash(rehash);

From 2b089bf8d19c66f70ae3b2d2d101be1ae49bfe24 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Mon, 22 Jul 2019 11:20:08 +0200
Subject: [PATCH 013/157] kernel/configs: Replace GPL boilerplate code with
 SPDX identifier

The FSF does not reside in "675 Mass Ave, Cambridge" anymore...
let's replace the old GPL boilerplate code with a proper SPDX
identifier instead.

Signed-off-by: Thomas Huth <thuth@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/configs.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/kernel/configs.c b/kernel/configs.c
index b062425ccf8d..c09ea4c995e1 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * kernel/configs.c
  * Echo the kernel .config file used to build the kernel
@@ -6,21 +7,6 @@
  * Copyright (C) 2002 Randy Dunlap <rdunlap@xenotime.net>
  * Copyright (C) 2002 Al Stone <ahs3@fc.hp.com>
  * Copyright (C) 2002 Hewlett-Packard Company
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/kernel.h>

From ef349abd91c1b37f87639170bcd9d11fe3ac58ff Mon Sep 17 00:00:00 2001
From: Matthias Maennich <maennich@google.com>
Date: Thu, 25 Jul 2019 11:17:04 +0100
Subject: [PATCH 014/157] coccinelle: api/atomic_as_refcounter: add SPDX
 License Identifier

Add the missing GPLv2 SPDX license identifier.

It appears this single file was missing from 7f904d7e1f3e ("treewide:
Replace GPLv2 boilerplate/reference with SPDX - rule 505"), which
addressed all other files in scripts/coccinelle. Hence I added
GPL-2.0-only consitently with the mentioned patch.

Cc: linux-spdx@vger.kernel.org
Cc: Elena Reshetova <elena.reshetova@intel.com>
Signed-off-by: Matthias Maennich <maennich@google.com>
Acked-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/coccinelle/api/atomic_as_refcounter.cocci | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/coccinelle/api/atomic_as_refcounter.cocci b/scripts/coccinelle/api/atomic_as_refcounter.cocci
index 988120e0fd67..0f78d94abc35 100644
--- a/scripts/coccinelle/api/atomic_as_refcounter.cocci
+++ b/scripts/coccinelle/api/atomic_as_refcounter.cocci
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 // Check if refcount_t type and API should be used
 // instead of atomic_t type when dealing with refcounters
 //

From fac7b714c514fcc555541e1d6450c694b0a5f8d3 Mon Sep 17 00:00:00 2001
From: Nishad Kamdar <nishadkamdar@gmail.com>
Date: Fri, 26 Jul 2019 19:58:45 +0530
Subject: [PATCH 015/157] intel_th: Use the correct style for SPDX License
 Identifier

This patch corrects the SPDX License Identifier style
in header files related to Drivers for Intel(R) Trace Hub
controller.
For C header files Documentation/process/license-rules.rst
mandates C-like comments (opposed to C source files where
C++ style should be used)

Changes made by using a script provided by Joe Perches here:
https://lkml.org/lkml/2019/2/7/46

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nishad Kamdar <nishadkamdar@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/intel_th/msu.h | 2 +-
 drivers/hwtracing/intel_th/pti.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwtracing/intel_th/msu.h b/drivers/hwtracing/intel_th/msu.h
index 574c16004cb2..13d9b141daaa 100644
--- a/drivers/hwtracing/intel_th/msu.h
+++ b/drivers/hwtracing/intel_th/msu.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Intel(R) Trace Hub Memory Storage Unit (MSU) data structures
  *
diff --git a/drivers/hwtracing/intel_th/pti.h b/drivers/hwtracing/intel_th/pti.h
index e9381babc84c..7dfc0431333b 100644
--- a/drivers/hwtracing/intel_th/pti.h
+++ b/drivers/hwtracing/intel_th/pti.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Intel(R) Trace Hub PTI output data structures
  *

From 740ce365a4dccc1df4d05219cd5bf68f01359196 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 30 Jul 2019 08:49:34 -0600
Subject: [PATCH 016/157] dt-bindings: Fix generated example files getting
 added to schemas
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 837158b847a4 ("dt-bindings: Check the examples against the
schemas") started generating YAML encoded DT files to validate the
examples against the schema. When running 'make dt_binding_check' in
tree after the 1st time, the generated example .dt.yaml files are
mistakenly added to the list of schema files. Exclude *.example.dt.yaml
files from the search for schema files.

Fixes: 837158b847a4 ("dt-bindings: Check the examples against the schemas")
Reported-by: Guido Günther <agx@sigxcpu.org>
Tested-by: Guido Günther <agx@sigxcpu.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/Makefile | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile
index 6b0dfd5c17ba..5138a2f6232a 100644
--- a/Documentation/devicetree/bindings/Makefile
+++ b/Documentation/devicetree/bindings/Makefile
@@ -19,7 +19,9 @@ quiet_cmd_mk_schema = SCHEMA  $@
 
 DT_DOCS = $(shell \
 	cd $(srctree)/$(src) && \
-	find * \( -name '*.yaml' ! -name $(DT_TMP_SCHEMA) \) \
+	find * \( -name '*.yaml' ! \
+		-name $(DT_TMP_SCHEMA) ! \
+		-name '*.example.dt.yaml' \) \
 	)
 
 DT_SCHEMA_FILES ?= $(addprefix $(src)/,$(DT_DOCS))

From 3aed86731ee2b23e4dc4d2c6d943d33992cd551b Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 31 Jul 2019 17:35:31 -0600
Subject: [PATCH 017/157] nvmet: Fix use-after-free bug when a port is removed

When a port is removed through configfs, any connected controllers
are still active and can still send commands. This causes a
use-after-free bug which is detected by KASAN for any admin command
that dereferences req->port (like in nvmet_execute_identify_ctrl).

To fix this, disconnect all active controllers when a subsystem is
removed from a port. This ensures there are no active controllers
when the port is eventually removed.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by : Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/configfs.c |  1 +
 drivers/nvme/target/core.c     | 12 ++++++++++++
 drivers/nvme/target/nvmet.h    |  3 +++
 3 files changed, 16 insertions(+)

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index cd52b9f15376..98613a45bd3b 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -675,6 +675,7 @@ static void nvmet_port_subsys_drop_link(struct config_item *parent,
 
 found:
 	list_del(&p->entry);
+	nvmet_port_del_ctrls(port, subsys);
 	nvmet_port_disc_changed(port, subsys);
 
 	if (list_empty(&port->subsystems))
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index dad0243c7c96..b86a23aa9020 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -280,6 +280,18 @@ void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
 }
 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
 
+void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys)
+{
+	struct nvmet_ctrl *ctrl;
+
+	mutex_lock(&subsys->lock);
+	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+		if (ctrl->port == port)
+			ctrl->ops->delete_ctrl(ctrl);
+	}
+	mutex_unlock(&subsys->lock);
+}
+
 int nvmet_enable_port(struct nvmet_port *port)
 {
 	const struct nvmet_fabrics_ops *ops;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 6ee66c610739..c51f8dd01dc4 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -418,6 +418,9 @@ void nvmet_port_send_ana_event(struct nvmet_port *port);
 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
 
+void nvmet_port_del_ctrls(struct nvmet_port *port,
+			  struct nvmet_subsys *subsys);
+
 int nvmet_enable_port(struct nvmet_port *port);
 void nvmet_disable_port(struct nvmet_port *port);
 

From 86b9a63e595ff03f9d0a7b92b6acc231fecefc29 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 31 Jul 2019 17:35:32 -0600
Subject: [PATCH 018/157] nvmet-loop: Flush nvme_delete_wq when removing the
 port

After calling nvme_loop_delete_ctrl(), the controllers will not
yet be deleted because nvme_delete_ctrl() only schedules work
to do the delete.

This means a race can occur if a port is removed but there
are still active controllers trying to access that memory.

To fix this, flush the nvme_delete_wq before returning from
nvme_loop_remove_port() so that any controllers that might
be in the process of being deleted won't access a freed port.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by : Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/loop.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index b16dc3981c69..0940c5024a34 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -654,6 +654,14 @@ static void nvme_loop_remove_port(struct nvmet_port *port)
 	mutex_lock(&nvme_loop_ports_mutex);
 	list_del_init(&port->entry);
 	mutex_unlock(&nvme_loop_ports_mutex);
+
+	/*
+	 * Ensure any ctrls that are in the process of being
+	 * deleted are in fact deleted before we return
+	 * and free the port. This is to prevent active
+	 * ctrls from using a port after it's freed.
+	 */
+	flush_workqueue(nvme_delete_wq);
 }
 
 static const struct nvmet_fabrics_ops nvme_loop_ops = {

From cfc1a1af56200362d1508b82b9a3cc3acb2eae0c Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 31 Jul 2019 17:35:33 -0600
Subject: [PATCH 019/157] nvmet-file: fix nvmet_file_flush() always returning
 an error

Presently, nvmet_file_flush() always returns a call to
errno_to_nvme_status() but that helper doesn't take into account the
case when errno=0. So nvmet_file_flush() always returns an error code.

All other callers of errno_to_nvme_status() check for success before
calling it.

To fix this, ensure errno_to_nvme_status() returns success if the
errno is zero. This should prevent future mistakes like this from
happening.

Fixes: c6aa3542e010 ("nvmet: add error log support for file backend")
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/target/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b86a23aa9020..3a67e244e568 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -46,6 +46,9 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
 	u16 status;
 
 	switch (errno) {
+	case 0:
+		status = NVME_SC_SUCCESS;
+		break;
 	case -ENOSPC:
 		req->error_loc = offsetof(struct nvme_rw_command, length);
 		status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;

From 8c36e66fb407ce076535a7db98ab9f6d720b866a Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Wed, 31 Jul 2019 17:35:34 -0600
Subject: [PATCH 020/157] nvme-core: Fix extra device_put() call on error path

In the error path for nvme_init_subsystem(), nvme_put_subsystem()
will call device_put(), but it will get called again after the
mutex_unlock().

The device_put() only needs to be called if device_add() fails.

This bug caused a KASAN use-after-free error when adding and
removing subsytems in a loop:

  BUG: KASAN: use-after-free in device_del+0x8d9/0x9a0
  Read of size 8 at addr ffff8883cdaf7120 by task multipathd/329

  CPU: 0 PID: 329 Comm: multipathd Not tainted 5.2.0-rc6-vmlocalyes-00019-g70a2b39005fd-dirty #314
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
  Call Trace:
   dump_stack+0x7b/0xb5
   print_address_description+0x6f/0x280
   ? device_del+0x8d9/0x9a0
   __kasan_report+0x148/0x199
   ? device_del+0x8d9/0x9a0
   ? class_release+0x100/0x130
   ? device_del+0x8d9/0x9a0
   kasan_report+0x12/0x20
   __asan_report_load8_noabort+0x14/0x20
   device_del+0x8d9/0x9a0
   ? device_platform_notify+0x70/0x70
   nvme_destroy_subsystem+0xf9/0x150
   nvme_free_ctrl+0x280/0x3a0
   device_release+0x72/0x1d0
   kobject_put+0x144/0x410
   put_device+0x13/0x20
   nvme_free_ns+0xc4/0x100
   nvme_release+0xb3/0xe0
   __blkdev_put+0x549/0x6e0
   ? kasan_check_write+0x14/0x20
   ? bd_set_size+0xb0/0xb0
   ? kasan_check_write+0x14/0x20
   ? mutex_lock+0x8f/0xe0
   ? __mutex_lock_slowpath+0x20/0x20
   ? locks_remove_file+0x239/0x370
   blkdev_put+0x72/0x2c0
   blkdev_close+0x8d/0xd0
   __fput+0x256/0x770
   ? _raw_read_lock_irq+0x40/0x40
   ____fput+0xe/0x10
   task_work_run+0x10c/0x180
   ? filp_close+0xf7/0x140
   exit_to_usermode_loop+0x151/0x170
   do_syscall_64+0x240/0x2e0
   ? prepare_exit_to_usermode+0xd5/0x190
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f5a79af05d7
  Code: 00 00 0f 05 48 3d 00 f0 ff ff 77 3f c3 66 0f 1f 44 00 00 53 89 fb 48 83 ec 10 e8 c4 fb ff ff 89 df 89 c2 b8 03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 2b 89 d7 89 44 24 0c e8 06 fc ff ff 8b 44 24
  RSP: 002b:00007f5a7799c810 EFLAGS: 00000293 ORIG_RAX: 0000000000000003
  RAX: 0000000000000000 RBX: 0000000000000008 RCX: 00007f5a79af05d7
  RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000008
  RBP: 00007f5a58000f98 R08: 0000000000000002 R09: 00007f5a7935ee80
  R10: 0000000000000000 R11: 0000000000000293 R12: 000055e432447240
  R13: 0000000000000000 R14: 0000000000000001 R15: 000055e4324a9cf0

  Allocated by task 1236:
   save_stack+0x21/0x80
   __kasan_kmalloc.constprop.6+0xab/0xe0
   kasan_kmalloc+0x9/0x10
   kmem_cache_alloc_trace+0x102/0x210
   nvme_init_identify+0x13c3/0x3820
   nvme_loop_configure_admin_queue+0x4fa/0x5e0
   nvme_loop_create_ctrl+0x469/0xf40
   nvmf_dev_write+0x19a3/0x21ab
   __vfs_write+0x66/0x120
   vfs_write+0x154/0x490
   ksys_write+0x104/0x240
   __x64_sys_write+0x73/0xb0
   do_syscall_64+0xa5/0x2e0
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

  Freed by task 329:
   save_stack+0x21/0x80
   __kasan_slab_free+0x129/0x190
   kasan_slab_free+0xe/0x10
   kfree+0xa7/0x200
   nvme_release_subsystem+0x49/0x60
   device_release+0x72/0x1d0
   kobject_put+0x144/0x410
   put_device+0x13/0x20
   klist_class_dev_put+0x31/0x40
   klist_put+0x8f/0xf0
   klist_del+0xe/0x10
   device_del+0x3a7/0x9a0
   nvme_destroy_subsystem+0xf9/0x150
   nvme_free_ctrl+0x280/0x3a0
   device_release+0x72/0x1d0
   kobject_put+0x144/0x410
   put_device+0x13/0x20
   nvme_free_ns+0xc4/0x100
   nvme_release+0xb3/0xe0
   __blkdev_put+0x549/0x6e0
   blkdev_put+0x72/0x2c0
   blkdev_close+0x8d/0xd0
   __fput+0x256/0x770
   ____fput+0xe/0x10
   task_work_run+0x10c/0x180
   exit_to_usermode_loop+0x151/0x170
   do_syscall_64+0x240/0x2e0
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 32fd90c40768 ("nvme: change locking for the per-subsystem controller list")
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by : Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 80c7a7ee240b..e35f16b60fc9 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2488,6 +2488,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
 		if (ret) {
 			dev_err(ctrl->device,
 				"failed to register subsystem device.\n");
+			put_device(&subsys->dev);
 			goto out_unlock;
 		}
 		ida_init(&subsys->ns_ida);
@@ -2510,7 +2511,6 @@ out_put_subsystem:
 	nvme_put_subsystem(subsys);
 out_unlock:
 	mutex_unlock(&nvme_subsystems_lock);
-	put_device(&subsys->dev);
 	return ret;
 }
 

From b9156daeb1601d69007b7e50efcf89d69d72ec1d Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 31 Jul 2019 11:00:26 -0700
Subject: [PATCH 021/157] nvme: fix a possible deadlock when passthru commands
 sent to a multipath device

When the user issues a command with side effects, we will end up freezing
the namespace request queue when updating disk info (and the same for
the corresponding mpath disk node).

However, we are not freezing the mpath node request queue,
which means that mpath I/O can still come in and block on blk_queue_enter
(called from nvme_ns_head_make_request -> direct_make_request).

This is a deadlock, because blk_queue_enter will block until the inner
namespace request queue is unfroze, but that process is blocked because
the namespace revalidation is trying to update the mpath disk info
and freeze its request queue (which will never complete because
of the I/O that is blocked on blk_queue_enter).

Fix this by freezing all the subsystem nsheads request queues before
executing the passthru command. Given that these commands are infrequent
we should not worry about this temporary I/O freeze to keep things sane.

Here is the matching hang traces:
--
[ 374.465002] INFO: task systemd-udevd:17994 blocked for more than 122 seconds.
[ 374.472975] Not tainted 5.2.0-rc3-mpdebug+ #42
[ 374.478522] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 374.487274] systemd-udevd D 0 17994 1 0x00000000
[ 374.493407] Call Trace:
[ 374.496145] __schedule+0x2ef/0x620
[ 374.500047] schedule+0x38/0xa0
[ 374.503569] blk_queue_enter+0x139/0x220
[ 374.507959] ? remove_wait_queue+0x60/0x60
[ 374.512540] direct_make_request+0x60/0x130
[ 374.517219] nvme_ns_head_make_request+0x11d/0x420 [nvme_core]
[ 374.523740] ? generic_make_request_checks+0x307/0x6f0
[ 374.529484] generic_make_request+0x10d/0x2e0
[ 374.534356] submit_bio+0x75/0x140
[ 374.538163] ? guard_bio_eod+0x32/0xe0
[ 374.542361] submit_bh_wbc+0x171/0x1b0
[ 374.546553] block_read_full_page+0x1ed/0x330
[ 374.551426] ? check_disk_change+0x70/0x70
[ 374.556008] ? scan_shadow_nodes+0x30/0x30
[ 374.560588] blkdev_readpage+0x18/0x20
[ 374.564783] do_read_cache_page+0x301/0x860
[ 374.569463] ? blkdev_writepages+0x10/0x10
[ 374.574037] ? prep_new_page+0x88/0x130
[ 374.578329] ? get_page_from_freelist+0xa2f/0x1280
[ 374.583688] ? __alloc_pages_nodemask+0x179/0x320
[ 374.588947] read_cache_page+0x12/0x20
[ 374.593142] read_dev_sector+0x2d/0xd0
[ 374.597337] read_lba+0x104/0x1f0
[ 374.601046] find_valid_gpt+0xfa/0x720
[ 374.605243] ? string_nocheck+0x58/0x70
[ 374.609534] ? find_valid_gpt+0x720/0x720
[ 374.614016] efi_partition+0x89/0x430
[ 374.618113] ? string+0x48/0x60
[ 374.621632] ? snprintf+0x49/0x70
[ 374.625339] ? find_valid_gpt+0x720/0x720
[ 374.629828] check_partition+0x116/0x210
[ 374.634214] rescan_partitions+0xb6/0x360
[ 374.638699] __blkdev_reread_part+0x64/0x70
[ 374.643377] blkdev_reread_part+0x23/0x40
[ 374.647860] blkdev_ioctl+0x48c/0x990
[ 374.651956] block_ioctl+0x41/0x50
[ 374.655766] do_vfs_ioctl+0xa7/0x600
[ 374.659766] ? locks_lock_inode_wait+0xb1/0x150
[ 374.664832] ksys_ioctl+0x67/0x90
[ 374.668539] __x64_sys_ioctl+0x1a/0x20
[ 374.672732] do_syscall_64+0x5a/0x1c0
[ 374.676828] entry_SYSCALL_64_after_hwframe+0x44/0xa9

[ 374.738474] INFO: task nvmeadm:49141 blocked for more than 123 seconds.
[ 374.745871] Not tainted 5.2.0-rc3-mpdebug+ #42
[ 374.751419] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 374.760170] nvmeadm D 0 49141 36333 0x00004080
[ 374.766301] Call Trace:
[ 374.769038] __schedule+0x2ef/0x620
[ 374.772939] schedule+0x38/0xa0
[ 374.776452] blk_mq_freeze_queue_wait+0x59/0x100
[ 374.781614] ? remove_wait_queue+0x60/0x60
[ 374.786192] blk_mq_freeze_queue+0x1a/0x20
[ 374.790773] nvme_update_disk_info.isra.57+0x5f/0x350 [nvme_core]
[ 374.797582] ? nvme_identify_ns.isra.50+0x71/0xc0 [nvme_core]
[ 374.804006] __nvme_revalidate_disk+0xe5/0x110 [nvme_core]
[ 374.810139] nvme_revalidate_disk+0xa6/0x120 [nvme_core]
[ 374.816078] ? nvme_submit_user_cmd+0x11e/0x320 [nvme_core]
[ 374.822299] nvme_user_cmd+0x264/0x370 [nvme_core]
[ 374.827661] nvme_dev_ioctl+0x112/0x1d0 [nvme_core]
[ 374.833114] do_vfs_ioctl+0xa7/0x600
[ 374.837117] ? __audit_syscall_entry+0xdd/0x130
[ 374.842184] ksys_ioctl+0x67/0x90
[ 374.845891] __x64_sys_ioctl+0x1a/0x20
[ 374.850082] do_syscall_64+0x5a/0x1c0
[ 374.854178] entry_SYSCALL_64_after_hwframe+0x44/0xa9
--

Reported-by: James Puthukattukaran <james.puthukattukaran@oracle.com>
Tested-by: James Puthukattukaran <james.puthukattukaran@oracle.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c      |  5 +++++
 drivers/nvme/host/multipath.c | 30 ++++++++++++++++++++++++++++++
 drivers/nvme/host/nvme.h      | 12 ++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e35f16b60fc9..88b8dfd7928a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1286,6 +1286,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
 	 */
 	if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
 		mutex_lock(&ctrl->scan_lock);
+		mutex_lock(&ctrl->subsys->lock);
+		nvme_mpath_start_freeze(ctrl->subsys);
+		nvme_mpath_wait_freeze(ctrl->subsys);
 		nvme_start_freeze(ctrl);
 		nvme_wait_freeze(ctrl);
 	}
@@ -1316,6 +1319,8 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
 		nvme_update_formats(ctrl);
 	if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
 		nvme_unfreeze(ctrl);
+		nvme_mpath_unfreeze(ctrl->subsys);
+		mutex_unlock(&ctrl->subsys->lock);
 		mutex_unlock(&ctrl->scan_lock);
 	}
 	if (effects & NVME_CMD_EFFECTS_CCC)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 4f0d0d12744e..b34dcb2288e7 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -12,6 +12,36 @@ module_param(multipath, bool, 0444);
 MODULE_PARM_DESC(multipath,
 	"turn on native support for multiple controllers per subsystem");
 
+void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
+{
+	struct nvme_ns_head *h;
+
+	lockdep_assert_held(&subsys->lock);
+	list_for_each_entry(h, &subsys->nsheads, entry)
+		if (h->disk)
+			blk_mq_unfreeze_queue(h->disk->queue);
+}
+
+void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
+{
+	struct nvme_ns_head *h;
+
+	lockdep_assert_held(&subsys->lock);
+	list_for_each_entry(h, &subsys->nsheads, entry)
+		if (h->disk)
+			blk_mq_freeze_queue_wait(h->disk->queue);
+}
+
+void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
+{
+	struct nvme_ns_head *h;
+
+	lockdep_assert_held(&subsys->lock);
+	list_for_each_entry(h, &subsys->nsheads, entry)
+		if (h->disk)
+			blk_freeze_queue_start(h->disk->queue);
+}
+
 /*
  * If multipathing is enabled we need to always use the subsystem instance
  * number for numbering our devices to avoid conflicts between subsystems that
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 26b563f9985b..6b4fb67124c6 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -490,6 +490,9 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
 	return ctrl->ana_log_buf != NULL;
 }
 
+void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
+void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
+void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
 void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
 			struct nvme_ctrl *ctrl, int *flags);
 void nvme_failover_req(struct request *req);
@@ -568,6 +571,15 @@ static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
 static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
 {
 }
+static inline void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
+{
+}
+static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
+{
+}
+static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
+{
+}
 #endif /* CONFIG_NVME_MULTIPATH */
 
 #ifdef CONFIG_NVM

From d94211b8bad3787e0655a67284105f57db728cb1 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Fri, 26 Jul 2019 10:29:49 -0700
Subject: [PATCH 022/157] nvme-rdma: fix possible use-after-free in connect
 error flow

When start_queue fails, we need to make sure to drain the
queue cq before freeing the rdma resources because we might
still race with the completion path. Have start_queue() error
path safely stop the queue.

--
[30371.808111] nvme nvme1: Failed reconnect attempt 11
[30371.808113] nvme nvme1: Reconnecting in 10 seconds...
[...]
[30382.069315] nvme nvme1: creating 4 I/O queues.
[30382.257058] nvme nvme1: Connect Invalid SQE Parameter, qid 4
[30382.257061] nvme nvme1: failed to connect queue: 4 ret=386
[30382.305001] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[30382.305022] IP: qedr_poll_cq+0x8a3/0x1170 [qedr]
[30382.305028] PGD 0 P4D 0
[30382.305037] Oops: 0000 [#1] SMP PTI
[...]
[30382.305153] Call Trace:
[30382.305166]  ? __switch_to_asm+0x34/0x70
[30382.305187]  __ib_process_cq+0x56/0xd0 [ib_core]
[30382.305201]  ib_poll_handler+0x26/0x70 [ib_core]
[30382.305213]  irq_poll_softirq+0x88/0x110
[30382.305223]  ? sort_range+0x20/0x20
[30382.305232]  __do_softirq+0xde/0x2c6
[30382.305241]  ? sort_range+0x20/0x20
[30382.305249]  run_ksoftirqd+0x1c/0x60
[30382.305258]  smpboot_thread_fn+0xef/0x160
[30382.305265]  kthread+0x113/0x130
[30382.305273]  ? kthread_create_worker_on_cpu+0x50/0x50
[30382.305281]  ret_from_fork+0x35/0x40
--

Reported-by: Nicolas Morey-Chaisemartin <NMoreyChaisemartin@suse.com>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/rdma.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index a249db528d54..1a6449bc547b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -562,13 +562,17 @@ out_destroy_cm_id:
 	return ret;
 }
 
+static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
+{
+	rdma_disconnect(queue->cm_id);
+	ib_drain_qp(queue->qp);
+}
+
 static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
 {
 	if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
 		return;
-
-	rdma_disconnect(queue->cm_id);
-	ib_drain_qp(queue->qp);
+	__nvme_rdma_stop_queue(queue);
 }
 
 static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
@@ -607,11 +611,13 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
 	else
 		ret = nvmf_connect_admin_queue(&ctrl->ctrl);
 
-	if (!ret)
+	if (!ret) {
 		set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
-	else
+	} else {
+		__nvme_rdma_stop_queue(queue);
 		dev_info(ctrl->ctrl.device,
 			"failed to connect queue: %d ret=%d\n", idx, ret);
+	}
 	return ret;
 }
 

From 0157ec8dad3c8fc9bc9790f76e0831ffdaf2e7f0 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Thu, 25 Jul 2019 11:56:57 -0700
Subject: [PATCH 023/157] nvme: fix controller removal race with scan work

With multipath enabled, nvme_scan_work() can read from the device
(through nvme_mpath_add_disk()) and hang [1]. However, with fabrics,
once ctrl->state is set to NVME_CTRL_DELETING, the reads will hang
(see nvmf_check_ready()) and the mpath stack device make_request
will block if head->list is not empty. However, when the head->list
consistst of only DELETING/DEAD controllers, we should actually not
block, but rather fail immediately.

In addition, before we go ahead and remove the namespaces, make sure
to clear the current path and kick the requeue list so that the
request will fast fail upon requeuing.

[1]:
--
  INFO: task kworker/u4:3:166 blocked for more than 120 seconds.
        Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  kworker/u4:3    D    0   166      2 0x80004000
  Workqueue: nvme-wq nvme_scan_work
  Call Trace:
   __schedule+0x851/0x1400
   schedule+0x99/0x210
   io_schedule+0x21/0x70
   do_read_cache_page+0xa57/0x1330
   read_cache_page+0x4a/0x70
   read_dev_sector+0xbf/0x380
   amiga_partition+0xc4/0x1230
   check_partition+0x30f/0x630
   rescan_partitions+0x19a/0x980
   __blkdev_get+0x85a/0x12f0
   blkdev_get+0x2a5/0x790
   __device_add_disk+0xe25/0x1250
   device_add_disk+0x13/0x20
   nvme_mpath_set_live+0x172/0x2b0
   nvme_update_ns_ana_state+0x130/0x180
   nvme_set_ns_ana_state+0x9a/0xb0
   nvme_parse_ana_log+0x1c3/0x4a0
   nvme_mpath_add_disk+0x157/0x290
   nvme_validate_ns+0x1017/0x1bd0
   nvme_scan_work+0x44d/0x6a0
   process_one_work+0x7d7/0x1240
   worker_thread+0x8e/0xff0
   kthread+0x2c3/0x3b0
   ret_from_fork+0x35/0x40

   INFO: task kworker/u4:1:1034 blocked for more than 120 seconds.
        Not tainted 5.2.0-rc6-vmlocalyes-00005-g808c8c2dc0cf #316
  "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
  kworker/u4:1    D    0  1034      2 0x80004000
  Workqueue: nvme-delete-wq nvme_delete_ctrl_work
  Call Trace:
   __schedule+0x851/0x1400
   schedule+0x99/0x210
   schedule_timeout+0x390/0x830
   wait_for_completion+0x1a7/0x310
   __flush_work+0x241/0x5d0
   flush_work+0x10/0x20
   nvme_remove_namespaces+0x85/0x3d0
   nvme_do_delete_ctrl+0xb4/0x1e0
   nvme_delete_ctrl_work+0x15/0x20
   process_one_work+0x7d7/0x1240
   worker_thread+0x8e/0xff0
   kthread+0x2c3/0x3b0
   ret_from_fork+0x35/0x40
--

Reported-by: Logan Gunthorpe <logang@deltatee.com>
Tested-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/core.c      |  7 ++++++
 drivers/nvme/host/multipath.c | 46 ++++++++++++++++++++++++++++++-----
 drivers/nvme/host/nvme.h      |  9 +++++--
 3 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 88b8dfd7928a..c258a1ce4b28 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3577,6 +3577,13 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 	struct nvme_ns *ns, *next;
 	LIST_HEAD(ns_list);
 
+	/*
+	 * make sure to requeue I/O to all namespaces as these
+	 * might result from the scan itself and must complete
+	 * for the scan_work to make progress
+	 */
+	nvme_mpath_clear_ctrl_paths(ctrl);
+
 	/* prevent racing with ns scanning */
 	flush_work(&ctrl->scan_work);
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index b34dcb2288e7..888d4543894e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -134,18 +134,34 @@ static const char *nvme_ana_state_names[] = {
 	[NVME_ANA_CHANGE]		= "change",
 };
 
-void nvme_mpath_clear_current_path(struct nvme_ns *ns)
+bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
 {
 	struct nvme_ns_head *head = ns->head;
+	bool changed = false;
 	int node;
 
 	if (!head)
-		return;
+		goto out;
 
 	for_each_node(node) {
-		if (ns == rcu_access_pointer(head->current_path[node]))
+		if (ns == rcu_access_pointer(head->current_path[node])) {
 			rcu_assign_pointer(head->current_path[node], NULL);
+			changed = true;
+		}
 	}
+out:
+	return changed;
+}
+
+void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
+{
+	struct nvme_ns *ns;
+
+	mutex_lock(&ctrl->scan_lock);
+	list_for_each_entry(ns, &ctrl->namespaces, list)
+		if (nvme_mpath_clear_current_path(ns))
+			kblockd_schedule_work(&ns->head->requeue_work);
+	mutex_unlock(&ctrl->scan_lock);
 }
 
 static bool nvme_path_is_disabled(struct nvme_ns *ns)
@@ -256,6 +272,24 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
 	return ns;
 }
 
+static bool nvme_available_path(struct nvme_ns_head *head)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry_rcu(ns, &head->list, siblings) {
+		switch (ns->ctrl->state) {
+		case NVME_CTRL_LIVE:
+		case NVME_CTRL_RESETTING:
+		case NVME_CTRL_CONNECTING:
+			/* fallthru */
+			return true;
+		default:
+			break;
+		}
+	}
+	return false;
+}
+
 static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
 		struct bio *bio)
 {
@@ -282,14 +316,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
 				      disk_devt(ns->head->disk),
 				      bio->bi_iter.bi_sector);
 		ret = direct_make_request(bio);
-	} else if (!list_empty_careful(&head->list)) {
-		dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
+	} else if (nvme_available_path(head)) {
+		dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
 
 		spin_lock_irq(&head->requeue_lock);
 		bio_list_add(&head->requeue_list, bio);
 		spin_unlock_irq(&head->requeue_lock);
 	} else {
-		dev_warn_ratelimited(dev, "no path - failing I/O\n");
+		dev_warn_ratelimited(dev, "no available path - failing I/O\n");
 
 		bio->bi_status = BLK_STS_IOERR;
 		bio_endio(bio);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 6b4fb67124c6..778b3a0b6adb 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -503,7 +503,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
 int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
 void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
 void nvme_mpath_stop(struct nvme_ctrl *ctrl);
-void nvme_mpath_clear_current_path(struct nvme_ns *ns);
+bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
 struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
 
 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
@@ -551,7 +552,11 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
 static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
 {
 }
-static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
+static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
+{
+	return false;
+}
+static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
 {
 }
 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)

From bd46a90634302bfe791e93ad5496f98f165f7ae0 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Mon, 29 Jul 2019 16:34:52 -0600
Subject: [PATCH 024/157] nvme-pci: Fix async probe remove race

Ensure the controller is not in the NEW state when nvme_probe() exits.
This will always allow a subsequent nvme_remove() to set the state to
DELETING, fixing a potential race between the initial asynchronous probe
and device removal.

Reported-by: Li Zhong <lizhongfs@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
---
 drivers/nvme/host/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index db160cee42ad..0c2c4b0c6655 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2695,7 +2695,7 @@ static void nvme_async_probe(void *data, async_cookie_t cookie)
 {
 	struct nvme_dev *dev = data;
 
-	nvme_reset_ctrl_sync(&dev->ctrl);
+	flush_work(&dev->ctrl.reset_work);
 	flush_work(&dev->ctrl.scan_work);
 	nvme_put_ctrl(&dev->ctrl);
 }
@@ -2761,6 +2761,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
 	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
 
+	nvme_reset_ctrl(&dev->ctrl);
 	nvme_get_ctrl(&dev->ctrl);
 	async_schedule(nvme_async_probe, dev);
 

From b07dd9b400981f487940a4d84292d3a0e7cd9362 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 25 Jul 2019 18:40:05 -0500
Subject: [PATCH 025/157] soundwire: cadence_master: fix register definition
 for SLAVE_STATE

wrong prefix and wrong macro.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190725234032.21152-14-pierre-louis.bossart@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/cadence_master.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
index ff4badc9b3de..c2e3d93c4ddc 100644
--- a/drivers/soundwire/cadence_master.c
+++ b/drivers/soundwire/cadence_master.c
@@ -81,8 +81,8 @@
 
 #define CDNS_MCP_INTSET				0x4C
 
-#define CDNS_SDW_SLAVE_STAT			0x50
-#define CDNS_MCP_SLAVE_STAT_MASK		BIT(1, 0)
+#define CDNS_MCP_SLAVE_STAT			0x50
+#define CDNS_MCP_SLAVE_STAT_MASK		GENMASK(1, 0)
 
 #define CDNS_MCP_SLAVE_INTSTAT0			0x54
 #define CDNS_MCP_SLAVE_INTSTAT1			0x58

From 664b16589f882202b8fa8149d0074f3159bade76 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 25 Jul 2019 18:40:06 -0500
Subject: [PATCH 026/157] soundwire: cadence_master: fix definitions for
 INTSTAT0/1

Two off-by-one errors: INTSTAT0 missed BIT(31) and INTSTAT1 is only
defined on first 16 bits.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190725234032.21152-15-pierre-louis.bossart@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/cadence_master.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
index c2e3d93c4ddc..60e8bdee5c75 100644
--- a/drivers/soundwire/cadence_master.c
+++ b/drivers/soundwire/cadence_master.c
@@ -96,8 +96,8 @@
 #define CDNS_MCP_SLAVE_INTMASK0			0x5C
 #define CDNS_MCP_SLAVE_INTMASK1			0x60
 
-#define CDNS_MCP_SLAVE_INTMASK0_MASK		GENMASK(30, 0)
-#define CDNS_MCP_SLAVE_INTMASK1_MASK		GENMASK(16, 0)
+#define CDNS_MCP_SLAVE_INTMASK0_MASK		GENMASK(31, 0)
+#define CDNS_MCP_SLAVE_INTMASK1_MASK		GENMASK(15, 0)
 
 #define CDNS_MCP_PORT_INTSTAT			0x64
 #define CDNS_MCP_PDI_STAT			0x6C

From 294fc7a4c8ec42b3053b1d2e87b0dafef80a76b8 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Thu, 1 Aug 2019 13:47:07 -0700
Subject: [PATCH 027/157] fs: xfs: xfs_log: Don't use KM_MAYFAIL at
 xfs_log_reserve().

When the system is close-to-OOM, fsync() may fail due to -ENOMEM because
xfs_log_reserve() is using KM_MAYFAIL. It is a bad thing to fail writeback
operation due to user-triggerable OOM condition. Since we are not using
KM_MAYFAIL at xfs_trans_alloc() before calling xfs_log_reserve(), let's
use the same flags at xfs_log_reserve().

  oom-torture: page allocation failure: order:0, mode:0x46c40(GFP_NOFS|__GFP_NOWARN|__GFP_RETRY_MAYFAIL|__GFP_COMP), nodemask=(null)
  CPU: 7 PID: 1662 Comm: oom-torture Kdump: loaded Not tainted 5.3.0-rc2+ #925
  Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00
  Call Trace:
   dump_stack+0x67/0x95
   warn_alloc+0xa9/0x140
   __alloc_pages_slowpath+0x9a8/0xbce
   __alloc_pages_nodemask+0x372/0x3b0
   alloc_slab_page+0x3a/0x8d0
   new_slab+0x330/0x420
   ___slab_alloc.constprop.94+0x879/0xb00
   __slab_alloc.isra.89.constprop.93+0x43/0x6f
   kmem_cache_alloc+0x331/0x390
   kmem_zone_alloc+0x9f/0x110 [xfs]
   kmem_zone_alloc+0x9f/0x110 [xfs]
   xlog_ticket_alloc+0x33/0xd0 [xfs]
   xfs_log_reserve+0xb4/0x410 [xfs]
   xfs_trans_reserve+0x1d1/0x2b0 [xfs]
   xfs_trans_alloc+0xc9/0x250 [xfs]
   xfs_setfilesize_trans_alloc.isra.27+0x44/0xc0 [xfs]
   xfs_submit_ioend.isra.28+0xa5/0x180 [xfs]
   xfs_vm_writepages+0x76/0xa0 [xfs]
   do_writepages+0x17/0x80
   __filemap_fdatawrite_range+0xc1/0xf0
   file_write_and_wait_range+0x53/0xa0
   xfs_file_fsync+0x87/0x290 [xfs]
   vfs_fsync_range+0x37/0x80
   do_fsync+0x38/0x60
   __x64_sys_fsync+0xf/0x20
   do_syscall_64+0x4a/0x1c0
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: eb01c9cd87 ("[XFS] Remove the xlog_ticket allocator")
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 fs/xfs/xfs_log.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 00e9f5c388d3..7fc3c1ad36bc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -429,10 +429,7 @@ xfs_log_reserve(
 
 	ASSERT(*ticp == NULL);
 	tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
-				KM_SLEEP | KM_MAYFAIL);
-	if (!tic)
-		return -ENOMEM;
-
+				KM_SLEEP);
 	*ticp = tic;
 
 	xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt

From 7e7ae38bf928c5cfa6dd6e9a2cf8b42c84a27c92 Mon Sep 17 00:00:00 2001
From: Yoshiaki Okamoto <yokamoto@allied-telesis.co.jp>
Date: Sat, 20 Jul 2019 22:23:18 +0900
Subject: [PATCH 028/157] USB: serial: option: Add support for ZTE MF871A

This patch adds support for MF871A USB modem (aka Speed USB STICK U03)
to option driver. This modem is manufactured by ZTE corporation, and
sold by KDDI.

Interface layout:
0: AT
1: MODEM

usb-devices output:
T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  9 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=19d2 ProdID=1481 Rev=52.87
S:  Manufacturer=ZTE,Incorporated
S:  Product=ZTE Technologies MSM
S:  SerialNumber=1234567890ABCDEF
C:  #Ifs= 2 Cfg#= 1 Atr=80 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
I:  If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option

Co-developed-by: Hiroyuki Yamamoto <hyamamo@allied-telesis.co.jp>
Signed-off-by: Hiroyuki Yamamoto <hyamamo@allied-telesis.co.jp>
Signed-off-by: Yoshiaki Okamoto <yokamoto@allied-telesis.co.jp>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index c1582fbd1150..23d4a5f543af 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1549,6 +1549,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1428, 0xff, 0xff, 0xff),  /* Telewell TW-LTE 4G v2 */
 	  .driver_info = RSVD(2) },
 	{ USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) },	/* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) },

From 552573e42aab5f75aff9bab855a9677979d9a7d5 Mon Sep 17 00:00:00 2001
From: Rogan Dawes <rogan@dawes.za.net>
Date: Wed, 17 Jul 2019 11:11:34 +0200
Subject: [PATCH 029/157] USB: serial: option: add D-Link DWM-222 device ID

Add device id for D-Link DWM-222 A2.

MI_00 D-Link HS-USB Diagnostics
MI_01 D-Link HS-USB Modem
MI_02 D-Link HS-USB AT Port
MI_03 D-Link HS-USB NMEA
MI_04 D-Link HS-USB WWAN Adapter (qmi_wwan)
MI_05 USB Mass Storage Device

Cc: stable@vger.kernel.org
Signed-off-by: Rogan Dawes <rogan@dawes.za.net>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 23d4a5f543af..f2c19660ed16 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1953,6 +1953,8 @@ static const struct usb_device_id option_ids[] = {
 	  .driver_info = RSVD(4) },
 	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff),			/* D-Link DWM-222 */
 	  .driver_info = RSVD(4) },
+	{ USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e3d, 0xff),			/* D-Link DWM-222 A2 */
+	  .driver_info = RSVD(4) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) },	/* D-Link DWM-152/C1 */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) },	/* D-Link DWM-156/C1 */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) },	/* D-Link DWM-156/A3 */

From 0dda5907b0fc60f72f67f479f224e02c95d06e21 Mon Sep 17 00:00:00 2001
From: Nishad Kamdar <nishadkamdar@gmail.com>
Date: Sat, 3 Aug 2019 19:43:35 +0530
Subject: [PATCH 030/157] i2c: stm32: Use the correct style for SPDX License
 Identifier

This patch corrects the SPDX License Identifier style
in header file related to STM32 Driver for I2C hardware
bus support.
For C header files Documentation/process/license-rules.rst
mandates C-like comments (opposed to C source files where
C++ style should be used)

Changes made by using a script provided by Joe Perches here:
https://lkml.org/lkml/2019/2/7/46

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nishad Kamdar <nishadkamdar@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/i2c/busses/i2c-stm32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-stm32.h b/drivers/i2c/busses/i2c-stm32.h
index 868755f82f88..2c21893905a3 100644
--- a/drivers/i2c/busses/i2c-stm32.h
+++ b/drivers/i2c/busses/i2c-stm32.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * i2c-stm32.h
  *

From 82a5008a341d301da3ab529ca888c64f529bd075 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Mon, 5 Aug 2019 15:37:16 +0200
Subject: [PATCH 031/157] iio: frequency: adf4371: Fix output frequency setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The fract1 word was not being properly programmed on the device leading
to wrong output frequencies.

Fixes: 7f699bd14913 (iio: frequency: adf4371: Add support for ADF4371 PLL)
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Stefan Popa <stefan.popa@analog.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/frequency/adf4371.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/frequency/adf4371.c b/drivers/iio/frequency/adf4371.c
index e48f15cc9ab5..ff82863cbf42 100644
--- a/drivers/iio/frequency/adf4371.c
+++ b/drivers/iio/frequency/adf4371.c
@@ -276,11 +276,11 @@ static int adf4371_set_freq(struct adf4371_state *st, unsigned long long freq,
 	st->buf[0] = st->integer >> 8;
 	st->buf[1] = 0x40; /* REG12 default */
 	st->buf[2] = 0x00;
-	st->buf[3] = st->fract2 & 0xFF;
-	st->buf[4] = st->fract2 >> 7;
-	st->buf[5] = st->fract2 >> 15;
+	st->buf[3] = st->fract1 & 0xFF;
+	st->buf[4] = st->fract1 >> 8;
+	st->buf[5] = st->fract1 >> 16;
 	st->buf[6] = ADF4371_FRAC2WORD_L(st->fract2 & 0x7F) |
-		     ADF4371_FRAC1WORD(st->fract1 >> 23);
+		     ADF4371_FRAC1WORD(st->fract1 >> 24);
 	st->buf[7] = ADF4371_FRAC2WORD_H(st->fract2 >> 7);
 	st->buf[8] = st->mod2 & 0xFF;
 	st->buf[9] = ADF4371_MOD2WORD(st->mod2 >> 8);

From b9ddd5091160793ee9fac10da765cf3f53d2aaf0 Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Mon, 5 Aug 2019 17:55:15 +0200
Subject: [PATCH 032/157] iio: adc: max9611: Fix temperature reading in probe

The max9611 driver reads the die temperature at probe time to validate
the communication channel. Use the actual read value to perform the test
instead of the read function return value, which was mistakenly used so
far.

The temperature reading test was only successful because the 0 return
value is in the range of supported temperatures.

Fixes: 69780a3bbc0b ("iio: adc: Add Maxim max9611 ADC driver")
Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/max9611.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c
index 0e3c6529fc4c..da073d72f649 100644
--- a/drivers/iio/adc/max9611.c
+++ b/drivers/iio/adc/max9611.c
@@ -480,7 +480,7 @@ static int max9611_init(struct max9611_dev *max9611)
 	if (ret)
 		return ret;
 
-	regval = ret & MAX9611_TEMP_MASK;
+	regval &= MAX9611_TEMP_MASK;
 
 	if ((regval > MAX9611_TEMP_MAX_POS &&
 	     regval < MAX9611_TEMP_MIN_NEG) ||

From b33d567560c1aadf3033290d74d4fd67af47aa61 Mon Sep 17 00:00:00 2001
From: zhengbin <zhengbin13@huawei.com>
Date: Mon, 8 Jul 2019 20:42:18 +0800
Subject: [PATCH 033/157] auxdisplay: panel: need to delete scan_timer when
 misc_register fails in panel_attach

In panel_attach, if misc_register fails, we need to delete scan_timer,
which was setup in keypad_init->init_scan_timer.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: zhengbin <zhengbin13@huawei.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 drivers/auxdisplay/panel.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c
index e06de63497cf..e6bd727da503 100644
--- a/drivers/auxdisplay/panel.c
+++ b/drivers/auxdisplay/panel.c
@@ -1617,6 +1617,8 @@ static void panel_attach(struct parport *port)
 	return;
 
 err_lcd_unreg:
+	if (scan_timer.function)
+		del_timer_sync(&scan_timer);
 	if (lcd.enabled)
 		charlcd_unregister(lcd.charlcd);
 err_unreg_device:

From ba2c1340d7c8e1ba5957d814ec18592b315fd4de Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Thu, 4 Jul 2019 20:33:54 +0100
Subject: [PATCH 034/157] auxdisplay: charlcd: add help text for backlight
 initial state

While the individual CHARLCD_BL_xxx options have help texts, the
menu itself does not.  Fix this.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Mans Rullgard <mans@mansr.com>
[Added a bit more text to address Linus' suggestion]
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 drivers/auxdisplay/Kconfig | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig
index dd61fdd400f0..68489d1f00bb 100644
--- a/drivers/auxdisplay/Kconfig
+++ b/drivers/auxdisplay/Kconfig
@@ -448,6 +448,11 @@ config PANEL_BOOT_MESSAGE
 choice
 	prompt "Backlight initial state"
 	default CHARLCD_BL_FLASH
+	---help---
+	  Select the initial backlight state on boot or module load.
+
+	  Previously, there was no option for this: the backlight flashed
+	  briefly on init. Now you can also turn it off/on.
 
 	config CHARLCD_BL_OFF
 		bool "Off"

From 8af23fad626173eed7cc02733874d4124049bd5e Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 29 Jul 2019 16:32:38 +0100
Subject: [PATCH 035/157] iommu/dma: Handle MSI mappings separately

MSI pages must always be mapped into a device's *current* domain, which
*might* be the default DMA domain, but might instead be a VFIO domain
with its own MSI cookie. This subtlety got accidentally lost in the
streamlining of __iommu_dma_map(), but rather than reintroduce more
complexity and/or special-casing, it turns out neater to just split this
path out entirely.

Since iommu_dma_get_msi_page() already duplicates much of what
__iommu_dma_map() does, it can easily just make the allocation and
mapping calls directly as well. That way we can further streamline the
helper back to exclusively operating on DMA domains.

Fixes: b61d271e59d7 ("iommu/dma: Move domain lookup into __iommu_dma_{map,unmap}")
Reported-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Reported-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Andre Przywara <andre.przywara@arm.com>
Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a7f9c3edbcb2..6441197a75ea 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -459,13 +459,11 @@ static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
 {
 	struct iommu_domain *domain = iommu_get_dma_domain(dev);
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
-	size_t iova_off = 0;
+	struct iova_domain *iovad = &cookie->iovad;
+	size_t iova_off = iova_offset(iovad, phys);
 	dma_addr_t iova;
 
-	if (cookie->type == IOMMU_DMA_IOVA_COOKIE) {
-		iova_off = iova_offset(&cookie->iovad, phys);
-		size = iova_align(&cookie->iovad, size + iova_off);
-	}
+	size = iova_align(iovad, size + iova_off);
 
 	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
 	if (!iova)
@@ -1147,16 +1145,21 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 	if (!msi_page)
 		return NULL;
 
-	iova = __iommu_dma_map(dev, msi_addr, size, prot);
-	if (iova == DMA_MAPPING_ERROR)
+	iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev);
+	if (!iova)
 		goto out_free_page;
 
+	if (iommu_map(domain, iova, msi_addr, size, prot))
+		goto out_free_iova;
+
 	INIT_LIST_HEAD(&msi_page->list);
 	msi_page->phys = msi_addr;
 	msi_page->iova = iova;
 	list_add(&msi_page->list, &cookie->msi_page_list);
 	return msi_page;
 
+out_free_iova:
+	iommu_dma_free_iova(cookie, iova, size);
 out_free_page:
 	kfree(msi_page);
 	return NULL;

From 11f4fe9ba3c85a4efac7ec25e75056a9b612d9da Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Tue, 30 Jul 2019 17:20:11 +0200
Subject: [PATCH 036/157] iommu/arm-smmu: Mark expected switch fall-through
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that -Wimplicit-fallthrough is passed to GCC by default, the
following warning shows up:

../drivers/iommu/arm-smmu-v3.c: In function ‘arm_smmu_write_strtab_ent’:
../drivers/iommu/arm-smmu-v3.c:1189:7: warning: this statement may fall
 through [-Wimplicit-fallthrough=]
    if (disable_bypass)
       ^
../drivers/iommu/arm-smmu-v3.c:1191:3: note: here
   default:
   ^~~~~~~

Rework so that the compiler doesn't warn about fall-through. Make it
clearer by calling 'BUG_ON()' when disable_bypass is set, and always
'break;'

Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm-smmu-v3.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index a9a9fabd3968..c5c93e48b4db 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1186,8 +1186,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
 			ste_live = true;
 			break;
 		case STRTAB_STE_0_CFG_ABORT:
-			if (disable_bypass)
-				break;
+			BUG_ON(!disable_bypass);
+			break;
 		default:
 			BUG(); /* STE corruption */
 		}

From 458b7c8e0dde12d140e3472b80919cbb9ae793f4 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Thu, 1 Aug 2019 11:14:58 +0800
Subject: [PATCH 037/157] iommu/vt-d: Detach domain when move device out of
 group

When removing a device from an iommu group, the domain should
be detached from the device. Otherwise, the stale domain info
will still be cached by the driver and the driver will refuse
to attach any domain to the device again.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Fixes: b7297783c2bb6 ("iommu/vt-d: Remove duplicated code for device hotplug")
Reported-and-tested-by: Vlad Buslov <vladbu@mellanox.com>
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lkml.org/lkml/2019/7/26/1133
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index bdaed2da8a55..3e22fa6ae8c8 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5316,6 +5316,8 @@ static void intel_iommu_remove_device(struct device *dev)
 	if (!iommu)
 		return;
 
+	dmar_remove_one_dev_info(dev);
+
 	iommu_group_remove_device(dev);
 
 	iommu_device_unlink(&iommu->iommu, dev);

From d7cd4dd907c19c0295829c947d79afa290b6fc24 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 7 Aug 2019 11:21:46 +0100
Subject: [PATCH 038/157] Btrfs: fix sysfs warning and missing raid sysfs
 directories

In the 5.3 merge window, commit 7c7e301406d0a9 ("btrfs: sysfs: Replace
default_attrs in ktypes with groups"), we started using the member
"defaults_groups" for the kobject type "btrfs_raid_ktype". That leads
to a series of warnings when running some test cases of fstests, such
as btrfs/027, btrfs/124 and btrfs/176. The traces produced by those
warnings are like the following:

  [116648.059212] kernfs: can not remove 'total_bytes', no directory
  [116648.060112] WARNING: CPU: 3 PID: 28500 at fs/kernfs/dir.c:1504 kernfs_remove_by_name_ns+0x75/0x80
  (...)
  [116648.066482] CPU: 3 PID: 28500 Comm: umount Tainted: G        W         5.3.0-rc3-btrfs-next-54 #1
  (...)
  [116648.069376] RIP: 0010:kernfs_remove_by_name_ns+0x75/0x80
  (...)
  [116648.072385] RSP: 0018:ffffabfd0090bd08 EFLAGS: 00010282
  [116648.073437] RAX: 0000000000000000 RBX: ffffffffc0c11998 RCX: 0000000000000000
  [116648.074201] RDX: ffff9fff603a7a00 RSI: ffff9fff603978a8 RDI: ffff9fff603978a8
  [116648.074956] RBP: ffffffffc0b9ca2f R08: 0000000000000000 R09: 0000000000000001
  [116648.075708] R10: ffff9ffe1f72e1c0 R11: 0000000000000000 R12: ffffffffc0b94120
  [116648.076434] R13: ffffffffb3d9b4e0 R14: 0000000000000000 R15: dead000000000100
  [116648.077143] FS:  00007f9cdc78a2c0(0000) GS:ffff9fff60380000(0000) knlGS:0000000000000000
  [116648.077852] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [116648.078546] CR2: 00007f9fc4747ab4 CR3: 00000005c7832003 CR4: 00000000003606e0
  [116648.079235] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [116648.079907] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [116648.080585] Call Trace:
  [116648.081262]  remove_files+0x31/0x70
  [116648.081929]  sysfs_remove_group+0x38/0x80
  [116648.082596]  sysfs_remove_groups+0x34/0x70
  [116648.083258]  kobject_del+0x20/0x60
  [116648.083933]  btrfs_free_block_groups+0x405/0x430 [btrfs]
  [116648.084608]  close_ctree+0x19a/0x380 [btrfs]
  [116648.085278]  generic_shutdown_super+0x6c/0x110
  [116648.085951]  kill_anon_super+0xe/0x30
  [116648.086621]  btrfs_kill_super+0x12/0xa0 [btrfs]
  [116648.087289]  deactivate_locked_super+0x3a/0x70
  [116648.087956]  cleanup_mnt+0xb4/0x160
  [116648.088620]  task_work_run+0x7e/0xc0
  [116648.089285]  exit_to_usermode_loop+0xfa/0x100
  [116648.089933]  do_syscall_64+0x1cb/0x220
  [116648.090567]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
  [116648.091197] RIP: 0033:0x7f9cdc073b37
  (...)
  [116648.100046] ---[ end trace 22e24db328ccadf8 ]---
  [116648.100618] ------------[ cut here ]------------
  [116648.101175] kernfs: can not remove 'used_bytes', no directory
  [116648.101731] WARNING: CPU: 3 PID: 28500 at fs/kernfs/dir.c:1504 kernfs_remove_by_name_ns+0x75/0x80
  (...)
  [116648.105649] CPU: 3 PID: 28500 Comm: umount Tainted: G        W         5.3.0-rc3-btrfs-next-54 #1
  (...)
  [116648.107461] RIP: 0010:kernfs_remove_by_name_ns+0x75/0x80
  (...)
  [116648.109336] RSP: 0018:ffffabfd0090bd08 EFLAGS: 00010282
  [116648.109979] RAX: 0000000000000000 RBX: ffffffffc0c119a0 RCX: 0000000000000000
  [116648.110625] RDX: ffff9fff603a7a00 RSI: ffff9fff603978a8 RDI: ffff9fff603978a8
  [116648.111283] RBP: ffffffffc0b9ca41 R08: 0000000000000000 R09: 0000000000000001
  [116648.111940] R10: ffff9ffe1f72e1c0 R11: 0000000000000000 R12: ffffffffc0b94120
  [116648.112603] R13: ffffffffb3d9b4e0 R14: 0000000000000000 R15: dead000000000100
  [116648.113268] FS:  00007f9cdc78a2c0(0000) GS:ffff9fff60380000(0000) knlGS:0000000000000000
  [116648.113939] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  [116648.114607] CR2: 00007f9fc4747ab4 CR3: 00000005c7832003 CR4: 00000000003606e0
  [116648.115286] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  [116648.115966] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  [116648.116649] Call Trace:
  [116648.117326]  remove_files+0x31/0x70
  [116648.117997]  sysfs_remove_group+0x38/0x80
  [116648.118671]  sysfs_remove_groups+0x34/0x70
  [116648.119342]  kobject_del+0x20/0x60
  [116648.120022]  btrfs_free_block_groups+0x405/0x430 [btrfs]
  [116648.120707]  close_ctree+0x19a/0x380 [btrfs]
  [116648.121396]  generic_shutdown_super+0x6c/0x110
  [116648.122057]  kill_anon_super+0xe/0x30
  [116648.122702]  btrfs_kill_super+0x12/0xa0 [btrfs]
  [116648.123335]  deactivate_locked_super+0x3a/0x70
  [116648.123961]  cleanup_mnt+0xb4/0x160
  [116648.124586]  task_work_run+0x7e/0xc0
  [116648.125210]  exit_to_usermode_loop+0xfa/0x100
  [116648.125830]  do_syscall_64+0x1cb/0x220
  [116648.126463]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
  [116648.127080] RIP: 0033:0x7f9cdc073b37
  (...)
  [116648.135923] ---[ end trace 22e24db328ccadf9 ]---

These happen because, during the unmount path, we call kobject_del() for
raid kobjects that are not fully initialized, meaning that we set their
ktype (as btrfs_raid_ktype) through link_block_group() but we didn't set
their parent kobject, which is done through btrfs_add_raid_kobjects().

We have this split raid kobject setup since commit 75cb379d263521
("btrfs: defer adding raid type kobject until after chunk relocation") in
order to avoid triggering reclaim during contextes where we can not
(either we are holding a transaction handle or some lock required by
the transaction commit path), so that we do the calls to kobject_add(),
which triggers GFP_KERNEL allocations, through btrfs_add_raid_kobjects()
in contextes where it is safe to trigger reclaim. That change expected
that a new raid kobject can only be created either when mounting the
filesystem or after raid profile conversion through the relocation path.
However, we can have new raid kobject created in other two cases at least:

1) During device replace (or scrub) after adding a device a to the
   filesystem. The replace procedure (and scrub) do calls to
   btrfs_inc_block_group_ro() which can allocate a new block group
   with a new raid profile (because we now have more devices). This
   can be triggered by test cases btrfs/027 and btrfs/176.

2) During a degraded mount trough any write path. This can be triggered
   by test case btrfs/124.

Fixing this by adding extra calls to btrfs_add_raid_kobjects(), not only
makes things more complex and fragile, can also introduce deadlocks with
reclaim the following way:

1) Calling btrfs_add_raid_kobjects() at btrfs_inc_block_group_ro() or
   anywhere in the replace/scrub path will cause a deadlock with reclaim
   because if reclaim happens and a transaction commit is triggered,
   the transaction commit path will block at btrfs_scrub_pause().

2) During degraded mounts it is essentially impossible to figure out where
   to add extra calls to btrfs_add_raid_kobjects(), because allocation of
   a block group with a new raid profile can happen anywhere, which means
   we can't safely figure out which contextes are safe for reclaim, as
   we can either hold a transaction handle or some lock needed by the
   transaction commit path.

So it is too complex and error prone to have this split setup of raid
kobjects. So fix the issue by consolidating the setup of the kobjects in a
single place, at link_block_group(), and setup a nofs context there in
order to prevent reclaim being triggered by the memory allocations done
through the call chain of kobject_add().

Besides fixing the sysfs warnings during kobject_del(), this also ensures
the sysfs directories for the new raid profiles end up created and visible
to users (a bug that existed before the 5.3 commit 7c7e301406d0a9
("btrfs: sysfs: Replace default_attrs in ktypes with groups")).

Fixes: 75cb379d263521 ("btrfs: defer adding raid type kobject until after chunk relocation")
Fixes: 7c7e301406d0a9 ("btrfs: sysfs: Replace default_attrs in ktypes with groups")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h       |  4 ---
 fs/btrfs/disk-io.c     |  2 --
 fs/btrfs/extent-tree.c | 57 ++++++++++++++++++------------------------
 fs/btrfs/volumes.c     | 13 ----------
 4 files changed, 24 insertions(+), 52 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 299e11e6c554..94660063a162 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -401,7 +401,6 @@ struct btrfs_dev_replace {
 struct raid_kobject {
 	u64 flags;
 	struct kobject kobj;
-	struct list_head list;
 };
 
 /*
@@ -915,8 +914,6 @@ struct btrfs_fs_info {
 	u32 thread_pool_size;
 
 	struct kobject *space_info_kobj;
-	struct list_head pending_raid_kobjs;
-	spinlock_t pending_raid_kobjs_lock; /* uncontended */
 
 	u64 total_pinned;
 
@@ -2698,7 +2695,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
 int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 			   u64 bytes_used, u64 type, u64 chunk_offset,
 			   u64 size);
-void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
 				struct btrfs_fs_info *fs_info,
 				const u64 chunk_offset);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5f7ee70b3d1a..97beb351a10c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2683,8 +2683,6 @@ int open_ctree(struct super_block *sb,
 	INIT_LIST_HEAD(&fs_info->delayed_iputs);
 	INIT_LIST_HEAD(&fs_info->delalloc_roots);
 	INIT_LIST_HEAD(&fs_info->caching_block_groups);
-	INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
-	spin_lock_init(&fs_info->pending_raid_kobjs_lock);
 	spin_lock_init(&fs_info->delalloc_root_lock);
 	spin_lock_init(&fs_info->trans_lock);
 	spin_lock_init(&fs_info->fs_roots_radix_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d3b58e388535..25b2c9d7fa7f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/pagemap.h>
 #include <linux/writeback.h>
@@ -7888,33 +7889,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
 	return 0;
 }
 
-/* link_block_group will queue up kobjects to add when we're reclaim-safe */
-void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
-{
-	struct btrfs_space_info *space_info;
-	struct raid_kobject *rkobj;
-	LIST_HEAD(list);
-	int ret = 0;
-
-	spin_lock(&fs_info->pending_raid_kobjs_lock);
-	list_splice_init(&fs_info->pending_raid_kobjs, &list);
-	spin_unlock(&fs_info->pending_raid_kobjs_lock);
-
-	list_for_each_entry(rkobj, &list, list) {
-		space_info = btrfs_find_space_info(fs_info, rkobj->flags);
-
-		ret = kobject_add(&rkobj->kobj, &space_info->kobj,
-				"%s", btrfs_bg_type_to_raid_name(rkobj->flags));
-		if (ret) {
-			kobject_put(&rkobj->kobj);
-			break;
-		}
-	}
-	if (ret)
-		btrfs_warn(fs_info,
-			   "failed to add kobject for block cache, ignoring");
-}
-
 static void link_block_group(struct btrfs_block_group_cache *cache)
 {
 	struct btrfs_space_info *space_info = cache->space_info;
@@ -7929,18 +7903,36 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
 	up_write(&space_info->groups_sem);
 
 	if (first) {
-		struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS);
+		struct raid_kobject *rkobj;
+		unsigned int nofs_flag;
+		int ret;
+
+		/*
+		 * Setup a NOFS context because kobject_add(), deep in its call
+		 * chain, does GFP_KERNEL allocations, and we are often called
+		 * in a context where if reclaim is triggered we can deadlock
+		 * (we are either holding a transaction handle or some lock
+		 * required for a transaction commit).
+		 */
+		nofs_flag = memalloc_nofs_save();
+		rkobj = kzalloc(sizeof(*rkobj), GFP_KERNEL);
 		if (!rkobj) {
+			memalloc_nofs_restore(nofs_flag);
 			btrfs_warn(cache->fs_info,
 				"couldn't alloc memory for raid level kobject");
 			return;
 		}
 		rkobj->flags = cache->flags;
 		kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
-
-		spin_lock(&fs_info->pending_raid_kobjs_lock);
-		list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs);
-		spin_unlock(&fs_info->pending_raid_kobjs_lock);
+		ret = kobject_add(&rkobj->kobj, &space_info->kobj, "%s",
+				  btrfs_bg_type_to_raid_name(rkobj->flags));
+		memalloc_nofs_restore(nofs_flag);
+		if (ret) {
+			kobject_put(&rkobj->kobj);
+			btrfs_warn(fs_info,
+			   "failed to add kobject for block cache, ignoring");
+			return;
+		}
 		space_info->block_group_kobjs[index] = &rkobj->kobj;
 	}
 }
@@ -8206,7 +8198,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
 			inc_block_group_ro(cache, 1);
 	}
 
-	btrfs_add_raid_kobjects(info);
 	btrfs_init_global_block_rsv(info);
 	ret = check_chunk_block_group_mappings(info);
 error:
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d74b74ca07af..a447d3ec48d5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3087,16 +3087,6 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 	if (ret)
 		return ret;
 
-	/*
-	 * We add the kobjects here (and after forcing data chunk creation)
-	 * since relocation is the only place we'll create chunks of a new
-	 * type at runtime.  The only place where we'll remove the last
-	 * chunk of a type is the call immediately below this one.  Even
-	 * so, we're protected against races with the cleaner thread since
-	 * we're covered by the delete_unused_bgs_mutex.
-	 */
-	btrfs_add_raid_kobjects(fs_info);
-
 	trans = btrfs_start_trans_remove_block_group(root->fs_info,
 						     chunk_offset);
 	if (IS_ERR(trans)) {
@@ -3223,9 +3213,6 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
 			btrfs_end_transaction(trans);
 			if (ret < 0)
 				return ret;
-
-			btrfs_add_raid_kobjects(fs_info);
-
 			return 1;
 		}
 	}

From 07301df7d2fc220d3de5f7ad804dcb941400cb00 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 28 May 2019 16:21:54 +0800
Subject: [PATCH 039/157] btrfs: trim: Check the range passed into to prevent
 overflow

Normally the range->len is set to default value (U64_MAX), but when it's
not default value, we should check if the range overflows.

And if it overflows, return -EINVAL before doing anything.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Anand Jain <anand.jain@oracle.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 25b2c9d7fa7f..8b7eb22d508a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8966,6 +8966,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
 	struct btrfs_device *device;
 	struct list_head *devices;
 	u64 group_trimmed;
+	u64 range_end = U64_MAX;
 	u64 start;
 	u64 end;
 	u64 trimmed = 0;
@@ -8975,16 +8976,23 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
 	int dev_ret = 0;
 	int ret = 0;
 
+	/*
+	 * Check range overflow if range->len is set.
+	 * The default range->len is U64_MAX.
+	 */
+	if (range->len != U64_MAX &&
+	    check_add_overflow(range->start, range->len, &range_end))
+		return -EINVAL;
+
 	cache = btrfs_lookup_first_block_group(fs_info, range->start);
 	for (; cache; cache = next_block_group(cache)) {
-		if (cache->key.objectid >= (range->start + range->len)) {
+		if (cache->key.objectid >= range_end) {
 			btrfs_put_block_group(cache);
 			break;
 		}
 
 		start = max(range->start, cache->key.objectid);
-		end = min(range->start + range->len,
-				cache->key.objectid + cache->key.offset);
+		end = min(range_end, cache->key.objectid + cache->key.offset);
 
 		if (end - start >= range->minlen) {
 			if (!block_group_cache_done(cache)) {

From f591822c3cf314442819486f45ff7dc1f690e0c0 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Mon, 5 Aug 2019 11:30:10 +0300
Subject: [PATCH 040/157] IB/mlx5: Fix implicit MR release flow

Once implicit MR is being called to be released by
ib_umem_notifier_release() its leaves were marked as "dying".

However, when dereg_mr()->mlx5_ib_free_implicit_mr()->mr_leaf_free() is
called, it skips running the mr_leaf_free_action (i.e. umem_odp->work)
when those leaves were marked as "dying".

As such ib_umem_release() for the leaves won't be called and their MRs
will be leaked as well.

When an application exits/killed without calling dereg_mr we might hit the
above flow.

This fatal scenario is reported by WARN_ON() upon
mlx5_ib_dealloc_ucontext() as ibcontext->per_mm_list is not empty, the
call trace can be seen below.

Originally the "dying" mark as part of ib_umem_notifier_release() was
introduced to prevent pagefault_mr() from returning a success response
once this happened. However, we already have today the completion
mechanism so no need for that in those flows any more.  Even in case a
success response will be returned the firmware will not find the pages and
an error will be returned in the following call as a released mm will
cause ib_umem_odp_map_dma_pages() to permanently fail mmget_not_zero().

Fix the above issue by dropping the "dying" from the above flows.  The
other flows that are using "dying" are still needed it for their
synchronization purposes.

   WARNING: CPU: 1 PID: 7218 at
   drivers/infiniband/hw/mlx5/main.c:2004
		  mlx5_ib_dealloc_ucontext+0x84/0x90 [mlx5_ib]
   CPU: 1 PID: 7218 Comm: ibv_rc_pingpong Tainted: G     E
	       5.2.0-rc6+ #13
   Call Trace:
   uverbs_destroy_ufile_hw+0xb5/0x120 [ib_uverbs]
   ib_uverbs_close+0x1f/0x80 [ib_uverbs]
   __fput+0xbe/0x250
   task_work_run+0x88/0xa0
   do_exit+0x2cb/0xc30
   ? __fput+0x14b/0x250
   do_group_exit+0x39/0xb0
   get_signal+0x191/0x920
   ? _raw_spin_unlock_bh+0xa/0x20
   ? inet_csk_accept+0x229/0x2f0
   do_signal+0x36/0x5e0
   ? put_unused_fd+0x5b/0x70
   ? __sys_accept4+0x1a6/0x1e0
   ? inet_hash+0x35/0x40
   ? release_sock+0x43/0x90
   ? _raw_spin_unlock_bh+0xa/0x20
   ? inet_listen+0x9f/0x120
   exit_to_usermode_loop+0x5c/0xc6
   do_syscall_64+0x182/0x1b0
   entry_SYSCALL_64_after_hwframe+0x44/0xa9

Fixes: 81713d3788d2 ("IB/mlx5: Add implicit MR support")
Link: https://lore.kernel.org/r/20190805083010.21777-1-leon@kernel.org
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/core/umem_odp.c |  4 ----
 drivers/infiniband/hw/mlx5/odp.c   | 22 ++++++++--------------
 2 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 2a75c6f8d827..c0e15db34680 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -112,10 +112,6 @@ static int ib_umem_notifier_release_trampoline(struct ib_umem_odp *umem_odp,
 	 * prevent any further fault handling on this MR.
 	 */
 	ib_umem_notifier_start_account(umem_odp);
-	umem_odp->dying = 1;
-	/* Make sure that the fact the umem is dying is out before we release
-	 * all pending page faults. */
-	smp_wmb();
 	complete_all(&umem_odp->notifier_completion);
 	umem_odp->umem.context->invalidate_range(
 		umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp));
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 81da82050d05..1d257d1b3b0d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -579,7 +579,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 			u32 flags)
 {
 	int npages = 0, current_seq, page_shift, ret, np;
-	bool implicit = false;
 	struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
 	bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
 	bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
@@ -594,7 +593,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
 		if (IS_ERR(odp))
 			return PTR_ERR(odp);
 		mr = odp->private;
-		implicit = true;
 	} else {
 		odp = odp_mr;
 	}
@@ -682,19 +680,15 @@ next_mr:
 
 out:
 	if (ret == -EAGAIN) {
-		if (implicit || !odp->dying) {
-			unsigned long timeout =
-				msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
+		unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
 
-			if (!wait_for_completion_timeout(
-					&odp->notifier_completion,
-					timeout)) {
-				mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
-					     current_seq, odp->notifiers_seq, odp->notifiers_count);
-			}
-		} else {
-			/* The MR is being killed, kill the QP as well. */
-			ret = -EFAULT;
+		if (!wait_for_completion_timeout(&odp->notifier_completion,
+						 timeout)) {
+			mlx5_ib_warn(
+				dev,
+				"timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
+				current_seq, odp->notifiers_seq,
+				odp->notifiers_count);
 		}
 	}
 

From d97de8887a12c598abc4d2e4e57a54c1f030e112 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markz@mellanox.com>
Date: Wed, 7 Aug 2019 13:18:19 +0300
Subject: [PATCH 041/157] RDMA/counter: Prevent QP counter binding if counters
 unsupported

In case of rdma_counter_init() fails, counter allocation and QP bind
should not be allowed.

Fixes: 413d3347503b ("RDMA/counter: Add set/clear per-port auto mode support")
Fixes: 1bd8e0a9d0fd ("RDMA/counter: Allow manual mode configuration support")
Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190807101819.7581-1-leon@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/counters.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 45d5164e9574..b79890739a2c 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -38,6 +38,9 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u8 port,
 	int ret;
 
 	port_counter = &dev->port_data[port].port_counter;
+	if (!port_counter->hstats)
+		return -EOPNOTSUPP;
+
 	mutex_lock(&port_counter->lock);
 	if (on) {
 		ret = __counter_set_mode(&port_counter->mode,
@@ -509,6 +512,9 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u8 port,
 	if (!rdma_is_port_valid(dev, port))
 		return -EINVAL;
 
+	if (!dev->port_data[port].port_counter.hstats)
+		return -EOPNOTSUPP;
+
 	qp = rdma_counter_get_qp(dev, qp_num);
 	if (!qp)
 		return -ENOENT;

From e7e6c6320c8c9ed923250cd019e5f9ca0f59b4b8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 7 Aug 2019 15:32:36 +0300
Subject: [PATCH 042/157] IB/mlx5: Check the correct variable in error handling
 code

The code accidentally checks "event_sub" instead of "event_sub->eventfd".

Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190807123236.GA11452@mwanda
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/devx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index ec4370f99381..2d1b3d9609d9 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2026,7 +2026,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
 			event_sub->eventfd =
 				eventfd_ctx_fdget(redirect_fd);
 
-			if (IS_ERR(event_sub)) {
+			if (IS_ERR(event_sub->eventfd)) {
 				err = PTR_ERR(event_sub->eventfd);
 				event_sub->eventfd = NULL;
 				goto err;

From a86c71ba3022331f79662d7f12d1b25188c7e377 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Fri, 2 Aug 2019 13:26:12 -0700
Subject: [PATCH 043/157] scsi: lpfc: Fix crash when cpu count is 1 and null
 irq affinity mask

When a configurations runs with a single cpu (such as a kdump kernel),
which causes the driver to request a single vector, when the driver
subsequently requests an irq affinity mask, the mask comes back null.  The
driver currently does nothing in this scenario, which leaves mappings to
hardware queues incomplete and crashes the system.

Fix by recognizing the null mask and assigning the vector to the first cpu
in the system.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index faf43b1d3dbe..a7549ae32542 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10776,12 +10776,31 @@ lpfc_cpu_affinity_check(struct lpfc_hba *phba, int vectors)
 	/* This loop sets up all CPUs that are affinitized with a
 	 * irq vector assigned to the driver. All affinitized CPUs
 	 * will get a link to that vectors IRQ and EQ.
+	 *
+	 * NULL affinity mask handling:
+	 * If irq count is greater than one, log an error message.
+	 * If the null mask is received for the first irq, find the
+	 * first present cpu, and assign the eq index to ensure at
+	 * least one EQ is assigned.
 	 */
 	for (idx = 0; idx <  phba->cfg_irq_chann; idx++) {
 		/* Get a CPU mask for all CPUs affinitized to this vector */
 		maskp = pci_irq_get_affinity(phba->pcidev, idx);
-		if (!maskp)
-			continue;
+		if (!maskp) {
+			if (phba->cfg_irq_chann > 1)
+				lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+						"3329 No affinity mask found "
+						"for vector %d (%d)\n",
+						idx, phba->cfg_irq_chann);
+			if (!idx) {
+				cpu = cpumask_first(cpu_present_mask);
+				cpup = &phba->sli4_hba.cpu_map[cpu];
+				cpup->eq = idx;
+				cpup->irq = pci_irq_vector(phba->pcidev, idx);
+				cpup->flag |= LPFC_CPU_FIRST_IRQ;
+			}
+			break;
+		}
 
 		i = 0;
 		/* Loop through all CPUs associated with vector idx */

From 39c71a5b8212f4b502d9a630c6706ac723abd422 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Mon, 29 Jul 2019 10:08:49 +0800
Subject: [PATCH 044/157] dmaengine: stm32-mdma: Fix a possible null-pointer
 dereference in stm32_mdma_irq_handler()

In stm32_mdma_irq_handler(), chan is checked on line 1368.
When chan is NULL, it is still used on line 1369:
    dev_err(chan2dev(chan), "MDMA channel not initialized\n");

Thus, a possible null-pointer dereference may occur.

To fix this bug, "dev_dbg(mdma2dev(dmadev), ...)" is used instead.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Fixes: a4ffb13c8946 ("dmaengine: Add STM32 MDMA driver")
Link: https://lore.kernel.org/r/20190729020849.17971-1-baijiaju1990@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/stm32-mdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index d6e919d3936a..1311de74bfdd 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -1366,7 +1366,7 @@ static irqreturn_t stm32_mdma_irq_handler(int irq, void *devid)
 
 	chan = &dmadev->chan[id];
 	if (!chan) {
-		dev_err(chan2dev(chan), "MDMA channel not initialized\n");
+		dev_dbg(mdma2dev(dmadev), "MDMA channel not initialized\n");
 		goto exit;
 	}
 

From 75354284cc3aa58f7e54d479d9bee69bd2ca828f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 6 Aug 2019 16:14:44 +0900
Subject: [PATCH 045/157] auxdisplay: charlcd: move charlcd.h to
 drivers/auxdisplay

This header is included in drivers/auxdisplay/. Make it a local header.

Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 drivers/auxdisplay/charlcd.c                   | 2 +-
 {include/misc => drivers/auxdisplay}/charlcd.h | 0
 drivers/auxdisplay/hd44780.c                   | 3 +--
 drivers/auxdisplay/panel.c                     | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)
 rename {include/misc => drivers/auxdisplay}/charlcd.h (100%)

diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c
index 92745efefb54..bef6b85778b6 100644
--- a/drivers/auxdisplay/charlcd.c
+++ b/drivers/auxdisplay/charlcd.c
@@ -20,7 +20,7 @@
 
 #include <generated/utsrelease.h>
 
-#include <misc/charlcd.h>
+#include "charlcd.h"
 
 #define LCD_MINOR		156
 
diff --git a/include/misc/charlcd.h b/drivers/auxdisplay/charlcd.h
similarity index 100%
rename from include/misc/charlcd.h
rename to drivers/auxdisplay/charlcd.h
diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
index ab15b64707ad..bcbe13092327 100644
--- a/drivers/auxdisplay/hd44780.c
+++ b/drivers/auxdisplay/hd44780.c
@@ -14,8 +14,7 @@
 #include <linux/property.h>
 #include <linux/slab.h>
 
-#include <misc/charlcd.h>
-
+#include "charlcd.h"
 
 enum hd44780_pin {
 	/* Order does matter due to writing to GPIO array subsets! */
diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c
index e6bd727da503..85965953683e 100644
--- a/drivers/auxdisplay/panel.c
+++ b/drivers/auxdisplay/panel.c
@@ -55,7 +55,7 @@
 #include <linux/io.h>
 #include <linux/uaccess.h>
 
-#include <misc/charlcd.h>
+#include "charlcd.h"
 
 #define KEYPAD_MINOR		185
 

From 390235c3e66036351e2a89b925843a741c8afd6c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 6 Aug 2019 16:14:45 +0900
Subject: [PATCH 046/157] auxdisplay: charlcd: add include guard to charlcd.h

Add a header include guard just in case.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 drivers/auxdisplay/charlcd.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/auxdisplay/charlcd.h b/drivers/auxdisplay/charlcd.h
index 8cf6c18b0adb..00911ad0f3de 100644
--- a/drivers/auxdisplay/charlcd.h
+++ b/drivers/auxdisplay/charlcd.h
@@ -6,6 +6,9 @@
  * Copyright (C) 2016-2017 Glider bvba
  */
 
+#ifndef _CHARLCD_H
+#define _CHARLCD_H
+
 struct charlcd {
 	const struct charlcd_ops *ops;
 	const unsigned char *char_conv;	/* Optional */
@@ -37,3 +40,5 @@ int charlcd_register(struct charlcd *lcd);
 int charlcd_unregister(struct charlcd *lcd);
 
 void charlcd_poke(struct charlcd *lcd);
+
+#endif /* CHARLCD_H */

From 6c4d6bc5486466e3a67cc47270001d0b4a26eed4 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Tue, 6 Aug 2019 23:23:28 +0900
Subject: [PATCH 047/157] auxdisplay: Fix a typo in cfag12864b-example.c

This patch fix a spelling typo in cfag12864b-example.c

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 samples/auxdisplay/cfag12864b-example.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/auxdisplay/cfag12864b-example.c b/samples/auxdisplay/cfag12864b-example.c
index 85571e90191f..bfeab44f81d0 100644
--- a/samples/auxdisplay/cfag12864b-example.c
+++ b/samples/auxdisplay/cfag12864b-example.c
@@ -245,7 +245,7 @@ int main(int argc, char *argv[])
 
 	if (argc != 2) {
 		printf(
-			"Sintax:  %s fbdev\n"
+			"Syntax:  %s fbdev\n"
 			"Usually: /dev/fb0, /dev/fb1...\n", argv[0]);
 		return -1;
 	}

From 8676b3ca4673517650fd509d7fa586aff87b3c28 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Thu, 18 Jul 2019 18:02:15 -0500
Subject: [PATCH 048/157] soundwire: fix regmap dependencies and align with
 other serial links

The existing code has a mixed select/depend usage which makes no sense.

config SOUNDWIRE_BUS
       tristate
       select REGMAP_SOUNDWIRE

config REGMAP_SOUNDWIRE
        tristate
        depends on SOUNDWIRE_BUS

Let's remove one layer of Kconfig definitions and align with the
solutions used by all other serial links.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Link: https://lore.kernel.org/r/20190718230215.18675-1-pierre-louis.bossart@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/base/regmap/Kconfig | 2 +-
 drivers/soundwire/Kconfig   | 7 +------
 drivers/soundwire/Makefile  | 2 +-
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index a4984136c19d..0fd6f97ee523 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -44,7 +44,7 @@ config REGMAP_IRQ
 
 config REGMAP_SOUNDWIRE
 	tristate
-	depends on SOUNDWIRE_BUS
+	depends on SOUNDWIRE
 
 config REGMAP_SCCB
 	tristate
diff --git a/drivers/soundwire/Kconfig b/drivers/soundwire/Kconfig
index 3a01cfd70fdc..f518273cfbe3 100644
--- a/drivers/soundwire/Kconfig
+++ b/drivers/soundwire/Kconfig
@@ -4,7 +4,7 @@
 #
 
 menuconfig SOUNDWIRE
-	bool "SoundWire support"
+	tristate "SoundWire support"
 	help
 	  SoundWire is a 2-Pin interface with data and clock line ratified
 	  by the MIPI Alliance. SoundWire is used for transporting data
@@ -17,17 +17,12 @@ if SOUNDWIRE
 
 comment "SoundWire Devices"
 
-config SOUNDWIRE_BUS
-	tristate
-	select REGMAP_SOUNDWIRE
-
 config SOUNDWIRE_CADENCE
 	tristate
 
 config SOUNDWIRE_INTEL
 	tristate "Intel SoundWire Master driver"
 	select SOUNDWIRE_CADENCE
-	select SOUNDWIRE_BUS
 	depends on X86 && ACPI && SND_SOC
 	help
 	  SoundWire Intel Master driver.
diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile
index fd99a831b92a..45b7e5001653 100644
--- a/drivers/soundwire/Makefile
+++ b/drivers/soundwire/Makefile
@@ -5,7 +5,7 @@
 
 #Bus Objs
 soundwire-bus-objs := bus_type.o bus.o slave.o mipi_disco.o stream.o
-obj-$(CONFIG_SOUNDWIRE_BUS) += soundwire-bus.o
+obj-$(CONFIG_SOUNDWIRE) += soundwire-bus.o
 
 #Cadence Objs
 soundwire-cadence-objs := cadence_master.o

From eac4471d0882da14be652ef0a55b914145fab15a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 8 Aug 2019 13:32:36 +0300
Subject: [PATCH 049/157] drm/i915: Use after free in error path in
 intel_vgpu_create_workload()

We can't free "workload" until after the printk or it's a use after
free.

Fixes: 2089a76ade90 ("drm/i915/gvt: Checking workload's gma earlier")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 9f3fd7d96a69..75baff657e43 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -1528,9 +1528,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 			if (!intel_gvt_ggtt_validate_range(vgpu,
 				workload->wa_ctx.indirect_ctx.guest_gma,
 				workload->wa_ctx.indirect_ctx.size)) {
-				kmem_cache_free(s->workloads, workload);
 				gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n",
 				    workload->wa_ctx.indirect_ctx.guest_gma);
+				kmem_cache_free(s->workloads, workload);
 				return ERR_PTR(-EINVAL);
 			}
 		}
@@ -1542,9 +1542,9 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 			if (!intel_gvt_ggtt_validate_range(vgpu,
 				workload->wa_ctx.per_ctx.guest_gma,
 				CACHELINE_BYTES)) {
-				kmem_cache_free(s->workloads, workload);
 				gvt_vgpu_err("invalid per_ctx at: 0x%lx\n",
 					workload->wa_ctx.per_ctx.guest_gma);
+				kmem_cache_free(s->workloads, workload);
 				return ERR_PTR(-EINVAL);
 			}
 		}

From de768ce45466f3009809719eb7b1f6f5277d9373 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 9 Aug 2019 11:23:00 +0200
Subject: [PATCH 050/157] ALSA: hda - Apply workaround for another AMD chip
 1022:1487

MSI MPG X570 board is with another AMD HD-audio controller (PCI ID
1022:1487) and it requires the same workaround applied for X370, etc
(PCI ID 1022:1457).

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=195303
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index a6d8c0d77b84..99fc0917339b 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -2508,6 +2508,9 @@ static const struct pci_device_id azx_ids[] = {
 	/* AMD, X370 & co */
 	{ PCI_DEVICE(0x1022, 0x1457),
 	  .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB },
+	/* AMD, X570 & co */
+	{ PCI_DEVICE(0x1022, 0x1487),
+	  .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_AMD_SB },
 	/* AMD Stoney */
 	{ PCI_DEVICE(0x1022, 0x157a),
 	  .driver_data = AZX_DRIVER_GENERIC | AZX_DCAPS_PRESET_ATI_SB |

From d555c34338cae844b207564c482e5a3fb089d25e Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 9 Aug 2019 10:32:40 +0200
Subject: [PATCH 051/157] omap-dma/omap_vout_vrfb: fix off-by-one fi value

The OMAP 4 TRM specifies that when using double-index addressing
the address increases by the ES plus the EI value minus 1 within
a frame. When a full frame is transferred, the address increases
by the ES plus the frame index (FI) value minus 1.

The omap-dma code didn't account for the 'minus 1' in the FI register.
To get correct addressing, add 1 to the src_icg value.

This was found when testing a hacked version of the media m2m-deinterlace.c
driver on a Pandaboard.

The only other source that uses this feature is omap_vout_vrfb.c,
and that adds a + 1 when setting the dst_icg. This is a workaround
for the broken omap-dma.c behavior. So remove the workaround at the
same time that we fix omap-dma.c.

I tested the omap_vout driver with a Beagle XM board to check that
the '+ 1' in omap_vout_vrfb.c was indeed a workaround for the omap-dma
bug.

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Acked-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Link: https://lore.kernel.org/r/952e7f51-f208-9333-6f58-b7ed20d2ea0b@xs4all.nl
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/omap-dma.c                    | 4 ++--
 drivers/media/platform/omap/omap_vout_vrfb.c | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c
index ba2489d4ea24..ba27802efcd0 100644
--- a/drivers/dma/ti/omap-dma.c
+++ b/drivers/dma/ti/omap-dma.c
@@ -1234,7 +1234,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved(
 	if (src_icg) {
 		d->ccr |= CCR_SRC_AMODE_DBLIDX;
 		d->ei = 1;
-		d->fi = src_icg;
+		d->fi = src_icg + 1;
 	} else if (xt->src_inc) {
 		d->ccr |= CCR_SRC_AMODE_POSTINC;
 		d->fi = 0;
@@ -1249,7 +1249,7 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_interleaved(
 	if (dst_icg) {
 		d->ccr |= CCR_DST_AMODE_DBLIDX;
 		sg->ei = 1;
-		sg->fi = dst_icg;
+		sg->fi = dst_icg + 1;
 	} else if (xt->dst_inc) {
 		d->ccr |= CCR_DST_AMODE_POSTINC;
 		sg->fi = 0;
diff --git a/drivers/media/platform/omap/omap_vout_vrfb.c b/drivers/media/platform/omap/omap_vout_vrfb.c
index 29e3f5da59c1..11ec048929e8 100644
--- a/drivers/media/platform/omap/omap_vout_vrfb.c
+++ b/drivers/media/platform/omap/omap_vout_vrfb.c
@@ -253,8 +253,7 @@ int omap_vout_prepare_vrfb(struct omap_vout_device *vout,
 	 */
 
 	pixsize = vout->bpp * vout->vrfb_bpp;
-	dst_icg = ((MAX_PIXELS_PER_LINE * pixsize) -
-		  (vout->pix.width * vout->bpp)) + 1;
+	dst_icg = MAX_PIXELS_PER_LINE * pixsize - vout->pix.width * vout->bpp;
 
 	xt->src_start = vout->buf_phy_addr[vb->i];
 	xt->dst_start = vout->vrfb_context[vb->i].paddr[0];

From bfeaec7f7d2f6b09764a1b48e88e9ca3d5076419 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sat, 20 Jul 2019 10:01:26 +0800
Subject: [PATCH 052/157] iommu/vt-d: Correctly check format of page table in
 debugfs

PASID support and enable bit in the context entry isn't the right
indicator for the type of tables (legacy or scalable mode). Check
the DMA_RTADDR_SMT bit in the root context pointer instead.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Fixes: dd5142ca5d24b ("iommu/vt-d: Add debugfs support to show scalable mode DMAR table internals")
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu-debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
index 2b25d9c59336..471f05d452e0 100644
--- a/drivers/iommu/intel-iommu-debugfs.c
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -235,7 +235,7 @@ static void ctx_tbl_walk(struct seq_file *m, struct intel_iommu *iommu, u16 bus)
 		tbl_wlk.ctx_entry = context;
 		m->private = &tbl_wlk;
 
-		if (pasid_supported(iommu) && is_pasid_enabled(context)) {
+		if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) {
 			pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
 			pasid_dir_size = get_pasid_dir_size(context);
 			pasid_dir_walk(m, pasid_dir_ptr, pasid_dir_size);

From ab2cbeb0ed301a9f0460078e91b09f39958212ef Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Mon, 29 Jul 2019 17:46:00 +0100
Subject: [PATCH 053/157] iommu/dma: Handle SG length overflow better

Since scatterlist dimensions are all unsigned ints, in the relatively
rare cases where a device's max_segment_size is set to UINT_MAX, then
the "cur_len + s_length <= max_len" check in __finalise_sg() will always
return true. As a result, the corner case of such a device mapping an
excessively large scatterlist which is mergeable to or beyond a total
length of 4GB can lead to overflow and a bogus truncated dma_length in
the resulting segment.

As we already assume that any single segment must be no longer than
max_len to begin with, this can easily be addressed by reshuffling the
comparison.

Fixes: 809eac54cdd6 ("iommu/dma: Implement scatterlist segment merging")
Reported-by: Nicolin Chen <nicoleotsuka@gmail.com>
Tested-by: Nicolin Chen <nicoleotsuka@gmail.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dma-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 6441197a75ea..4ea9cf02ba2d 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -762,7 +762,7 @@ static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
 		 * - and wouldn't make the resulting output segment too long
 		 */
 		if (cur_len && !s_iova_off && (dma_addr & seg_mask) &&
-		    (cur_len + s_length <= max_len)) {
+		    (max_len - cur_len >= s_length)) {
 			/* ...then concatenate it with the previous one */
 			cur_len += s_length;
 		} else {

From ae23bfb68f2896835e54a137688906713cb607e7 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 6 Aug 2019 08:14:08 +0800
Subject: [PATCH 054/157] iommu/vt-d: Detach domain before using a private one

When the default domain of a group doesn't work for a device,
the iommu driver will try to use a private domain. The domain
which was previously attached to the device must be detached.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private")
Reported-by: Alex Williamson <alex.williamson@redhat.com>
Link: https://lkml.org/lkml/2019/8/2/1379
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 3e22fa6ae8c8..37259b7f95a7 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3449,6 +3449,7 @@ static bool iommu_need_mapping(struct device *dev)
 				dmar_domain = to_dmar_domain(domain);
 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
 			}
+			dmar_remove_one_dev_info(dev);
 			get_private_domain_for_dev(dev);
 		}
 
@@ -4803,7 +4804,8 @@ static void dmar_remove_one_dev_info(struct device *dev)
 
 	spin_lock_irqsave(&device_domain_lock, flags);
 	info = dev->archdata.iommu;
-	__dmar_remove_one_dev_info(info);
+	if (info)
+		__dmar_remove_one_dev_info(info);
 	spin_unlock_irqrestore(&device_domain_lock, flags);
 }
 
@@ -5281,6 +5283,7 @@ static int intel_iommu_add_device(struct device *dev)
 		if (device_def_domain_type(dev) == IOMMU_DOMAIN_IDENTITY) {
 			ret = iommu_request_dm_for_dev(dev);
 			if (ret) {
+				dmar_remove_one_dev_info(dev);
 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
 				domain_add_dev_info(si_domain, dev);
 				dev_info(dev,
@@ -5291,6 +5294,7 @@ static int intel_iommu_add_device(struct device *dev)
 		if (device_def_domain_type(dev) == IOMMU_DOMAIN_DMA) {
 			ret = iommu_request_dma_domain_for_dev(dev);
 			if (ret) {
+				dmar_remove_one_dev_info(dev);
 				dmar_domain->flags |= DOMAIN_FLAG_LOSE_CHILDREN;
 				if (!get_private_domain_for_dev(dev)) {
 					dev_warn(dev,

From 3a18844dcf89e636b2d0cbf577e3963b0bcb6d23 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 6 Aug 2019 08:14:09 +0800
Subject: [PATCH 055/157] iommu/vt-d: Fix possible use-after-free of private
 domain

Multiple devices might share a private domain. One real example
is a pci bridge and all devices behind it. When remove a private
domain, make sure that it has been detached from all devices to
avoid use-after-free case.

Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Fixes: 942067f1b6b97 ("iommu/vt-d: Identify default domains replaced with private")
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 37259b7f95a7..12d094d08c0a 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4791,7 +4791,8 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info)
 
 	/* free the private domain */
 	if (domain->flags & DOMAIN_FLAG_LOSE_CHILDREN &&
-	    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY))
+	    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
+	    list_empty(&domain->devices))
 		domain_exit(info->domain);
 
 	free_devinfo_mem(info);

From cfef67f016e4c00a2f423256fc678a6967a9fc09 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wenwen@cs.uga.edu>
Date: Fri, 9 Aug 2019 23:29:48 -0500
Subject: [PATCH 056/157] ALSA: hda - Fix a memory leak bug

In snd_hda_parse_generic_codec(), 'spec' is allocated through kzalloc().
Then, the pin widgets in 'codec' are parsed. However, if the parsing
process fails, 'spec' is not deallocated, leading to a memory leak.

To fix the above issue, free 'spec' before returning the error.

Fixes: 352f7f914ebb ("ALSA: hda - Merge Realtek parser code to generic parser")
Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_generic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 485edaba0037..8f2beb1f3ae4 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -6100,7 +6100,7 @@ static int snd_hda_parse_generic_codec(struct hda_codec *codec)
 
 	err = snd_hda_parse_pin_defcfg(codec, &spec->autocfg, NULL, 0);
 	if (err < 0)
-		return err;
+		goto error;
 
 	err = snd_hda_gen_parse_auto_config(codec, &spec->autocfg);
 	if (err < 0)

From e61a41256edf9f425039129757af4a80b5ed8162 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 9 Aug 2019 08:01:17 +0530
Subject: [PATCH 057/157] cpufreq: dev_pm_qos_update_request() can return 1 on
 success

dev_pm_qos_update_request() can return 1 on success, so don't treat
it as an error.

Fixes: 18c49926c4bf ("cpufreq: Add QoS requests for userspace constraints")
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 8dda62367816..c28ebf2810f1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2528,7 +2528,7 @@ static int cpufreq_boost_set_sw(int state)
 		}
 
 		ret = dev_pm_qos_update_request(policy->max_freq_req, policy->max);
-		if (ret)
+		if (ret < 0)
 			break;
 	}
 

From 600f5badb78c316146d062cfd7af4a2cfb655baa Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 7 Aug 2019 12:36:01 +0530
Subject: [PATCH 058/157] cpufreq: schedutil: Don't skip freq update when
 limits change

To avoid reducing the frequency of a CPU prematurely, we skip reducing
the frequency if the CPU had been busy recently.

This should not be done when the limits of the policy are changed, for
example due to thermal throttling. We should always get the frequency
within the new limits as soon as possible.

Trying to fix this by using only one flag, i.e. need_freq_update, can
lead to a race condition where the flag gets cleared without forcing us
to change the frequency at least once. And so this patch introduces
another flag to avoid that race condition.

Fixes: ecd288429126 ("cpufreq: schedutil: Don't set next_freq to UINT_MAX")
Cc: v4.18+ <stable@vger.kernel.org> # v4.18+
Reported-by: Doug Smythies <dsmythies@telus.net>
Tested-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/sched/cpufreq_schedutil.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 636ca6f88c8e..867b4bb6d4be 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -40,6 +40,7 @@ struct sugov_policy {
 	struct task_struct	*thread;
 	bool			work_in_progress;
 
+	bool			limits_changed;
 	bool			need_freq_update;
 };
 
@@ -89,8 +90,11 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
 	    !cpufreq_this_cpu_can_update(sg_policy->policy))
 		return false;
 
-	if (unlikely(sg_policy->need_freq_update))
+	if (unlikely(sg_policy->limits_changed)) {
+		sg_policy->limits_changed = false;
+		sg_policy->need_freq_update = true;
 		return true;
+	}
 
 	delta_ns = time - sg_policy->last_freq_update_time;
 
@@ -437,7 +441,7 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
 static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
 {
 	if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-		sg_policy->need_freq_update = true;
+		sg_policy->limits_changed = true;
 }
 
 static void sugov_update_single(struct update_util_data *hook, u64 time,
@@ -457,7 +461,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
 	if (!sugov_should_update_freq(sg_policy, time))
 		return;
 
-	busy = sugov_cpu_is_busy(sg_cpu);
+	/* Limits may have changed, don't skip frequency update */
+	busy = !sg_policy->need_freq_update && sugov_cpu_is_busy(sg_cpu);
 
 	util = sugov_get_util(sg_cpu);
 	max = sg_cpu->max;
@@ -831,6 +836,7 @@ static int sugov_start(struct cpufreq_policy *policy)
 	sg_policy->last_freq_update_time	= 0;
 	sg_policy->next_freq			= 0;
 	sg_policy->work_in_progress		= false;
+	sg_policy->limits_changed		= false;
 	sg_policy->need_freq_update		= false;
 	sg_policy->cached_raw_freq		= 0;
 
@@ -879,7 +885,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
 		mutex_unlock(&sg_policy->work_lock);
 	}
 
-	sg_policy->need_freq_update = true;
+	sg_policy->limits_changed = true;
 }
 
 struct cpufreq_governor schedutil_gov = {

From cf14be0b41c659ede89abef3f7ec0e98e6cfea5b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 Aug 2019 14:33:23 +0300
Subject: [PATCH 059/157] dma-direct: fix DMA_ATTR_NO_KERNEL_MAPPING

The new DMA_ATTR_NO_KERNEL_MAPPING needs to actually assign
a dma_addr to work.  Also skip it if the architecture needs
forced decryption handling, as that needs a kernel virtual
address.

Fixes: d98849aff879 (dma-direct: handle DMA_ATTR_NO_KERNEL_MAPPING in common code)
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
---
 kernel/dma/direct.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 59bdceea3737..974e96a1de44 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -130,10 +130,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
 	if (!page)
 		return NULL;
 
-	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
+	    !force_dma_unencrypted(dev)) {
 		/* remove any dirty cache lines on the kernel alias */
 		if (!PageHighMem(page))
 			arch_dma_prep_coherent(page, size);
+		*dma_handle = phys_to_dma(dev, page_to_phys(page));
 		/* return the page pointer as the opaque cookie */
 		return page;
 	}
@@ -178,7 +180,8 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
 {
 	unsigned int page_order = get_order(size);
 
-	if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) {
+	if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
+	    !force_dma_unencrypted(dev)) {
 		/* cpu_addr is a struct page cookie, not a kernel address */
 		__dma_direct_free_pages(dev, size, cpu_addr);
 		return;

From d8ad55538abe443919e20e0bb996561bca9cad84 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Mon, 5 Aug 2019 17:51:53 +0200
Subject: [PATCH 060/157] dma-direct: don't truncate dma_required_mask to bus
 addressing capabilities

The dma required_mask needs to reflect the actual addressing capabilities
needed to handle the whole system RAM. When truncated down to the bus
addressing capabilities dma_addressing_limited() will incorrectly signal
no limitations for devices which are restricted by the bus_dma_mask.

Fixes: b4ebe6063204 (dma-direct: implement complete bus_dma_mask handling)
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Atish Patra <atish.patra@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 kernel/dma/direct.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 974e96a1de44..795c9b095d75 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -47,9 +47,6 @@ u64 dma_direct_get_required_mask(struct device *dev)
 {
 	u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);
 
-	if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma)
-		max_dma = dev->bus_dma_mask;
-
 	return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
 }
 

From 33dcb37cef741294b481f4d889a465b8091f11bf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 26 Jul 2019 09:26:40 +0200
Subject: [PATCH 061/157] dma-mapping: fix page attributes for dma_mmap_*

All the way back to introducing dma_common_mmap we've defaulted to mark
the pages as uncached.  But this is wrong for DMA coherent devices.
Later on DMA_ATTR_WRITE_COMBINE also got incorrect treatment as that
flag is only treated special on the alloc side for non-coherent devices.

Introduce a new dma_pgprot helper that deals with the check for coherent
devices so that only the remapping cases ever reach arch_dma_mmap_pgprot
and we thus ensure no aliasing of page attributes happens, which makes
the powerpc version of arch_dma_mmap_pgprot obsolete and simplifies the
remaining ones.

Note that this means arch_dma_mmap_pgprot is a bit misnamed now, but
we'll phase it out soon.

Fixes: 64ccc9c033c6 ("common: dma-mapping: add support for generic dma_mmap_* calls")
Reported-by: Shawn Anastasio <shawn@anastas.io>
Reported-by: Gavin Li <git@thegavinli.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com> # arm64
---
 arch/arm/mm/dma-mapping.c        |  4 +---
 arch/arm64/mm/dma-mapping.c      |  4 +---
 arch/powerpc/Kconfig             |  1 -
 arch/powerpc/kernel/Makefile     |  3 +--
 arch/powerpc/kernel/dma-common.c | 17 -----------------
 drivers/iommu/dma-iommu.c        |  6 +++---
 include/linux/dma-noncoherent.h  | 13 +++++++++----
 kernel/dma/mapping.c             | 19 ++++++++++++++++++-
 kernel/dma/remap.c               |  2 +-
 9 files changed, 34 insertions(+), 35 deletions(-)
 delete mode 100644 arch/powerpc/kernel/dma-common.c

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 6774b03aa405..d42557ee69c2 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -2405,9 +2405,7 @@ long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
 		unsigned long attrs)
 {
-	if (!dev_is_dma_coherent(dev))
-		return __get_dma_pgprot(attrs, prot);
-	return prot;
+	return __get_dma_pgprot(attrs, prot);
 }
 
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 1d3f0b5a9940..bd2b039f43a6 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -14,9 +14,7 @@
 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
 		unsigned long attrs)
 {
-	if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE))
-		return pgprot_writecombine(prot);
-	return prot;
+	return pgprot_writecombine(prot);
 }
 
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 77f6ebf97113..d8dcd8820369 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -121,7 +121,6 @@ config PPC
 	select ARCH_32BIT_OFF_T if PPC32
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEVMEM_IS_ALLOWED
-	select ARCH_HAS_DMA_MMAP_PGPROT
 	select ARCH_HAS_ELF_RANDOMIZE
 	select ARCH_HAS_FORTIFY_SOURCE
 	select ARCH_HAS_GCOV_PROFILE_ALL
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ea0c69236789..56dfa7a2a6f2 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -49,8 +49,7 @@ obj-y				:= cputable.o ptrace.o syscalls.o \
 				   signal.o sysfs.o cacheinfo.o time.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
-				   of_platform.o prom_parse.o \
-				   dma-common.o
+				   of_platform.o prom_parse.o
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
diff --git a/arch/powerpc/kernel/dma-common.c b/arch/powerpc/kernel/dma-common.c
deleted file mode 100644
index dc7ef6b17b69..000000000000
--- a/arch/powerpc/kernel/dma-common.c
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Contains common dma routines for all powerpc platforms.
- *
- * Copyright (C) 2019 Shawn Anastasio.
- */
-
-#include <linux/mm.h>
-#include <linux/dma-noncoherent.h>
-
-pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
-		unsigned long attrs)
-{
-	if (!dev_is_dma_coherent(dev))
-		return pgprot_noncached(prot);
-	return prot;
-}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index a7f9c3edbcb2..0015fe610b23 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -574,7 +574,7 @@ static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
 	struct iova_domain *iovad = &cookie->iovad;
 	bool coherent = dev_is_dma_coherent(dev);
 	int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
-	pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
+	pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
 	unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
 	struct page **pages;
 	struct sg_table sgt;
@@ -975,7 +975,7 @@ static void *iommu_dma_alloc_pages(struct device *dev, size_t size,
 		return NULL;
 
 	if (IS_ENABLED(CONFIG_DMA_REMAP) && (!coherent || PageHighMem(page))) {
-		pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
+		pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
 
 		cpu_addr = dma_common_contiguous_remap(page, alloc_size,
 				VM_USERMAP, prot, __builtin_return_address(0));
@@ -1035,7 +1035,7 @@ static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	unsigned long pfn, off = vma->vm_pgoff;
 	int ret;
 
-	vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
+	vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
 
 	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 3813211a9aad..0bff3d7fac92 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -42,13 +42,18 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs);
 long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
 		dma_addr_t dma_addr);
-
-#ifdef CONFIG_ARCH_HAS_DMA_MMAP_PGPROT
 pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
 		unsigned long attrs);
+
+#ifdef CONFIG_MMU
+pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs);
 #else
-# define arch_dma_mmap_pgprot(dev, prot, attrs)	pgprot_noncached(prot)
-#endif
+static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot,
+		unsigned long attrs)
+{
+	return prot;	/* no protection bits supported without page tables */
+}
+#endif /* CONFIG_MMU */
 
 #ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC
 void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index b945239621d8..b0038ca3aa92 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -150,6 +150,23 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
 }
 EXPORT_SYMBOL(dma_get_sgtable_attrs);
 
+#ifdef CONFIG_MMU
+/*
+ * Return the page attributes used for mapping dma_alloc_* memory, either in
+ * kernel space if remapping is needed, or to userspace through dma_mmap_*.
+ */
+pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
+{
+	if (dev_is_dma_coherent(dev) ||
+	    (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
+             (attrs & DMA_ATTR_NON_CONSISTENT)))
+		return prot;
+	if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_MMAP_PGPROT))
+		return arch_dma_mmap_pgprot(dev, prot, attrs);
+	return pgprot_noncached(prot);
+}
+#endif /* CONFIG_MMU */
+
 /*
  * Create userspace mapping for the DMA-coherent memory.
  */
@@ -164,7 +181,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
 	unsigned long pfn;
 	int ret = -ENXIO;
 
-	vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
+	vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
 
 	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index a594aec07882..ffe78f0b2fe4 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -218,7 +218,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 
 	/* create a coherent mapping */
 	ret = dma_common_contiguous_remap(page, size, VM_USERMAP,
-			arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs),
+			dma_pgprot(dev, PAGE_KERNEL, attrs),
 			__builtin_return_address(0));
 	if (!ret) {
 		__dma_direct_free_pages(dev, size, page);

From 54b13723f765122f3f5f49d07f3e48fa9fcf8c7c Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Sat, 10 Aug 2019 00:19:11 -0500
Subject: [PATCH 062/157] sh: kernel: disassemble: Mark expected switch
 fall-throughs

Remove logically dead code and mark switch cases where we are expecting
to fall through.

Fix the following warnings (Building: defconfig sh):

arch/sh/kernel/disassemble.c:478:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]
arch/sh/kernel/disassemble.c:487:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]
arch/sh/kernel/disassemble.c:496:8: warning: this statement may fall
through [-Wimplicit-fallthrough=]

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 arch/sh/kernel/disassemble.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c
index defebf1a9c8a..845543780cc5 100644
--- a/arch/sh/kernel/disassemble.c
+++ b/arch/sh/kernel/disassemble.c
@@ -475,8 +475,6 @@ static void print_sh_insn(u32 memaddr, u16 insn)
 				printk("dbr");
 				break;
 			case FD_REG_N:
-				if (0)
-					goto d_reg_n;
 			case F_REG_N:
 				printk("fr%d", rn);
 				break;
@@ -488,7 +486,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
 					printk("xd%d", rn & ~1);
 					break;
 				}
-			d_reg_n:
+				/* else, fall through */
 			case D_REG_N:
 				printk("dr%d", rn);
 				break;
@@ -497,6 +495,7 @@ static void print_sh_insn(u32 memaddr, u16 insn)
 					printk("xd%d", rm & ~1);
 					break;
 				}
+				/* else, fall through */
 			case D_REG_M:
 				printk("dr%d", rm);
 				break;

From 1ee1119d184bb06af921b48c3021d921bbd85bac Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Fri, 9 Aug 2019 23:43:56 -0500
Subject: [PATCH 063/157] sh: kernel: hw_breakpoint: Fix missing break in
 switch statement

Add missing break statement in order to prevent the code from falling
through to case SH_BREAKPOINT_WRITE.

Fixes: 09a072947791 ("sh: hw-breakpoints: Add preliminary support for SH-4A UBC.")
Cc: stable@vger.kernel.org
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
---
 arch/sh/kernel/hw_breakpoint.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c
index 3bd010b4c55f..f10d64311127 100644
--- a/arch/sh/kernel/hw_breakpoint.c
+++ b/arch/sh/kernel/hw_breakpoint.c
@@ -157,6 +157,7 @@ int arch_bp_generic_fields(int sh_len, int sh_type,
 	switch (sh_type) {
 	case SH_BREAKPOINT_READ:
 		*gen_type = HW_BREAKPOINT_R;
+		break;
 	case SH_BREAKPOINT_WRITE:
 		*gen_type = HW_BREAKPOINT_W;
 		break;

From 5dac665cf403967bb79a7aeb8c182a621fe617ff Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 31 Jul 2019 19:15:43 +0900
Subject: [PATCH 064/157] usb: gadget: udc: renesas_usb3: Fix sysfs interface
 of "role"

Since the role_store() uses strncmp(), it's possible to refer
out-of-memory if the sysfs data size is smaller than strlen("host").
This patch fixes it by using sysfs_streq() instead of strncmp().

Fixes: cc995c9ec118 ("usb: gadget: udc: renesas_usb3: add support for usb role swap")
Cc: <stable@vger.kernel.org> # v4.12+
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/udc/renesas_usb3.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c
index 87062d22134d..1f4c3fbd1df8 100644
--- a/drivers/usb/gadget/udc/renesas_usb3.c
+++ b/drivers/usb/gadget/udc/renesas_usb3.c
@@ -19,6 +19,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
+#include <linux/string.h>
 #include <linux/sys_soc.h>
 #include <linux/uaccess.h>
 #include <linux/usb/ch9.h>
@@ -2450,9 +2451,9 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr,
 	if (usb3->forced_b_device)
 		return -EBUSY;
 
-	if (!strncmp(buf, "host", strlen("host")))
+	if (sysfs_streq(buf, "host"))
 		new_mode_is_host = true;
-	else if (!strncmp(buf, "peripheral", strlen("peripheral")))
+	else if (sysfs_streq(buf, "peripheral"))
 		new_mode_is_host = false;
 	else
 		return -EINVAL;

From 602fda17c7356bb7ae98467d93549057481d11dd Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 26 Jul 2019 14:59:03 +1000
Subject: [PATCH 065/157] usb: gadget: composite: Clear "suspended" on
 reset/disconnect

In some cases, one can get out of suspend with a reset or
a disconnect followed by a reconnect. Previously we would
leave a stale suspended flag set.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/composite.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 9118b42c70b6..76883ff4f5bb 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1976,6 +1976,7 @@ void composite_disconnect(struct usb_gadget *gadget)
 	 * disconnect callbacks?
 	 */
 	spin_lock_irqsave(&cdev->lock, flags);
+	cdev->suspended = 0;
 	if (cdev->config)
 		reset_config(cdev);
 	if (cdev->driver->disconnect)

From 4a56a478a525d6427be90753451c40e1327caa1a Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 26 Jul 2019 14:59:04 +1000
Subject: [PATCH 066/157] usb: gadget: mass_storage: Fix races between
 fsg_disable and fsg_set_alt

If fsg_disable() and fsg_set_alt() are called too closely to each
other (for example due to a quick reset/reconnect), what can happen
is that fsg_set_alt sets common->new_fsg from an interrupt while
handle_exception is trying to process the config change caused by
fsg_disable():

	fsg_disable()
	...
	handle_exception()
		sets state back to FSG_STATE_NORMAL
		hasn't yet called do_set_interface()
		or is inside it.

 ---> interrupt
	fsg_set_alt
		sets common->new_fsg
		queues a new FSG_STATE_CONFIG_CHANGE
 <---

Now, the first handle_exception can "see" the updated
new_fsg, treats it as if it was a fsg_set_alt() response,
call usb_composite_setup_continue() etc...

But then, the thread sees the second FSG_STATE_CONFIG_CHANGE,
and goes back down the same path, wipes and reattaches a now
active fsg, and .. calls usb_composite_setup_continue() which
at this point is wrong.

Not only we get a backtrace, but I suspect the second set_interface
wrecks some state causing the host to get upset in my case.

This fixes it by replacing "new_fsg" by a "state argument" (same
principle) which is set in the same lock section as the state
update, and retrieved similarly.

That way, there is never any discrepancy between the dequeued
state and the observed value of it. We keep the ability to have
the latest reconfig operation take precedence, but we guarantee
that once "dequeued" the argument (new_fsg) will not be clobbered
by any new event.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
---
 drivers/usb/gadget/function/f_mass_storage.c | 28 +++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 29cc5693e05c..7c96c4665178 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -261,7 +261,7 @@ struct fsg_common;
 struct fsg_common {
 	struct usb_gadget	*gadget;
 	struct usb_composite_dev *cdev;
-	struct fsg_dev		*fsg, *new_fsg;
+	struct fsg_dev		*fsg;
 	wait_queue_head_t	io_wait;
 	wait_queue_head_t	fsg_wait;
 
@@ -290,6 +290,7 @@ struct fsg_common {
 	unsigned int		bulk_out_maxpacket;
 	enum fsg_state		state;		/* For exception handling */
 	unsigned int		exception_req_tag;
+	void			*exception_arg;
 
 	enum data_direction	data_dir;
 	u32			data_size;
@@ -391,7 +392,8 @@ static int fsg_set_halt(struct fsg_dev *fsg, struct usb_ep *ep)
 
 /* These routines may be called in process context or in_irq */
 
-static void raise_exception(struct fsg_common *common, enum fsg_state new_state)
+static void __raise_exception(struct fsg_common *common, enum fsg_state new_state,
+			      void *arg)
 {
 	unsigned long		flags;
 
@@ -404,6 +406,7 @@ static void raise_exception(struct fsg_common *common, enum fsg_state new_state)
 	if (common->state <= new_state) {
 		common->exception_req_tag = common->ep0_req_tag;
 		common->state = new_state;
+		common->exception_arg = arg;
 		if (common->thread_task)
 			send_sig_info(SIGUSR1, SEND_SIG_PRIV,
 				      common->thread_task);
@@ -411,6 +414,10 @@ static void raise_exception(struct fsg_common *common, enum fsg_state new_state)
 	spin_unlock_irqrestore(&common->lock, flags);
 }
 
+static void raise_exception(struct fsg_common *common, enum fsg_state new_state)
+{
+	__raise_exception(common, new_state, NULL);
+}
 
 /*-------------------------------------------------------------------------*/
 
@@ -2285,16 +2292,16 @@ reset:
 static int fsg_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
 {
 	struct fsg_dev *fsg = fsg_from_func(f);
-	fsg->common->new_fsg = fsg;
-	raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE);
+
+	__raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE, fsg);
 	return USB_GADGET_DELAYED_STATUS;
 }
 
 static void fsg_disable(struct usb_function *f)
 {
 	struct fsg_dev *fsg = fsg_from_func(f);
-	fsg->common->new_fsg = NULL;
-	raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE);
+
+	__raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE, NULL);
 }
 
 
@@ -2307,6 +2314,7 @@ static void handle_exception(struct fsg_common *common)
 	enum fsg_state		old_state;
 	struct fsg_lun		*curlun;
 	unsigned int		exception_req_tag;
+	struct fsg_dev		*new_fsg;
 
 	/*
 	 * Clear the existing signals.  Anything but SIGUSR1 is converted
@@ -2360,6 +2368,7 @@ static void handle_exception(struct fsg_common *common)
 	common->next_buffhd_to_fill = &common->buffhds[0];
 	common->next_buffhd_to_drain = &common->buffhds[0];
 	exception_req_tag = common->exception_req_tag;
+	new_fsg = common->exception_arg;
 	old_state = common->state;
 	common->state = FSG_STATE_NORMAL;
 
@@ -2413,8 +2422,8 @@ static void handle_exception(struct fsg_common *common)
 		break;
 
 	case FSG_STATE_CONFIG_CHANGE:
-		do_set_interface(common, common->new_fsg);
-		if (common->new_fsg)
+		do_set_interface(common, new_fsg);
+		if (new_fsg)
 			usb_composite_setup_continue(common->cdev);
 		break;
 
@@ -2989,8 +2998,7 @@ static void fsg_unbind(struct usb_configuration *c, struct usb_function *f)
 
 	DBG(fsg, "unbind\n");
 	if (fsg->common->fsg == fsg) {
-		fsg->common->new_fsg = NULL;
-		raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE);
+		__raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE, NULL);
 		/* FIXME: make interruptible or killable somehow? */
 		wait_event(common->fsg_wait, common->fsg != fsg);
 	}

From 508c5849c62d009e03f37ad0f556071fac5112f0 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 1 Aug 2019 13:57:36 +0000
Subject: [PATCH 067/157] habanalabs: Avoid double free in error flow

In case kernel context init fails during device initialization, both
hl_ctx_put() and kfree() are called, ending with a double free of the
kernel context.
Calling kfree() is needed only when a failure happens between the
allocation of the kernel context and its initialization, so move it to
there and remove it from the error flow.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/device.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 0c4894dd9c02..7a8f9d0b71b5 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -970,7 +970,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 	rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
 	if (rc) {
 		dev_err(hdev->dev, "failed to initialize kernel context\n");
-		goto free_ctx;
+		kfree(hdev->kernel_ctx);
+		goto mmu_fini;
 	}
 
 	rc = hl_cb_pool_init(hdev);
@@ -1053,8 +1054,6 @@ release_ctx:
 	if (hl_ctx_put(hdev->kernel_ctx) != 1)
 		dev_err(hdev->dev,
 			"kernel ctx is still alive on initialization failure\n");
-free_ctx:
-	kfree(hdev->kernel_ctx);
 mmu_fini:
 	hl_mmu_fini(hdev);
 eq_fini:

From c8113756ba27298d6e95403c087dc5881b419a99 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Sun, 4 Aug 2019 07:03:41 +0000
Subject: [PATCH 068/157] habanalabs: fix DRAM usage accounting on context tear
 down

The patch fix the DRAM usage accounting by adding a missing update of
the DRAM memory consumption, when a context is being torn down without an
organized release of the allocated memory.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/memory.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index 42d237cae1dc..365fb0cb8dff 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -1629,6 +1629,8 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
 			dev_dbg(hdev->dev,
 				"page list 0x%p of asid %d is still alive\n",
 				phys_pg_list, ctx->asid);
+			atomic64_sub(phys_pg_list->total_size,
+					&hdev->dram_used_mem);
 			free_phys_pg_pack(hdev, phys_pg_list);
 			idr_remove(&vm->phys_pg_pack_handles, i);
 		}

From 213ad5ad016a0da975b35f54e8cd236c3b04724b Mon Sep 17 00:00:00 2001
From: Ben Segal <bpsegal20@gmail.com>
Date: Thu, 1 Aug 2019 23:20:32 +0000
Subject: [PATCH 069/157] habanalabs: fix endianness handling for packets from
 user

Packets that arrive from the user and need to be parsed by the driver are
assumed to be in LE format.

This patch fix all the places where the code handles these packets and use
the correct endianness macros to handle them, as the driver handles the
packets in CPU format (LE or BE depending on the arch).

Signed-off-by: Ben Segal <bpsegal20@gmail.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c           | 32 +++++++++++--------
 .../habanalabs/include/goya/goya_packets.h    | 13 ++++++++
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index a0e181714891..e8b1142910e0 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -3428,12 +3428,13 @@ static int goya_validate_cb(struct hl_device *hdev,
 	while (cb_parsed_length < parser->user_cb_size) {
 		enum packet_id pkt_id;
 		u16 pkt_size;
-		void *user_pkt;
+		struct goya_packet *user_pkt;
 
-		user_pkt = (void *) (uintptr_t)
+		user_pkt = (struct goya_packet *) (uintptr_t)
 			(parser->user_cb->kernel_address + cb_parsed_length);
 
-		pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
+		pkt_id = (enum packet_id) (
+				(le64_to_cpu(user_pkt->header) &
 				PACKET_HEADER_PACKET_ID_MASK) >>
 					PACKET_HEADER_PACKET_ID_SHIFT);
 
@@ -3453,7 +3454,8 @@ static int goya_validate_cb(struct hl_device *hdev,
 			 * need to validate here as well because patch_cb() is
 			 * not called in MMU path while this function is called
 			 */
-			rc = goya_validate_wreg32(hdev, parser, user_pkt);
+			rc = goya_validate_wreg32(hdev,
+				parser, (struct packet_wreg32 *) user_pkt);
 			break;
 
 		case PACKET_WREG_BULK:
@@ -3481,10 +3483,10 @@ static int goya_validate_cb(struct hl_device *hdev,
 		case PACKET_LIN_DMA:
 			if (is_mmu)
 				rc = goya_validate_dma_pkt_mmu(hdev, parser,
-						user_pkt);
+					(struct packet_lin_dma *) user_pkt);
 			else
 				rc = goya_validate_dma_pkt_no_mmu(hdev, parser,
-						user_pkt);
+					(struct packet_lin_dma *) user_pkt);
 			break;
 
 		case PACKET_MSG_LONG:
@@ -3657,15 +3659,16 @@ static int goya_patch_cb(struct hl_device *hdev,
 		enum packet_id pkt_id;
 		u16 pkt_size;
 		u32 new_pkt_size = 0;
-		void *user_pkt, *kernel_pkt;
+		struct goya_packet *user_pkt, *kernel_pkt;
 
-		user_pkt = (void *) (uintptr_t)
+		user_pkt = (struct goya_packet *) (uintptr_t)
 			(parser->user_cb->kernel_address + cb_parsed_length);
-		kernel_pkt = (void *) (uintptr_t)
+		kernel_pkt = (struct goya_packet *) (uintptr_t)
 			(parser->patched_cb->kernel_address +
 					cb_patched_cur_length);
 
-		pkt_id = (enum packet_id) (((*(u64 *) user_pkt) &
+		pkt_id = (enum packet_id) (
+				(le64_to_cpu(user_pkt->header) &
 				PACKET_HEADER_PACKET_ID_MASK) >>
 					PACKET_HEADER_PACKET_ID_SHIFT);
 
@@ -3680,15 +3683,18 @@ static int goya_patch_cb(struct hl_device *hdev,
 
 		switch (pkt_id) {
 		case PACKET_LIN_DMA:
-			rc = goya_patch_dma_packet(hdev, parser, user_pkt,
-						kernel_pkt, &new_pkt_size);
+			rc = goya_patch_dma_packet(hdev, parser,
+					(struct packet_lin_dma *) user_pkt,
+					(struct packet_lin_dma *) kernel_pkt,
+					&new_pkt_size);
 			cb_patched_cur_length += new_pkt_size;
 			break;
 
 		case PACKET_WREG_32:
 			memcpy(kernel_pkt, user_pkt, pkt_size);
 			cb_patched_cur_length += pkt_size;
-			rc = goya_validate_wreg32(hdev, parser, kernel_pkt);
+			rc = goya_validate_wreg32(hdev, parser,
+					(struct packet_wreg32 *) kernel_pkt);
 			break;
 
 		case PACKET_WREG_BULK:
diff --git a/drivers/misc/habanalabs/include/goya/goya_packets.h b/drivers/misc/habanalabs/include/goya/goya_packets.h
index a14407b975e4..ef54bad20509 100644
--- a/drivers/misc/habanalabs/include/goya/goya_packets.h
+++ b/drivers/misc/habanalabs/include/goya/goya_packets.h
@@ -52,6 +52,19 @@ enum goya_dma_direction {
 #define GOYA_PKT_CTL_MB_SHIFT		31
 #define GOYA_PKT_CTL_MB_MASK		0x80000000
 
+/* All packets have, at least, an 8-byte header, which contains
+ * the packet type. The kernel driver uses the packet header for packet
+ * validation and to perform any necessary required preparation before
+ * sending them off to the hardware.
+ */
+struct goya_packet {
+	__le64 header;
+	/* The rest of the packet data follows. Use the corresponding
+	 * packet_XXX struct to deference the data, based on packet type
+	 */
+	u8 contents[0];
+};
+
 struct packet_nop {
 	__le32 reserved;
 	__le32 ctl;

From 4e87334a0ef43663019dbaf3638ad10fd8c3320c Mon Sep 17 00:00:00 2001
From: Ben Segal <bpsegal20@gmail.com>
Date: Thu, 1 Aug 2019 23:22:20 +0000
Subject: [PATCH 070/157] habanalabs: fix completion queue handling when host
 is BE

This patch fix the CQ irq handler to work in hosts with BE architecture.
It adds the correct endian-swapping macros around the relevant memory
accesses.

Signed-off-by: Ben Segal <bpsegal20@gmail.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/irq.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/drivers/misc/habanalabs/irq.c b/drivers/misc/habanalabs/irq.c
index ea9f72ff456c..199791b57caf 100644
--- a/drivers/misc/habanalabs/irq.c
+++ b/drivers/misc/habanalabs/irq.c
@@ -80,8 +80,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 	struct hl_cs_job *job;
 	bool shadow_index_valid;
 	u16 shadow_index;
-	u32 *cq_entry;
-	u32 *cq_base;
+	struct hl_cq_entry *cq_entry, *cq_base;
 
 	if (hdev->disabled) {
 		dev_dbg(hdev->dev,
@@ -90,29 +89,29 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 		return IRQ_HANDLED;
 	}
 
-	cq_base = (u32 *) (uintptr_t) cq->kernel_address;
+	cq_base = (struct hl_cq_entry *) (uintptr_t) cq->kernel_address;
 
 	while (1) {
-		bool entry_ready = ((cq_base[cq->ci] & CQ_ENTRY_READY_MASK)
+		bool entry_ready = ((le32_to_cpu(cq_base[cq->ci].data) &
+					CQ_ENTRY_READY_MASK)
 						>> CQ_ENTRY_READY_SHIFT);
 
 		if (!entry_ready)
 			break;
 
-		cq_entry = (u32 *) &cq_base[cq->ci];
+		cq_entry = (struct hl_cq_entry *) &cq_base[cq->ci];
 
-		/*
-		 * Make sure we read CQ entry contents after we've
+		/* Make sure we read CQ entry contents after we've
 		 * checked the ownership bit.
 		 */
 		dma_rmb();
 
-		shadow_index_valid =
-			((*cq_entry & CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
+		shadow_index_valid = ((le32_to_cpu(cq_entry->data) &
+					CQ_ENTRY_SHADOW_INDEX_VALID_MASK)
 					>> CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT);
 
-		shadow_index = (u16)
-			((*cq_entry & CQ_ENTRY_SHADOW_INDEX_MASK)
+		shadow_index = (u16) ((le32_to_cpu(cq_entry->data) &
+					CQ_ENTRY_SHADOW_INDEX_MASK)
 					>> CQ_ENTRY_SHADOW_INDEX_SHIFT);
 
 		queue = &hdev->kernel_queues[cq->hw_queue_id];
@@ -122,8 +121,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 			queue_work(hdev->cq_wq, &job->finish_work);
 		}
 
-		/*
-		 * Update ci of the context's queue. There is no
+		/* Update ci of the context's queue. There is no
 		 * need to protect it with spinlock because this update is
 		 * done only inside IRQ and there is a different IRQ per
 		 * queue
@@ -131,7 +129,8 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
 		queue->ci = hl_queue_inc_ptr(queue->ci);
 
 		/* Clear CQ entry ready bit */
-		cq_base[cq->ci] &= ~CQ_ENTRY_READY_MASK;
+		cq_entry->data = cpu_to_le32(le32_to_cpu(cq_entry->data) &
+						~CQ_ENTRY_READY_MASK);
 
 		cq->ci = hl_cq_inc_ptr(cq->ci);
 

From b9040c99414ba5b85090595a61abc686a5dbb388 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <oded.gabbay@gmail.com>
Date: Thu, 8 Aug 2019 15:45:58 +0300
Subject: [PATCH 071/157] habanalabs: fix endianness handling for internal QMAN
 submission

The PQs of internal H/W queues (QMANs) can be located in different memory
areas for different ASICs. Therefore, when writing PQEs, we need to use
the correct function according to the location of the PQ. e.g. if the PQ
is located in the device's memory (SRAM or DRAM), we need to use
memcpy_toio() so it would work in architectures that have separate
address ranges for IO memory.

This patch makes the code that writes the PQE to be ASIC-specific so we
can handle this properly per ASIC.

Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
Tested-by: Ben Segal <bpsegal20@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c  |  7 ++++---
 drivers/misc/habanalabs/goya/goyaP.h |  2 +-
 drivers/misc/habanalabs/habanalabs.h |  9 +++++++--
 drivers/misc/habanalabs/hw_queue.c   | 14 +++++---------
 4 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index e8b1142910e0..b39b9c98fe1d 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2729,9 +2729,10 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
 				GOYA_ASYNC_EVENT_ID_PI_UPDATE);
 }
 
-void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val)
+void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
 {
-	/* Not needed in Goya */
+	/* The QMANs are on the SRAM so need to copy to IO space */
+	memcpy_toio((void __iomem *) pqe, bd, sizeof(struct hl_bd));
 }
 
 static void *goya_dma_alloc_coherent(struct hl_device *hdev, size_t size,
@@ -5048,7 +5049,7 @@ static const struct hl_asic_funcs goya_funcs = {
 	.resume = goya_resume,
 	.cb_mmap = goya_cb_mmap,
 	.ring_doorbell = goya_ring_doorbell,
-	.flush_pq_write = goya_flush_pq_write,
+	.pqe_write = goya_pqe_write,
 	.asic_dma_alloc_coherent = goya_dma_alloc_coherent,
 	.asic_dma_free_coherent = goya_dma_free_coherent,
 	.get_int_queue_base = goya_get_int_queue_base,
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index f8c611883dc1..d7f48c9c41cd 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -177,7 +177,7 @@ int goya_late_init(struct hl_device *hdev);
 void goya_late_fini(struct hl_device *hdev);
 
 void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
-void goya_flush_pq_write(struct hl_device *hdev, u64 *pq, u64 exp_val);
+void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd);
 void goya_update_eq_ci(struct hl_device *hdev, u32 val);
 void goya_restore_phase_topology(struct hl_device *hdev);
 int goya_context_switch(struct hl_device *hdev, u32 asid);
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index 6a4c64b97f38..ce83adafcf2d 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -441,7 +441,11 @@ enum hl_pll_frequency {
  * @resume: handles IP specific H/W or SW changes for resume.
  * @cb_mmap: maps a CB.
  * @ring_doorbell: increment PI on a given QMAN.
- * @flush_pq_write: flush PQ entry write if necessary, WARN if flushing failed.
+ * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
+ *             function because the PQs are located in different memory areas
+ *             per ASIC (SRAM, DRAM, Host memory) and therefore, the method of
+ *             writing the PQE must match the destination memory area
+ *             properties.
  * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling
  *                           dma_alloc_coherent(). This is ASIC function because
  *                           its implementation is not trivial when the driver
@@ -510,7 +514,8 @@ struct hl_asic_funcs {
 	int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
 			u64 kaddress, phys_addr_t paddress, u32 size);
 	void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
-	void (*flush_pq_write)(struct hl_device *hdev, u64 *pq, u64 exp_val);
+	void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
+			struct hl_bd *bd);
 	void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flag);
 	void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size,
diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c
index e3b5517897ea..5f5673b74985 100644
--- a/drivers/misc/habanalabs/hw_queue.c
+++ b/drivers/misc/habanalabs/hw_queue.c
@@ -290,23 +290,19 @@ static void int_hw_queue_schedule_job(struct hl_cs_job *job)
 	struct hl_device *hdev = job->cs->ctx->hdev;
 	struct hl_hw_queue *q = &hdev->kernel_queues[job->hw_queue_id];
 	struct hl_bd bd;
-	u64 *pi, *pbd = (u64 *) &bd;
+	__le64 *pi;
 
 	bd.ctl = 0;
-	bd.len = __cpu_to_le32(job->job_cb_size);
-	bd.ptr = __cpu_to_le64((u64) (uintptr_t) job->user_cb);
+	bd.len = cpu_to_le32(job->job_cb_size);
+	bd.ptr = cpu_to_le64((u64) (uintptr_t) job->user_cb);
 
-	pi = (u64 *) (uintptr_t) (q->kernel_address +
+	pi = (__le64 *) (uintptr_t) (q->kernel_address +
 		((q->pi & (q->int_queue_len - 1)) * sizeof(bd)));
 
-	pi[0] = pbd[0];
-	pi[1] = pbd[1];
-
 	q->pi++;
 	q->pi &= ((q->int_queue_len << 1) - 1);
 
-	/* Flush PQ entry write. Relevant only for specific ASICs */
-	hdev->asic_funcs->flush_pq_write(hdev, pi, pbd[0]);
+	hdev->asic_funcs->pqe_write(hdev, pi, &bd);
 
 	hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
 }

From b421d83a3947369fd5718824aecfaebe1efbf7ed Mon Sep 17 00:00:00 2001
From: Ben Segal <bpsegal20@gmail.com>
Date: Wed, 7 Aug 2019 13:54:54 +0000
Subject: [PATCH 072/157] habanalabs: fix device IRQ unmasking for BE host

When unmasking IRQs inside the ASIC, the driver passes an array of all the
IRQ to unmask. The ASIC's CPU is working in LE so when running in a BE
host, the driver needs to do the proper endianness swapping when preparing
this array.

In addition, this patch also fixes the endianness of a couple of kernel log
debug messages that print values of packets

Signed-off-by: Ben Segal <bpsegal20@gmail.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/misc/habanalabs/goya/goya.c | 33 +++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index b39b9c98fe1d..271c5c8f53b4 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -3314,9 +3314,11 @@ static int goya_validate_dma_pkt_no_mmu(struct hl_device *hdev,
 	int rc;
 
 	dev_dbg(hdev->dev, "DMA packet details:\n");
-	dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
-	dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
-	dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
+	dev_dbg(hdev->dev, "source == 0x%llx\n",
+		le64_to_cpu(user_dma_pkt->src_addr));
+	dev_dbg(hdev->dev, "destination == 0x%llx\n",
+		le64_to_cpu(user_dma_pkt->dst_addr));
+	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
 
 	ctl = le32_to_cpu(user_dma_pkt->ctl);
 	user_dir = (ctl & GOYA_PKT_LIN_DMA_CTL_DMA_DIR_MASK) >>
@@ -3345,9 +3347,11 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
 				struct packet_lin_dma *user_dma_pkt)
 {
 	dev_dbg(hdev->dev, "DMA packet details:\n");
-	dev_dbg(hdev->dev, "source == 0x%llx\n", user_dma_pkt->src_addr);
-	dev_dbg(hdev->dev, "destination == 0x%llx\n", user_dma_pkt->dst_addr);
-	dev_dbg(hdev->dev, "size == %u\n", user_dma_pkt->tsize);
+	dev_dbg(hdev->dev, "source == 0x%llx\n",
+		le64_to_cpu(user_dma_pkt->src_addr));
+	dev_dbg(hdev->dev, "destination == 0x%llx\n",
+		le64_to_cpu(user_dma_pkt->dst_addr));
+	dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
 
 	/*
 	 * WA for HW-23.
@@ -3387,7 +3391,8 @@ static int goya_validate_wreg32(struct hl_device *hdev,
 
 	dev_dbg(hdev->dev, "WREG32 packet details:\n");
 	dev_dbg(hdev->dev, "reg_offset == 0x%x\n", reg_offset);
-	dev_dbg(hdev->dev, "value      == 0x%x\n", wreg_pkt->value);
+	dev_dbg(hdev->dev, "value      == 0x%x\n",
+		le32_to_cpu(wreg_pkt->value));
 
 	if (reg_offset != (mmDMA_CH_0_WR_COMP_ADDR_LO & 0x1FFF)) {
 		dev_err(hdev->dev, "WREG32 packet with illegal address 0x%x\n",
@@ -4359,6 +4364,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
 	size_t total_pkt_size;
 	long result;
 	int rc;
+	int irq_num_entries, irq_arr_index;
+	__le32 *goya_irq_arr;
 
 	total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
 			irq_arr_size;
@@ -4376,8 +4383,16 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
 	if (!pkt)
 		return -ENOMEM;
 
-	pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
-	memcpy(&pkt->irqs, irq_arr, irq_arr_size);
+	irq_num_entries = irq_arr_size / sizeof(irq_arr[0]);
+	pkt->length = cpu_to_le32(irq_num_entries);
+
+	/* We must perform any necessary endianness conversation on the irq
+	 * array being passed to the goya hardware
+	 */
+	for (irq_arr_index = 0, goya_irq_arr = (__le32 *) &pkt->irqs;
+			irq_arr_index < irq_num_entries ; irq_arr_index++)
+		goya_irq_arr[irq_arr_index] =
+				cpu_to_le32(irq_arr[irq_arr_index]);
 
 	pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
 						ARMCP_PKT_CTL_OPCODE_SHIFT);

From accd2dd72c8f087441d725dd916688171519e4e6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 9 Aug 2019 10:23:57 +0200
Subject: [PATCH 073/157] PCI/ASPM: Add pcie_aspm_enabled()

Add a function checking whether or not PCIe ASPM has been enabled for
a given device.

It will be used by the NVMe driver to decide how to handle the
device during system suspend.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pcie/aspm.c | 20 ++++++++++++++++++++
 include/linux/pci.h     |  2 ++
 2 files changed, 22 insertions(+)

diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index e44af7f4d37f..464f8f92653f 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1170,6 +1170,26 @@ static int pcie_aspm_get_policy(char *buffer, const struct kernel_param *kp)
 module_param_call(policy, pcie_aspm_set_policy, pcie_aspm_get_policy,
 	NULL, 0644);
 
+/**
+ * pcie_aspm_enabled - Check if PCIe ASPM has been enabled for a device.
+ * @pdev: Target device.
+ */
+bool pcie_aspm_enabled(struct pci_dev *pdev)
+{
+	struct pci_dev *bridge = pci_upstream_bridge(pdev);
+	bool ret;
+
+	if (!bridge)
+		return false;
+
+	mutex_lock(&aspm_lock);
+	ret = bridge->link_state ? !!bridge->link_state->aspm_enabled : false;
+	mutex_unlock(&aspm_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pcie_aspm_enabled);
+
 #ifdef CONFIG_PCIEASPM_DEBUG
 static ssize_t link_state_show(struct device *dev,
 		struct device_attribute *attr,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 9e700d9f9f28..82e4cd1b7ac3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1567,8 +1567,10 @@ extern bool pcie_ports_native;
 
 #ifdef CONFIG_PCIEASPM
 bool pcie_aspm_support_enabled(void);
+bool pcie_aspm_enabled(struct pci_dev *pdev);
 #else
 static inline bool pcie_aspm_support_enabled(void) { return false; }
+static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; }
 #endif
 
 #ifdef CONFIG_PCIEAER

From 4eaefe8c621c6195c91044396ed8060c179f7aae Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 8 Aug 2019 23:58:38 +0200
Subject: [PATCH 074/157] nvme-pci: Allow PCI bus-level PM to be used if ASPM
 is disabled

One of the modifications made by commit d916b1be94b6 ("nvme-pci: use
host managed power state for suspend") was adding a pci_save_state()
call to nvme_suspend() so as to instruct the PCI bus type to leave
devices handled by the nvme driver in D0 during suspend-to-idle.
That was done with the assumption that ASPM would transition the
device's PCIe link into a low-power state when the device became
inactive.  However, if ASPM is disabled for the device, its PCIe
link will stay in L0 and in that case commit d916b1be94b6 is likely
to cause the energy used by the system while suspended to increase.

Namely, if the device in question works in accordance with the PCIe
specification, putting it into D3hot causes its PCIe link to go to
L1 or L2/L3 Ready, which is lower-power than L0.  Since the energy
used by the system while suspended depends on the state of its PCIe
link (as a general rule, the lower-power the state of the link, the
less energy the system will use), putting the device into D3hot
during suspend-to-idle should be more energy-efficient that leaving
it in D0 with disabled ASPM.

For this reason, avoid leaving NVMe devices with disabled ASPM in D0
during suspend-to-idle.  Instead, shut them down entirely and let
the PCI bus type put them into D3.

Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend")
Link: https://lore.kernel.org/linux-pm/2763495.NmdaWeg79L@kreacher/T/#t
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Keith Busch <keith.busch@intel.com>
---
 drivers/nvme/host/pci.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index db160cee42ad..108e109e99f1 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2846,7 +2846,7 @@ static int nvme_resume(struct device *dev)
 	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
 	struct nvme_ctrl *ctrl = &ndev->ctrl;
 
-	if (pm_resume_via_firmware() || !ctrl->npss ||
+	if (ndev->last_ps == U32_MAX ||
 	    nvme_set_power_state(ctrl, ndev->last_ps) != 0)
 		nvme_reset_ctrl(ctrl);
 	return 0;
@@ -2859,6 +2859,8 @@ static int nvme_suspend(struct device *dev)
 	struct nvme_ctrl *ctrl = &ndev->ctrl;
 	int ret = -EBUSY;
 
+	ndev->last_ps = U32_MAX;
+
 	/*
 	 * The platform does not remove power for a kernel managed suspend so
 	 * use host managed nvme power settings for lowest idle power if
@@ -2866,8 +2868,14 @@ static int nvme_suspend(struct device *dev)
 	 * shutdown.  But if the firmware is involved after the suspend or the
 	 * device does not support any non-default power states, shut down the
 	 * device fully.
+	 *
+	 * If ASPM is not enabled for the device, shut down the device and allow
+	 * the PCI bus layer to put it into D3 in order to take the PCIe link
+	 * down, so as to allow the platform to achieve its minimum low-power
+	 * state (which may not be possible if the link is up).
 	 */
-	if (pm_suspend_via_firmware() || !ctrl->npss) {
+	if (pm_suspend_via_firmware() || !ctrl->npss ||
+	    !pcie_aspm_enabled(pdev)) {
 		nvme_dev_disable(ndev, true);
 		return 0;
 	}
@@ -2880,7 +2888,6 @@ static int nvme_suspend(struct device *dev)
 	    ctrl->state != NVME_CTRL_ADMIN_ONLY)
 		goto unfreeze;
 
-	ndev->last_ps = 0;
 	ret = nvme_get_power_state(ctrl, &ndev->last_ps);
 	if (ret < 0)
 		goto unfreeze;

From b61fbc887af7a13a1c90c84c1feaeb4c9780e1e2 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 7 Aug 2019 23:59:03 +0200
Subject: [PATCH 075/157] efi-stub: Fix get_efi_config_table on mixed-mode
 setups

Fix get_efi_config_table using the wrong structs when booting a
64 bit kernel on 32 bit firmware.

Fixes: 82d736ac56d7 ("Abstract out support for locating an EFI config table")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-By: Matthew Garrett <mjg59@google.com>
Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 .../firmware/efi/libstub/efi-stub-helper.c    | 38 +++++++++++++------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 1db780c0f07b..3caae7f2cf56 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -927,17 +927,33 @@ fail:
 	return status;
 }
 
+#define GET_EFI_CONFIG_TABLE(bits)					\
+static void *get_efi_config_table##bits(efi_system_table_t *_sys_table,	\
+					efi_guid_t guid)		\
+{									\
+	efi_system_table_##bits##_t *sys_table;				\
+	efi_config_table_##bits##_t *tables;				\
+	int i;								\
+									\
+	sys_table = (typeof(sys_table))_sys_table;			\
+	tables = (typeof(tables))(unsigned long)sys_table->tables;	\
+									\
+	for (i = 0; i < sys_table->nr_tables; i++) {			\
+		if (efi_guidcmp(tables[i].guid, guid) != 0)		\
+			continue;					\
+									\
+		return (void *)(unsigned long)tables[i].table;		\
+	}								\
+									\
+	return NULL;							\
+}
+GET_EFI_CONFIG_TABLE(32)
+GET_EFI_CONFIG_TABLE(64)
+
 void *get_efi_config_table(efi_system_table_t *sys_table, efi_guid_t guid)
 {
-	efi_config_table_t *tables = (efi_config_table_t *)sys_table->tables;
-	int i;
-
-	for (i = 0; i < sys_table->nr_tables; i++) {
-		if (efi_guidcmp(tables[i].guid, guid) != 0)
-			continue;
-
-		return (void *)tables[i].table;
-	}
-
-	return NULL;
+	if (efi_is_64bit())
+		return get_efi_config_table64(sys_table, guid);
+	else
+		return get_efi_config_table32(sys_table, guid);
 }

From e7409258845a0f64967f8377e99294d438137537 Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Fri, 9 Aug 2019 18:40:37 -0700
Subject: [PATCH 076/157] x86/umwait: Fix error handling in umwait_init()

Currently, failure of cpuhp_setup_state() is ignored and the syscore ops
and the control interfaces can still be added even after the failure. But,
this error handling will cause a few issues:

1. The CPUs may have different values in the IA32_UMWAIT_CONTROL
   MSR because there is no way to roll back the control MSR on
   the CPUs which already set the MSR before the failure.

2. If the sysfs interface is added successfully, there will be a mismatch
   between the global control value and the control MSR:
   - The interface shows the default global control value. But,
     the control MSR is not set to the value because the CPU online
     function, which is supposed to set the MSR to the value,
     is not installed.
   - If the sysadmin changes the global control value through
     the interface, the control MSR on all current online CPUs is
     set to the new value. But, the control MSR on newly onlined CPUs
     after the value change will not be set to the new value due to
     lack of the CPU online function.

3. On resume from suspend/hibernation, the boot CPU restores the control
   MSR to the global control value through the syscore ops. But, the
   control MSR on all APs is not set due to lake of the CPU online
   function.

To solve the issues and enforce consistent behavior on the failure
of the CPU hotplug setup, make the following changes:

1. Cache the original control MSR value which is configured by
   hardware or BIOS before kernel boot. This value is likely to
   be 0. But it could be a different number as well. Cache the
   control MSR only once before the MSR is changed.
2. Add the CPU offline function so that the MSR is restored to the
   original control value on all CPUs on the failure.
3. On the failure, exit from cpumait_init() so that the syscore ops
   and the control interfaces are not added.

Reported-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/1565401237-60936-1-git-send-email-fenghua.yu@intel.com
---
 arch/x86/kernel/cpu/umwait.c | 39 +++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/umwait.c b/arch/x86/kernel/cpu/umwait.c
index 6a204e7336c1..32b4dc9030aa 100644
--- a/arch/x86/kernel/cpu/umwait.c
+++ b/arch/x86/kernel/cpu/umwait.c
@@ -17,6 +17,12 @@
  */
 static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
 
+/*
+ * Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
+ * hardware or BIOS before kernel boot.
+ */
+static u32 orig_umwait_control_cached __ro_after_init;
+
 /*
  * Serialize access to umwait_control_cached and IA32_UMWAIT_CONTROL MSR in
  * the sysfs write functions.
@@ -52,6 +58,23 @@ static int umwait_cpu_online(unsigned int cpu)
 	return 0;
 }
 
+/*
+ * The CPU hotplug callback sets the control MSR to the original control
+ * value.
+ */
+static int umwait_cpu_offline(unsigned int cpu)
+{
+	/*
+	 * This code is protected by the CPU hotplug already and
+	 * orig_umwait_control_cached is never changed after it caches
+	 * the original control MSR value in umwait_init(). So there
+	 * is no race condition here.
+	 */
+	wrmsr(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached, 0);
+
+	return 0;
+}
+
 /*
  * On resume, restore IA32_UMWAIT_CONTROL MSR on the boot processor which
  * is the only active CPU at this time. The MSR is set up on the APs via the
@@ -185,8 +208,22 @@ static int __init umwait_init(void)
 	if (!boot_cpu_has(X86_FEATURE_WAITPKG))
 		return -ENODEV;
 
+	/*
+	 * Cache the original control MSR value before the control MSR is
+	 * changed. This is the only place where orig_umwait_control_cached
+	 * is modified.
+	 */
+	rdmsrl(MSR_IA32_UMWAIT_CONTROL, orig_umwait_control_cached);
+
 	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait:online",
-				umwait_cpu_online, NULL);
+				umwait_cpu_online, umwait_cpu_offline);
+	if (ret < 0) {
+		/*
+		 * On failure, the control MSR on all CPUs has the
+		 * original control value.
+		 */
+		return ret;
+	}
 
 	register_syscore_ops(&umwait_syscore_ops);
 

From e26cc08265dda37d2acc8394604f220ef412299d Mon Sep 17 00:00:00 2001
From: zhengbin <zhengbin13@huawei.com>
Date: Mon, 12 Aug 2019 20:36:55 +0800
Subject: [PATCH 077/157] blk-mq: move cancel of requeue_work to the front of
 blk_exit_queue

blk_exit_queue will free elevator_data, while blk_mq_requeue_work
will access it. Move cancel of requeue_work to the front of
blk_exit_queue to avoid use-after-free.

blk_exit_queue                blk_mq_requeue_work
  __elevator_exit               blk_mq_run_hw_queues
    blk_mq_exit_sched             blk_mq_run_hw_queue
      dd_exit_queue                 blk_mq_hctx_has_pending
        kfree(elevator_data)          blk_mq_sched_has_work
                                        dd_has_work

Fixes: fbc2a15e3433 ("blk-mq: move cancel of requeue_work into blk_mq_release")
Cc: stable@vger.kernel.org
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: zhengbin <zhengbin13@huawei.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c    | 2 --
 block/blk-sysfs.c | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f78d3287dd82..a8e6a58f5f28 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2666,8 +2666,6 @@ void blk_mq_release(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx, *next;
 	int i;
 
-	cancel_delayed_work_sync(&q->requeue_work);
-
 	queue_for_each_hw_ctx(q, hctx, i)
 		WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 977c659dcd18..9bfa3ea4ed63 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -892,6 +892,9 @@ static void __blk_release_queue(struct work_struct *work)
 
 	blk_free_queue_stats(q->stats);
 
+	if (queue_is_mq(q))
+		cancel_delayed_work_sync(&q->requeue_work);
+
 	blk_exit_queue(q);
 
 	blk_queue_free_zone_bitmaps(q);

From ae78ca3cf3d9e9f914bfcd0bc5c389ff18b9c2e0 Mon Sep 17 00:00:00 2001
From: Wenwen Wang <wenwen@cs.uga.edu>
Date: Sun, 11 Aug 2019 12:23:22 -0500
Subject: [PATCH 078/157] xen/blkback: fix memory leaks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In read_per_ring_refs(), after 'req' and related memory regions are
allocated, xen_blkif_map() is invoked to map the shared frame, irq, and
etc. However, if this mapping process fails, no cleanup is performed,
leading to memory leaks. To fix this issue, invoke the cleanup before
returning the error.

Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Wenwen Wang <wenwen@cs.uga.edu>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/xen-blkback/xenbus.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 3ac6a5d18071..b90dbcd99c03 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -965,6 +965,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
 		}
 	}
 
+	err = -ENOMEM;
 	for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
 		req = kzalloc(sizeof(*req), GFP_KERNEL);
 		if (!req)
@@ -987,7 +988,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
 	err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
 	if (err) {
 		xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
-		return err;
+		goto fail;
 	}
 
 	return 0;
@@ -1007,8 +1008,7 @@ fail:
 		}
 		kfree(req);
 	}
-	return -ENOMEM;
-
+	return err;
 }
 
 static int connect_ring(struct backend_info *be)

From b4d98bc3fc93ec3a58459948a2c0e0c9b501cd88 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 12 Aug 2019 12:15:17 +0100
Subject: [PATCH 079/157] staging: comedi: dt3000: Fix signed integer overflow
 'divider * base'

In `dt3k_ns_to_timer()` the following lines near the end of the function
result in a signed integer overflow:

	prescale = 15;
	base = timer_base * (1 << prescale);
	divider = 65535;
	*nanosec = divider * base;

(`divider`, `base` and `prescale` are type `int`, `timer_base` and
`*nanosec` are type `unsigned int`.  The value of `timer_base` will be
either 50 or 100.)

The main reason for the overflow is that the calculation for `base` is
completely wrong.  It should be:

	base = timer_base * (prescale + 1);

which matches an earlier instance of this calculation in the same
function.

Reported-by: David Binderman <dcb314@hotmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Link: https://lore.kernel.org/r/20190812111517.26803-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/dt3000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/comedi/drivers/dt3000.c b/drivers/staging/comedi/drivers/dt3000.c
index 2edf3ee91300..4ad176fc14ad 100644
--- a/drivers/staging/comedi/drivers/dt3000.c
+++ b/drivers/staging/comedi/drivers/dt3000.c
@@ -368,7 +368,7 @@ static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec,
 	}
 
 	prescale = 15;
-	base = timer_base * (1 << prescale);
+	base = timer_base * (prescale + 1);
 	divider = 65535;
 	*nanosec = divider * base;
 	return (prescale << 16) | (divider);

From 8e2a589a3fc36ce858d42e767c3bcd8fc62a512b Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Mon, 12 Aug 2019 13:08:14 +0100
Subject: [PATCH 080/157] staging: comedi: dt3000: Fix rounding up of timer
 divisor

`dt3k_ns_to_timer()` determines the prescaler and divisor to use to
produce a desired timing period.  It is influenced by a rounding mode
and can round the divisor up, down, or to the nearest value.  However,
the code for rounding up currently does the same as rounding down!  Fix
ir by using the `DIV_ROUND_UP()` macro to calculate the divisor when
rounding up.

Also, change the types of the `divider`, `base` and `prescale` variables
from `int` to `unsigned int` to avoid mixing signed and unsigned types
in the calculations.

Also fix a typo in a nearby comment: "improvment" => "improvement".

Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Cc: stable <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20190812120814.21188-1-abbotti@mev.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/comedi/drivers/dt3000.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/comedi/drivers/dt3000.c b/drivers/staging/comedi/drivers/dt3000.c
index 4ad176fc14ad..caf4d4df4bd3 100644
--- a/drivers/staging/comedi/drivers/dt3000.c
+++ b/drivers/staging/comedi/drivers/dt3000.c
@@ -342,9 +342,9 @@ static irqreturn_t dt3k_interrupt(int irq, void *d)
 static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec,
 			    unsigned int flags)
 {
-	int divider, base, prescale;
+	unsigned int divider, base, prescale;
 
-	/* This function needs improvment */
+	/* This function needs improvement */
 	/* Don't know if divider==0 works. */
 
 	for (prescale = 0; prescale < 16; prescale++) {
@@ -358,7 +358,7 @@ static int dt3k_ns_to_timer(unsigned int timer_base, unsigned int *nanosec,
 			divider = (*nanosec) / base;
 			break;
 		case CMDF_ROUND_UP:
-			divider = (*nanosec) / base;
+			divider = DIV_ROUND_UP(*nanosec, base);
 			break;
 		}
 		if (divider < 65536) {

From e9eec6a55c95fb918036bfe29c26a535dca1ad49 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 8 Aug 2019 11:15:38 +0300
Subject: [PATCH 081/157] IB/mlx5: Fix use-after-free error while accessing
 ev_file pointer

Call to uverbs_close_fd() releases file pointer to 'ev_file' and
mlx5_ib_dev is going to be inaccessible. Cache pointer prior cleaning
resources to solve the KASAN warning below.

BUG: KASAN: use-after-free in devx_async_event_close+0x391/0x480 [mlx5_ib]
Read of size 8 at addr ffff888301e3cec0 by task devx_direct_tes/4631
CPU: 1 PID: 4631 Comm: devx_direct_tes Tainted: G OE 5.3.0-rc1-for-upstream-dbg-2019-07-26_01-19-56-93 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu2 04/01/2014
Call Trace:
dump_stack+0x9a/0xeb
print_address_description+0x1e2/0x400
? devx_async_event_close+0x391/0x480 [mlx5_ib]
__kasan_report+0x15c/0x1df
? devx_async_event_close+0x391/0x480 [mlx5_ib]
kasan_report+0xe/0x20
devx_async_event_close+0x391/0x480 [mlx5_ib]
__fput+0x26a/0x7b0
task_work_run+0x10d/0x180
exit_to_usermode_loop+0x137/0x160
do_syscall_64+0x3c7/0x490
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f5df907d664
Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f
80 00 00 00 00 8b 05 6a cd 20 00 48 63 ff 85 c0 75 13 b8
03 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 44 f3 c3 66 90
48 83 ec 18 48 89 7c 24 08 e8
RSP: 002b:00007ffd353cb958 EFLAGS: 00000246 ORIG_RAX: 0000000000000003
RAX: 0000000000000000 RBX: 000056017a88c348 RCX: 00007f5df907d664
RDX: 00007f5df969d400 RSI: 00007f5de8f1ec90 RDI: 0000000000000006
RBP: 00007f5df9681dc0 R08: 00007f5de8736410 R09: 000056017a9d2dd0
R10: 000000000000000b R11: 0000000000000246 R12: 00007f5de899d7d0
R13: 00007f5df96c4248 R14: 00007f5de8f1ecb0 R15: 000056017ae41308

Allocated by task 4631:
save_stack+0x19/0x80
kasan_kmalloc.constprop.3+0xa0/0xd0
alloc_uobj+0x71/0x230 [ib_uverbs]
alloc_begin_fd_uobject+0x2e/0xc0 [ib_uverbs]
rdma_alloc_begin_uobject+0x96/0x140 [ib_uverbs]
ib_uverbs_run_method+0xdf0/0x1940 [ib_uverbs]
ib_uverbs_cmd_verbs+0x57e/0xdb0 [ib_uverbs]
ib_uverbs_ioctl+0x177/0x260 [ib_uverbs]
do_vfs_ioctl+0x18f/0x1010
ksys_ioctl+0x70/0x80
__x64_sys_ioctl+0x6f/0xb0
do_syscall_64+0x95/0x490
entry_SYSCALL_64_after_hwframe+0x49/0xbe

Freed by task 4631:
save_stack+0x19/0x80
__kasan_slab_free+0x11d/0x160
slab_free_freelist_hook+0x67/0x1a0
kfree+0xb9/0x2a0
uverbs_close_fd+0x118/0x1c0 [ib_uverbs]
devx_async_event_close+0x28a/0x480 [mlx5_ib]
__fput+0x26a/0x7b0
task_work_run+0x10d/0x180
exit_to_usermode_loop+0x137/0x160
do_syscall_64+0x3c7/0x490
entry_SYSCALL_64_after_hwframe+0x49/0xbe

The buggy address belongs to the object at ffff888301e3cda8
which belongs to the cache kmalloc-512 of size 512
The buggy address is located 280 bytes inside of 512-byte region
[ffff888301e3cda8, ffff888301e3cfa8)
The buggy address belongs to the page:
page:ffffea000c078e00 refcount:1 mapcount:0
mapping:ffff888352811300 index:0x0 compound_mapcount: 0
flags: 0x2fffff80010200(slab|head)
raw: 002fffff80010200 ffffea000d152608 ffffea000c077808 ffff888352811300
raw: 0000000000000000 0000000000250025 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected
Memory state around the buggy address:
ffff888301e3cd80: fc fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb
ffff888301e3ce00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff888301e3ce80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff888301e3cf00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
ffff888301e3cf80: fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc fc
Disabling lock debugging due to kernel taint

Cc: <stable@vger.kernel.org> # 5.2
Fixes: 759738537142 ("IB/mlx5: Enable subscription for device events over DEVX")
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgg@mellanox.com>
Link: https://lore.kernel.org/r/20190808081538.28772-1-leon@kernel.org
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/devx.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 2d1b3d9609d9..af5bbb35c058 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2644,12 +2644,13 @@ static int devx_async_event_close(struct inode *inode, struct file *filp)
 	struct devx_async_event_file *ev_file = filp->private_data;
 	struct devx_event_subscription *event_sub, *event_sub_tmp;
 	struct devx_async_event_data *entry, *tmp;
+	struct mlx5_ib_dev *dev = ev_file->dev;
 
-	mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock);
+	mutex_lock(&dev->devx_event_table.event_xa_lock);
 	/* delete the subscriptions which are related to this FD */
 	list_for_each_entry_safe(event_sub, event_sub_tmp,
 				 &ev_file->subscribed_events_list, file_list) {
-		devx_cleanup_subscription(ev_file->dev, event_sub);
+		devx_cleanup_subscription(dev, event_sub);
 		if (event_sub->eventfd)
 			eventfd_ctx_put(event_sub->eventfd);
 
@@ -2658,7 +2659,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp)
 		kfree_rcu(event_sub, rcu);
 	}
 
-	mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock);
+	mutex_unlock(&dev->devx_event_table.event_xa_lock);
 
 	/* free the pending events allocation */
 	if (!ev_file->omit_data) {
@@ -2670,7 +2671,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp)
 	}
 
 	uverbs_close_fd(filp);
-	put_device(&ev_file->dev->ib_dev.dev);
+	put_device(&dev->ib_dev.dev);
 	return 0;
 }
 

From 17c19287ecf54fb55f155902dcd39c62a9547c4e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 9 Aug 2019 17:09:04 +0300
Subject: [PATCH 082/157] RDMA/siw: Fix a memory leak in siw_init_cpulist()

The error handling code doesn't free siw_cpu_info.tx_valid_cpus[0].  The
first iteration through the loop is a no-op so this is sort of an off
by one bug.  Also Bernard pointed out that we can remove the NULL
assignment and simplify the code a bit.

Fixes: bdcf26bf9b3a ("rdma/siw: network and RDMA core interface")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Bernard Metzler <bmt@zurich.ibm.com>
Reviewed-by: Bernard Metzler <bmt@zurich.ibm.com>
Link: https://lore.kernel.org/r/20190809140904.GB3552@mwanda
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/siw/siw_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index d0f140daf659..05a92f997f60 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -160,10 +160,8 @@ static int siw_init_cpulist(void)
 
 out_err:
 	siw_cpu_info.num_nodes = 0;
-	while (i) {
+	while (--i >= 0)
 		kfree(siw_cpu_info.tx_valid_cpus[i]);
-		siw_cpu_info.tx_valid_cpus[i--] = NULL;
-	}
 	kfree(siw_cpu_info.tx_valid_cpus);
 	siw_cpu_info.tx_valid_cpus = NULL;
 

From 932727c55653c1d7838d0ecd0cdce4393be156e0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 9 Aug 2019 13:13:19 +0300
Subject: [PATCH 083/157] RDMA/core: Fix error code in stat_get_doit_qp()

We need to set the error codes on these paths.  Currently the only
possible error code is -EMSGSIZE so that's what the patch uses.

Fixes: 83c2c1fcbd08 ("RDMA/nldev: Allow get counter mode through RDMA netlink")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Link: https://lore.kernel.org/r/20190809101311.GA17867@mwanda
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/core/nldev.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 783e465e7c41..87d40d1ecdde 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1952,12 +1952,16 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	if (fill_nldev_handle(msg, device) ||
 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
-	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode))
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
+		ret = -EMSGSIZE;
 		goto err_msg;
+	}
 
 	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
-	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask))
+	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
+		ret = -EMSGSIZE;
 		goto err_msg;
+	}
 
 	nlmsg_end(msg, nlh);
 	ib_device_put(device);

From 858b44dc62a1018312fd9cbd15db24237a897e0e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sun, 11 Aug 2019 15:52:26 -0700
Subject: [PATCH 084/157] xfs: remove more ondisk directory corruption asserts

Continue our game of replacing ASSERTs for corrupt ondisk metadata with
EFSCORRUPTED returns.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
---
 fs/xfs/libxfs/xfs_da_btree.c  | 19 ++++++++++++-------
 fs/xfs/libxfs/xfs_dir2_node.c |  3 ++-
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index d1c77fd0815d..0bf56e94bfe9 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -487,10 +487,8 @@ xfs_da3_split(
 	ASSERT(state->path.active == 0);
 	oldblk = &state->path.blk[0];
 	error = xfs_da3_root_split(state, oldblk, addblk);
-	if (error) {
-		addblk->bp = NULL;
-		return error;	/* GROT: dir is inconsistent */
-	}
+	if (error)
+		goto out;
 
 	/*
 	 * Update pointers to the node which used to be block 0 and just got
@@ -505,7 +503,10 @@ xfs_da3_split(
 	 */
 	node = oldblk->bp->b_addr;
 	if (node->hdr.info.forw) {
-		ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno);
+		if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
+			error = -EFSCORRUPTED;
+			goto out;
+		}
 		node = addblk->bp->b_addr;
 		node->hdr.info.back = cpu_to_be32(oldblk->blkno);
 		xfs_trans_log_buf(state->args->trans, addblk->bp,
@@ -514,15 +515,19 @@ xfs_da3_split(
 	}
 	node = oldblk->bp->b_addr;
 	if (node->hdr.info.back) {
-		ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno);
+		if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
+			error = -EFSCORRUPTED;
+			goto out;
+		}
 		node = addblk->bp->b_addr;
 		node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
 		xfs_trans_log_buf(state->args->trans, addblk->bp,
 				  XFS_DA_LOGRANGE(node, &node->hdr.info,
 				  sizeof(node->hdr.info)));
 	}
+out:
 	addblk->bp = NULL;
-	return 0;
+	return error;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index afcc6642690a..1fc44efc344d 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -741,7 +741,8 @@ xfs_dir2_leafn_lookup_for_entry(
 	ents = dp->d_ops->leaf_ents_p(leaf);
 
 	xfs_dir3_leaf_check(dp, bp);
-	ASSERT(leafhdr.count > 0);
+	if (leafhdr.count <= 0)
+		return -EFSCORRUPTED;
 
 	/*
 	 * Look up the hash value in the leaf entries.

From 8612de3f7ba6e900465e340516b8313806d27b2d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Sun, 11 Aug 2019 15:52:27 -0700
Subject: [PATCH 085/157] xfs: don't crash on null attr fork xfs_bmapi_read

Zorro Lang reported a crash in generic/475 if we try to inactivate a
corrupt inode with a NULL attr fork (stack trace shortened somewhat):

RIP: 0010:xfs_bmapi_read+0x311/0xb00 [xfs]
RSP: 0018:ffff888047f9ed68 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: ffff888047f9f038 RCX: 1ffffffff5f99f51
RDX: 0000000000000002 RSI: 0000000000000008 RDI: 0000000000000012
RBP: ffff888002a41f00 R08: ffffed10005483f0 R09: ffffed10005483ef
R10: ffffed10005483ef R11: ffff888002a41f7f R12: 0000000000000004
R13: ffffe8fff53b5768 R14: 0000000000000005 R15: 0000000000000001
FS:  00007f11d44b5b80(0000) GS:ffff888114200000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000ef6000 CR3: 000000002e176003 CR4: 00000000001606e0
Call Trace:
 xfs_dabuf_map.constprop.18+0x696/0xe50 [xfs]
 xfs_da_read_buf+0xf5/0x2c0 [xfs]
 xfs_da3_node_read+0x1d/0x230 [xfs]
 xfs_attr_inactive+0x3cc/0x5e0 [xfs]
 xfs_inactive+0x4c8/0x5b0 [xfs]
 xfs_fs_destroy_inode+0x31b/0x8e0 [xfs]
 destroy_inode+0xbc/0x190
 xfs_bulkstat_one_int+0xa8c/0x1200 [xfs]
 xfs_bulkstat_one+0x16/0x20 [xfs]
 xfs_bulkstat+0x6fa/0xf20 [xfs]
 xfs_ioc_bulkstat+0x182/0x2b0 [xfs]
 xfs_file_ioctl+0xee0/0x12a0 [xfs]
 do_vfs_ioctl+0x193/0x1000
 ksys_ioctl+0x60/0x90
 __x64_sys_ioctl+0x6f/0xb0
 do_syscall_64+0x9f/0x4d0
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f11d39a3e5b

The "obvious" cause is that the attr ifork is null despite the inode
claiming an attr fork having at least one extent, but it's not so
obvious why we ended up with an inode in that state.

Reported-by: Zorro Lang <zlang@redhat.com>
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204031
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Bill O'Donnell <billodo@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index baf0b72c0a37..07aad70f3931 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3835,15 +3835,28 @@ xfs_bmapi_read(
 	XFS_STATS_INC(mp, xs_blk_mapr);
 
 	ifp = XFS_IFORK_PTR(ip, whichfork);
+	if (!ifp) {
+		/* No CoW fork?  Return a hole. */
+		if (whichfork == XFS_COW_FORK) {
+			mval->br_startoff = bno;
+			mval->br_startblock = HOLESTARTBLOCK;
+			mval->br_blockcount = len;
+			mval->br_state = XFS_EXT_NORM;
+			*nmap = 1;
+			return 0;
+		}
 
-	/* No CoW fork?  Return a hole. */
-	if (whichfork == XFS_COW_FORK && !ifp) {
-		mval->br_startoff = bno;
-		mval->br_startblock = HOLESTARTBLOCK;
-		mval->br_blockcount = len;
-		mval->br_state = XFS_EXT_NORM;
-		*nmap = 1;
-		return 0;
+		/*
+		 * A missing attr ifork implies that the inode says we're in
+		 * extents or btree format but failed to pass the inode fork
+		 * verifier while trying to load it.  Treat that as a file
+		 * corruption too.
+		 */
+#ifdef DEBUG
+		xfs_alert(mp, "%s: inode %llu missing fork %d",
+				__func__, ip->i_ino, whichfork);
+#endif /* DEBUG */
+		return -EFSCORRUPTED;
 	}
 
 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {

From 5785675dfef4f9edcee66edef7b3af21618d2707 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Sun, 11 Aug 2019 17:40:36 +0200
Subject: [PATCH 086/157] x86/apic/32: Fix yet another implicit fallthrough
 warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix

  arch/x86/kernel/apic/probe_32.c: In function ‘default_setup_apic_routing’:
  arch/x86/kernel/apic/probe_32.c:146:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
      if (!APIC_XAPIC(version)) {
         ^
  arch/x86/kernel/apic/probe_32.c:151:3: note: here
   case X86_VENDOR_HYGON:
   ^~~~

for 32-bit builds.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lkml.kernel.org/r/20190811154036.29805-1-bp@alien8.de
---
 arch/x86/kernel/apic/probe_32.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 1492799b8f43..ee2d91e382f1 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -184,7 +184,8 @@ void __init default_setup_apic_routing(void)
 				def_to_bigsmp = 0;
 				break;
 			}
-			/* If P4 and above fall through */
+			/* P4 and above */
+			/* fall through */
 		case X86_VENDOR_HYGON:
 		case X86_VENDOR_AMD:
 			def_to_bigsmp = 1;

From 91be2587e82a0f16348fd8f12a57e4c328baffc7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 12 Aug 2019 20:16:17 +0200
Subject: [PATCH 087/157] x86/fpu/math-emu: Address fallthrough warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

/home/tglx/work/kernel/linus/linux/arch/x86/math-emu/errors.c: In function ‘FPU_printall’:
/home/tglx/work/kernel/linus/linux/arch/x86/math-emu/errors.c:187:9: warning: this statement may fall through [-Wimplicit-fallthrough=]
    tagi = FPU_Special(r);
    ~~~~~^~~~~~~~~~~~~~~~
/home/tglx/work/kernel/linus/linux/arch/x86/math-emu/errors.c:188:3: note: here
   case TAG_Valid:
   ^~~~
/home/tglx/work/kernel/linus/linux/arch/x86/math-emu/fpu_trig.c: In function ‘fyl2xp1’:
/home/tglx/work/kernel/linus/linux/arch/x86/math-emu/fpu_trig.c:1353:7: warning: this statement may fall through [-Wimplicit-fallthrough=]
    if (denormal_operand() < 0)
       ^
/home/tglx/work/kernel/linus/linux/arch/x86/math-emu/fpu_trig.c:1356:3: note: here
   case TAG_Zero:

Remove the pointless 'break;' after 'continue;' while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/math-emu/errors.c   | 5 +++--
 arch/x86/math-emu/fpu_trig.c | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c
index 6b468517ab71..73dc66d887f3 100644
--- a/arch/x86/math-emu/errors.c
+++ b/arch/x86/math-emu/errors.c
@@ -178,13 +178,15 @@ void FPU_printall(void)
 	for (i = 0; i < 8; i++) {
 		FPU_REG *r = &st(i);
 		u_char tagi = FPU_gettagi(i);
+
 		switch (tagi) {
 		case TAG_Empty:
 			continue;
-			break;
 		case TAG_Zero:
 		case TAG_Special:
+			/* Update tagi for the printk below */
 			tagi = FPU_Special(r);
+			/* fall through */
 		case TAG_Valid:
 			printk("st(%d)  %c .%04lx %04lx %04lx %04lx e%+-6d ", i,
 			       getsign(r) ? '-' : '+',
@@ -198,7 +200,6 @@ void FPU_printall(void)
 			printk("Whoops! Error in errors.c: tag%d is %d ", i,
 			       tagi);
 			continue;
-			break;
 		}
 		printk("%s\n", tag_desc[(int)(unsigned)tagi]);
 	}
diff --git a/arch/x86/math-emu/fpu_trig.c b/arch/x86/math-emu/fpu_trig.c
index 783c509f957a..127ea54122d7 100644
--- a/arch/x86/math-emu/fpu_trig.c
+++ b/arch/x86/math-emu/fpu_trig.c
@@ -1352,7 +1352,7 @@ static void fyl2xp1(FPU_REG *st0_ptr, u_char st0_tag)
 		case TW_Denormal:
 			if (denormal_operand() < 0)
 				return;
-
+			/* fall through */
 		case TAG_Zero:
 		case TAG_Valid:
 			setsign(st0_ptr, getsign(st0_ptr) ^ getsign(st1_ptr));

From 303911cfc5b95d33687d9046133ff184cf5043ff Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 12 Aug 2019 16:11:07 -0400
Subject: [PATCH 088/157] USB: core: Fix races in character device registration
 and deregistraion

The syzbot fuzzer has found two (!) races in the USB character device
registration and deregistration routines.  This patch fixes the races.

The first race results from the fact that usb_deregister_dev() sets
usb_minors[intf->minor] to NULL before calling device_destroy() on the
class device.  This leaves a window during which another thread can
allocate the same minor number but will encounter a duplicate name
error when it tries to register its own class device.  A typical error
message in the system log would look like:

    sysfs: cannot create duplicate filename '/class/usbmisc/ldusb0'

The patch fixes this race by destroying the class device first.

The second race is in usb_register_dev().  When that routine runs, it
first allocates a minor number, then drops minor_rwsem, and then
creates the class device.  If the device creation fails, the minor
number is deallocated and the whole routine returns an error.  But
during the time while minor_rwsem was dropped, there is a window in
which the minor number is allocated and so another thread can
successfully open the device file.  Typically this results in
use-after-free errors or invalid accesses when the other thread closes
its open file reference, because the kernel then tries to release
resources that were already deallocated when usb_register_dev()
failed.  The patch fixes this race by keeping minor_rwsem locked
throughout the entire routine.

Reported-and-tested-by: syzbot+30cf45ebfe0b0c4847a1@syzkaller.appspotmail.com
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/Pine.LNX.4.44L0.1908121607590.1659-100000@iolanthe.rowland.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/file.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 65de6f73b672..558890ada0e5 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -193,9 +193,10 @@ int usb_register_dev(struct usb_interface *intf,
 		intf->minor = minor;
 		break;
 	}
-	up_write(&minor_rwsem);
-	if (intf->minor < 0)
+	if (intf->minor < 0) {
+		up_write(&minor_rwsem);
 		return -EXFULL;
+	}
 
 	/* create a usb class device for this usb interface */
 	snprintf(name, sizeof(name), class_driver->name, minor - minor_base);
@@ -203,12 +204,11 @@ int usb_register_dev(struct usb_interface *intf,
 				      MKDEV(USB_MAJOR, minor), class_driver,
 				      "%s", kbasename(name));
 	if (IS_ERR(intf->usb_dev)) {
-		down_write(&minor_rwsem);
 		usb_minors[minor] = NULL;
 		intf->minor = -1;
-		up_write(&minor_rwsem);
 		retval = PTR_ERR(intf->usb_dev);
 	}
+	up_write(&minor_rwsem);
 	return retval;
 }
 EXPORT_SYMBOL_GPL(usb_register_dev);
@@ -234,12 +234,12 @@ void usb_deregister_dev(struct usb_interface *intf,
 		return;
 
 	dev_dbg(&intf->dev, "removing %d minor\n", intf->minor);
+	device_destroy(usb_class->class, MKDEV(USB_MAJOR, intf->minor));
 
 	down_write(&minor_rwsem);
 	usb_minors[intf->minor] = NULL;
 	up_write(&minor_rwsem);
 
-	device_destroy(usb_class->class, MKDEV(USB_MAJOR, intf->minor));
 	intf->usb_dev = NULL;
 	intf->minor = -1;
 	destroy_usb_class();

From cd8869f4cb257f22b89495ca40f5281e58ba359c Mon Sep 17 00:00:00 2001
From: Max Filippov <jcmvbkbc@gmail.com>
Date: Mon, 12 Aug 2019 15:01:30 -0700
Subject: [PATCH 089/157] xtensa: add missing isync to the cpu_reset TLB code

ITLB entry modifications must be followed by the isync instruction
before the new entries are possibly used. cpu_reset lacks one isync
between ITLB way 6 initialization and jump to the identity mapping.
Add missing isync to xtensa cpu_reset.

Cc: stable@vger.kernel.org
Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
---
 arch/xtensa/kernel/setup.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 5cb8a62e091c..7c3106093c75 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -511,6 +511,7 @@ void cpu_reset(void)
 				      "add	%2, %2, %7\n\t"
 				      "addi	%0, %0, -1\n\t"
 				      "bnez	%0, 1b\n\t"
+				      "isync\n\t"
 				      /* Jump to identity mapping */
 				      "jx	%3\n"
 				      "2:\n\t"

From 60d437bbff358748fcfc3bce5f08da9a6b3761da Mon Sep 17 00:00:00 2001
From: Nishka Dasgupta <nishkadg.linux@gmail.com>
Date: Tue, 16 Jul 2019 11:13:30 +0530
Subject: [PATCH 090/157] of: resolver: Add of_node_put() before return and
 break

Each iteration of for_each_child_of_node puts the previous node, but in
the case of a return or break from the middle of the loop, there is no
put, thus causing a memory leak. Hence add an of_node_put before the
return or break in three places.
Issue found with Coccinelle.

Signed-off-by: Nishka Dasgupta <nishkadg.linux@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/resolver.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index c1b67dd7cd6e..83c766233181 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -206,16 +206,22 @@ static int adjust_local_phandle_references(struct device_node *local_fixups,
 	for_each_child_of_node(local_fixups, child) {
 
 		for_each_child_of_node(overlay, overlay_child)
-			if (!node_name_cmp(child, overlay_child))
+			if (!node_name_cmp(child, overlay_child)) {
+				of_node_put(overlay_child);
 				break;
+			}
 
-		if (!overlay_child)
+		if (!overlay_child) {
+			of_node_put(child);
 			return -EINVAL;
+		}
 
 		err = adjust_local_phandle_references(child, overlay_child,
 				phandle_delta);
-		if (err)
+		if (err) {
+			of_node_put(child);
 			return err;
+		}
 	}
 
 	return 0;

From fd295733e702a63c189fea8294c12e04b5e5ecc0 Mon Sep 17 00:00:00 2001
From: Sven Van Asbroeck <thesven73@gmail.com>
Date: Thu, 18 Jul 2019 16:14:53 -0400
Subject: [PATCH 091/157] dt-bindings: fec: explicitly mark deprecated
 properties

fec's gpio phy reset properties have been deprecated.
Update the dt-bindings documentation to explicitly mark
them as such, and provide a short description of the
recommended alternative.

Signed-off-by: Sven Van Asbroeck <TheSven73@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/net/fsl-fec.txt       | 30 +++++++++++--------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index 2d41fb96ce0a..5b88fae0307d 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -7,18 +7,6 @@ Required properties:
 - phy-mode : See ethernet.txt file in the same directory
 
 Optional properties:
-- phy-reset-gpios : Should specify the gpio for phy reset
-- phy-reset-duration : Reset duration in milliseconds.  Should present
-  only if property "phy-reset-gpios" is available.  Missing the property
-  will have the duration be 1 millisecond.  Numbers greater than 1000 are
-  invalid and 1 millisecond will be used instead.
-- phy-reset-active-high : If present then the reset sequence using the GPIO
-  specified in the "phy-reset-gpios" property is reversed (H=reset state,
-  L=operation state).
-- phy-reset-post-delay : Post reset delay in milliseconds. If present then
-  a delay of phy-reset-post-delay milliseconds will be observed after the
-  phy-reset-gpios has been toggled. Can be omitted thus no delay is
-  observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
 - phy-supply : regulator that powers the Ethernet PHY.
 - phy-handle : phandle to the PHY device connected to this device.
 - fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
@@ -47,11 +35,27 @@ Optional properties:
   For imx6sx, "int0" handles all 3 queues and ENET_MII. "pps" is for the pulse
   per second interrupt associated with 1588 precision time protocol(PTP).
 
-
 Optional subnodes:
 - mdio : specifies the mdio bus in the FEC, used as a container for phy nodes
   according to phy.txt in the same directory
 
+Deprecated optional properties:
+	To avoid these, create a phy node according to phy.txt in the same
+	directory, and point the fec's "phy-handle" property to it. Then use
+	the phy's reset binding, again described by phy.txt.
+- phy-reset-gpios : Should specify the gpio for phy reset
+- phy-reset-duration : Reset duration in milliseconds.  Should present
+  only if property "phy-reset-gpios" is available.  Missing the property
+  will have the duration be 1 millisecond.  Numbers greater than 1000 are
+  invalid and 1 millisecond will be used instead.
+- phy-reset-active-high : If present then the reset sequence using the GPIO
+  specified in the "phy-reset-gpios" property is reversed (H=reset state,
+  L=operation state).
+- phy-reset-post-delay : Post reset delay in milliseconds. If present then
+  a delay of phy-reset-post-delay milliseconds will be observed after the
+  phy-reset-gpios has been toggled. Can be omitted thus no delay is
+  observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
+
 Example:
 
 ethernet@83fec000 {

From 17b6d2d528542bc60ad400add35728b2259b3cc1 Mon Sep 17 00:00:00 2001
From: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Date: Tue, 6 Aug 2019 18:27:26 +0200
Subject: [PATCH 092/157] drm/amdgpu: fix gfx9 soft recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SOC15_REG_OFFSET() macro wasn't used, making the soft recovery fail.

v2: use WREG32_SOC15 instead of WREG32 + SOC15_REG_OFFSET

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 1cf639a51178..04b8ac4432c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4869,7 +4869,7 @@ static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
 	value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
 	value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
 	value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
-	WREG32(mmSQ_CMD, value);
+	WREG32_SOC15(GC, 0, mmSQ_CMD, value);
 }
 
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,

From 31ec0dd38085aaeb75ab04211bf3b51ad917c9b4 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Thu, 8 Aug 2019 00:29:23 -0500
Subject: [PATCH 093/157] drm/amd/display: use kvmalloc for dc_state (v2)

It's large and doesn't need contiguous memory.  Fixes
allocation failures in some cases.

v2: kvfree the memory.

Reviewed-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/core/dc.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index fa20201eef3a..cbc480a33376 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/slab.h>
+#include <linux/mm.h>
 
 #include "dm_services.h"
 
@@ -1171,8 +1172,8 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc)
 
 struct dc_state *dc_create_state(struct dc *dc)
 {
-	struct dc_state *context = kzalloc(sizeof(struct dc_state),
-					   GFP_KERNEL);
+	struct dc_state *context = kvzalloc(sizeof(struct dc_state),
+					    GFP_KERNEL);
 
 	if (!context)
 		return NULL;
@@ -1192,11 +1193,11 @@ struct dc_state *dc_create_state(struct dc *dc)
 struct dc_state *dc_copy_state(struct dc_state *src_ctx)
 {
 	int i, j;
-	struct dc_state *new_ctx = kmemdup(src_ctx,
-			sizeof(struct dc_state), GFP_KERNEL);
+	struct dc_state *new_ctx = kvmalloc(sizeof(struct dc_state), GFP_KERNEL);
 
 	if (!new_ctx)
 		return NULL;
+	memcpy(new_ctx, src_ctx, sizeof(struct dc_state));
 
 	for (i = 0; i < MAX_PIPES; i++) {
 			struct pipe_ctx *cur_pipe = &new_ctx->res_ctx.pipe_ctx[i];
@@ -1230,7 +1231,7 @@ static void dc_state_free(struct kref *kref)
 {
 	struct dc_state *context = container_of(kref, struct dc_state, refcount);
 	dc_resource_state_destruct(context);
-	kfree(context);
+	kvfree(context);
 }
 
 void dc_release_state(struct dc_state *context)

From 5717fe5ab38f9ccb32718bcb03bea68409c9cce4 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Mon, 12 Aug 2019 16:02:25 +0100
Subject: [PATCH 094/157] arm64: cpufeature: Don't treat granule sizes as
 strict

If a CPU doesn't support the page size for which the kernel is
configured, then we will complain and refuse to bring it online. For
secondary CPUs (and the boot CPU on a system booting with EFI), we will
also print an error identifying the mismatch.

Consequently, the only time that the cpufeature code can detect a
granule size mismatch is for a granule other than the one that is
currently being used. Although we would rather such systems didn't
exist, we've unfortunately lost that battle and Kevin reports that
on his amlogic S922X (odroid-n2 board) we end up warning and taining
with defconfig because 16k pages are not supported by all of the CPUs.

In such a situation, we don't actually care about the feature mismatch,
particularly now that KVM only exposes the sanitised view of the CPU
registers (commit 93390c0a1b20 - "arm64: KVM: Hide unsupported AArch64
CPU features from guests"). Treat the granule fields as non-strict and
let Kevin run without a tainted kernel.

Cc: Marc Zyngier <maz@kernel.org>
Reported-by: Kevin Hilman <khilman@baylibre.com>
Tested-by: Kevin Hilman <khilman@baylibre.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will@kernel.org>
[catalin.marinas@arm.com: changelog updated with KVM sanitised regs commit]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/cpufeature.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d19d14ba9ae4..b1fdc486aed8 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -184,9 +184,17 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
-	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
-	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+	/*
+	 * We already refuse to boot CPUs that don't support our configured
+	 * page size, so we can only detect mismatches for a page size other
+	 * than the one we're currently using. Unfortunately, SoCs like this
+	 * exist in the wild so, even though we don't like it, we'll have to go
+	 * along with it and treat them as non-strict.
+	 */
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
+	S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI),
+
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0),
 	/* Linux shouldn't care about secure memory */
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0),

From 834de5c1aa768eb3d233d6544ea7153826c4b206 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Wed, 31 Jul 2019 08:46:16 +0000
Subject: [PATCH 095/157] mtd: spi-nor: Fix the disabling of write protection
 at init

spi_nor_spansion_clear_sr_bp() depends on spansion_quad_enable().
While spansion_quad_enable() is selected as default when
initializing the flash parameters, the nor->quad_enable() method
can be overwritten later on when parsing BFPT.

Select the write protection disable mechanism at spi_nor_init() time,
when the nor->quad_enable() method is already known.

Fixes: 191f5c2ed4b6faba ("mtd: spi-nor: use 16-bit WRR command when QE is set on spansion flashes")
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Vignesh Raghavendra <vigneshr@ti.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/spi-nor/spi-nor.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 03cc788511d5..654bdc41fc99 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -3780,8 +3780,6 @@ static int spi_nor_init_params(struct spi_nor *nor,
 		default:
 			/* Kept only for backward compatibility purpose. */
 			params->quad_enable = spansion_quad_enable;
-			if (nor->clear_sr_bp)
-				nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp;
 			break;
 		}
 
@@ -4035,6 +4033,9 @@ static int spi_nor_init(struct spi_nor *nor)
 	int err;
 
 	if (nor->clear_sr_bp) {
+		if (nor->quad_enable == spansion_quad_enable)
+			nor->clear_sr_bp = spi_nor_spansion_clear_sr_bp;
+
 		err = nor->clear_sr_bp(nor);
 		if (err) {
 			dev_err(nor->dev,

From 9a67b72552f8d019948453e56ca7db8c7e5a94ba Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Tue, 13 Aug 2019 08:15:07 -0700
Subject: [PATCH 096/157] MAINTAINERS: iomap: Remove fs/iomap.c record

Update MAINTAINERS to reflect that fs/iomap.c file
was splitted into separate files in fs/iomap/

Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: linux-fsdevel@vger.kernel.org
Fixes: cb7181ff4b1c ("iomap: move the main iteration code into a separate file")
Signed-off-by: Denis Efremov <efremov@linux.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6426db5198f0..5a35c0dd1d35 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8416,7 +8416,6 @@ L:	linux-xfs@vger.kernel.org
 L:	linux-fsdevel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
 S:	Supported
-F:	fs/iomap.c
 F:	fs/iomap/
 F:	include/linux/iomap.h
 

From 190d03814eb3b49d4f87ff38fef26d36f3568a60 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 13 Aug 2019 17:39:56 +0200
Subject: [PATCH 097/157] ALSA: hda/realtek - Add quirk for HP Envy x360

HP Envy x360 (AMD Ryzen-based model) with 103c:8497 needs the same
quirk like HP Spectre x360 for enabling the mute LED over Mic3 pin.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=204373
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index de224cbea7a0..8aaf1d9c55cf 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6987,6 +6987,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x82bf, "HP G3 mini", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+	SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
 	SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
 	SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
 	SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),

From 2c8ccb37b08fe364f02a9914daca474d43151453 Mon Sep 17 00:00:00 2001
From: Bernard Metzler <bmt@zurich.ibm.com>
Date: Fri, 9 Aug 2019 17:18:16 +0200
Subject: [PATCH 098/157] RDMA/siw: Change CQ flags from 64->32 bits

This patch changes the driver/user shared (mmapped) CQ notification
flags field from unsigned 64-bits size to unsigned 32-bits size. This
enables building siw on 32-bit architectures.

This patch changes the siw-abi, but as siw was only just merged in
this merge window cycle, there are no released kernels with the prior
abi.  We are making no attempt to be binary compatible with siw user
space libraries prior to the merge of siw into the upstream kernel,
only moving forward with upstream kernels and upstream rdma-core
provided siw libraries are we guaranteeing compatibility.

Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
Link: https://lore.kernel.org/r/20190809151816.13018-1-bmt@zurich.ibm.com
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/sw/siw/Kconfig     |  2 +-
 drivers/infiniband/sw/siw/siw.h       |  2 +-
 drivers/infiniband/sw/siw/siw_qp.c    | 14 ++++++++++----
 drivers/infiniband/sw/siw/siw_verbs.c | 16 +++++++++++-----
 include/uapi/rdma/siw-abi.h           |  3 ++-
 5 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
index dace276aea14..b622fc62f2cd 100644
--- a/drivers/infiniband/sw/siw/Kconfig
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -1,6 +1,6 @@
 config RDMA_SIW
 	tristate "Software RDMA over TCP/IP (iWARP) driver"
-	depends on INET && INFINIBAND && LIBCRC32C && 64BIT
+	depends on INET && INFINIBAND && LIBCRC32C
 	select DMA_VIRT_OPS
 	help
 	This driver implements the iWARP RDMA transport over
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 03fd7b2f595f..77b1aabf6ff3 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -214,7 +214,7 @@ struct siw_wqe {
 struct siw_cq {
 	struct ib_cq base_cq;
 	spinlock_t lock;
-	u64 *notify;
+	struct siw_cq_ctrl *notify;
 	struct siw_cqe *queue;
 	u32 cq_put;
 	u32 cq_get;
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index e27bd5b35b96..0990307c5d2c 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1013,18 +1013,24 @@ out:
  */
 static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
 {
-	u64 cq_notify;
+	u32 cq_notify;
 
 	if (!cq->base_cq.comp_handler)
 		return false;
 
-	cq_notify = READ_ONCE(*cq->notify);
+	/* Read application shared notification state */
+	cq_notify = READ_ONCE(cq->notify->flags);
 
 	if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
 	    ((cq_notify & SIW_NOTIFY_SOLICITED) &&
 	     (flags & SIW_WQE_SOLICITED))) {
-		/* dis-arm CQ */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);
+		/*
+		 * CQ notification is one-shot: Since the
+		 * current CQE causes user notification,
+		 * the CQ gets dis-aremd and must be re-aremd
+		 * by the user for a new notification.
+		 */
+		WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
 
 		return true;
 	}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 32dc79d0e898..e7f3a2379d9d 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1049,7 +1049,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 
 	spin_lock_init(&cq->lock);
 
-	cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify;
+	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
 
 	if (udata) {
 		struct siw_uresp_create_cq uresp = {};
@@ -1141,11 +1141,17 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)
 	siw_dbg_cq(cq, "flags: 0x%02x\n", flags);
 
 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-		/* CQ event for next solicited completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);
+		/*
+		 * Enable CQ event for next solicited completion.
+		 * and make it visible to all associated producers.
+		 */
+		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);
 	else
-		/* CQ event for any signalled completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);
+		/*
+		 * Enable CQ event for any signalled completion.
+		 * and make it visible to all associated producers.
+		 */
+		smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL);
 
 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)
 		return cq->cq_put - cq->cq_get;
diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h
index 7de68f1dc707..af735f55b291 100644
--- a/include/uapi/rdma/siw-abi.h
+++ b/include/uapi/rdma/siw-abi.h
@@ -180,6 +180,7 @@ struct siw_cqe {
  * to control CQ arming.
  */
 struct siw_cq_ctrl {
-	__aligned_u64 notify;
+	__u32 flags;
+	__u32 pad;
 };
 #endif

From 2d6c25215ab26bb009de3575faab7b685f138e92 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Mon, 5 Aug 2019 18:44:27 +0200
Subject: [PATCH 099/157] KEYS: trusted: allow module init if TPM is inactive
 or deactivated

Commit c78719203fc6 ("KEYS: trusted: allow trusted.ko to initialize w/o a
TPM") allows the trusted module to be loaded even if a TPM is not found, to
avoid module dependency problems.

However, trusted module initialization can still fail if the TPM is
inactive or deactivated. tpm_get_random() returns an error.

This patch removes the call to tpm_get_random() and instead extends the PCR
specified by the user with zeros. The security of this alternative is
equivalent to the previous one, as either option prevents with a PCR update
unsealing and misuse of sealed data by a user space process.

Even if a PCR is extended with zeros, instead of random data, it is still
computationally infeasible to find a value as input for a new PCR extend
operation, to obtain again the PCR value that would allow unsealing.

Cc: stable@vger.kernel.org
Fixes: 240730437deb ("KEYS: trusted: explicitly use tpm_chip structure...")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Tyler Hicks <tyhicks@canonical.com>
Suggested-by: Mimi Zohar <zohar@linux.ibm.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 security/keys/trusted.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 9a94672e7adc..ade699131065 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1228,24 +1228,11 @@ hashalg_fail:
 
 static int __init init_digests(void)
 {
-	u8 digest[TPM_MAX_DIGEST_SIZE];
-	int ret;
-	int i;
-
-	ret = tpm_get_random(chip, digest, TPM_MAX_DIGEST_SIZE);
-	if (ret < 0)
-		return ret;
-	if (ret < TPM_MAX_DIGEST_SIZE)
-		return -EFAULT;
-
 	digests = kcalloc(chip->nr_allocated_banks, sizeof(*digests),
 			  GFP_KERNEL);
 	if (!digests)
 		return -ENOMEM;
 
-	for (i = 0; i < chip->nr_allocated_banks; i++)
-		memcpy(digests[i].digest, digest, TPM_MAX_DIGEST_SIZE);
-
 	return 0;
 }
 

From eb93685847a9055283d05951c1b205e737f38533 Mon Sep 17 00:00:00 2001
From: Paul Walmsley <paul.walmsley@sifive.com>
Date: Wed, 7 Aug 2019 19:07:34 -0700
Subject: [PATCH 100/157] riscv: fix flush_tlb_range() end address for
 flush_tlb_page()

The RISC-V kernel implementation of flush_tlb_page() when CONFIG_SMP
is set is wrong.  It passes zero to flush_tlb_range() as the final
address to flush, but it should be at least 'addr'.

Some other Linux architecture ports use the beginning address to
flush, plus PAGE_SIZE, as the final address to flush.  This might
flush slightly more than what's needed, but it seems unlikely that
being more clever would improve anything.  So let's just take that
implementation for now.

While here, convert the macro into a static inline function, primarily
to avoid unintentional multiple evaluations of 'addr'.

This second version of the patch fixes a coding style issue found by
Christoph Hellwig <hch@lst.de>.

Reported-by: Andreas Schwab <schwab@suse.de>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 arch/riscv/include/asm/tlbflush.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 687dd19735a7..4d9bbe8438bf 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -53,10 +53,17 @@ static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
 }
 
 #define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
-#define flush_tlb_page(vma, addr) flush_tlb_range(vma, addr, 0)
+
 #define flush_tlb_range(vma, start, end) \
 	remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
-#define flush_tlb_mm(mm) \
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+				  unsigned long addr)
+{
+	flush_tlb_range(vma, addr, addr + PAGE_SIZE);
+}
+
+#define flush_tlb_mm(mm)				\
 	remote_sfence_vma(mm_cpumask(mm), 0, -1)
 
 #endif /* CONFIG_SMP */

From 76470ccd62f18bfa0954bec10f2329339f793914 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <rcampbell@nvidia.com>
Date: Tue, 13 Aug 2019 15:37:04 -0700
Subject: [PATCH 101/157] mm: document zone device struct page field usage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm/hmm: fixes for device private page migration", v3.

Testing the latest linux git tree turned up a few bugs with page
migration to and from ZONE_DEVICE private and anonymous pages.
Hopefully it clarifies how ZONE_DEVICE private struct page uses the same
mapping and index fields from the source anonymous page mapping.

This patch (of 3):

Struct page for ZONE_DEVICE private pages uses the page->mapping and and
page->index fields while the source anonymous pages are migrated to
device private memory.  This is so rmap_walk() can find the page when
migrating the ZONE_DEVICE private page back to system memory.
ZONE_DEVICE pmem backed fsdax pages also use the page->mapping and
page->index fields when files are mapped into a process address space.

Add comments to struct page and remove the unused "_zd_pad_1" field to
make this more clear.

Link: http://lkml.kernel.org/r/20190724232700.23327-2-rcampbell@nvidia.com
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 3a37a89eb7a7..6a7a1083b6fb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -159,7 +159,16 @@ struct page {
 			/** @pgmap: Points to the hosting device page map. */
 			struct dev_pagemap *pgmap;
 			void *zone_device_data;
-			unsigned long _zd_pad_1;	/* uses mapping */
+			/*
+			 * ZONE_DEVICE private pages are counted as being
+			 * mapped so the next 3 words hold the mapping, index,
+			 * and private fields from the source anonymous or
+			 * page cache page while the page is migrated to device
+			 * private memory.
+			 * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also
+			 * use the mapping, index, and private fields when
+			 * pmem backed DAX files are mapped.
+			 */
 		};
 
 		/** @rcu_head: You can use this to free a page by RCU. */

From 7ab0ad0e74f82db1a7e0810828785e44305cff20 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <rcampbell@nvidia.com>
Date: Tue, 13 Aug 2019 15:37:07 -0700
Subject: [PATCH 102/157] mm/hmm: fix ZONE_DEVICE anon page mapping reuse
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a ZONE_DEVICE private page is freed, the page->mapping field can be
set.  If this page is reused as an anonymous page, the previous value
can prevent the page from being inserted into the CPU's anon rmap table.
For example, when migrating a pte_none() page to device memory:

  migrate_vma(ops, vma, start, end, src, dst, private)
    migrate_vma_collect()
      src[] = MIGRATE_PFN_MIGRATE
    migrate_vma_prepare()
      /* no page to lock or isolate so OK */
    migrate_vma_unmap()
      /* no page to unmap so OK */
    ops->alloc_and_copy()
      /* driver allocates ZONE_DEVICE page for dst[] */
    migrate_vma_pages()
      migrate_vma_insert_page()
        page_add_new_anon_rmap()
          __page_set_anon_rmap()
            /* This check sees the page's stale mapping field */
            if (PageAnon(page))
              return
            /* page->mapping is not updated */

The result is that the migration appears to succeed but a subsequent CPU
fault will be unable to migrate the page back to system memory or worse.

Clear the page->mapping field when freeing the ZONE_DEVICE page so stale
pointer data doesn't affect future page use.

Link: http://lkml.kernel.org/r/20190719192955.30462-3-rcampbell@nvidia.com
Fixes: b7a523109fb5c9d2d6dd ("mm: don't clear ->mapping in hmm_devmem_free")
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Jan Kara <jack@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memremap.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/mm/memremap.c b/mm/memremap.c
index 86432650f829..ed70c4e8e52a 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -403,6 +403,30 @@ void __put_devmap_managed_page(struct page *page)
 
 		mem_cgroup_uncharge(page);
 
+		/*
+		 * When a device_private page is freed, the page->mapping field
+		 * may still contain a (stale) mapping value. For example, the
+		 * lower bits of page->mapping may still identify the page as
+		 * an anonymous page. Ultimately, this entire field is just
+		 * stale and wrong, and it will cause errors if not cleared.
+		 * One example is:
+		 *
+		 *  migrate_vma_pages()
+		 *    migrate_vma_insert_page()
+		 *      page_add_new_anon_rmap()
+		 *        __page_set_anon_rmap()
+		 *          ...checks page->mapping, via PageAnon(page) call,
+		 *            and incorrectly concludes that the page is an
+		 *            anonymous page. Therefore, it incorrectly,
+		 *            silently fails to set up the new anon rmap.
+		 *
+		 * For other types of ZONE_DEVICE pages, migration is either
+		 * handled differently or not done at all, so there is no need
+		 * to clear page->mapping.
+		 */
+		if (is_device_private_page(page))
+			page->mapping = NULL;
+
 		page->pgmap->ops->page_free(page);
 	} else if (!count)
 		__put_page(page);

From 1de13ee59225dfc98d483f8cce7d83f97c0b31de Mon Sep 17 00:00:00 2001
From: Ralph Campbell <rcampbell@nvidia.com>
Date: Tue, 13 Aug 2019 15:37:11 -0700
Subject: [PATCH 103/157] mm/hmm: fix bad subpage pointer in try_to_unmap_one
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When migrating an anonymous private page to a ZONE_DEVICE private page,
the source page->mapping and page->index fields are copied to the
destination ZONE_DEVICE struct page and the page_mapcount() is
increased.  This is so rmap_walk() can be used to unmap and migrate the
page back to system memory.

However, try_to_unmap_one() computes the subpage pointer from a swap pte
which computes an invalid page pointer and a kernel panic results such
as:

  BUG: unable to handle page fault for address: ffffea1fffffffc8

Currently, only single pages can be migrated to device private memory so
no subpage computation is needed and it can be set to "page".

[rcampbell@nvidia.com: add comment]
  Link: http://lkml.kernel.org/r/20190724232700.23327-4-rcampbell@nvidia.com
Link: http://lkml.kernel.org/r/20190719192955.30462-4-rcampbell@nvidia.com
Fixes: a5430dda8a3a1c ("mm/migrate: support un-addressable ZONE_DEVICE page in migration")
Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/rmap.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/mm/rmap.c b/mm/rmap.c
index e5dfe2ae6b0d..003377e24232 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1475,7 +1475,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			/*
 			 * No need to invalidate here it will synchronize on
 			 * against the special swap migration pte.
+			 *
+			 * The assignment to subpage above was computed from a
+			 * swap PTE which results in an invalid pointer.
+			 * Since only PAGE_SIZE pages can currently be
+			 * migrated, just set it to page. This will need to be
+			 * changed when hugepage migrations to device private
+			 * memory are supported.
 			 */
+			subpage = page;
 			goto discard;
 		}
 

From d883544515aae54842c21730b880172e7894fde9 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Tue, 13 Aug 2019 15:37:15 -0700
Subject: [PATCH 104/157] mm: mempolicy: make the behavior consistent when
 MPOL_MF_MOVE* and MPOL_MF_STRICT were specified

When both MPOL_MF_MOVE* and MPOL_MF_STRICT was specified, mbind() should
try best to migrate misplaced pages, if some of the pages could not be
migrated, then return -EIO.

There are three different sub-cases:
 1. vma is not migratable
 2. vma is migratable, but there are unmovable pages
 3. vma is migratable, pages are movable, but migrate_pages() fails

If #1 happens, kernel would just abort immediately, then return -EIO,
after a7f40cfe3b7a ("mm: mempolicy: make mbind() return -EIO when
MPOL_MF_STRICT is specified").

If #3 happens, kernel would set policy and migrate pages with
best-effort, but won't rollback the migrated pages and reset the policy
back.

Before that commit, they behaves in the same way.  It'd better to keep
their behavior consistent.  But, rolling back the migrated pages and
resetting the policy back sounds not feasible, so just make #1 behave as
same as #3.

Userspace will know that not everything was successfully migrated (via
-EIO), and can take whatever steps it deems necessary - attempt
rollback, determine which exact page(s) are violating the policy, etc.

Make queue_pages_range() return 1 to indicate there are unmovable pages
or vma is not migratable.

The #2 is not handled correctly in the current kernel, the following
patch will fix it.

[yang.shi@linux.alibaba.com: fix review comments from Vlastimil]
  Link: http://lkml.kernel.org/r/1563556862-54056-2-git-send-email-yang.shi@linux.alibaba.com
Link: http://lkml.kernel.org/r/1561162809-59140-2-git-send-email-yang.shi@linux.alibaba.com
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mempolicy.c | 68 +++++++++++++++++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 20 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index f48693f75b37..932c26845e3e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -429,11 +429,14 @@ static inline bool queue_pages_required(struct page *page,
 }
 
 /*
- * queue_pages_pmd() has three possible return values:
- * 1 - pages are placed on the right node or queued successfully.
- * 0 - THP was split.
- * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing
- *        page was already on a node that does not follow the policy.
+ * queue_pages_pmd() has four possible return values:
+ * 0 - pages are placed on the right node or queued successfully.
+ * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ *     specified.
+ * 2 - THP was split.
+ * -EIO - is migration entry or only MPOL_MF_STRICT was specified and an
+ *        existing page was already on a node that does not follow the
+ *        policy.
  */
 static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
 				unsigned long end, struct mm_walk *walk)
@@ -451,19 +454,17 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
 	if (is_huge_zero_page(page)) {
 		spin_unlock(ptl);
 		__split_huge_pmd(walk->vma, pmd, addr, false, NULL);
+		ret = 2;
 		goto out;
 	}
-	if (!queue_pages_required(page, qp)) {
-		ret = 1;
+	if (!queue_pages_required(page, qp))
 		goto unlock;
-	}
 
-	ret = 1;
 	flags = qp->flags;
 	/* go to thp migration */
 	if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
 		if (!vma_migratable(walk->vma)) {
-			ret = -EIO;
+			ret = 1;
 			goto unlock;
 		}
 
@@ -479,6 +480,13 @@ out:
 /*
  * Scan through pages checking if pages follow certain conditions,
  * and move them to the pagelist if they do.
+ *
+ * queue_pages_pte_range() has three possible return values:
+ * 0 - pages are placed on the right node or queued successfully.
+ * 1 - there is unmovable page, and MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ *     specified.
+ * -EIO - only MPOL_MF_STRICT was specified and an existing page was already
+ *        on a node that does not follow the policy.
  */
 static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 			unsigned long end, struct mm_walk *walk)
@@ -488,17 +496,17 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 	struct queue_pages *qp = walk->private;
 	unsigned long flags = qp->flags;
 	int ret;
+	bool has_unmovable = false;
 	pte_t *pte;
 	spinlock_t *ptl;
 
 	ptl = pmd_trans_huge_lock(pmd, vma);
 	if (ptl) {
 		ret = queue_pages_pmd(pmd, ptl, addr, end, walk);
-		if (ret > 0)
-			return 0;
-		else if (ret < 0)
+		if (ret != 2)
 			return ret;
 	}
+	/* THP was split, fall through to pte walk */
 
 	if (pmd_trans_unstable(pmd))
 		return 0;
@@ -519,14 +527,21 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 		if (!queue_pages_required(page, qp))
 			continue;
 		if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-			if (!vma_migratable(vma))
+			/* MPOL_MF_STRICT must be specified if we get here */
+			if (!vma_migratable(vma)) {
+				has_unmovable = true;
 				break;
+			}
 			migrate_page_add(page, qp->pagelist, flags);
 		} else
 			break;
 	}
 	pte_unmap_unlock(pte - 1, ptl);
 	cond_resched();
+
+	if (has_unmovable)
+		return 1;
+
 	return addr != end ? -EIO : 0;
 }
 
@@ -639,7 +654,13 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
  *
  * If pages found in a given range are on a set of nodes (determined by
  * @nodes and @flags,) it's isolated and queued to the pagelist which is
- * passed via @private.)
+ * passed via @private.
+ *
+ * queue_pages_range() has three possible return values:
+ * 1 - there is unmovable page, but MPOL_MF_MOVE* & MPOL_MF_STRICT were
+ *     specified.
+ * 0 - queue pages successfully or no misplaced page.
+ * -EIO - there is misplaced page and only MPOL_MF_STRICT was specified.
  */
 static int
 queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
@@ -1182,6 +1203,7 @@ static long do_mbind(unsigned long start, unsigned long len,
 	struct mempolicy *new;
 	unsigned long end;
 	int err;
+	int ret;
 	LIST_HEAD(pagelist);
 
 	if (flags & ~(unsigned long)MPOL_MF_VALID)
@@ -1243,10 +1265,15 @@ static long do_mbind(unsigned long start, unsigned long len,
 	if (err)
 		goto mpol_out;
 
-	err = queue_pages_range(mm, start, end, nmask,
+	ret = queue_pages_range(mm, start, end, nmask,
 			  flags | MPOL_MF_INVERT, &pagelist);
-	if (!err)
-		err = mbind_range(mm, start, end, new);
+
+	if (ret < 0) {
+		err = -EIO;
+		goto up_out;
+	}
+
+	err = mbind_range(mm, start, end, new);
 
 	if (!err) {
 		int nr_failed = 0;
@@ -1259,13 +1286,14 @@ static long do_mbind(unsigned long start, unsigned long len,
 				putback_movable_pages(&pagelist);
 		}
 
-		if (nr_failed && (flags & MPOL_MF_STRICT))
+		if ((ret > 0) || (nr_failed && (flags & MPOL_MF_STRICT)))
 			err = -EIO;
 	} else
 		putback_movable_pages(&pagelist);
 
+up_out:
 	up_write(&mm->mmap_sem);
- mpol_out:
+mpol_out:
 	mpol_put(new);
 	return err;
 }

From a53190a4aaa36494f4d7209fd1fcc6f2ee08e0e0 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Tue, 13 Aug 2019 15:37:18 -0700
Subject: [PATCH 105/157] mm: mempolicy: handle vma with unmovable pages mapped
 correctly in mbind

When running syzkaller internally, we ran into the below bug on 4.9.x
kernel:

  kernel BUG at mm/huge_memory.c:2124!
  invalid opcode: 0000 [#1] SMP KASAN
  CPU: 0 PID: 1518 Comm: syz-executor107 Not tainted 4.9.168+ #2
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011
  task: ffff880067b34900 task.stack: ffff880068998000
  RIP: split_huge_page_to_list+0x8fb/0x1030 mm/huge_memory.c:2124
  Call Trace:
    split_huge_page include/linux/huge_mm.h:100 [inline]
    queue_pages_pte_range+0x7e1/0x1480 mm/mempolicy.c:538
    walk_pmd_range mm/pagewalk.c:50 [inline]
    walk_pud_range mm/pagewalk.c:90 [inline]
    walk_pgd_range mm/pagewalk.c:116 [inline]
    __walk_page_range+0x44a/0xdb0 mm/pagewalk.c:208
    walk_page_range+0x154/0x370 mm/pagewalk.c:285
    queue_pages_range+0x115/0x150 mm/mempolicy.c:694
    do_mbind mm/mempolicy.c:1241 [inline]
    SYSC_mbind+0x3c3/0x1030 mm/mempolicy.c:1370
    SyS_mbind+0x46/0x60 mm/mempolicy.c:1352
    do_syscall_64+0x1d2/0x600 arch/x86/entry/common.c:282
    entry_SYSCALL_64_after_swapgs+0x5d/0xdb
  Code: c7 80 1c 02 00 e8 26 0a 76 01 <0f> 0b 48 c7 c7 40 46 45 84 e8 4c
  RIP  [<ffffffff81895d6b>] split_huge_page_to_list+0x8fb/0x1030 mm/huge_memory.c:2124
   RSP <ffff88006899f980>

with the below test:

  uint64_t r[1] = {0xffffffffffffffff};

  int main(void)
  {
        syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0);
                                intptr_t res = 0;
        res = syscall(__NR_socket, 0x11, 3, 0x300);
        if (res != -1)
                r[0] = res;
        *(uint32_t*)0x20000040 = 0x10000;
        *(uint32_t*)0x20000044 = 1;
        *(uint32_t*)0x20000048 = 0xc520;
        *(uint32_t*)0x2000004c = 1;
        syscall(__NR_setsockopt, r[0], 0x107, 0xd, 0x20000040, 0x10);
        syscall(__NR_mmap, 0x20fed000, 0x10000, 0, 0x8811, r[0], 0);
        *(uint64_t*)0x20000340 = 2;
        syscall(__NR_mbind, 0x20ff9000, 0x4000, 0x4002, 0x20000340, 0x45d4, 3);
        return 0;
  }

Actually the test does:

  mmap(0x20000000, 16777216, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x20000000
  socket(AF_PACKET, SOCK_RAW, 768)        = 3
  setsockopt(3, SOL_PACKET, PACKET_TX_RING, {block_size=65536, block_nr=1, frame_size=50464, frame_nr=1}, 16) = 0
  mmap(0x20fed000, 65536, PROT_NONE, MAP_SHARED|MAP_FIXED|MAP_POPULATE|MAP_DENYWRITE, 3, 0) = 0x20fed000
  mbind(..., MPOL_MF_STRICT|MPOL_MF_MOVE) = 0

The setsockopt() would allocate compound pages (16 pages in this test)
for packet tx ring, then the mmap() would call packet_mmap() to map the
pages into the user address space specified by the mmap() call.

When calling mbind(), it would scan the vma to queue the pages for
migration to the new node.  It would split any huge page since 4.9
doesn't support THP migration, however, the packet tx ring compound
pages are not THP and even not movable.  So, the above bug is triggered.

However, the later kernel is not hit by this issue due to commit
d44d363f6578 ("mm: don't assume anonymous pages have SwapBacked flag"),
which just removes the PageSwapBacked check for a different reason.

But, there is a deeper issue.  According to the semantic of mbind(), it
should return -EIO if MPOL_MF_MOVE or MPOL_MF_MOVE_ALL was specified and
MPOL_MF_STRICT was also specified, but the kernel was unable to move all
existing pages in the range.  The tx ring of the packet socket is
definitely not movable, however, mbind() returns success for this case.

Although the most socket file associates with non-movable pages, but XDP
may have movable pages from gup.  So, it sounds not fine to just check
the underlying file type of vma in vma_migratable().

Change migrate_page_add() to check if the page is movable or not, if it
is unmovable, just return -EIO.  But do not abort pte walk immediately,
since there may be pages off LRU temporarily.  We should migrate other
pages if MPOL_MF_MOVE* is specified.  Set has_unmovable flag if some
paged could not be not moved, then return -EIO for mbind() eventually.

With this change the above test would return -EIO as expected.

[yang.shi@linux.alibaba.com: fix review comments from Vlastimil]
  Link: http://lkml.kernel.org/r/1563556862-54056-3-git-send-email-yang.shi@linux.alibaba.com
Link: http://lkml.kernel.org/r/1561162809-59140-3-git-send-email-yang.shi@linux.alibaba.com
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/mempolicy.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 932c26845e3e..547cd403ed02 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -403,7 +403,7 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
 	},
 };
 
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags);
 
 struct queue_pages {
@@ -463,12 +463,11 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr,
 	flags = qp->flags;
 	/* go to thp migration */
 	if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
-		if (!vma_migratable(walk->vma)) {
+		if (!vma_migratable(walk->vma) ||
+		    migrate_page_add(page, qp->pagelist, flags)) {
 			ret = 1;
 			goto unlock;
 		}
-
-		migrate_page_add(page, qp->pagelist, flags);
 	} else
 		ret = -EIO;
 unlock:
@@ -532,7 +531,14 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
 				has_unmovable = true;
 				break;
 			}
-			migrate_page_add(page, qp->pagelist, flags);
+
+			/*
+			 * Do not abort immediately since there may be
+			 * temporary off LRU pages in the range.  Still
+			 * need migrate other LRU pages.
+			 */
+			if (migrate_page_add(page, qp->pagelist, flags))
+				has_unmovable = true;
 		} else
 			break;
 	}
@@ -961,7 +967,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 /*
  * page migration, thp tail pages can be passed.
  */
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags)
 {
 	struct page *head = compound_head(page);
@@ -974,8 +980,19 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
 			mod_node_page_state(page_pgdat(head),
 				NR_ISOLATED_ANON + page_is_file_cache(head),
 				hpage_nr_pages(head));
+		} else if (flags & MPOL_MF_STRICT) {
+			/*
+			 * Non-movable page may reach here.  And, there may be
+			 * temporary off LRU pages or non-LRU movable pages.
+			 * Treat them as unmovable pages since they can't be
+			 * isolated, so they can't be moved at the moment.  It
+			 * should return -EIO for this case too.
+			 */
+			return -EIO;
 		}
 	}
+
+	return 0;
 }
 
 /* page allocation callback for NUMA node migration */
@@ -1178,9 +1195,10 @@ static struct page *new_page(struct page *page, unsigned long start)
 }
 #else
 
-static void migrate_page_add(struct page *page, struct list_head *pagelist,
+static int migrate_page_add(struct page *page, struct list_head *pagelist,
 				unsigned long flags)
 {
+	return -EIO;
 }
 
 int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,

From 6051d3bd3b91e96c59e62b8be2dba1cc2b19ee40 Mon Sep 17 00:00:00 2001
From: Henry Burns <henryburns@google.com>
Date: Tue, 13 Aug 2019 15:37:21 -0700
Subject: [PATCH 106/157] mm/z3fold.c: fix z3fold_destroy_pool() ordering

The constraint from the zpool use of z3fold_destroy_pool() is there are
no outstanding handles to memory (so no active allocations), but it is
possible for there to be outstanding work on either of the two wqs in
the pool.

If there is work queued on pool->compact_workqueue when it is called,
z3fold_destroy_pool() will do:

   z3fold_destroy_pool()
     destroy_workqueue(pool->release_wq)
     destroy_workqueue(pool->compact_wq)
       drain_workqueue(pool->compact_wq)
         do_compact_page(zhdr)
           kref_put(&zhdr->refcount)
             __release_z3fold_page(zhdr, ...)
               queue_work_on(pool->release_wq, &pool->work) *BOOM*

So compact_wq needs to be destroyed before release_wq.

Link: http://lkml.kernel.org/r/20190726224810.79660-1-henryburns@google.com
Fixes: 5d03a6613957 ("mm/z3fold.c: use kref to prevent page free/compact race")
Signed-off-by: Henry Burns <henryburns@google.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Jonathan Adams <jwadams@google.com>
Cc: Vitaly Vul <vitaly.vul@sony.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Al Viro <viro@zeniv.linux.org.uk
Cc: Henry Burns <henrywolfeburns@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/z3fold.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 1a029a7432ee..43de92f52961 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -818,8 +818,15 @@ static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
 	kmem_cache_destroy(pool->c_handle);
 	z3fold_unregister_migration(pool);
-	destroy_workqueue(pool->release_wq);
+
+	/*
+	 * We need to destroy pool->compact_wq before pool->release_wq,
+	 * as any pending work on pool->compact_wq will call
+	 * queue_work(pool->release_wq, &pool->work).
+	 */
+
 	destroy_workqueue(pool->compact_wq);
+	destroy_workqueue(pool->release_wq);
 	kfree(pool);
 }
 

From b997052bc3ac444a0bceab1093aff7ae71ed419e Mon Sep 17 00:00:00 2001
From: Henry Burns <henryburns@google.com>
Date: Tue, 13 Aug 2019 15:37:25 -0700
Subject: [PATCH 107/157] mm/z3fold.c: fix z3fold_destroy_pool() race condition

The constraint from the zpool use of z3fold_destroy_pool() is there are
no outstanding handles to memory (so no active allocations), but it is
possible for there to be outstanding work on either of the two wqs in
the pool.

Calling z3fold_deregister_migration() before the workqueues are drained
means that there can be allocated pages referencing a freed inode,
causing any thread in compaction to be able to trip over the bad pointer
in PageMovable().

Link: http://lkml.kernel.org/r/20190726224810.79660-2-henryburns@google.com
Fixes: 1f862989b04a ("mm/z3fold.c: support page migration")
Signed-off-by: Henry Burns <henryburns@google.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Jonathan Adams <jwadams@google.com>
Cc: Vitaly Vul <vitaly.vul@sony.com>
Cc: Vitaly Wool <vitalywool@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Henry Burns <henrywolfeburns@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/z3fold.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index 43de92f52961..ed19d98c9dcd 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -817,16 +817,19 @@ out:
 static void z3fold_destroy_pool(struct z3fold_pool *pool)
 {
 	kmem_cache_destroy(pool->c_handle);
-	z3fold_unregister_migration(pool);
 
 	/*
 	 * We need to destroy pool->compact_wq before pool->release_wq,
 	 * as any pending work on pool->compact_wq will call
 	 * queue_work(pool->release_wq, &pool->work).
+	 *
+	 * There are still outstanding pages until both workqueues are drained,
+	 * so we cannot unregister migration until then.
 	 */
 
 	destroy_workqueue(pool->compact_wq);
 	destroy_workqueue(pool->release_wq);
+	z3fold_unregister_migration(pool);
 	kfree(pool);
 }
 

From 54a83d6bcbf8f4700013766b974bf9190d40b689 Mon Sep 17 00:00:00 2001
From: Miles Chen <miles.chen@mediatek.com>
Date: Tue, 13 Aug 2019 15:37:28 -0700
Subject: [PATCH 108/157] mm/memcontrol.c: fix use after free in
 mem_cgroup_iter()

This patch is sent to report an use after free in mem_cgroup_iter()
after merging commit be2657752e9e ("mm: memcg: fix use after free in
mem_cgroup_iter()").

I work with android kernel tree (4.9 & 4.14), and commit be2657752e9e
("mm: memcg: fix use after free in mem_cgroup_iter()") has been merged
to the trees.  However, I can still observe use after free issues
addressed in the commit be2657752e9e.  (on low-end devices, a few times
this month)

backtrace:
        css_tryget <- crash here
        mem_cgroup_iter
        shrink_node
        shrink_zones
        do_try_to_free_pages
        try_to_free_pages
        __perform_reclaim
        __alloc_pages_direct_reclaim
        __alloc_pages_slowpath
        __alloc_pages_nodemask

To debug, I poisoned mem_cgroup before freeing it:

  static void __mem_cgroup_free(struct mem_cgroup *memcg)
        for_each_node(node)
        free_mem_cgroup_per_node_info(memcg, node);
        free_percpu(memcg->stat);
  +     /* poison memcg before freeing it */
  +     memset(memcg, 0x78, sizeof(struct mem_cgroup));
        kfree(memcg);
  }

The coredump shows the position=0xdbbc2a00 is freed.

  (gdb) p/x ((struct mem_cgroup_per_node *)0xe5009e00)->iter[8]
  $13 = {position = 0xdbbc2a00, generation = 0x2efd}

  0xdbbc2a00:     0xdbbc2e00      0x00000000      0xdbbc2800      0x00000100
  0xdbbc2a10:     0x00000200      0x78787878      0x00026218      0x00000000
  0xdbbc2a20:     0xdcad6000      0x00000001      0x78787800      0x00000000
  0xdbbc2a30:     0x78780000      0x00000000      0x0068fb84      0x78787878
  0xdbbc2a40:     0x78787878      0x78787878      0x78787878      0xe3fa5cc0
  0xdbbc2a50:     0x78787878      0x78787878      0x00000000      0x00000000
  0xdbbc2a60:     0x00000000      0x00000000      0x00000000      0x00000000
  0xdbbc2a70:     0x00000000      0x00000000      0x00000000      0x00000000
  0xdbbc2a80:     0x00000000      0x00000000      0x00000000      0x00000000
  0xdbbc2a90:     0x00000001      0x00000000      0x00000000      0x00100000
  0xdbbc2aa0:     0x00000001      0xdbbc2ac8      0x00000000      0x00000000
  0xdbbc2ab0:     0x00000000      0x00000000      0x00000000      0x00000000
  0xdbbc2ac0:     0x00000000      0x00000000      0xe5b02618      0x00001000
  0xdbbc2ad0:     0x00000000      0x78787878      0x78787878      0x78787878
  0xdbbc2ae0:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2af0:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b00:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b10:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b20:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b30:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b40:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b50:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b60:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b70:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2b80:     0x78787878      0x78787878      0x00000000      0x78787878
  0xdbbc2b90:     0x78787878      0x78787878      0x78787878      0x78787878
  0xdbbc2ba0:     0x78787878      0x78787878      0x78787878      0x78787878

In the reclaim path, try_to_free_pages() does not setup
sc.target_mem_cgroup and sc is passed to do_try_to_free_pages(), ...,
shrink_node().

In mem_cgroup_iter(), root is set to root_mem_cgroup because
sc->target_mem_cgroup is NULL.  It is possible to assign a memcg to
root_mem_cgroup.nodeinfo.iter in mem_cgroup_iter().

        try_to_free_pages
        	struct scan_control sc = {...}, target_mem_cgroup is 0x0;
        do_try_to_free_pages
        shrink_zones
        shrink_node
        	 mem_cgroup *root = sc->target_mem_cgroup;
        	 memcg = mem_cgroup_iter(root, NULL, &reclaim);
        mem_cgroup_iter()
        	if (!root)
        		root = root_mem_cgroup;
        	...

        	css = css_next_descendant_pre(css, &root->css);
        	memcg = mem_cgroup_from_css(css);
        	cmpxchg(&iter->position, pos, memcg);

My device uses memcg non-hierarchical mode.  When we release a memcg:
invalidate_reclaim_iterators() reaches only dead_memcg and its parents.
If non-hierarchical mode is used, invalidate_reclaim_iterators() never
reaches root_mem_cgroup.

  static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
  {
        struct mem_cgroup *memcg = dead_memcg;

        for (; memcg; memcg = parent_mem_cgroup(memcg)
        ...
  }

So the use after free scenario looks like:

  CPU1						CPU2

  try_to_free_pages
  do_try_to_free_pages
  shrink_zones
  shrink_node
  mem_cgroup_iter()
      if (!root)
      	root = root_mem_cgroup;
      ...
      css = css_next_descendant_pre(css, &root->css);
      memcg = mem_cgroup_from_css(css);
      cmpxchg(&iter->position, pos, memcg);

        				invalidate_reclaim_iterators(memcg);
        				...
        				__mem_cgroup_free()
        					kfree(memcg);

  try_to_free_pages
  do_try_to_free_pages
  shrink_zones
  shrink_node
  mem_cgroup_iter()
      if (!root)
      	root = root_mem_cgroup;
      ...
      mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id);
      iter = &mz->iter[reclaim->priority];
      pos = READ_ONCE(iter->position);
      css_tryget(&pos->css) <- use after free

To avoid this, we should also invalidate root_mem_cgroup.nodeinfo.iter
in invalidate_reclaim_iterators().

[cai@lca.pw: fix -Wparentheses compilation warning]
  Link: http://lkml.kernel.org/r/1564580753-17531-1-git-send-email-cai@lca.pw
Link: http://lkml.kernel.org/r/20190730015729.4406-1-miles.chen@mediatek.com
Fixes: 5ac8fb31ad2e ("mm: memcontrol: convert reclaim iterator to simple css refcounting")
Signed-off-by: Miles Chen <miles.chen@mediatek.com>
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c | 39 +++++++++++++++++++++++++++++----------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cdbb7a84cb6e..2e405e058eda 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1130,26 +1130,45 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
 		css_put(&prev->css);
 }
 
-static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
+static void __invalidate_reclaim_iterators(struct mem_cgroup *from,
+					struct mem_cgroup *dead_memcg)
 {
-	struct mem_cgroup *memcg = dead_memcg;
 	struct mem_cgroup_reclaim_iter *iter;
 	struct mem_cgroup_per_node *mz;
 	int nid;
 	int i;
 
-	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
-		for_each_node(nid) {
-			mz = mem_cgroup_nodeinfo(memcg, nid);
-			for (i = 0; i <= DEF_PRIORITY; i++) {
-				iter = &mz->iter[i];
-				cmpxchg(&iter->position,
-					dead_memcg, NULL);
-			}
+	for_each_node(nid) {
+		mz = mem_cgroup_nodeinfo(from, nid);
+		for (i = 0; i <= DEF_PRIORITY; i++) {
+			iter = &mz->iter[i];
+			cmpxchg(&iter->position,
+				dead_memcg, NULL);
 		}
 	}
 }
 
+static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
+{
+	struct mem_cgroup *memcg = dead_memcg;
+	struct mem_cgroup *last;
+
+	do {
+		__invalidate_reclaim_iterators(memcg, dead_memcg);
+		last = memcg;
+	} while ((memcg = parent_mem_cgroup(memcg)));
+
+	/*
+	 * When cgruop1 non-hierarchy mode is used,
+	 * parent_mem_cgroup() does not walk all the way up to the
+	 * cgroup root (root_mem_cgroup). So we have to handle
+	 * dead_memcg from cgroup root separately.
+	 */
+	if (last != root_mem_cgroup)
+		__invalidate_reclaim_iterators(root_mem_cgroup,
+						dead_memcg);
+}
+
 /**
  * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
  * @memcg: hierarchy root

From 5336e52c9e46afa69b7a85a0a091f0e4daa23d6e Mon Sep 17 00:00:00 2001
From: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Date: Tue, 13 Aug 2019 15:37:31 -0700
Subject: [PATCH 109/157] mm/vmalloc.c: fix percpu free VM area search criteria

Recent changes to the vmalloc code by commit 68ad4a330433
("mm/vmalloc.c: keep track of free blocks for vmap allocation") can
cause spurious percpu allocation failures.  These, in turn, can result
in panic()s in the slub code.  One such possible panic was reported by
Dave Hansen in following link https://lkml.org/lkml/2019/6/19/939.
Another related panic observed is,

 RIP: 0033:0x7f46f7441b9b
 Call Trace:
  dump_stack+0x61/0x80
  pcpu_alloc.cold.30+0x22/0x4f
  mem_cgroup_css_alloc+0x110/0x650
  cgroup_apply_control_enable+0x133/0x330
  cgroup_mkdir+0x41b/0x500
  kernfs_iop_mkdir+0x5a/0x90
  vfs_mkdir+0x102/0x1b0
  do_mkdirat+0x7d/0xf0
  do_syscall_64+0x5b/0x180
  entry_SYSCALL_64_after_hwframe+0x44/0xa9

VMALLOC memory manager divides the entire VMALLOC space (VMALLOC_START
to VMALLOC_END) into multiple VM areas (struct vm_areas), and it mainly
uses two lists (vmap_area_list & free_vmap_area_list) to track the used
and free VM areas in VMALLOC space.  And pcpu_get_vm_areas(offsets[],
sizes[], nr_vms, align) function is used for allocating congruent VM
areas for percpu memory allocator.  In order to not conflict with
VMALLOC users, pcpu_get_vm_areas allocates VM areas near the end of the
VMALLOC space.  So the search for free vm_area for the given requirement
starts near VMALLOC_END and moves upwards towards VMALLOC_START.

Prior to commit 68ad4a330433, the search for free vm_area in
pcpu_get_vm_areas() involves following two main steps.

Step 1:
    Find a aligned "base" adress near VMALLOC_END.
    va = free vm area near VMALLOC_END
Step 2:
    Loop through number of requested vm_areas and check,
        Step 2.1:
           if (base < VMALLOC_START)
              1. fail with error
        Step 2.2:
           // end is offsets[area] + sizes[area]
           if (base + end > va->vm_end)
               1. Move the base downwards and repeat Step 2
        Step 2.3:
           if (base + start < va->vm_start)
              1. Move to previous free vm_area node, find aligned
                 base address and repeat Step 2

But Commit 68ad4a330433 removed Step 2.2 and modified Step 2.3 as below:

        Step 2.3:
           if (base + start < va->vm_start || base + end > va->vm_end)
              1. Move to previous free vm_area node, find aligned
                 base address and repeat Step 2

Above change is the root cause of spurious percpu memory allocation
failures.  For example, consider a case where a relatively large vm_area
(~ 30 TB) was ignored in free vm_area search because it did not pass the
base + end < vm->vm_end boundary check.  Ignoring such large free
vm_area's would lead to not finding free vm_area within boundary of
VMALLOC_start to VMALLOC_END which in turn leads to allocation failures.

So modify the search algorithm to include Step 2.2.

Link: http://lkml.kernel.org/r/20190729232139.91131-1-sathyanarayanan.kuppuswamy@linux.intel.com
Fixes: 68ad4a330433 ("mm/vmalloc.c: keep track of free blocks for vmap allocation")
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reported-by: Dave Hansen <dave.hansen@intel.com>
Acked-by: Dennis Zhou <dennis@kernel.org>
Reviewed-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: sathyanarayanan kuppuswamy <sathyanarayanan.kuppuswamy@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmalloc.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e0fc963acc41..7ba11e12a11f 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3278,10 +3278,20 @@ retry:
 		if (va == NULL)
 			goto overflow;
 
+		/*
+		 * If required width exeeds current VA block, move
+		 * base downwards and then recheck.
+		 */
+		if (base + end > va->va_end) {
+			base = pvm_determine_end_from_reverse(&va, align) - end;
+			term_area = area;
+			continue;
+		}
+
 		/*
 		 * If this VA does not fit, move base downwards and recheck.
 		 */
-		if (base + start < va->va_start || base + end > va->va_end) {
+		if (base + start < va->va_start) {
 			va = node_to_va(rb_prev(&va->rb_node));
 			base = pvm_determine_end_from_reverse(&va, align) - end;
 			term_area = area;

From fcf3a5b62f431ce9feeac73afbe128b7b6395dbc Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 13 Aug 2019 15:37:34 -0700
Subject: [PATCH 110/157] mm: kmemleak: disable early logging in case of error

If an error occurs during kmemleak_init() (e.g.  kmem cache cannot be
created), kmemleak is disabled but kmemleak_early_log remains enabled.
Subsequently, when the .init.text section is freed, the log_early()
function no longer exists.  To avoid a page fault in such scenario,
ensure that kmemleak_disable() also disables early logging.

Link: http://lkml.kernel.org/r/20190731152302.42073-1-catalin.marinas@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Qian Cai <cai@lca.pw>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kmemleak.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 6e9e8cca663e..f6e602918dac 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1966,6 +1966,7 @@ static void kmemleak_disable(void)
 
 	/* stop any memory operation tracing */
 	kmemleak_enabled = 0;
+	kmemleak_early_log = 0;
 
 	/* check whether it is too early for a kernel thread */
 	if (kmemleak_initialized)
@@ -2009,7 +2010,6 @@ void __init kmemleak_init(void)
 
 #ifdef CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF
 	if (!kmemleak_skip_disable) {
-		kmemleak_early_log = 0;
 		kmemleak_disable();
 		return;
 	}

From 951531691c4bcaa59f56a316e018bc2ff1ddf855 Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacm@codeaurora.org>
Date: Tue, 13 Aug 2019 15:37:37 -0700
Subject: [PATCH 111/157] mm/usercopy: use memory range to be accessed for
 wraparound check

Currently, when checking to see if accessing n bytes starting at address
"ptr" will cause a wraparound in the memory addresses, the check in
check_bogus_address() adds an extra byte, which is incorrect, as the
range of addresses that will be accessed is [ptr, ptr + (n - 1)].

This can lead to incorrectly detecting a wraparound in the memory
address, when trying to read 4 KB from memory that is mapped to the the
last possible page in the virtual address space, when in fact, accessing
that range of memory would not cause a wraparound to occur.

Use the memory range that will actually be accessed when considering if
accessing a certain amount of bytes will cause the memory address to
wrap around.

Link: http://lkml.kernel.org/r/1564509253-23287-1-git-send-email-isaacm@codeaurora.org
Fixes: f5509cc18daa ("mm: Hardened usercopy")
Signed-off-by: Prasad Sodagudi <psodagud@codeaurora.org>
Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
Co-developed-by: Prasad Sodagudi <psodagud@codeaurora.org>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Trilok Soni <tsoni@codeaurora.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/usercopy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/usercopy.c b/mm/usercopy.c
index 2a09796edef8..98e924864554 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -147,7 +147,7 @@ static inline void check_bogus_address(const unsigned long ptr, unsigned long n,
 				       bool to_user)
 {
 	/* Reject if object wraps past end of memory. */
-	if (ptr + n < ptr)
+	if (ptr + (n - 1) < ptr)
 		usercopy_abort("wrapped address", NULL, to_user, 0, ptr + n);
 
 	/* Reject if NULL or ZERO-allocation. */

From ec9f02384f6053f2a5417e82b65078edc5364a8d Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Tue, 13 Aug 2019 15:37:41 -0700
Subject: [PATCH 112/157] mm: workingset: fix vmstat counters for shadow nodes

Memcg counters for shadow nodes are broken because the memcg pointer is
obtained in a wrong way. The following approach is used:
        virt_to_page(xa_node)->mem_cgroup

Since commit 4d96ba353075 ("mm: memcg/slab: stop setting
page->mem_cgroup pointer for slab pages") page->mem_cgroup pointer isn't
set for slab pages, so memcg_from_slab_page() should be used instead.

Also I doubt that it ever worked correctly: virt_to_head_page() should
be used instead of virt_to_page().  Otherwise objects residing on tail
pages are not accounted, because only the head page contains a valid
mem_cgroup pointer.  That was a case since the introduction of these
counters by the commit 68d48e6a2df5 ("mm: workingset: add vmstat counter
for shadow nodes").

Link: http://lkml.kernel.org/r/20190801233532.138743-1-guro@fb.com
Fixes: 4d96ba353075 ("mm: memcg/slab: stop setting page->mem_cgroup pointer for slab pages")
Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 19 +++++++++++++++++++
 mm/memcontrol.c            | 20 ++++++++++++++++++++
 mm/workingset.c            | 10 ++++------
 3 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 44c41462be33..2cd4359cb38c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -668,6 +668,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 
 void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 			int val);
+void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
 
 static inline void mod_lruvec_state(struct lruvec *lruvec,
 				    enum node_stat_item idx, int val)
@@ -1072,6 +1073,14 @@ static inline void mod_lruvec_page_state(struct page *page,
 	mod_node_page_state(page_pgdat(page), idx, val);
 }
 
+static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+					   int val)
+{
+	struct page *page = virt_to_head_page(p);
+
+	__mod_node_page_state(page_pgdat(page), idx, val);
+}
+
 static inline
 unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 					    gfp_t gfp_mask,
@@ -1159,6 +1168,16 @@ static inline void __dec_lruvec_page_state(struct page *page,
 	__mod_lruvec_page_state(page, idx, -1);
 }
 
+static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx)
+{
+	__mod_lruvec_slab_state(p, idx, 1);
+}
+
+static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx)
+{
+	__mod_lruvec_slab_state(p, idx, -1);
+}
+
 /* idx can be of type enum memcg_stat_item or node_stat_item */
 static inline void inc_memcg_state(struct mem_cgroup *memcg,
 				   int idx)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2e405e058eda..6f5c0c517c49 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -768,6 +768,26 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
 	__this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
 }
 
+void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val)
+{
+	struct page *page = virt_to_head_page(p);
+	pg_data_t *pgdat = page_pgdat(page);
+	struct mem_cgroup *memcg;
+	struct lruvec *lruvec;
+
+	rcu_read_lock();
+	memcg = memcg_from_slab_page(page);
+
+	/* Untracked pages have no memcg, no lruvec. Update only the node */
+	if (!memcg || memcg == root_mem_cgroup) {
+		__mod_node_page_state(pgdat, idx, val);
+	} else {
+		lruvec = mem_cgroup_lruvec(pgdat, memcg);
+		__mod_lruvec_state(lruvec, idx, val);
+	}
+	rcu_read_unlock();
+}
+
 /**
  * __count_memcg_events - account VM events in a cgroup
  * @memcg: the memory cgroup
diff --git a/mm/workingset.c b/mm/workingset.c
index e0b4edcb88c8..c963831d354f 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -380,14 +380,12 @@ void workingset_update_node(struct xa_node *node)
 	if (node->count && node->count == node->nr_values) {
 		if (list_empty(&node->private_list)) {
 			list_lru_add(&shadow_nodes, &node->private_list);
-			__inc_lruvec_page_state(virt_to_page(node),
-						WORKINGSET_NODES);
+			__inc_lruvec_slab_state(node, WORKINGSET_NODES);
 		}
 	} else {
 		if (!list_empty(&node->private_list)) {
 			list_lru_del(&shadow_nodes, &node->private_list);
-			__dec_lruvec_page_state(virt_to_page(node),
-						WORKINGSET_NODES);
+			__dec_lruvec_slab_state(node, WORKINGSET_NODES);
 		}
 	}
 }
@@ -480,7 +478,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	}
 
 	list_lru_isolate(lru, item);
-	__dec_lruvec_page_state(virt_to_page(node), WORKINGSET_NODES);
+	__dec_lruvec_slab_state(node, WORKINGSET_NODES);
 
 	spin_unlock(lru_lock);
 
@@ -503,7 +501,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * shadow entries we were tracking ...
 	 */
 	xas_store(&xas, NULL);
-	__inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
+	__inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM);
 
 out_invalid:
 	xa_unlock_irq(&mapping->i_pages);

From 6a2aeab59e97101b4001bac84388fc49a992f87e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 13 Aug 2019 15:37:44 -0700
Subject: [PATCH 113/157] seq_file: fix problem when seeking mid-record

If you use lseek or similar (e.g.  pread) to access a location in a
seq_file file that is within a record, rather than at a record boundary,
then the first read will return the remainder of the record, and the
second read will return the whole of that same record (instead of the
next record).  When seeking to a record boundary, the next record is
correctly returned.

This bug was introduced by a recent patch (identified below).  Before
that patch, seq_read() would increment m->index when the last of the
buffer was returned (m->count == 0).  After that patch, we rely on
->next to increment m->index after filling the buffer - but there was
one place where that didn't happen.

Link: https://lkml.kernel.org/lkml/877e7xl029.fsf@notabene.neil.brown.name/
Fixes: 1f4aace60b0e ("fs/seq_file.c: simplify seq_file iteration code and interface")
Signed-off-by: NeilBrown <neilb@suse.com>
Reported-by: Sergei Turchanov <turchanov@farpost.com>
Tested-by: Sergei Turchanov <turchanov@farpost.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Markus Elfring <Markus.Elfring@web.de>
Cc: <stable@vger.kernel.org>	[4.19+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/seq_file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 04f09689cd6d..1600034a929b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -119,6 +119,7 @@ static int traverse(struct seq_file *m, loff_t offset)
 		}
 		if (seq_has_overflowed(m))
 			goto Eoverflow;
+		p = m->op->next(m, p, &m->index);
 		if (pos + m->count > offset) {
 			m->from = offset - pos;
 			m->count -= m->from;
@@ -126,7 +127,6 @@ static int traverse(struct seq_file *m, loff_t offset)
 		}
 		pos += m->count;
 		m->count = 0;
-		p = m->op->next(m, p, &m->index);
 		if (pos == offset)
 			break;
 	}

From 0cfaee2af3a04c0be5f056cebe5f804dedc59a43 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Tue, 13 Aug 2019 15:37:47 -0700
Subject: [PATCH 114/157] include/asm-generic/5level-fixup.h: fix variable
 'p4d' set but not used

A compiler throws a warning on an arm64 system since commit 9849a5697d3d
("arch, mm: convert all architectures to use 5level-fixup.h"),

  mm/kasan/init.c: In function 'kasan_free_p4d':
  mm/kasan/init.c:344:9: warning: variable 'p4d' set but not used [-Wunused-but-set-variable]
   p4d_t *p4d;
          ^~~

because p4d_none() in "5level-fixup.h" is compiled away while it is a
static inline function in "pgtable-nopud.h".

However, if converted p4d_none() to a static inline there, powerpc would
be unhappy as it reads those in assembler language in
"arch/powerpc/include/asm/book3s/64/pgtable.h", so it needs to skip
assembly include for the static inline C function.

While at it, converted a few similar functions to be consistent with the
ones in "pgtable-nopud.h".

Link: http://lkml.kernel.org/r/20190806232917.881-1-cai@lca.pw
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/5level-fixup.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
index bb6cb347018c..f6947da70d71 100644
--- a/include/asm-generic/5level-fixup.h
+++ b/include/asm-generic/5level-fixup.h
@@ -19,9 +19,24 @@
 
 #define p4d_alloc(mm, pgd, address)	(pgd)
 #define p4d_offset(pgd, start)		(pgd)
-#define p4d_none(p4d)			0
-#define p4d_bad(p4d)			0
-#define p4d_present(p4d)		1
+
+#ifndef __ASSEMBLY__
+static inline int p4d_none(p4d_t p4d)
+{
+	return 0;
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+	return 0;
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+	return 1;
+}
+#endif
+
 #define p4d_ERROR(p4d)			do { } while (0)
 #define p4d_clear(p4d)			pgd_clear(p4d)
 #define p4d_val(p4d)			pgd_val(p4d)

From 92717d429b38e4f9f934eed7e605cc42858f1839 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 13 Aug 2019 15:37:50 -0700
Subject: [PATCH 115/157] Revert "Revert "mm, thp: consolidate THP gfp handling
 into alloc_hugepage_direct_gfpmask""

Patch series "reapply: relax __GFP_THISNODE for MADV_HUGEPAGE mappings".

The fixes for what was originally reported as "pathological THP
behavior" we rightfully reverted to be sure not to introduced
regressions at end of a merge window after a severe regression report
from the kernel bot.  We can safely re-apply them now that we had time
to analyze the problem.

The mm process worked fine, because the good fixes were eventually
committed upstream without excessive delay.

The regression reported by the kernel bot however forced us to revert
the good fixes to be sure not to introduce regressions and to give us
the time to analyze the issue further.  The silver lining is that this
extra time allowed to think more at this issue and also plan for a
future direction to improve things further in terms of THP NUMA
locality.

This patch (of 2):

This reverts commit 356ff8a9a78fb35d ("Revert "mm, thp: consolidate THP
gfp handling into alloc_hugepage_direct_gfpmask").  So it reapplies
89c83fb539f954 ("mm, thp: consolidate THP gfp handling into
alloc_hugepage_direct_gfpmask").

Consolidation of the THP allocation flags at the same place was meant to
be a clean up to easier handle otherwise scattered code which is
imposing a maintenance burden.  There were no real problems observed
with the gfp mask consolidation but the reversion was rushed through
without a larger consensus regardless.

This patch brings the consolidation back because this should make the
long term maintainability easier as well as it should allow future
changes to be less error prone.

[mhocko@kernel.org: changelog additions]
Link: http://lkml.kernel.org/r/20190503223146.2312-2-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 12 ++++--------
 mm/huge_memory.c    | 27 ++++++++++++++-------------
 mm/mempolicy.c      | 32 +++-----------------------------
 mm/shmem.c          |  2 +-
 4 files changed, 22 insertions(+), 51 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index fb07b503dc45..f33881688f42 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
-			int node, bool hugepage);
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
-	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+			int node);
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
-	alloc_pages(gfp_mask, order)
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
 	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)			\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1334ede667a8..f7e388b8662d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -644,30 +644,30 @@ release:
  *	    available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+	const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
 
 	/* Always do synchronous compaction */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
+		return GFP_TRANSHUGE | __GFP_THISNODE |
+		       (vma_madvised ? 0 : __GFP_NORETRY);
 
 	/* Kick kcompactd and fail quickly */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+		return gfp_mask | __GFP_KSWAPD_RECLAIM;
 
 	/* Synchronous compaction if madvised, otherwise kick kcompactd */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT |
-			(vma_madvised ? __GFP_DIRECT_RECLAIM :
-					__GFP_KSWAPD_RECLAIM);
+		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+						  __GFP_KSWAPD_RECLAIM);
 
 	/* Only do synchronous compaction if madvised */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT |
-		       (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
+		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
 
-	return GFP_TRANSHUGE_LIGHT;
+	return gfp_mask;
 }
 
 /* Caller must hold page table lock. */
@@ -739,8 +739,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 			pte_free(vma->vm_mm, pgtable);
 		return ret;
 	}
-	gfp = alloc_hugepage_direct_gfpmask(vma);
-	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+	gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+	page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1347,8 +1347,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 alloc:
 	if (__transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
-		huge_gfp = alloc_hugepage_direct_gfpmask(vma);
-		new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+		huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+		new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
+				haddr, numa_node_id());
 	} else
 		new_page = NULL;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 547cd403ed02..9c9877a43d58 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1180,8 +1180,8 @@ static struct page *new_page(struct page *page, unsigned long start)
 	} else if (PageTransHuge(page)) {
 		struct page *thp;
 
-		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-					 HPAGE_PMD_ORDER);
+		thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+				address, numa_node_id());
 		if (!thp)
 			return NULL;
 		prep_transhuge_page(thp);
@@ -2083,7 +2083,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  * 	@vma:  Pointer to VMA or NULL if not available.
  *	@addr: Virtual Address of the allocation. Must be inside the VMA.
  *	@node: Which node to prefer for allocation (modulo policy).
- *	@hugepage: for hugepages try only the preferred node if possible
  *
  * 	This function allocates a page from the kernel page pool and applies
  *	a NUMA policy associated with the VMA or the current process.
@@ -2094,7 +2093,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-		unsigned long addr, int node, bool hugepage)
+		unsigned long addr, int node)
 {
 	struct mempolicy *pol;
 	struct page *page;
@@ -2112,31 +2111,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		goto out;
 	}
 
-	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
-		int hpage_node = node;
-
-		/*
-		 * For hugepage allocation and non-interleave policy which
-		 * allows the current node (or other explicitly preferred
-		 * node) we only try to allocate from the current/preferred
-		 * node and don't fall back to other nodes, as the cost of
-		 * remote accesses would likely offset THP benefits.
-		 *
-		 * If the policy is interleave, or does not allow the current
-		 * node in its nodemask, we allocate the standard way.
-		 */
-		if (pol->mode == MPOL_PREFERRED && !(pol->flags & MPOL_F_LOCAL))
-			hpage_node = pol->v.preferred_node;
-
-		nmask = policy_nodemask(gfp, pol);
-		if (!nmask || node_isset(hpage_node, *nmask)) {
-			mpol_cond_put(pol);
-			page = __alloc_pages_node(hpage_node,
-						gfp | __GFP_THISNODE, order);
-			goto out;
-		}
-	}
-
 	nmask = policy_nodemask(gfp, pol);
 	preferred_nid = policy_node(gfp, pol, node);
 	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index 626d8c74b973..2bed4761f279 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1466,7 +1466,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
 	shmem_pseudo_vma_destroy(&pvma);
 	if (page)
 		prep_transhuge_page(page);

From a8282608c88e08b1782141026eab61204c1e533f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Tue, 13 Aug 2019 15:37:53 -0700
Subject: [PATCH 116/157] Revert "mm, thp: restore node-local hugepage
 allocations"

This reverts commit 2f0799a0ffc033b ("mm, thp: restore node-local
hugepage allocations").

commit 2f0799a0ffc033b was rightfully applied to avoid the risk of a
severe regression that was reported by the kernel test robot at the end
of the merge window.  Now we understood the regression was a false
positive and was caused by a significant increase in fairness during a
swap trashing benchmark.  So it's safe to re-apply the fix and continue
improving the code from there.  The benchmark that reported the
regression is very useful, but it provides a meaningful result only when
there is no significant alteration in fairness during the workload.  The
removal of __GFP_THISNODE increased fairness.

__GFP_THISNODE cannot be used in the generic page faults path for new
memory allocations under the MPOL_DEFAULT mempolicy, or the allocation
behavior significantly deviates from what the MPOL_DEFAULT semantics are
supposed to be for THP and 4k allocations alike.

Setting THP defrag to "always" or using MADV_HUGEPAGE (with THP defrag
set to "madvise") has never meant to provide an implicit MPOL_BIND on
the "current" node the task is running on, causing swap storms and
providing a much more aggressive behavior than even zone_reclaim_node =
3.

Any workload who could have benefited from __GFP_THISNODE has now to
enable zone_reclaim_mode=1||2||3.  __GFP_THISNODE implicitly provided
the zone_reclaim_mode behavior, but it only did so if THP was enabled:
if THP was disabled, there would have been no chance to get any 4k page
from the current node if the current node was full of pagecache, which
further shows how this __GFP_THISNODE was misplaced in MADV_HUGEPAGE.
MADV_HUGEPAGE has never been intended to provide any zone_reclaim_mode
semantics, in fact the two are orthogonal, zone_reclaim_mode = 1|2|3
must work exactly the same with MADV_HUGEPAGE set or not.

The performance characteristic of memory depends on the hardware
details.  The numbers below are obtained on Naples/EPYC architecture and
the N/A projection extends them to show what we should aim for in the
future as a good THP NUMA locality default.  The benchmark used
exercises random memory seeks (note: the cost of the page faults is not
part of the measurement).

  D0 THP | D0 4k | D1 THP | D1 4k | D2 THP | D2 4k | D3 THP | D3 4k | ...
  0%     | +43%  | +45%   | +106% | +131%  | +224% | N/A    | N/A

D0 means distance zero (i.e.  local memory), D1 means distance one (i.e.
intra socket memory), D2 means distance two (i.e.  inter socket memory),
etc...

For the guest physical memory allocated by qemu and for guest mode
kernel the performance characteristic of RAM is more complex and an
ideal default could be:

  D0 THP | D1 THP | D0 4k | D2 THP | D1 4k | D3 THP | D2 4k | D3 4k | ...
  0%     | +58%   | +101% | N/A    | +222% | N/A    | N/A   | N/A

NOTE: the N/A are projections and haven't been measured yet, the
measurement in this case is done on a 1950x with only two NUMA nodes.
The THP case here means THP was used both in the host and in the guest.

After applying this commit the THP NUMA locality order that we'll get
out of MADV_HUGEPAGE is this:

  D0 THP | D1 THP | D2 THP | D3 THP | ... | D0 4k | D1 4k | D2 4k | D3 4k | ...

Before this commit it was:

  D0 THP | D0 4k | D1 4k | D2 4k | D3 4k | ...

Even if we ignore the breakage of large workloads that can't fit in a
single node that the __GFP_THISNODE implicit "current node" mbind
caused, the THP NUMA locality order provided by __GFP_THISNODE was still
not the one we shall aim for in the long term (i.e.  the first one at
the top).

After this commit is applied, we can introduce a new allocator multi
order API and to replace those two alloc_pages_vmas calls in the page
fault path, with a single multi order call:

        unsigned int order = (1 << HPAGE_PMD_ORDER) | (1 << 0);
        page = alloc_pages_multi_order(..., &order);
        if (!page)
        	goto out;
        if (!(order & (1 << 0))) {
        	VM_WARN_ON(order != 1 << HPAGE_PMD_ORDER);
        	/* THP fault */
        } else {
        	VM_WARN_ON(order != 1 << 0);
        	/* 4k fallback */
        }

The page allocator logic has to be altered so that when it fails on any
zone with order 9, it has to try again with a order 0 before falling
back to the next zone in the zonelist.

After that we need to do more measurements and evaluate if adding an
opt-in feature for guest mode is worth it, to swap "DN 4k | DN+1 THP"
with "DN+1 THP | DN 4k" at every NUMA distance crossing.

Link: http://lkml.kernel.org/r/20190503223146.2312-3-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 ++
 mm/huge_memory.c          | 42 ++++++++++++++++++++++++---------------
 mm/mempolicy.c            |  2 +-
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5228c62af416..bac395f1d00a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f7e388b8662d..738065f765ab 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -647,27 +647,37 @@ release:
 static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
-	const gfp_t gfp_mask = GFP_TRANSHUGE_LIGHT | __GFP_THISNODE;
+	gfp_t this_node = 0;
+
+#ifdef CONFIG_NUMA
+	struct mempolicy *pol;
+	/*
+	 * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+	 * specified, to express a general desire to stay on the current
+	 * node for optimistic allocation attempts. If the defrag mode
+	 * and/or madvise hint requires the direct reclaim then we prefer
+	 * to fallback to other node rather than node reclaim because that
+	 * can lead to excessive reclaim even though there is free memory
+	 * on other nodes. We expect that NUMA preferences are specified
+	 * by memory policies.
+	 */
+	pol = get_vma_policy(vma, addr);
+	if (pol->mode != MPOL_BIND)
+		this_node = __GFP_THISNODE;
+	mpol_cond_put(pol);
+#endif
 
-	/* Always do synchronous compaction */
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE | __GFP_THISNODE |
-		       (vma_madvised ? 0 : __GFP_NORETRY);
-
-	/* Kick kcompactd and fail quickly */
+		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | __GFP_KSWAPD_RECLAIM;
-
-	/* Synchronous compaction if madvised, otherwise kick kcompactd */
+		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-						  __GFP_KSWAPD_RECLAIM);
-
-	/* Only do synchronous compaction if madvised */
+		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+							     __GFP_KSWAPD_RECLAIM | this_node);
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
-		return gfp_mask | (vma_madvised ? __GFP_DIRECT_RECLAIM : 0);
-
-	return gfp_mask;
+		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
+							     this_node);
+	return GFP_TRANSHUGE_LIGHT | this_node;
 }
 
 /* Caller must hold page table lock. */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9c9877a43d58..65e0874fce17 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1734,7 +1734,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
 						unsigned long addr)
 {
 	struct mempolicy *pol = __get_vma_policy(vma, addr);

From 28360f398778d7623a5ff8a8e90958c0d925e120 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Tue, 13 Aug 2019 15:37:57 -0700
Subject: [PATCH 117/157] mm, vmscan: do not special-case slab reclaim when
 watermarks are boosted

Dave Chinner reported a problem pointing a finger at commit 1c30844d2dfe
("mm: reclaim small amounts of memory when an external fragmentation
event occurs").

The report is extensive:

  https://lore.kernel.org/linux-mm/20190807091858.2857-1-david@fromorbit.com/

and it's worth recording the most relevant parts (colorful language and
typos included).

	When running a simple, steady state 4kB file creation test to
	simulate extracting tarballs larger than memory full of small
	files into the filesystem, I noticed that once memory fills up
	the cache balance goes to hell.

	The workload is creating one dirty cached inode for every dirty
	page, both of which should require a single IO each to clean and
	reclaim, and creation of inodes is throttled by the rate at which
	dirty writeback runs at (via balance dirty pages). Hence the ingest
	rate of new cached inodes and page cache pages is identical and
	steady. As a result, memory reclaim should quickly find a steady
	balance between page cache and inode caches.

	The moment memory fills, the page cache is reclaimed at a much
	faster rate than the inode cache, and evidence suggests that
	the inode cache shrinker is not being called when large batches
	of pages are being reclaimed. In roughly the same time period
	that it takes to fill memory with 50% pages and 50% slab caches,
	memory reclaim reduces the page cache down to just dirty pages
	and slab caches fill the entirety of memory.

	The LRU is largely full of dirty pages, and we're getting spikes
	of random writeback from memory reclaim so it's all going to shit.
	Behaviour never recovers, the page cache remains pinned at just
	dirty pages, and nothing I could tune would make any difference.
	vfs_cache_pressure makes no difference - I would set it so high
	it should trim the entire inode caches in a single pass, yet it
	didn't do anything. It was clear from tracing and live telemetry
	that the shrinkers were pretty much not running except when
	there was absolutely no memory free at all, and then they did
	the minimum necessary to free memory to make progress.

	So I went looking at the code, trying to find places where pages
	got reclaimed and the shrinkers weren't called. There's only one
	- kswapd doing boosted reclaim as per commit 1c30844d2dfe ("mm:
	reclaim small amounts of memory when an external fragmentation
	event occurs").

The watermark boosting introduced by the commit is triggered in response
to an allocation "fragmentation event".  The boosting was not intended
to target THP specifically and triggers even if THP is disabled.
However, with Dave's perfectly reasonable workload, fragmentation events
can be very common given the ratio of slab to page cache allocations so
boosting remains active for long periods of time.

As high-order allocations might use compaction and compaction cannot
move slab pages the decision was made in the commit to special-case
kswapd when watermarks are boosted -- kswapd avoids reclaiming slab as
reclaiming slab does not directly help compaction.

As Dave notes, this decision means that slab can be artificially
protected for long periods of time and messes up the balance with slab
and page caches.

Removing the special casing can still indirectly help avoid
fragmentation by avoiding fragmentation-causing events due to slab
allocation as pages from a slab pageblock will have some slab objects
freed.  Furthermore, with the special casing, reclaim behaviour is
unpredictable as kswapd sometimes examines slab and sometimes does not
in a manner that is tricky to tune or analyse.

This patch removes the special casing.  The downside is that this is not
a universal performance win.  Some benchmarks that depend on the
residency of data when rereading metadata may see a regression when slab
reclaim is restored to its original behaviour.  Similarly, some
benchmarks that only read-once or write-once may perform better when
page reclaim is too aggressive.  The primary upside is that slab
shrinker is less surprising (arguably more sane but that's a matter of
opinion), behaves consistently regardless of the fragmentation state of
the system and properly obeys VM sysctls.

A fsmark benchmark configuration was constructed similar to what Dave
reported and is codified by the mmtest configuration
config-io-fsmark-small-file-stream.  It was evaluated on a 1-socket
machine to avoid dealing with NUMA-related issues and the timing of
reclaim.  The storage was an SSD Samsung Evo and a fresh trimmed XFS
filesystem was used for the test data.

This is not an exact replication of Dave's setup.  The configuration
scales its parameters depending on the memory size of the SUT to behave
similarly across machines.  The parameters mean the first sample
reported by fs_mark is using 50% of RAM which will barely be throttled
and look like a big outlier.  Dave used fake NUMA to have multiple
kswapd instances which I didn't replicate.  Finally, the number of
iterations differ from Dave's test as the target disk was not large
enough.  While not identical, it should be representative.

  fsmark
                                     5.3.0-rc3              5.3.0-rc3
                                       vanilla          shrinker-v1r1
  Min       1-files/sec     4444.80 (   0.00%)     4765.60 (   7.22%)
  1st-qrtle 1-files/sec     5005.10 (   0.00%)     5091.70 (   1.73%)
  2nd-qrtle 1-files/sec     4917.80 (   0.00%)     4855.60 (  -1.26%)
  3rd-qrtle 1-files/sec     4667.40 (   0.00%)     4831.20 (   3.51%)
  Max-1     1-files/sec    11421.50 (   0.00%)     9999.30 ( -12.45%)
  Max-5     1-files/sec    11421.50 (   0.00%)     9999.30 ( -12.45%)
  Max-10    1-files/sec    11421.50 (   0.00%)     9999.30 ( -12.45%)
  Max-90    1-files/sec     4649.60 (   0.00%)     4780.70 (   2.82%)
  Max-95    1-files/sec     4491.00 (   0.00%)     4768.20 (   6.17%)
  Max-99    1-files/sec     4491.00 (   0.00%)     4768.20 (   6.17%)
  Max       1-files/sec    11421.50 (   0.00%)     9999.30 ( -12.45%)
  Hmean     1-files/sec     5004.75 (   0.00%)     5075.96 (   1.42%)
  Stddev    1-files/sec     1778.70 (   0.00%)     1369.66 (  23.00%)
  CoeffVar  1-files/sec       33.70 (   0.00%)       26.05 (  22.71%)
  BHmean-99 1-files/sec     5053.72 (   0.00%)     5101.52 (   0.95%)
  BHmean-95 1-files/sec     5053.72 (   0.00%)     5101.52 (   0.95%)
  BHmean-90 1-files/sec     5107.05 (   0.00%)     5131.41 (   0.48%)
  BHmean-75 1-files/sec     5208.45 (   0.00%)     5206.68 (  -0.03%)
  BHmean-50 1-files/sec     5405.53 (   0.00%)     5381.62 (  -0.44%)
  BHmean-25 1-files/sec     6179.75 (   0.00%)     6095.14 (  -1.37%)

                     5.3.0-rc3   5.3.0-rc3
                       vanillashrinker-v1r1
  Duration User         501.82      497.29
  Duration System      4401.44     4424.08
  Duration Elapsed     8124.76     8358.05

This is showing a slight skew for the max result representing a large
outlier for the 1st, 2nd and 3rd quartile are similar indicating that
the bulk of the results show little difference.  Note that an earlier
version of the fsmark configuration showed a regression but that
included more samples taken while memory was still filling.

Note that the elapsed time is higher.  Part of this is that the
configuration included time to delete all the test files when the test
completes -- the test automation handles the possibility of testing
fsmark with multiple thread counts.  Without the patch, many of these
objects would be memory resident which is part of what the patch is
addressing.

There are other important observations that justify the patch.

1. With the vanilla kernel, the number of dirty pages in the system is
   very low for much of the test. With this patch, dirty pages is
   generally kept at 10% which matches vm.dirty_background_ratio which
   is normal expected historical behaviour.

2. With the vanilla kernel, the ratio of Slab/Pagecache is close to
   0.95 for much of the test i.e. Slab is being left alone and
   dominating memory consumption. With the patch applied, the ratio
   varies between 0.35 and 0.45 with the bulk of the measured ratios
   roughly half way between those values. This is a different balance to
   what Dave reported but it was at least consistent.

3. Slabs are scanned throughout the entire test with the patch applied.
   The vanille kernel has periods with no scan activity and then
   relatively massive spikes.

4. Without the patch, kswapd scan rates are very variable. With the
   patch, the scan rates remain quite steady.

4. Overall vmstats are closer to normal expectations

	                                5.3.0-rc3      5.3.0-rc3
	                                  vanilla  shrinker-v1r1
    Ops Direct pages scanned             99388.00      328410.00
    Ops Kswapd pages scanned          45382917.00    33451026.00
    Ops Kswapd pages reclaimed        30869570.00    25239655.00
    Ops Direct pages reclaimed           74131.00        5830.00
    Ops Kswapd efficiency %                 68.02          75.45
    Ops Kswapd velocity                   5585.75        4002.25
    Ops Page reclaim immediate         1179721.00      430927.00
    Ops Slabs scanned                 62367361.00    73581394.00
    Ops Direct inode steals               2103.00        1002.00
    Ops Kswapd inode steals             570180.00     5183206.00

	o Vanilla kernel is hitting direct reclaim more frequently,
	  not very much in absolute terms but the fact the patch
	  reduces it is interesting
	o "Page reclaim immediate" in the vanilla kernel indicates
	  dirty pages are being encountered at the tail of the LRU.
	  This is generally bad and means in this case that the LRU
	  is not long enough for dirty pages to be cleaned by the
	  background flush in time. This is much reduced by the
	  patch.
	o With the patch, kswapd is reclaiming 10 times more slab
	  pages than with the vanilla kernel. This is indicative
	  of the watermark boosting over-protecting slab

A more complete set of tests were run that were part of the basis for
introducing boosting and while there are some differences, they are well
within tolerances.

Bottom line, the special casing kswapd to avoid slab behaviour is
unpredictable and can lead to abnormal results for normal workloads.

This patch restores the expected behaviour that slab and page cache is
balanced consistently for a workload with a steady allocation ratio of
slab/pagecache pages.  It also means that if there are workloads that
favour the preservation of slab over pagecache that it can be tuned via
vm.vfs_cache_pressure where as the vanilla kernel effectively ignores
the parameter when boosting is active.

Link: http://lkml.kernel.org/r/20190808182946.GM2739@techsingularity.net
Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs")
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: <stable@vger.kernel.org>	[5.0+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/vmscan.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index dbdc46a84f63..c77d1e3761a7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -88,9 +88,6 @@ struct scan_control {
 	/* Can pages be swapped as part of reclaim? */
 	unsigned int may_swap:1;
 
-	/* e.g. boosted watermark reclaim leaves slabs alone */
-	unsigned int may_shrinkslab:1;
-
 	/*
 	 * Cgroups are not reclaimed below their configured memory.low,
 	 * unless we threaten to OOM. If any cgroups are skipped due to
@@ -2714,10 +2711,8 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 			shrink_node_memcg(pgdat, memcg, sc, &lru_pages);
 			node_lru_pages += lru_pages;
 
-			if (sc->may_shrinkslab) {
-				shrink_slab(sc->gfp_mask, pgdat->node_id,
-				    memcg, sc->priority);
-			}
+			shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
+					sc->priority);
 
 			/* Record the group's reclaim efficiency */
 			vmpressure(sc->gfp_mask, memcg, false,
@@ -3194,7 +3189,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
 		.may_swap = 1,
-		.may_shrinkslab = 1,
 	};
 
 	/*
@@ -3238,7 +3232,6 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 		.may_unmap = 1,
 		.reclaim_idx = MAX_NR_ZONES - 1,
 		.may_swap = !noswap,
-		.may_shrinkslab = 1,
 	};
 	unsigned long lru_pages;
 
@@ -3286,7 +3279,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
 		.may_swap = may_swap,
-		.may_shrinkslab = 1,
 	};
 
 	set_task_reclaim_state(current, &sc.reclaim_state);
@@ -3598,7 +3590,6 @@ restart:
 		 */
 		sc.may_writepage = !laptop_mode && !nr_boost_reclaim;
 		sc.may_swap = !nr_boost_reclaim;
-		sc.may_shrinkslab = !nr_boost_reclaim;
 
 		/*
 		 * Do some background aging of the anon list, to give

From 4643d67e8cb0b3536ef0ab5cddd1cedc73fa14ad Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Tue, 13 Aug 2019 15:38:00 -0700
Subject: [PATCH 118/157] hugetlbfs: fix hugetlb page migration/fault race
 causing SIGBUS

Li Wang discovered that LTP/move_page12 V2 sometimes triggers SIGBUS in
the kernel-v5.2.3 testing.  This is caused by a race between hugetlb
page migration and page fault.

If a hugetlb page can not be allocated to satisfy a page fault, the task
is sent SIGBUS.  This is normal hugetlbfs behavior.  A hugetlb fault
mutex exists to prevent two tasks from trying to instantiate the same
page.  This protects against the situation where there is only one
hugetlb page, and both tasks would try to allocate.  Without the mutex,
one would fail and SIGBUS even though the other fault would be
successful.

There is a similar race between hugetlb page migration and fault.
Migration code will allocate a page for the target of the migration.  It
will then unmap the original page from all page tables.  It does this
unmap by first clearing the pte and then writing a migration entry.  The
page table lock is held for the duration of this clear and write
operation.  However, the beginnings of the hugetlb page fault code
optimistically checks the pte without taking the page table lock.  If
clear (as it can be during the migration unmap operation), a hugetlb
page allocation is attempted to satisfy the fault.  Note that the page
which will eventually satisfy this fault was already allocated by the
migration code.  However, the allocation within the fault path could
fail which would result in the task incorrectly being sent SIGBUS.

Ideally, we could take the hugetlb fault mutex in the migration code
when modifying the page tables.  However, locks must be taken in the
order of hugetlb fault mutex, page lock, page table lock.  This would
require significant rework of the migration code.  Instead, the issue is
addressed in the hugetlb fault code.  After failing to allocate a huge
page, take the page table lock and check for huge_pte_none before
returning an error.  This is the same check that must be made further in
the code even if page allocation is successful.

Link: http://lkml.kernel.org/r/20190808000533.7701-1-mike.kravetz@oracle.com
Fixes: 290408d4a250 ("hugetlb: hugepage migration core")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: Li Wang <liwang@redhat.com>
Tested-by: Li Wang <liwang@redhat.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Cyril Hrubis <chrubis@suse.cz>
Cc: Xishi Qiu <xishi.qiuxishi@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/hugetlb.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ede7e7f5d1ab..6d7296dd11b8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3856,6 +3856,25 @@ retry:
 
 		page = alloc_huge_page(vma, haddr, 0);
 		if (IS_ERR(page)) {
+			/*
+			 * Returning error will result in faulting task being
+			 * sent SIGBUS.  The hugetlb fault mutex prevents two
+			 * tasks from racing to fault in the same page which
+			 * could result in false unable to allocate errors.
+			 * Page migration does not take the fault mutex, but
+			 * does a clear then write of pte's under page table
+			 * lock.  Page fault code could race with migration,
+			 * notice the clear pte and try to allocate a page
+			 * here.  Before returning error, get ptl and make
+			 * sure there really is no pte entry.
+			 */
+			ptl = huge_pte_lock(h, mm, ptep);
+			if (!huge_pte_none(huge_ptep_get(ptep))) {
+				ret = 0;
+				spin_unlock(ptl);
+				goto out;
+			}
+			spin_unlock(ptl);
 			ret = vmf_error(PTR_ERR(page));
 			goto out;
 		}

From 500bc2c1f48aa6efad9474fdbe0c7cd8d600484c Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@wdc.com>
Date: Tue, 13 Aug 2019 16:32:29 -0700
Subject: [PATCH 119/157] riscv: rv32_defconfig: Update the defconfig

Update the rv32_defconfig:
 - Add 'CONFIG_DEVTMPFS_MOUNT=y' to match the RISC-V defconfig
 - Add CONFIG_HW_RANDOM=y and CONFIG_HW_RANDOM_VIRTIO=y to enable
   VirtIORNG when running on QEMU

Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/riscv/configs/rv32_defconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index d5449ef805a3..7da93e494445 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -34,6 +34,7 @@ CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCIE_XILINX=y
 CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_SD=y
@@ -53,6 +54,8 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_HVC_RISCV_SBI=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=y
 # CONFIG_PTP_1588_CLOCK is not set
 CONFIG_DRM=y
 CONFIG_DRM_RADEON=y

From d568cb3f935123b4fb42b9029ebcfebc4e4533a2 Mon Sep 17 00:00:00 2001
From: Alistair Francis <alistair.francis@wdc.com>
Date: Tue, 13 Aug 2019 16:32:30 -0700
Subject: [PATCH 120/157] riscv: defconfig: Update the defconfig

Update the defconfig:
 - Add CONFIG_HW_RANDOM=y and CONFIG_HW_RANDOM_VIRTIO=y to enable
   VirtIORNG when running on QEMU

Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/riscv/configs/defconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 93205c0bf71d..3efff552a261 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -54,6 +54,8 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_HVC_RISCV_SBI=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_VIRTIO=y
 CONFIG_SPI=y
 CONFIG_SPI_SIFIVE=y
 # CONFIG_PTP_1588_CLOCK is not set

From 401714d9534aad8c24196b32600da683116bbe09 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 14 Aug 2019 12:09:07 +0800
Subject: [PATCH 121/157] ALSA: hda - Let all conexant codec enter D3 when
 rebooting

We have 3 new lenovo laptops which have conexant codec 0x14f11f86,
these 3 laptops also have the noise issue when rebooting, after
letting the codec enter D3 before rebooting or poweroff, the noise
disappers.

Instead of adding a new ID again in the reboot_notify(), let us make
this function apply to all conexant codec. In theory make codec enter
D3 before rebooting or poweroff is harmless, and I tested this change
on a couple of other Lenovo laptops which have different conexant
codecs, there is no side effect so far.

Cc: stable@vger.kernel.org
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_conexant.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index f299f137eaea..93a303676aea 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -163,15 +163,6 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 {
 	struct conexant_spec *spec = codec->spec;
 
-	switch (codec->core.vendor_id) {
-	case 0x14f12008: /* CX8200 */
-	case 0x14f150f2: /* CX20722 */
-	case 0x14f150f4: /* CX20724 */
-		break;
-	default:
-		return;
-	}
-
 	/* Turn the problematic codec into D3 to avoid spurious noises
 	   from the internal speaker during (and after) reboot */
 	cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false);

From 871b9066027702e6e6589da0e1edd3b7dede7205 Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Wed, 14 Aug 2019 12:09:08 +0800
Subject: [PATCH 122/157] ALSA: hda - Add a generic reboot_notify

Make codec enter D3 before rebooting or poweroff can fix the noise
issue on some laptops. And in theory it is harmless for all codecs
to enter D3 before rebooting or poweroff, let us add a generic
reboot_notify, then realtek and conexant drivers can call this
function.

Cc: stable@vger.kernel.org
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_generic.c    | 19 +++++++++++++++++++
 sound/pci/hda/hda_generic.h    |  1 +
 sound/pci/hda/patch_conexant.c |  6 +-----
 sound/pci/hda/patch_realtek.c  | 11 +----------
 4 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 8f2beb1f3ae4..5bf24fb819d2 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -6051,6 +6051,24 @@ void snd_hda_gen_free(struct hda_codec *codec)
 }
 EXPORT_SYMBOL_GPL(snd_hda_gen_free);
 
+/**
+ * snd_hda_gen_reboot_notify - Make codec enter D3 before rebooting
+ * @codec: the HDA codec
+ *
+ * This can be put as patch_ops reboot_notify function.
+ */
+void snd_hda_gen_reboot_notify(struct hda_codec *codec)
+{
+	/* Make the codec enter D3 to avoid spurious noises from the internal
+	 * speaker during (and after) reboot
+	 */
+	snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3);
+	snd_hda_codec_write(codec, codec->core.afg, 0,
+			    AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
+	msleep(10);
+}
+EXPORT_SYMBOL_GPL(snd_hda_gen_reboot_notify);
+
 #ifdef CONFIG_PM
 /**
  * snd_hda_gen_check_power_status - check the loopback power save state
@@ -6078,6 +6096,7 @@ static const struct hda_codec_ops generic_patch_ops = {
 	.init = snd_hda_gen_init,
 	.free = snd_hda_gen_free,
 	.unsol_event = snd_hda_jack_unsol_event,
+	.reboot_notify = snd_hda_gen_reboot_notify,
 #ifdef CONFIG_PM
 	.check_power_status = snd_hda_gen_check_power_status,
 #endif
diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h
index 35a670a71c42..5f199dcb0d18 100644
--- a/sound/pci/hda/hda_generic.h
+++ b/sound/pci/hda/hda_generic.h
@@ -332,6 +332,7 @@ int snd_hda_gen_parse_auto_config(struct hda_codec *codec,
 				  struct auto_pin_cfg *cfg);
 int snd_hda_gen_build_controls(struct hda_codec *codec);
 int snd_hda_gen_build_pcms(struct hda_codec *codec);
+void snd_hda_gen_reboot_notify(struct hda_codec *codec);
 
 /* standard jack event callbacks */
 void snd_hda_gen_hp_automute(struct hda_codec *codec,
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 93a303676aea..14298ef45b21 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -166,11 +166,7 @@ static void cx_auto_reboot_notify(struct hda_codec *codec)
 	/* Turn the problematic codec into D3 to avoid spurious noises
 	   from the internal speaker during (and after) reboot */
 	cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false);
-
-	snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3);
-	snd_hda_codec_write(codec, codec->core.afg, 0,
-			    AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
-	msleep(10);
+	snd_hda_gen_reboot_notify(codec);
 }
 
 static void cx_auto_free(struct hda_codec *codec)
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8aaf1d9c55cf..e333b3e30e31 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -869,15 +869,6 @@ static void alc_reboot_notify(struct hda_codec *codec)
 		alc_shutup(codec);
 }
 
-/* power down codec to D3 at reboot/shutdown; set as reboot_notify ops */
-static void alc_d3_at_reboot(struct hda_codec *codec)
-{
-	snd_hda_codec_set_power_to_all(codec, codec->core.afg, AC_PWRST_D3);
-	snd_hda_codec_write(codec, codec->core.afg, 0,
-			    AC_VERB_SET_POWER_STATE, AC_PWRST_D3);
-	msleep(10);
-}
-
 #define alc_free	snd_hda_gen_free
 
 #ifdef CONFIG_PM
@@ -5152,7 +5143,7 @@ static void alc_fixup_tpt440_dock(struct hda_codec *codec,
 	struct alc_spec *spec = codec->spec;
 
 	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
-		spec->reboot_notify = alc_d3_at_reboot; /* reduce noise */
+		spec->reboot_notify = snd_hda_gen_reboot_notify; /* reduce noise */
 		spec->parse_flags = HDA_PINCFG_NO_HP_FIXUP;
 		codec->power_save_node = 0; /* avoid click noises */
 		snd_hda_apply_pincfgs(codec, pincfgs);

From e8c220fac415d9f4a994b0c2871b835feac1eb4e Mon Sep 17 00:00:00 2001
From: Fabio Estevam <festevam@gmail.com>
Date: Thu, 8 Aug 2019 18:01:36 -0300
Subject: [PATCH 123/157] Revert "i2c: imx: improve the error handling in
 i2c_imx_dma_request()"

Since commit e1ab9a468e3b ("i2c: imx: improve the error handling in
i2c_imx_dma_request()") when booting with the DMA driver as module (such
as CONFIG_FSL_EDMA=m) the following endless clk warnings are seen:

[  153.077831] ------------[ cut here ]------------
[  153.082528] WARNING: CPU: 0 PID: 15 at drivers/clk/clk.c:924 clk_core_disable_lock+0x18/0x24
[  153.093077] i2c0 already disabled
[  153.096416] Modules linked in:
[  153.099521] CPU: 0 PID: 15 Comm: kworker/0:1 Tainted: G        W         5.2.0+ #321
[  153.107290] Hardware name: Freescale Vybrid VF5xx/VF6xx (Device Tree)
[  153.113772] Workqueue: events deferred_probe_work_func
[  153.118979] [<c0019560>] (unwind_backtrace) from [<c0014734>] (show_stack+0x10/0x14)
[  153.126778] [<c0014734>] (show_stack) from [<c083f8dc>] (dump_stack+0x9c/0xd4)
[  153.134051] [<c083f8dc>] (dump_stack) from [<c0031154>] (__warn+0xf8/0x124)
[  153.141056] [<c0031154>] (__warn) from [<c0031248>] (warn_slowpath_fmt+0x38/0x48)
[  153.148580] [<c0031248>] (warn_slowpath_fmt) from [<c040fde0>] (clk_core_disable_lock+0x18/0x24)
[  153.157413] [<c040fde0>] (clk_core_disable_lock) from [<c058f520>] (i2c_imx_probe+0x554/0x6ec)
[  153.166076] [<c058f520>] (i2c_imx_probe) from [<c04b9178>] (platform_drv_probe+0x48/0x98)
[  153.174297] [<c04b9178>] (platform_drv_probe) from [<c04b7298>] (really_probe+0x1d8/0x2c0)
[  153.182605] [<c04b7298>] (really_probe) from [<c04b7554>] (driver_probe_device+0x5c/0x174)
[  153.190909] [<c04b7554>] (driver_probe_device) from [<c04b58c8>] (bus_for_each_drv+0x44/0x8c)
[  153.199480] [<c04b58c8>] (bus_for_each_drv) from [<c04b746c>] (__device_attach+0xa0/0x108)
[  153.207782] [<c04b746c>] (__device_attach) from [<c04b65a4>] (bus_probe_device+0x88/0x90)
[  153.215999] [<c04b65a4>] (bus_probe_device) from [<c04b6a04>] (deferred_probe_work_func+0x60/0x90)
[  153.225003] [<c04b6a04>] (deferred_probe_work_func) from [<c004f190>] (process_one_work+0x204/0x634)
[  153.234178] [<c004f190>] (process_one_work) from [<c004f618>] (worker_thread+0x20/0x484)
[  153.242315] [<c004f618>] (worker_thread) from [<c0055c2c>] (kthread+0x118/0x150)
[  153.249758] [<c0055c2c>] (kthread) from [<c00090b4>] (ret_from_fork+0x14/0x20)
[  153.257006] Exception stack(0xdde43fb0 to 0xdde43ff8)
[  153.262095] 3fa0:                                     00000000 00000000 00000000 00000000
[  153.270306] 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
[  153.278520] 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000
[  153.285159] irq event stamp: 3323022
[  153.288787] hardirqs last  enabled at (3323021): [<c0861c4c>] _raw_spin_unlock_irq+0x24/0x2c
[  153.297261] hardirqs last disabled at (3323022): [<c040d7a0>] clk_enable_lock+0x10/0x124
[  153.305392] softirqs last  enabled at (3322092): [<c000a504>] __do_softirq+0x344/0x540
[  153.313352] softirqs last disabled at (3322081): [<c00385c0>] irq_exit+0x10c/0x128
[  153.320946] ---[ end trace a506731ccd9bd703 ]---

This endless clk warnings behaviour is well explained by Andrey Smirnov:

"Allocating DMA after registering I2C adapter can lead to infinite
probing loop, for example, consider the following scenario:

    1. i2c_imx_probe() is called and successfully registers an I2C
       adapter via i2c_add_numbered_adapter()

    2. As a part of i2c_add_numbered_adapter() new I2C slave devices
       are added from DT which results in a call to
       driver_deferred_probe_trigger()

    3. i2c_imx_probe() continues and calls i2c_imx_dma_request() which
       due to lack of proper DMA driver returns -EPROBE_DEFER

    4. i2c_imx_probe() fails, removes I2C adapter and returns
       -EPROBE_DEFER, which places it into deferred probe list

    5. Deferred probe work triggered in #2 above kicks in and calls
       i2c_imx_probe() again thus bringing us to step #1"

So revert commit e1ab9a468e3b ("i2c: imx: improve the error handling in
i2c_imx_dma_request()") and restore the old behaviour, in order to
avoid regressions on existing setups.

Cc: <stable@vger.kernel.org>
Reported-by: Andrey Smirnov <andrew.smirnov@gmail.com>
Reported-by: Russell King <linux@armlinux.org.uk>
Fixes: e1ab9a468e3b ("i2c: imx: improve the error handling in i2c_imx_dma_request()")
Signed-off-by: Fabio Estevam <festevam@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-imx.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index b1b8b938d7f4..15f6cde6452f 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -273,8 +273,8 @@ static inline unsigned char imx_i2c_read_reg(struct imx_i2c_struct *i2c_imx,
 }
 
 /* Functions for DMA support */
-static int i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx,
-			       dma_addr_t phy_addr)
+static void i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx,
+						dma_addr_t phy_addr)
 {
 	struct imx_i2c_dma *dma;
 	struct dma_slave_config dma_sconfig;
@@ -283,7 +283,7 @@ static int i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx,
 
 	dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL);
 	if (!dma)
-		return -ENOMEM;
+		return;
 
 	dma->chan_tx = dma_request_chan(dev, "tx");
 	if (IS_ERR(dma->chan_tx)) {
@@ -328,7 +328,7 @@ static int i2c_imx_dma_request(struct imx_i2c_struct *i2c_imx,
 	dev_info(dev, "using %s (tx) and %s (rx) for DMA transfers\n",
 		dma_chan_name(dma->chan_tx), dma_chan_name(dma->chan_rx));
 
-	return 0;
+	return;
 
 fail_rx:
 	dma_release_channel(dma->chan_rx);
@@ -336,8 +336,6 @@ fail_tx:
 	dma_release_channel(dma->chan_tx);
 fail_al:
 	devm_kfree(dev, dma);
-	/* return successfully if there is no dma support */
-	return ret == -ENODEV ? 0 : ret;
 }
 
 static void i2c_imx_dma_callback(void *arg)
@@ -1165,17 +1163,13 @@ static int i2c_imx_probe(struct platform_device *pdev)
 	dev_dbg(&i2c_imx->adapter.dev, "device resources: %pR\n", res);
 	dev_dbg(&i2c_imx->adapter.dev, "adapter name: \"%s\"\n",
 		i2c_imx->adapter.name);
+	dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n");
 
 	/* Init DMA config if supported */
-	ret = i2c_imx_dma_request(i2c_imx, phy_addr);
-	if (ret < 0)
-		goto del_adapter;
+	i2c_imx_dma_request(i2c_imx, phy_addr);
 
-	dev_info(&i2c_imx->adapter.dev, "IMX I2C adapter registered\n");
 	return 0;   /* Return OK */
 
-del_adapter:
-	i2c_del_adapter(&i2c_imx->adapter);
 clk_notifier_unregister:
 	clk_notifier_unregister(i2c_imx->clk, &i2c_imx->clk_change_nb);
 rpm_disable:

From 8fc3ae3b109d17157102f0abf9c8fb1e16bf810a Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Mon, 12 Aug 2019 07:08:17 +0200
Subject: [PATCH 124/157] MAINTAINERS: i2c-imx: take over maintainership

I would like to maintain the i2c-imx driver. Since I work with
different i.MX variants and have access to the hardware, I can spend
some time on the reviewing of this driver.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e81e60bd7c26..ddc1cf5cb0b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6441,6 +6441,14 @@ S:	Maintained
 F:	drivers/perf/fsl_imx8_ddr_perf.c
 F:	Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
 
+FREESCALE IMX I2C DRIVER
+M:	Oleksij Rempel <o.rempel@pengutronix.de>
+R:	Pengutronix Kernel Team <kernel@pengutronix.de>
+L:	linux-i2c@vger.kernel.org
+S:	Maintained
+F:	drivers/i2c/busses/i2c-imx.c
+F:	Documentation/devicetree/bindings/i2c/i2c-imx.txt
+
 FREESCALE IMX LPI2C DRIVER
 M:	Dong Aisheng <aisheng.dong@nxp.com>
 L:	linux-i2c@vger.kernel.org

From 7b814d852af6944657c2961039f404c4490771c0 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 8 Aug 2019 21:39:10 +0200
Subject: [PATCH 125/157] i2c: rcar: avoid race when unregistering slave client

After we disabled interrupts, there might still be an active one
running. Sync before clearing the pointer to the slave device.

Fixes: de20d1857dd6 ("i2c: rcar: add slave support")
Reported-by: Krzysztof Adamski <krzysztof.adamski@nokia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Krzysztof Adamski <krzysztof.adamski@nokia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-rcar.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c
index d39a4606f72d..531c01100b56 100644
--- a/drivers/i2c/busses/i2c-rcar.c
+++ b/drivers/i2c/busses/i2c-rcar.c
@@ -139,6 +139,7 @@ struct rcar_i2c_priv {
 	enum dma_data_direction dma_direction;
 
 	struct reset_control *rstc;
+	int irq;
 };
 
 #define rcar_i2c_priv_to_dev(p)		((p)->adap.dev.parent)
@@ -861,9 +862,11 @@ static int rcar_unreg_slave(struct i2c_client *slave)
 
 	WARN_ON(!priv->slave);
 
+	/* disable irqs and ensure none is running before clearing ptr */
 	rcar_i2c_write(priv, ICSIER, 0);
 	rcar_i2c_write(priv, ICSCR, 0);
 
+	synchronize_irq(priv->irq);
 	priv->slave = NULL;
 
 	pm_runtime_put(rcar_i2c_priv_to_dev(priv));
@@ -918,7 +921,7 @@ static int rcar_i2c_probe(struct platform_device *pdev)
 	struct i2c_adapter *adap;
 	struct device *dev = &pdev->dev;
 	struct i2c_timings i2c_t;
-	int irq, ret;
+	int ret;
 
 	/* Otherwise logic will break because some bytes must always use PIO */
 	BUILD_BUG_ON_MSG(RCAR_MIN_DMA_LEN < 3, "Invalid min DMA length");
@@ -984,10 +987,10 @@ static int rcar_i2c_probe(struct platform_device *pdev)
 		pm_runtime_put(dev);
 
 
-	irq = platform_get_irq(pdev, 0);
-	ret = devm_request_irq(dev, irq, rcar_i2c_irq, 0, dev_name(dev), priv);
+	priv->irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(dev, priv->irq, rcar_i2c_irq, 0, dev_name(dev), priv);
 	if (ret < 0) {
-		dev_err(dev, "cannot get irq %d\n", irq);
+		dev_err(dev, "cannot get irq %d\n", priv->irq);
 		goto out_pm_disable;
 	}
 

From d7437fc0d8291181debe032671a289b6bd93f46f Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 8 Aug 2019 21:54:17 +0200
Subject: [PATCH 126/157] i2c: emev2: avoid race when unregistering slave
 client

After we disabled interrupts, there might still be an active one
running. Sync before clearing the pointer to the slave device.

Fixes: c31d0a00021d ("i2c: emev2: add slave support")
Reported-by: Krzysztof Adamski <krzysztof.adamski@nokia.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Krzysztof Adamski <krzysztof.adamski@nokia.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-emev2.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-emev2.c b/drivers/i2c/busses/i2c-emev2.c
index 35b302d983e0..959d4912ec0d 100644
--- a/drivers/i2c/busses/i2c-emev2.c
+++ b/drivers/i2c/busses/i2c-emev2.c
@@ -69,6 +69,7 @@ struct em_i2c_device {
 	struct completion msg_done;
 	struct clk *sclk;
 	struct i2c_client *slave;
+	int irq;
 };
 
 static inline void em_clear_set_bit(struct em_i2c_device *priv, u8 clear, u8 set, u8 reg)
@@ -339,6 +340,12 @@ static int em_i2c_unreg_slave(struct i2c_client *slave)
 
 	writeb(0, priv->base + I2C_OFS_SVA0);
 
+	/*
+	 * Wait for interrupt to finish. New slave irqs cannot happen because we
+	 * cleared the slave address and, thus, only extension codes will be
+	 * detected which do not use the slave ptr.
+	 */
+	synchronize_irq(priv->irq);
 	priv->slave = NULL;
 
 	return 0;
@@ -355,7 +362,7 @@ static int em_i2c_probe(struct platform_device *pdev)
 {
 	struct em_i2c_device *priv;
 	struct resource *r;
-	int irq, ret;
+	int ret;
 
 	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -390,8 +397,8 @@ static int em_i2c_probe(struct platform_device *pdev)
 
 	em_i2c_reset(&priv->adap);
 
-	irq = platform_get_irq(pdev, 0);
-	ret = devm_request_irq(&pdev->dev, irq, em_i2c_irq_handler, 0,
+	priv->irq = platform_get_irq(pdev, 0);
+	ret = devm_request_irq(&pdev->dev, priv->irq, em_i2c_irq_handler, 0,
 				"em_i2c", priv);
 	if (ret)
 		goto err_clk;
@@ -401,7 +408,8 @@ static int em_i2c_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_clk;
 
-	dev_info(&pdev->dev, "Added i2c controller %d, irq %d\n", priv->adap.nr, irq);
+	dev_info(&pdev->dev, "Added i2c controller %d, irq %d\n", priv->adap.nr,
+		 priv->irq);
 
 	return 0;
 

From 90865a3dc597bd8463efacb749561095ba70b0aa Mon Sep 17 00:00:00 2001
From: Nishad Kamdar <nishadkamdar@gmail.com>
Date: Sat, 3 Aug 2019 19:43:35 +0530
Subject: [PATCH 127/157] i2c: stm32: Use the correct style for SPDX License
 Identifier

This patch corrects the SPDX License Identifier style
in header file related to STM32 Driver for I2C hardware
bus support.
For C header files Documentation/process/license-rules.rst
mandates C-like comments (opposed to C source files where
C++ style should be used)

Changes made by using a script provided by Joe Perches here:
https://lkml.org/lkml/2019/2/7/46

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nishad Kamdar <nishadkamdar@gmail.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-stm32.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-stm32.h b/drivers/i2c/busses/i2c-stm32.h
index 868755f82f88..2c21893905a3 100644
--- a/drivers/i2c/busses/i2c-stm32.h
+++ b/drivers/i2c/busses/i2c-stm32.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * i2c-stm32.h
  *

From daac07156b330b18eb5071aec4b3ddca1c377f2c Mon Sep 17 00:00:00 2001
From: Hui Peng <benquike@gmail.com>
Date: Tue, 13 Aug 2019 22:34:04 -0400
Subject: [PATCH 128/157] ALSA: usb-audio: Fix an OOB bug in
 parse_audio_mixer_unit

The `uac_mixer_unit_descriptor` shown as below is read from the
device side. In `parse_audio_mixer_unit`, `baSourceID` field is
accessed from index 0 to `bNrInPins` - 1, the current implementation
assumes that descriptor is always valid (the length  of descriptor
is no shorter than 5 + `bNrInPins`). If a descriptor read from
the device side is invalid, it may trigger out-of-bound memory
access.

```
struct uac_mixer_unit_descriptor {
	__u8 bLength;
	__u8 bDescriptorType;
	__u8 bDescriptorSubtype;
	__u8 bUnitID;
	__u8 bNrInPins;
	__u8 baSourceID[];
}
```

This patch fixes the bug by add a sanity check on the length of
the descriptor.

Reported-by: Hui Peng <benquike@gmail.com>
Reported-by: Mathias Payer <mathias.payer@nebelwelt.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Peng <benquike@gmail.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 7498b5191b68..ea487378be17 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -744,6 +744,8 @@ static int uac_mixer_unit_get_channels(struct mixer_build *state,
 		return -EINVAL;
 	if (!desc->bNrInPins)
 		return -EINVAL;
+	if (desc->bLength < sizeof(*desc) + desc->bNrInPins)
+		return -EINVAL;
 
 	switch (state->mixer->protocol) {
 	case UAC_VERSION_1:

From 8ac71d7e46b94a4fc8ffc6f1c88004cdf24459e8 Mon Sep 17 00:00:00 2001
From: Vincent Chen <vincent.chen@sifive.com>
Date: Wed, 14 Aug 2019 16:23:52 +0800
Subject: [PATCH 129/157] riscv: Correct the initialized flow of FP register

  The following two reasons cause FP registers are sometimes not
initialized before starting the user program.
1. Currently, the FP context is initialized in flush_thread() function
   and we expect these initial values to be restored to FP register when
   doing FP context switch. However, the FP context switch only occurs in
   switch_to function. Hence, if this process does not be scheduled out
   and scheduled in before entering the user space, the FP registers
   have no chance to initialize.
2. In flush_thread(), the state of reg->sstatus.FS inherits from the
   parent. Hence, the state of reg->sstatus.FS may be dirty. If this
   process is scheduled out during flush_thread() and initializing the
   FP register, the fstate_save() in switch_to will corrupt the FP context
   which has been initialized until flush_thread().

  To solve the 1st case, the initialization of the FP register will be
completed in start_thread(). It makes sure all FP registers are initialized
before starting the user program. For the 2nd case, the state of
reg->sstatus.FS in start_thread will be set to SR_FS_OFF to prevent this
process from corrupting FP context in doing context save. The FP state is
set to SR_FS_INITIAL in start_trhead().

Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Fixes: 7db91e57a0acd ("RISC-V: Task implementation")
Cc: stable@vger.kernel.org
[paul.walmsley@sifive.com: fixed brace alignment issue reported by
 checkpatch]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/riscv/include/asm/switch_to.h |  6 ++++++
 arch/riscv/kernel/process.c        | 11 +++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 853b65ef656d..949d9cd91dec 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -19,6 +19,12 @@ static inline void __fstate_clean(struct pt_regs *regs)
 	regs->sstatus |= (regs->sstatus & ~(SR_FS)) | SR_FS_CLEAN;
 }
 
+static inline void fstate_off(struct task_struct *task,
+			      struct pt_regs *regs)
+{
+	regs->sstatus = (regs->sstatus & ~SR_FS) | SR_FS_OFF;
+}
+
 static inline void fstate_save(struct task_struct *task,
 			       struct pt_regs *regs)
 {
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index f23794bd1e90..fb3a082362eb 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -64,8 +64,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc,
 	unsigned long sp)
 {
 	regs->sstatus = SR_SPIE;
-	if (has_fpu)
+	if (has_fpu) {
 		regs->sstatus |= SR_FS_INITIAL;
+		/*
+		 * Restore the initial value to the FP register
+		 * before starting the user program.
+		 */
+		fstate_restore(current, regs);
+	}
 	regs->sepc = pc;
 	regs->sp = sp;
 	set_fs(USER_DS);
@@ -75,10 +81,11 @@ void flush_thread(void)
 {
 #ifdef CONFIG_FPU
 	/*
-	 * Reset FPU context
+	 * Reset FPU state and context
 	 *	frm: round to nearest, ties to even (IEEE default)
 	 *	fflags: accrued exceptions cleared
 	 */
+	fstate_off(current, task_pt_regs(current));
 	memset(&current->thread.fstate, 0, sizeof(current->thread.fstate));
 #endif
 }

From 69703eb9a8ae28a46cd5bce7d69ceeef6273a104 Mon Sep 17 00:00:00 2001
From: Vincent Chen <vincent.chen@sifive.com>
Date: Wed, 14 Aug 2019 16:23:53 +0800
Subject: [PATCH 130/157] riscv: Make __fstate_clean() work correctly.

Make the __fstate_clean() function correctly set the
state of sstatus.FS in pt_regs to SR_FS_CLEAN.

Fixes: 7db91e57a0acd ("RISC-V: Task implementation")
Cc: linux-stable <stable@vger.kernel.org>
Signed-off-by: Vincent Chen <vincent.chen@sifive.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
[paul.walmsley@sifive.com: expanded "Fixes" commit ID]
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
---
 arch/riscv/include/asm/switch_to.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index 949d9cd91dec..f0227bdce0f0 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -16,7 +16,7 @@ extern void __fstate_restore(struct task_struct *restore_from);
 
 static inline void __fstate_clean(struct pt_regs *regs)
 {
-	regs->sstatus |= (regs->sstatus & ~(SR_FS)) | SR_FS_CLEAN;
+	regs->sstatus = (regs->sstatus & ~SR_FS) | SR_FS_CLEAN;
 }
 
 static inline void fstate_off(struct task_struct *task,

From e1b4ce25dbc93ab0cb8ed0f236a3b9ff7b03802c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
Date: Fri, 9 Aug 2019 17:27:21 +0200
Subject: [PATCH 131/157] drm/scheduler: use job count instead of peek
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The spsc_queue_peek function is accessing queue->head which belongs to
the consumer thread and shouldn't be accessed by the producer

This is fixing a rare race condition when destroying entities.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Monk.liu@amd.com
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/scheduler/sched_entity.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 35ddbec1375a..671c90f34ede 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -95,7 +95,7 @@ static bool drm_sched_entity_is_idle(struct drm_sched_entity *entity)
 	rmb(); /* for list_empty to work without lock */
 
 	if (list_empty(&entity->list) ||
-	    spsc_queue_peek(&entity->job_queue) == NULL)
+	    spsc_queue_count(&entity->job_queue) == 0)
 		return true;
 
 	return false;
@@ -281,7 +281,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
 	/* Consumption of existing IBs wasn't completed. Forcefully
 	 * remove them here.
 	 */
-	if (spsc_queue_peek(&entity->job_queue)) {
+	if (spsc_queue_count(&entity->job_queue)) {
 		if (sched) {
 			/* Park the kernel for a moment to make sure it isn't processing
 			 * our enity.

From 626633425c079742c04f4f2d81c16962b084605c Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 13 Aug 2019 14:47:54 -0600
Subject: [PATCH 132/157] dt-bindings: pinctrl: stm32: Fix 'st,syscfg' schema

The proper way to add additional contraints to an existing json-schema
is using 'allOf' to reference the base schema. Using just '$ref' doesn't
work. Fix this for the 'st,syscfg' property.

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: linux-gpio@vger.kernel.org
Cc: linux-stm32@st-md-mailman.stormreply.com
Cc: linux-arm-kernel@lists.infradead.org
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml          | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
index 91d3e78b3395..400df2da018a 100644
--- a/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/st,stm32-pinctrl.yaml
@@ -37,7 +37,8 @@ properties:
   hwlocks: true
 
   st,syscfg:
-    $ref: "/schemas/types.yaml#/definitions/phandle-array"
+    allOf:
+      - $ref: "/schemas/types.yaml#/definitions/phandle-array"
     description: Should be phandle/offset/mask
     items:
       - description: Phandle to the syscon node which includes IRQ mux selection.

From 83f82d7a42583e93d0f0dde3d61ed10f75c0f4d8 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Wed, 7 Aug 2019 15:22:31 +0200
Subject: [PATCH 133/157] of: irq: fix a trivial typo in a doc comment

Diverged from what the code does with commit 530210c7814e ("of/irq: Replace
of_irq with of_phandle_args").

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 7f84bb4903ca..a296eaf52a5b 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -277,7 +277,7 @@ EXPORT_SYMBOL_GPL(of_irq_parse_raw);
  * of_irq_parse_one - Resolve an interrupt for a device
  * @device: the device whose interrupt is to be resolved
  * @index: index of the interrupt to resolve
- * @out_irq: structure of_irq filled by this function
+ * @out_irq: structure of_phandle_args filled by this function
  *
  * This function resolves an interrupt for a node by walking the interrupt tree,
  * finding which interrupt controller node it is attached to, and returning the

From 05b439711f6ff8700e8660f97a1179650778b9cb Mon Sep 17 00:00:00 2001
From: "Y.C. Chen" <yc_chen@aspeedtech.com>
Date: Wed, 11 Apr 2018 09:27:39 +0800
Subject: [PATCH 134/157] drm/ast: Fixed reboot test may cause system hanged

There is another thread still access standard VGA I/O while loading drm driver.
Disable standard VGA I/O decode to avoid this issue.

Signed-off-by: Y.C. Chen <yc_chen@aspeedtech.com>
Reviewed-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/1523410059-18415-1-git-send-email-yc_chen@aspeedtech.com
---
 drivers/gpu/drm/ast/ast_main.c | 5 ++++-
 drivers/gpu/drm/ast/ast_mode.c | 2 +-
 drivers/gpu/drm/ast/ast_post.c | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 4c7e31cb45ff..a5d1494a3dc4 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -131,8 +131,8 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 
 
 	/* Enable extended register access */
-	ast_enable_mmio(dev);
 	ast_open_key(ast);
+	ast_enable_mmio(dev);
 
 	/* Find out whether P2A works or whether to use device-tree */
 	ast_detect_config_mode(dev, &scu_rev);
@@ -576,6 +576,9 @@ void ast_driver_unload(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
 
+	/* enable standard VGA decode */
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa1, 0x04);
+
 	ast_release_firmware(dev);
 	kfree(ast->dp501_fw_addr);
 	ast_mode_fini(dev);
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index ffccbef962a4..a1cb020e07e5 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -604,7 +604,7 @@ static int ast_crtc_mode_set(struct drm_crtc *crtc,
 		return -EINVAL;
 	ast_open_key(ast);
 
-	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa1, 0xff, 0x04);
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa1, 0x06);
 
 	ast_set_std_reg(crtc, adjusted_mode, &vbios_mode);
 	ast_set_crtc_reg(crtc, adjusted_mode, &vbios_mode);
diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index f7d421359d56..c1d1ac51d1c2 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -46,7 +46,7 @@ void ast_enable_mmio(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
 
-	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa1, 0xff, 0x04);
+	ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa1, 0x06);
 }
 
 

From db1231ddc04682f60d56ff42447f13099c6c4a4c Mon Sep 17 00:00:00 2001
From: Lyude Paul <lyude@redhat.com>
Date: Thu, 8 Aug 2019 20:53:05 -0400
Subject: [PATCH 135/157] drm/nouveau: Only recalculate PBN/VCPI on
 mode/connector changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I -thought- I had fixed this entirely, but it looks like that I didn't
test this thoroughly enough as we apparently still make one big mistake
with nv50_msto_atomic_check() - we don't handle the following scenario:

* CRTC #1 has n VCPI allocated to it, is attached to connector DP-4
  which is attached to encoder #1. enabled=y active=n
* CRTC #1 is changed from DP-4 to DP-5, causing:
  * DP-4 crtc=#1→NULL (VCPI n→0)
  * DP-5 crtc=NULL→#1
  * CRTC #1 steals encoder #1 back from DP-4 and gives it to DP-5
  * CRTC #1 maintains the same mode as before, just with a different
    connector
* mode_changed=n connectors_changed=y
  (we _SHOULD_ do VCPI 0→n here, but don't)

Once the above scenario is repeated once, we'll attempt freeing VCPI
from the connector that we didn't allocate due to the connectors
changing, but the mode staying the same. Sigh.

Since nv50_msto_atomic_check() has broken a few times now, let's rethink
things a bit to be more careful: limit both VCPI/PBN allocations to
mode_changed || connectors_changed, since neither VCPI or PBN should
ever need to change outside of routing and mode changes.

Changes since v1:
* Fix accidental reversal of clock and bpp arguments in
  drm_dp_calc_pbn_mode() - William Lewis

Signed-off-by: Lyude Paul <lyude@redhat.com>
Reported-by: Bohdan Milar <bmilar@redhat.com>
Tested-by: Bohdan Milar <bmilar@redhat.com>
Fixes: 232c9eec417a ("drm/nouveau: Use atomic VCPI helpers for MST")
References: 412e85b60531 ("drm/nouveau: Only release VCPI slots on mode changes")
Cc: Lyude Paul <lyude@redhat.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Airlie <airlied@redhat.com>
Cc: Jerry Zuo <Jerry.Zuo@amd.com>
Cc: Harry Wentland <harry.wentland@amd.com>
Cc: Juston Li <juston.li@intel.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Karol Herbst <karolherbst@gmail.com>
Cc: Ilia Mirkin <imirkin@alum.mit.edu>
Cc: <stable@vger.kernel.org> # v5.1+
Acked-by: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190809005307.18391-1-lyude@redhat.com
---
 drivers/gpu/drm/nouveau/dispnv50/disp.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 126703816794..5c36c75232e6 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -771,16 +771,20 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
 	struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
 	int slots;
 
-	/* When restoring duplicated states, we need to make sure that the
-	 * bw remains the same and avoid recalculating it, as the connector's
-	 * bpc may have changed after the state was duplicated
-	 */
-	if (!state->duplicated)
-		asyh->dp.pbn =
-			drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock,
-					     connector->display_info.bpc * 3);
+	if (crtc_state->mode_changed || crtc_state->connectors_changed) {
+		/*
+		 * When restoring duplicated states, we need to make sure that
+		 * the bw remains the same and avoid recalculating it, as the
+		 * connector's bpc may have changed after the state was
+		 * duplicated
+		 */
+		if (!state->duplicated) {
+			const int bpp = connector->display_info.bpc * 3;
+			const int clock = crtc_state->adjusted_mode.clock;
+
+			asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, bpp);
+		}
 
-	if (crtc_state->mode_changed) {
 		slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr,
 						      mstc->port,
 						      asyh->dp.pbn);

From 5ed1c835ed8b522ce25071cc2d56a9a09bd5b59e Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Wed, 14 Aug 2019 16:40:30 -0700
Subject: [PATCH 136/157] MAINTAINERS, x86/CPU: Tony Luck will maintain
 asm/intel-family.h

There are a few different subsystems in the kernel that depend on model
specific behaviour (perf, EDAC, power, ...). Easier for just one person
to have the task to get new model numbers included instead of having
these groups trip over each other to do it.

 [ bp: s/Cpu/CPU/ and add x86@kernel.org so that it gets CCed too as
   FYI. ]

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190814234030.30817-1-tony.luck@intel.com
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e81e60bd7c26..f3a78403b47f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8064,6 +8064,13 @@ T:	git git://git.code.sf.net/p/intel-sas/isci
 S:	Supported
 F:	drivers/scsi/isci/
 
+INTEL CPU family model numbers
+M:	Tony Luck <tony.luck@intel.com>
+M:	x86@kernel.org
+L:	linux-kernel@vger.kernel.org
+S:	Supported
+F:	arch/x86/include/asm/intel-family.h
+
 INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets)
 M:	Jani Nikula <jani.nikula@linux.intel.com>
 M:	Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

From 6caf0be40a707689e8ff8824fdb96ef77685b1ba Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 15 Aug 2019 01:26:02 -0700
Subject: [PATCH 137/157] USB: serial: option: Add Motorola modem UARTs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Motorola Mapphone devices such as Droid 4 there are five USB ports
that do not use the same layout as Gobi 1K/2K/etc devices listed in
qcserial.c. So we should use qcaux.c or option.c as noted by
Dan Williams <dan.j.williams@intel.com>.

As the Motorola USB serial ports have an interrupt endpoint as shown
with lsusb -v, we should use option.c instead of qcaux.c as pointed out
by Johan Hovold <johan@kernel.org>.

The ff/ff/ff interfaces seem to always be UARTs on Motorola devices.
For the other interfaces, class 0x0a (CDC Data) should not in general
be added as they are typically part of a multi-interface function as
noted earlier by Bjørn Mork <bjorn@mork.no>.

However, looking at the Motorola mapphone kernel code, the mdm6600 0x0a
class is only used for flashing the modem firmware, and there are no
other interfaces. So I've added that too with more details below as it
works just fine.

The ttyUSB ports on Droid 4 are:

ttyUSB0 DIAG, CQDM-capable
ttyUSB1 MUX or NMEA, no response
ttyUSB2 MUX or NMEA, no response
ttyUSB3 TCMD
ttyUSB4 AT-capable

The ttyUSB0 is detected as QCDM capable by ModemManager. I think
it's only used for debugging with ModemManager --debug for sending
custom AT commands though. ModemManager already can manage data
connection using the USB QMI ports that are already handled by the
qmi_wwan.c driver.

To enable the MUX or NMEA ports, it seems that something needs to be
done additionally to enable them, maybe via the DIAG or TCMD port.
It might be just a NVRAM setting somewhere, but I have no idea what
NVRAM settings may need changing for that.

The TCMD port seems to be a Motorola custom protocol for testing
the modem and to configure it's NVRAM and seems to work just fine
based on a quick test with a minimal tcmdrw tool I wrote.

The voice modem AT-capable port seems to provide only partial
support, and no PM support compared to the TS 27.010 based UART
wired directly to the modem.

The UARTs added with this change are the same product IDs as the
Motorola Mapphone Android Linux kernel mdm6600_id_table. I don't
have any mdm9600 based devices, so I have only tested these on
mdm6600 based droid 4.

Then for the class 0x0a (CDC Data) mode, the Motorola Mapphone Android
Linux kernel driver moto_flashqsc.c just seems to change the
port->bulk_out_size to 8K from the default. And is only used for
flashing the modem firmware it seems.

I've verified that flashing the modem with signed firmware works just
fine with the option driver after manually toggling the GPIO pins, so
I've added droid 4 modem flashing mode to the option driver. I've not
added the other devices listed in moto_flashqsc.c in case they really
need different port->bulk_out_size. Those can be added as they get
tested to work for flashing the modem.

After this patch the output of /sys/kernel/debug/usb/devices has
the following for normal 22b8:2a70 mode including the related qmi_wwan
interfaces:

T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=12   MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=22b8 ProdID=2a70 Rev= 0.00
S:  Manufacturer=Motorola, Incorporated
S:  Product=Flash MZ600
C:* #Ifs= 9 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=81(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=83(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=03(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=84(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=04(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=85(I) Atr=03(Int.) MxPS=  64 Ivl=5ms
E:  Ad=86(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=05(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan
E:  Ad=87(I) Atr=03(Int.) MxPS=  64 Ivl=5ms
E:  Ad=88(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=06(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan
E:  Ad=89(I) Atr=03(Int.) MxPS=  64 Ivl=5ms
E:  Ad=8a(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=07(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 7 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan
E:  Ad=8b(I) Atr=03(Int.) MxPS=  64 Ivl=5ms
E:  Ad=8c(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=08(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 8 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fb Prot=ff Driver=qmi_wwan
E:  Ad=8d(I) Atr=03(Int.) MxPS=  64 Ivl=5ms
E:  Ad=8e(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=09(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms

In 22b8:900e "qc_dload" mode the device shows up as:

T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=12   MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=22b8 ProdID=900e Rev= 0.00
S:  Manufacturer=Motorola, Incorporated
S:  Product=Flash MZ600
C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=81(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms

And in 22b8:4281 "ram_downloader" mode the device shows up as:

T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#=  2 Spd=12   MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=22b8 ProdID=4281 Rev= 0.00
S:  Manufacturer=Motorola, Incorporated
S:  Product=Flash MZ600
C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=fc Driver=option
E:  Ad=81(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms

Cc: Bjørn Mork <bjorn@mork.no>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Lars Melin <larsm17@gmail.com>
Cc: Marcel Partap <mpartap@gmx.net>
Cc: Merlijn Wajer <merlijn@wizzup.org>
Cc: Michael Scott <hashcode0f@gmail.com>
Cc: NeKit <nekit1000@gmail.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Sebastian Reichel <sre@kernel.org>
Tested-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index f2c19660ed16..546d7f610a24 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -968,6 +968,11 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7B) },
 	{ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, 0xff, 0x06, 0x7C) },
 
+	/* Motorola devices */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x2a70, 0xff, 0xff, 0xff) },	/* mdm6600 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x2e0a, 0xff, 0xff, 0xff) },	/* mdm9600 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x4281, 0x0a, 0x00, 0xfc) },	/* mdm ram dl */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x22b8, 0x900e, 0xff, 0xff, 0xff) },	/* mdm qc dl */
 
 	{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) },
 	{ USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V620) },

From e5d8badf37e6b547842f2fcde10361b29e08bd36 Mon Sep 17 00:00:00 2001
From: Bob Ham <bob.ham@puri.sm>
Date: Wed, 24 Jul 2019 07:52:26 -0700
Subject: [PATCH 138/157] USB: serial: option: add the BroadMobi BM818 card

Add a VID:PID for the BroadMobi BM818 M.2 card

T:  Bus=01 Lev=03 Prnt=40 Port=03 Cnt=01 Dev#= 44 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=2020 ProdID=2060 Rev=00.00
S:  Manufacturer=Qualcomm, Incorporated
S:  Product=Qualcomm CDMA Technologies MSM
C:  #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA
I:  If#=0x0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
I:  If#=0x1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
I:  If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
I:  If#=0x3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=fe Prot=ff Driver=(none)
I:  If#=0x4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)

Signed-off-by: Bob Ham <bob.ham@puri.sm>
Signed-off-by: Angus Ainslie (Purism) <angus@akkea.ca>
Cc: stable <stable@vger.kernel.org>
[ johan: use USB_DEVICE_INTERFACE_CLASS() ]
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 546d7f610a24..38e920ac7f82 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -1965,6 +1965,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) },	/* D-Link DWM-156/A3 */
 	{ USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff),			/* Olicard 600 */
 	  .driver_info = RSVD(4) },
+	{ USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2060, 0xff),			/* BroadMobi BM818 */
+	  .driver_info = RSVD(4) },
 	{ USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) },			/* OLICARD300 - MT6225 */
 	{ USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) },
 	{ USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) },

From c52873e5a1ef72f845526d9f6a50704433f9c625 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Thu, 8 Aug 2019 16:21:19 +0200
Subject: [PATCH 139/157] usb: cdc-acm: make sure a refcount is taken early
 enough

destroy() will decrement the refcount on the interface, so that
it needs to be taken so early that it never undercounts.

Fixes: 7fb57a019f94e ("USB: cdc-acm: Fix potential deadlock (lockdep warning)")
Cc: stable <stable@vger.kernel.org>
Reported-and-tested-by: syzbot+1b2449b7b5dc240d107a@syzkaller.appspotmail.com
Signed-off-by: Oliver Neukum <oneukum@suse.com>
Link: https://lore.kernel.org/r/20190808142119.7998-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 183b41753c98..62f4fb9b362f 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1301,10 +1301,6 @@ made_compressed_probe:
 	tty_port_init(&acm->port);
 	acm->port.ops = &acm_port_ops;
 
-	minor = acm_alloc_minor(acm);
-	if (minor < 0)
-		goto alloc_fail1;
-
 	ctrlsize = usb_endpoint_maxp(epctrl);
 	readsize = usb_endpoint_maxp(epread) *
 				(quirks == SINGLE_RX_URB ? 1 : 2);
@@ -1312,6 +1308,13 @@ made_compressed_probe:
 	acm->writesize = usb_endpoint_maxp(epwrite) * 20;
 	acm->control = control_interface;
 	acm->data = data_interface;
+
+	usb_get_intf(acm->control); /* undone in destruct() */
+
+	minor = acm_alloc_minor(acm);
+	if (minor < 0)
+		goto alloc_fail1;
+
 	acm->minor = minor;
 	acm->dev = usb_dev;
 	if (h.usb_cdc_acm_descriptor)
@@ -1458,7 +1461,6 @@ skip_countries:
 	usb_driver_claim_interface(&acm_driver, data_interface, acm);
 	usb_set_intfdata(data_interface, acm);
 
-	usb_get_intf(control_interface);
 	tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor,
 			&control_interface->dev);
 	if (IS_ERR(tty_dev)) {

From 54364278fb3cabdea51d6398b07c87415065b3fc Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Tue, 13 Aug 2019 11:35:41 +0200
Subject: [PATCH 140/157] USB: CDC: fix sanity checks in CDC union parser

A few checks checked for the size of the pointer to a structure
instead of the structure itself. Copy & paste issue presumably.

Fixes: e4c6fb7794982 ("usbnet: move the CDC parser into USB core")
Cc: stable <stable@vger.kernel.org>
Reported-by: syzbot+45a53506b65321c1fe91@syzkaller.appspotmail.com
Signed-off-by: Oliver Neukum <oneukum@suse.com>
Link: https://lore.kernel.org/r/20190813093541.18889-1-oneukum@suse.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/message.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index e844bb7b5676..5adf489428aa 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -2218,14 +2218,14 @@ int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr,
 				(struct usb_cdc_dmm_desc *)buffer;
 			break;
 		case USB_CDC_MDLM_TYPE:
-			if (elength < sizeof(struct usb_cdc_mdlm_desc *))
+			if (elength < sizeof(struct usb_cdc_mdlm_desc))
 				goto next_desc;
 			if (desc)
 				return -EINVAL;
 			desc = (struct usb_cdc_mdlm_desc *)buffer;
 			break;
 		case USB_CDC_MDLM_DETAIL_TYPE:
-			if (elength < sizeof(struct usb_cdc_mdlm_detail_desc *))
+			if (elength < sizeof(struct usb_cdc_mdlm_detail_desc))
 				goto next_desc;
 			if (detail)
 				return -EINVAL;

From 777758888ffe59ef754cc39ab2f275dc277732f4 Mon Sep 17 00:00:00 2001
From: Hans Ulli Kroll <ulli.kroll@googlemail.com>
Date: Sat, 10 Aug 2019 17:04:58 +0200
Subject: [PATCH 141/157] usb: host: fotg2: restart hcd after port reset

On the Gemini SoC the FOTG2 stalls after port reset
so restart the HCD after each port reset.

Signed-off-by: Hans Ulli Kroll <ulli.kroll@googlemail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20190810150458.817-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/fotg210-hcd.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c
index 77cc36efae95..0dbfa5c10703 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/host/fotg210-hcd.c
@@ -1629,6 +1629,10 @@ static int fotg210_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 			/* see what we found out */
 			temp = check_reset_complete(fotg210, wIndex, status_reg,
 					fotg210_readl(fotg210, status_reg));
+
+			/* restart schedule */
+			fotg210->command |= CMD_RUN;
+			fotg210_writel(fotg210, fotg210->command, &fotg210->regs->command);
 		}
 
 		if (!(temp & (PORT_RESUME|PORT_RESET))) {

From 141822aa3f79efc8a2ec3ed464f2fd2c93ccd803 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Draszik?= <git@andred.net>
Date: Sat, 10 Aug 2019 16:07:58 +0100
Subject: [PATCH 142/157] usb: chipidea: imx: fix EPROBE_DEFER support during
 driver probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If driver probe needs to be deferred, e.g. because ci_hdrc_add_device()
isn't ready yet, this driver currently misbehaves badly:
    a) success is still reported to the driver core (meaning a 2nd
       probe attempt will never be done), leaving the driver in
       a dysfunctional state and the hardware unusable

    b) driver remove / shutdown OOPSes:
    [  206.786916] Unable to handle kernel paging request at virtual address fffffdff
    [  206.794148] pgd = 880b9f82
    [  206.796890] [fffffdff] *pgd=abf5e861, *pte=00000000, *ppte=00000000
    [  206.803179] Internal error: Oops: 37 [#1] PREEMPT SMP ARM
    [  206.808581] Modules linked in: wl18xx evbug
    [  206.813308] CPU: 1 PID: 1 Comm: systemd-shutdow Not tainted 4.19.35+gf345c93b4195 #1
    [  206.821053] Hardware name: Freescale i.MX7 Dual (Device Tree)
    [  206.826813] PC is at ci_hdrc_remove_device+0x4/0x20
    [  206.831699] LR is at ci_hdrc_imx_remove+0x20/0xe8
    [  206.836407] pc : [<805cd4b0>]    lr : [<805d62cc>]    psr: 20000013
    [  206.842678] sp : a806be40  ip : 00000001  fp : 80adbd3c
    [  206.847906] r10: 80b1b794  r9 : 80d5dfe0  r8 : a8192c44
    [  206.853136] r7 : 80db93a0  r6 : a8192c10  r5 : a8192c00  r4 : a93a4a00
    [  206.859668] r3 : 00000000  r2 : a8192ce4  r1 : ffffffff  r0 : fffffdfb
    [  206.866201] Flags: nzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
    [  206.873341] Control: 10c5387d  Table: a9e0c06a  DAC: 00000051
    [  206.879092] Process systemd-shutdow (pid: 1, stack limit = 0xb271353c)
    [  206.885624] Stack: (0xa806be40 to 0xa806c000)
    [  206.889992] be40: a93a4a00 805d62cc a8192c1c a8170e10 a8192c10 8049a490 80d04d08 00000000
    [  206.898179] be60: 00000000 80d0da2c fee1dead 00000000 a806a000 00000058 00000000 80148b08
    [  206.906366] be80: 01234567 80148d8c a9858600 00000000 00000000 00000000 00000000 80d04d08
    [  206.914553] bea0: 00000000 00000000 a82741e0 a9858600 00000024 00000002 a9858608 00000005
    [  206.922740] bec0: 0000001e 8022c058 00000000 00000000 a806bf14 a9858600 00000000 a806befc
    [  206.930927] bee0: a806bf78 00000000 7ee12c30 8022c18c a806bef8 a806befc 00000000 00000001
    [  206.939115] bf00: 00000000 00000024 a806bf14 00000005 7ee13b34 7ee12c68 00000004 7ee13f20
    [  206.947302] bf20: 00000010 7ee12c7c 00000005 7ee12d04 0000000a 76e7dc00 00000001 80d0f140
    [  206.955490] bf40: ab637880 a974de40 60000013 80d0f140 ab6378a0 80d04d08 a8080470 a9858600
    [  206.963677] bf60: a9858600 00000000 00000000 8022c24c 00000000 80144310 00000000 00000000
    [  206.971864] bf80: 80101204 80d04d08 00000000 80d04d08 00000000 00000000 00000003 00000058
    [  206.980051] bfa0: 80101204 80101000 00000000 00000000 fee1dead 28121969 01234567 00000000
    [  206.988237] bfc0: 00000000 00000000 00000003 00000058 00000000 00000000 00000000 00000000
    [  206.996425] bfe0: 0049ffb0 7ee13d58 0048a84b 76f245a6 60000030 fee1dead 00000000 00000000
    [  207.004622] [<805cd4b0>] (ci_hdrc_remove_device) from [<805d62cc>] (ci_hdrc_imx_remove+0x20/0xe8)
    [  207.013509] [<805d62cc>] (ci_hdrc_imx_remove) from [<8049a490>] (device_shutdown+0x16c/0x218)
    [  207.022050] [<8049a490>] (device_shutdown) from [<80148b08>] (kernel_restart+0xc/0x50)
    [  207.029980] [<80148b08>] (kernel_restart) from [<80148d8c>] (sys_reboot+0xf4/0x1f0)
    [  207.037648] [<80148d8c>] (sys_reboot) from [<80101000>] (ret_fast_syscall+0x0/0x54)
    [  207.045308] Exception stack(0xa806bfa8 to 0xa806bff0)
    [  207.050368] bfa0:                   00000000 00000000 fee1dead 28121969 01234567 00000000
    [  207.058554] bfc0: 00000000 00000000 00000003 00000058 00000000 00000000 00000000 00000000
    [  207.066737] bfe0: 0049ffb0 7ee13d58 0048a84b 76f245a6
    [  207.071799] Code: ebffffa8 e3a00000 e8bd8010 e92d4010 (e5904004)
    [  207.078021] ---[ end trace be47424e3fd46e9f ]---
    [  207.082647] Kernel panic - not syncing: Fatal exception
    [  207.087894] ---[ end Kernel panic - not syncing: Fatal exception ]---

    c) the error path in combination with driver removal causes
       imbalanced calls to the clk_*() and pm_()* APIs

a) happens because the original intended return value is
   overwritten (with 0) by the return code of
   regulator_disable() in ci_hdrc_imx_probe()'s error path
b) happens because ci_pdev is -EPROBE_DEFER, which causes
   ci_hdrc_remove_device() to OOPS

Fix a) by being more careful in ci_hdrc_imx_probe()'s error
path and not overwriting the real error code

Fix b) by calling the respective cleanup functions during
remove only when needed (when ci_pdev != NULL, i.e. when
everything was initialised correctly). This also has the
side effect of not causing imbalanced clk_*() and pm_*()
API calls as part of the error code path.

Fixes: 7c8e8909417e ("usb: chipidea: imx: add HSIC support")
Signed-off-by: André Draszik <git@andred.net>
Cc: stable <stable@vger.kernel.org>
CC: Peter Chen <Peter.Chen@nxp.com>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: Shawn Guo <shawnguo@kernel.org>
CC: Sascha Hauer <s.hauer@pengutronix.de>
CC: Pengutronix Kernel Team <kernel@pengutronix.de>
CC: Fabio Estevam <festevam@gmail.com>
CC: NXP Linux Team <linux-imx@nxp.com>
CC: linux-usb@vger.kernel.org
CC: linux-arm-kernel@lists.infradead.org
CC: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/r/20190810150758.17694-1-git@andred.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/ci_hdrc_imx.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index b5abfe89190c..df8812c30640 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -454,9 +454,11 @@ err_clk:
 	imx_disable_unprepare_clks(dev);
 disable_hsic_regulator:
 	if (data->hsic_pad_regulator)
-		ret = regulator_disable(data->hsic_pad_regulator);
+		/* don't overwrite original ret (cf. EPROBE_DEFER) */
+		regulator_disable(data->hsic_pad_regulator);
 	if (pdata.flags & CI_HDRC_PMQOS)
 		pm_qos_remove_request(&data->pm_qos_req);
+	data->ci_pdev = NULL;
 	return ret;
 }
 
@@ -469,14 +471,17 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev)
 		pm_runtime_disable(&pdev->dev);
 		pm_runtime_put_noidle(&pdev->dev);
 	}
-	ci_hdrc_remove_device(data->ci_pdev);
+	if (data->ci_pdev)
+		ci_hdrc_remove_device(data->ci_pdev);
 	if (data->override_phy_control)
 		usb_phy_shutdown(data->phy);
-	imx_disable_unprepare_clks(&pdev->dev);
-	if (data->plat_data->flags & CI_HDRC_PMQOS)
-		pm_qos_remove_request(&data->pm_qos_req);
-	if (data->hsic_pad_regulator)
-		regulator_disable(data->hsic_pad_regulator);
+	if (data->ci_pdev) {
+		imx_disable_unprepare_clks(&pdev->dev);
+		if (data->plat_data->flags & CI_HDRC_PMQOS)
+			pm_qos_remove_request(&data->pm_qos_req);
+		if (data->hsic_pad_regulator)
+			regulator_disable(data->hsic_pad_regulator);
+	}
 
 	return 0;
 }

From dd3ecf17ba70a70d2c9ef9ba725281b84f8eef12 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 11 Aug 2019 10:05:15 +0200
Subject: [PATCH 143/157] usb: don't create dma pools for HCDs with a
 localmem_pool

If the HCD provides a localmem pool we will never use the DMA pools, so
don't create them.

Fixes: b0310c2f09bb ("USB: use genalloc for USB HCs with local memory")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20190811080520.21712-2-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/buffer.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index 1359b78a624e..1a5b3dcae930 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -66,9 +66,9 @@ int hcd_buffer_create(struct usb_hcd *hcd)
 	char		name[16];
 	int		i, size;
 
-	if (!IS_ENABLED(CONFIG_HAS_DMA) ||
-	    (!is_device_dma_capable(hcd->self.sysdev) &&
-	     !hcd->localmem_pool))
+	if (hcd->localmem_pool ||
+	    !IS_ENABLED(CONFIG_HAS_DMA) ||
+	    !is_device_dma_capable(hcd->self.sysdev))
 		return 0;
 
 	for (i = 0; i < HCD_BUFFER_POOLS; i++) {

From edfbcb321faf07ca970e4191abe061deeb7d3788 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 11 Aug 2019 10:05:16 +0200
Subject: [PATCH 144/157] usb: add a hcd_uses_dma helper

The USB buffer allocation code is the only place in the usb core (and in
fact the whole kernel) that uses is_device_dma_capable, while the URB
mapping code uses the uses_dma flag in struct usb_bus.  Switch the buffer
allocation to use the uses_dma flag used by the rest of the USB code,
and create a helper in hcd.h that checks this flag as well as the
CONFIG_HAS_DMA to simplify the caller a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20190811080520.21712-3-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/buffer.c | 10 +++-------
 drivers/usb/core/hcd.c    |  4 ++--
 drivers/usb/dwc2/hcd.c    |  2 +-
 include/linux/usb.h       |  2 +-
 include/linux/usb/hcd.h   |  3 +++
 5 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index 1a5b3dcae930..6cf22c27f2d2 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -66,9 +66,7 @@ int hcd_buffer_create(struct usb_hcd *hcd)
 	char		name[16];
 	int		i, size;
 
-	if (hcd->localmem_pool ||
-	    !IS_ENABLED(CONFIG_HAS_DMA) ||
-	    !is_device_dma_capable(hcd->self.sysdev))
+	if (hcd->localmem_pool || !hcd_uses_dma(hcd))
 		return 0;
 
 	for (i = 0; i < HCD_BUFFER_POOLS; i++) {
@@ -129,8 +127,7 @@ void *hcd_buffer_alloc(
 		return gen_pool_dma_alloc(hcd->localmem_pool, size, dma);
 
 	/* some USB hosts just use PIO */
-	if (!IS_ENABLED(CONFIG_HAS_DMA) ||
-	    !is_device_dma_capable(bus->sysdev)) {
+	if (!hcd_uses_dma(hcd)) {
 		*dma = ~(dma_addr_t) 0;
 		return kmalloc(size, mem_flags);
 	}
@@ -160,8 +157,7 @@ void hcd_buffer_free(
 		return;
 	}
 
-	if (!IS_ENABLED(CONFIG_HAS_DMA) ||
-	    !is_device_dma_capable(bus->sysdev)) {
+	if (!hcd_uses_dma(hcd)) {
 		kfree(addr);
 		return;
 	}
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 2ccbc2f83570..8592c0344fe8 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1412,7 +1412,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 	if (usb_endpoint_xfer_control(&urb->ep->desc)) {
 		if (hcd->self.uses_pio_for_control)
 			return ret;
-		if (IS_ENABLED(CONFIG_HAS_DMA) && hcd->self.uses_dma) {
+		if (hcd_uses_dma(hcd)) {
 			if (is_vmalloc_addr(urb->setup_packet)) {
 				WARN_ONCE(1, "setup packet is not dma capable\n");
 				return -EAGAIN;
@@ -1446,7 +1446,7 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 	if (urb->transfer_buffer_length != 0
 	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		if (IS_ENABLED(CONFIG_HAS_DMA) && hcd->self.uses_dma) {
+		if (hcd_uses_dma(hcd)) {
 			if (urb->num_sgs) {
 				int n;
 
diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c
index ee144ff8af5b..111787a137ee 100644
--- a/drivers/usb/dwc2/hcd.c
+++ b/drivers/usb/dwc2/hcd.c
@@ -4608,7 +4608,7 @@ static int _dwc2_hcd_urb_enqueue(struct usb_hcd *hcd, struct urb *urb,
 
 	buf = urb->transfer_buffer;
 
-	if (hcd->self.uses_dma) {
+	if (hcd_uses_dma(hcd)) {
 		if (!buf && (urb->transfer_dma & 3)) {
 			dev_err(hsotg->dev,
 				"%s: unaligned transfer with no transfer_buffer",
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 83d35d993e8c..e87826e23d59 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1457,7 +1457,7 @@ typedef void (*usb_complete_t)(struct urb *);
  * field rather than determining a dma address themselves.
  *
  * Note that transfer_buffer must still be set if the controller
- * does not support DMA (as indicated by bus.uses_dma) and when talking
+ * does not support DMA (as indicated by hcd_uses_dma()) and when talking
  * to root hub. If you have to trasfer between highmem zone and the device
  * on such controller, create a bounce buffer or bail out with an error.
  * If transfer_buffer cannot be set (is in highmem) and the controller is DMA
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index bab27ccc8ff5..a20e7815d814 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -422,6 +422,9 @@ static inline bool hcd_periodic_completion_in_progress(struct usb_hcd *hcd,
 	return hcd->high_prio_bh.completing_ep == ep;
 }
 
+#define hcd_uses_dma(hcd) \
+	(IS_ENABLED(CONFIG_HAS_DMA) && (hcd)->self.uses_dma)
+
 extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
 extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
 		int status);

From 01fd150f4aff5bee03db5bda3b94c1bdbe9c6375 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 13 Aug 2019 16:01:20 -0700
Subject: [PATCH 145/157] misc: xilinx-sdfec: fix dependency and build error

lib/devres.c, which implements devm_ioremap_resource(), is only built
when CONFIG_HAS_IOMEM is set/enabled, so XILINX_SDFEC should depend
on HAS_IOMEM.  Fixes this build error (as seen on UML builds):

ERROR: "devm_ioremap_resource" [drivers/misc/xilinx_sdfec.ko] undefined!

Fixes: 76d83e1c3233 ("misc: xilinx-sdfec: add core driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Derek Kiernan <derek.kiernan@xilinx.com>
Cc: Dragan Cvetic <dragan.cvetic@xilinx.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/f9004be5-9925-327b-3ec2-6506e46fe565@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 6abfc8e92fcc..16900357afc2 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -465,6 +465,7 @@ config PCI_ENDPOINT_TEST
 
 config XILINX_SDFEC
 	tristate "Xilinx SDFEC 16"
+	depends on HAS_IOMEM
 	help
 	  This option enables support for the Xilinx SDFEC (Soft Decision
 	  Forward Error Correction) driver. This enables a char driver

From 99c79f6692ccdc42e04deea8a36e22bb48168a62 Mon Sep 17 00:00:00 2001
From: Aleix Roca Nonell <aleix.rocanonell@bsc.es>
Date: Thu, 15 Aug 2019 14:03:22 +0200
Subject: [PATCH 146/157] io_uring: fix manual setup of iov_iter for fixed
 buffers

Commit bd11b3a391e3 ("io_uring: don't use iov_iter_advance() for fixed
buffers") introduced an optimization to avoid using the slow
iov_iter_advance by manually populating the iov_iter iterator in some
cases.

However, the computation of the iterator count field was erroneous: The
first bvec was always accounted for an extent of page size even if the
bvec length was smaller.

In consequence, some I/O operations on fixed buffers were unable to
operate on the full extent of the buffer, consistently skipping some
bytes at the end of it.

Fixes: bd11b3a391e3 ("io_uring: don't use iov_iter_advance() for fixed buffers")
Cc: stable@vger.kernel.org
Signed-off-by: Aleix Roca Nonell <aleix.rocanonell@bsc.es>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index d542f1cf4428..aa25b5bbd4ae 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1097,10 +1097,8 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
 
 			iter->bvec = bvec + seg_skip;
 			iter->nr_segs -= seg_skip;
-			iter->count -= (seg_skip << PAGE_SHIFT);
+			iter->count -= bvec->bv_len + offset;
 			iter->iov_offset = offset & ~PAGE_MASK;
-			if (iter->iov_offset)
-				iter->count -= iter->iov_offset;
 		}
 	}
 

From 7b6620d7db566a46f49b4b9deab9fa061fd4b59b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 15 Aug 2019 11:09:16 -0600
Subject: [PATCH 147/157] block: remove REQ_NOWAIT_INLINE

We had a few issues with this code, and there's still a problem around
how we deal with error handling for chained/split bios. For now, just
revert the code and we'll try again with a thoroug solution. This
reverts commits:

e15c2ffa1091 ("block: fix O_DIRECT error handling for bio fragments")
0eb6ddfb865c ("block: Fix __blkdev_direct_IO() for bio fragments")
6a43074e2f46 ("block: properly handle IOCB_NOWAIT for async O_DIRECT IO")
893a1c97205a ("blk-mq: allow REQ_NOWAIT to return an error inline")

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c            |  8 ++-----
 fs/block_dev.c            | 49 ++++-----------------------------------
 include/linux/blk_types.h |  5 +---
 3 files changed, 8 insertions(+), 54 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a8e6a58f5f28..0835f4d8d42e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1958,13 +1958,9 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	rq = blk_mq_get_request(q, bio, &data);
 	if (unlikely(!rq)) {
 		rq_qos_cleanup(q, bio);
-
-		cookie = BLK_QC_T_NONE;
-		if (bio->bi_opf & REQ_NOWAIT_INLINE)
-			cookie = BLK_QC_T_EAGAIN;
-		else if (bio->bi_opf & REQ_NOWAIT)
+		if (bio->bi_opf & REQ_NOWAIT)
 			bio_wouldblock_error(bio);
-		return cookie;
+		return BLK_QC_T_NONE;
 	}
 
 	trace_block_getrq(q, bio, bio->bi_opf);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index eb657ab94060..677cb364d33f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -345,24 +345,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	struct bio *bio;
 	bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
 	bool is_read = (iov_iter_rw(iter) == READ), is_sync;
-	bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0;
 	loff_t pos = iocb->ki_pos;
 	blk_qc_t qc = BLK_QC_T_NONE;
-	gfp_t gfp;
-	int ret;
+	int ret = 0;
 
 	if ((pos | iov_iter_alignment(iter)) &
 	    (bdev_logical_block_size(bdev) - 1))
 		return -EINVAL;
 
-	if (nowait)
-		gfp = GFP_NOWAIT;
-	else
-		gfp = GFP_KERNEL;
-
-	bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool);
-	if (!bio)
-		return -EAGAIN;
+	bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
 
 	dio = container_of(bio, struct blkdev_dio, bio);
 	dio->is_sync = is_sync = is_sync_kiocb(iocb);
@@ -384,7 +375,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	if (!is_poll)
 		blk_start_plug(&plug);
 
-	ret = 0;
 	for (;;) {
 		bio_set_dev(bio, bdev);
 		bio->bi_iter.bi_sector = pos >> 9;
@@ -409,14 +399,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			task_io_account_write(bio->bi_iter.bi_size);
 		}
 
-		/*
-		 * Tell underlying layer to not block for resource shortage.
-		 * And if we would have blocked, return error inline instead
-		 * of through the bio->bi_end_io() callback.
-		 */
-		if (nowait)
-			bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
-
+		dio->size += bio->bi_iter.bi_size;
 		pos += bio->bi_iter.bi_size;
 
 		nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES);
@@ -428,13 +411,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 				polled = true;
 			}
 
-			dio->size += bio->bi_iter.bi_size;
 			qc = submit_bio(bio);
-			if (qc == BLK_QC_T_EAGAIN) {
-				dio->size -= bio->bi_iter.bi_size;
-				ret = -EAGAIN;
-				goto error;
-			}
 
 			if (polled)
 				WRITE_ONCE(iocb->ki_cookie, qc);
@@ -455,19 +432,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			atomic_inc(&dio->ref);
 		}
 
-		dio->size += bio->bi_iter.bi_size;
-		qc = submit_bio(bio);
-		if (qc == BLK_QC_T_EAGAIN) {
-			dio->size -= bio->bi_iter.bi_size;
-			ret = -EAGAIN;
-			goto error;
-		}
-
-		bio = bio_alloc(gfp, nr_pages);
-		if (!bio) {
-			ret = -EAGAIN;
-			goto error;
-		}
+		submit_bio(bio);
+		bio = bio_alloc(GFP_KERNEL, nr_pages);
 	}
 
 	if (!is_poll)
@@ -487,7 +453,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 	}
 	__set_current_state(TASK_RUNNING);
 
-out:
 	if (!ret)
 		ret = blk_status_to_errno(dio->bio.bi_status);
 	if (likely(!ret))
@@ -495,10 +460,6 @@ out:
 
 	bio_put(&dio->bio);
 	return ret;
-error:
-	if (!is_poll)
-		blk_finish_plug(&plug);
-	goto out;
 }
 
 static ssize_t
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 1b1fa1557e68..feff3fe4467e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -311,7 +311,6 @@ enum req_flag_bits {
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
 	__REQ_BACKGROUND,	/* background IO */
 	__REQ_NOWAIT,           /* Don't wait if request will block */
-	__REQ_NOWAIT_INLINE,	/* Return would-block error inline */
 	/*
 	 * When a shared kthread needs to issue a bio for a cgroup, doing
 	 * so synchronously can lead to priority inversions as the kthread
@@ -346,7 +345,6 @@ enum req_flag_bits {
 #define REQ_RAHEAD		(1ULL << __REQ_RAHEAD)
 #define REQ_BACKGROUND		(1ULL << __REQ_BACKGROUND)
 #define REQ_NOWAIT		(1ULL << __REQ_NOWAIT)
-#define REQ_NOWAIT_INLINE	(1ULL << __REQ_NOWAIT_INLINE)
 #define REQ_CGROUP_PUNT		(1ULL << __REQ_CGROUP_PUNT)
 
 #define REQ_NOUNMAP		(1ULL << __REQ_NOUNMAP)
@@ -420,13 +418,12 @@ static inline int op_stat_group(unsigned int op)
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE		-1U
-#define BLK_QC_T_EAGAIN		-2U
 #define BLK_QC_T_SHIFT		16
 #define BLK_QC_T_INTERNAL	(1U << 31)
 
 static inline bool blk_qc_t_valid(blk_qc_t cookie)
 {
-	return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN;
+	return cookie != BLK_QC_T_NONE;
 }
 
 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)

From a982eeb09b6030e567b8b815277c8c9197168040 Mon Sep 17 00:00:00 2001
From: Jackie Liu <liuyun01@kylinos.cn>
Date: Wed, 14 Aug 2019 17:35:22 +0800
Subject: [PATCH 148/157] io_uring: fix an issue when IOSQE_IO_LINK is inserted
 into defer list

This patch may fix two issues:

First, when IOSQE_IO_DRAIN set, the next IOs need to be inserted into
defer list to delay execution, but link io will be actively scheduled to
run by calling io_queue_sqe.

Second, when multiple LINK_IOs are inserted together with defer_list,
the LINK_IO is no longer keep order.

   |-------------|
   |   LINK_IO   |      ----> insert to defer_list  -----------
   |-------------|                                            |
   |   LINK_IO   |      ----> insert to defer_list  ----------|
   |-------------|                                            |
   |   LINK_IO   |      ----> insert to defer_list  ----------|
   |-------------|                                            |
   |   NORMAL_IO |      ----> insert to defer_list  ----------|
   |-------------|                                            |
                                                              |
                              queue_work at same time   <-----|

Fixes: 9e645e1105c ("io_uring: add support for sqe links")
Signed-off-by: Jackie Liu <liuyun01@kylinos.cn>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index aa25b5bbd4ae..24bbe3cb7ad4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2023,6 +2023,15 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
 {
 	int ret;
 
+	ret = io_req_defer(ctx, req, s->sqe);
+	if (ret) {
+		if (ret != -EIOCBQUEUED) {
+			io_free_req(req);
+			io_cqring_add_event(ctx, s->sqe->user_data, ret);
+		}
+		return 0;
+	}
+
 	ret = __io_submit_sqe(ctx, req, s, true);
 	if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
 		struct io_uring_sqe *sqe_copy;
@@ -2095,13 +2104,6 @@ err:
 		return;
 	}
 
-	ret = io_req_defer(ctx, req, s->sqe);
-	if (ret) {
-		if (ret != -EIOCBQUEUED)
-			goto err_req;
-		return;
-	}
-
 	/*
 	 * If we already have a head request, queue this one for async
 	 * submittal once the head completes. If we don't have a head but

From 19bce474c45be69a284ecee660aa12d8f1e88f18 Mon Sep 17 00:00:00 2001
From: Hui Peng <benquike@gmail.com>
Date: Thu, 15 Aug 2019 00:31:34 -0400
Subject: [PATCH 149/157] ALSA: usb-audio: Fix a stack buffer overflow bug in
 check_input_term

`check_input_term` recursively calls itself with input from
device side (e.g., uac_input_terminal_descriptor.bCSourceID)
as argument (id). In `check_input_term`, if `check_input_term`
is called with the same `id` argument as the caller, it triggers
endless recursive call, resulting kernel space stack overflow.

This patch fixes the bug by adding a bitmap to `struct mixer_build`
to keep track of the checked ids and stop the execution if some id
has been checked (similar to how parse_audio_unit handles unitid
argument).

Reported-by: Hui Peng <benquike@gmail.com>
Reported-by: Mathias Payer <mathias.payer@nebelwelt.net>
Signed-off-by: Hui Peng <benquike@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index ea487378be17..b5927c3d5bc0 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -68,6 +68,7 @@ struct mixer_build {
 	unsigned char *buffer;
 	unsigned int buflen;
 	DECLARE_BITMAP(unitbitmap, MAX_ID_ELEMS);
+	DECLARE_BITMAP(termbitmap, MAX_ID_ELEMS);
 	struct usb_audio_term oterm;
 	const struct usbmix_name_map *map;
 	const struct usbmix_selector_map *selector_map;
@@ -775,16 +776,25 @@ static int uac_mixer_unit_get_channels(struct mixer_build *state,
  * parse the source unit recursively until it reaches to a terminal
  * or a branched unit.
  */
-static int check_input_term(struct mixer_build *state, int id,
+static int __check_input_term(struct mixer_build *state, int id,
 			    struct usb_audio_term *term)
 {
 	int protocol = state->mixer->protocol;
 	int err;
 	void *p1;
+	unsigned char *hdr;
 
 	memset(term, 0, sizeof(*term));
-	while ((p1 = find_audio_control_unit(state, id)) != NULL) {
-		unsigned char *hdr = p1;
+	for (;;) {
+		/* a loop in the terminal chain? */
+		if (test_and_set_bit(id, state->termbitmap))
+			return -EINVAL;
+
+		p1 = find_audio_control_unit(state, id);
+		if (!p1)
+			break;
+
+		hdr = p1;
 		term->id = id;
 
 		if (protocol == UAC_VERSION_1 || protocol == UAC_VERSION_2) {
@@ -802,7 +812,7 @@ static int check_input_term(struct mixer_build *state, int id,
 
 					/* call recursively to verify that the
 					 * referenced clock entity is valid */
-					err = check_input_term(state, d->bCSourceID, term);
+					err = __check_input_term(state, d->bCSourceID, term);
 					if (err < 0)
 						return err;
 
@@ -836,7 +846,7 @@ static int check_input_term(struct mixer_build *state, int id,
 			case UAC2_CLOCK_SELECTOR: {
 				struct uac_selector_unit_descriptor *d = p1;
 				/* call recursively to retrieve the channel info */
-				err = check_input_term(state, d->baSourceID[0], term);
+				err = __check_input_term(state, d->baSourceID[0], term);
 				if (err < 0)
 					return err;
 				term->type = UAC3_SELECTOR_UNIT << 16; /* virtual type */
@@ -899,7 +909,7 @@ static int check_input_term(struct mixer_build *state, int id,
 
 				/* call recursively to verify that the
 				 * referenced clock entity is valid */
-				err = check_input_term(state, d->bCSourceID, term);
+				err = __check_input_term(state, d->bCSourceID, term);
 				if (err < 0)
 					return err;
 
@@ -950,7 +960,7 @@ static int check_input_term(struct mixer_build *state, int id,
 			case UAC3_CLOCK_SELECTOR: {
 				struct uac_selector_unit_descriptor *d = p1;
 				/* call recursively to retrieve the channel info */
-				err = check_input_term(state, d->baSourceID[0], term);
+				err = __check_input_term(state, d->baSourceID[0], term);
 				if (err < 0)
 					return err;
 				term->type = UAC3_SELECTOR_UNIT << 16; /* virtual type */
@@ -966,7 +976,7 @@ static int check_input_term(struct mixer_build *state, int id,
 					return -EINVAL;
 
 				/* call recursively to retrieve the channel info */
-				err = check_input_term(state, d->baSourceID[0], term);
+				err = __check_input_term(state, d->baSourceID[0], term);
 				if (err < 0)
 					return err;
 
@@ -984,6 +994,15 @@ static int check_input_term(struct mixer_build *state, int id,
 	return -ENODEV;
 }
 
+
+static int check_input_term(struct mixer_build *state, int id,
+			    struct usb_audio_term *term)
+{
+	memset(term, 0, sizeof(*term));
+	memset(state->termbitmap, 0, sizeof(state->termbitmap));
+	return __check_input_term(state, id, term);
+}
+
 /*
  * Feature Unit
  */

From a90118c445cc7f07781de26a9684d4ec58bfcfd1 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Tue, 30 Jul 2019 22:46:27 -0700
Subject: [PATCH 150/157] x86/boot: Save fields explicitly, zero out everything
 else

Recent gcc compilers (gcc 9.1) generate warnings about an out of bounds
memset, if the memset goes accross several fields of a struct. This
generated a couple of warnings on x86_64 builds in sanitize_boot_params().

Fix this by explicitly saving the fields in struct boot_params
that are intended to be preserved, and zeroing all the rest.

[ tglx: Tagged for stable as it breaks the warning free build there as well ]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20190731054627.5627-2-jhubbard@nvidia.com
---
 arch/x86/include/asm/bootparam_utils.h | 63 ++++++++++++++++++++------
 1 file changed, 48 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 101eb944f13c..f5e90a849bca 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -18,6 +18,20 @@
  * Note: efi_info is commonly left uninitialized, but that field has a
  * private magic, so it is better to leave it unchanged.
  */
+
+#define sizeof_mbr(type, member) ({ sizeof(((type *)0)->member); })
+
+#define BOOT_PARAM_PRESERVE(struct_member)				\
+	{								\
+		.start = offsetof(struct boot_params, struct_member),	\
+		.len   = sizeof_mbr(struct boot_params, struct_member),	\
+	}
+
+struct boot_params_to_save {
+	unsigned int start;
+	unsigned int len;
+};
+
 static void sanitize_boot_params(struct boot_params *boot_params)
 {
 	/* 
@@ -35,21 +49,40 @@ static void sanitize_boot_params(struct boot_params *boot_params)
 	 * problems again.
 	 */
 	if (boot_params->sentinel) {
-		/* fields in boot_params are left uninitialized, clear them */
-		boot_params->acpi_rsdp_addr = 0;
-		memset(&boot_params->ext_ramdisk_image, 0,
-		       (char *)&boot_params->efi_info -
-			(char *)&boot_params->ext_ramdisk_image);
-		memset(&boot_params->kbd_status, 0,
-		       (char *)&boot_params->hdr -
-		       (char *)&boot_params->kbd_status);
-		memset(&boot_params->_pad7[0], 0,
-		       (char *)&boot_params->edd_mbr_sig_buffer[0] -
-			(char *)&boot_params->_pad7[0]);
-		memset(&boot_params->_pad8[0], 0,
-		       (char *)&boot_params->eddbuf[0] -
-			(char *)&boot_params->_pad8[0]);
-		memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
+		static struct boot_params scratch;
+		char *bp_base = (char *)boot_params;
+		char *save_base = (char *)&scratch;
+		int i;
+
+		const struct boot_params_to_save to_save[] = {
+			BOOT_PARAM_PRESERVE(screen_info),
+			BOOT_PARAM_PRESERVE(apm_bios_info),
+			BOOT_PARAM_PRESERVE(tboot_addr),
+			BOOT_PARAM_PRESERVE(ist_info),
+			BOOT_PARAM_PRESERVE(acpi_rsdp_addr),
+			BOOT_PARAM_PRESERVE(hd0_info),
+			BOOT_PARAM_PRESERVE(hd1_info),
+			BOOT_PARAM_PRESERVE(sys_desc_table),
+			BOOT_PARAM_PRESERVE(olpc_ofw_header),
+			BOOT_PARAM_PRESERVE(efi_info),
+			BOOT_PARAM_PRESERVE(alt_mem_k),
+			BOOT_PARAM_PRESERVE(scratch),
+			BOOT_PARAM_PRESERVE(e820_entries),
+			BOOT_PARAM_PRESERVE(eddbuf_entries),
+			BOOT_PARAM_PRESERVE(edd_mbr_sig_buf_entries),
+			BOOT_PARAM_PRESERVE(edd_mbr_sig_buffer),
+			BOOT_PARAM_PRESERVE(e820_table),
+			BOOT_PARAM_PRESERVE(eddbuf),
+		};
+
+		memset(&scratch, 0, sizeof(scratch));
+
+		for (i = 0; i < ARRAY_SIZE(to_save); i++) {
+			memcpy(save_base + to_save[i].start,
+			       bp_base + to_save[i].start, to_save[i].len);
+		}
+
+		memcpy(boot_params, save_base, sizeof(*boot_params));
 	}
 }
 

From b6143d10d23ebb4a77af311e8b8b7f019d0163e6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 16 Aug 2019 14:57:43 +0100
Subject: [PATCH 151/157] arm64: ftrace: Ensure module ftrace trampoline is
 coherent with I-side

The initial support for dynamic ftrace trampolines in modules made use
of an indirect branch which loaded its target from the beginning of
a special section (e71a4e1bebaf7 ("arm64: ftrace: add support for far
branches to dynamic ftrace")). Since no instructions were being patched,
no cache maintenance was needed. However, later in be0f272bfc83 ("arm64:
ftrace: emit ftrace-mod.o contents through code") this code was reworked
to output the trampoline instructions directly into the PLT entry but,
unfortunately, the necessary cache maintenance was overlooked.

Add a call to __flush_icache_range() after writing the new trampoline
instructions but before patching in the branch to the trampoline.

Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: James Morse <james.morse@arm.com>
Cc: <stable@vger.kernel.org>
Fixes: be0f272bfc83 ("arm64: ftrace: emit ftrace-mod.o contents through code")
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/ftrace.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 1285c7b2947f..171773257974 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -73,7 +73,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
 	if (offset < -SZ_128M || offset >= SZ_128M) {
 #ifdef CONFIG_ARM64_MODULE_PLTS
-		struct plt_entry trampoline;
+		struct plt_entry trampoline, *dst;
 		struct module *mod;
 
 		/*
@@ -106,23 +106,27 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 		 * to check if the actual opcodes are in fact identical,
 		 * regardless of the offset in memory so use memcmp() instead.
 		 */
-		trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
-		if (memcmp(mod->arch.ftrace_trampoline, &trampoline,
-			   sizeof(trampoline))) {
-			if (plt_entry_is_initialized(mod->arch.ftrace_trampoline)) {
+		dst = mod->arch.ftrace_trampoline;
+		trampoline = get_plt_entry(addr, dst);
+		if (memcmp(dst, &trampoline, sizeof(trampoline))) {
+			if (plt_entry_is_initialized(dst)) {
 				pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
 				return -EINVAL;
 			}
 
 			/* point the trampoline to our ftrace entry point */
 			module_disable_ro(mod);
-			*mod->arch.ftrace_trampoline = trampoline;
+			*dst = trampoline;
 			module_enable_ro(mod, true);
 
-			/* update trampoline before patching in the branch */
-			smp_wmb();
+			/*
+			 * Ensure updated trampoline is visible to instruction
+			 * fetch before we patch in the branch.
+			 */
+			__flush_icache_range((unsigned long)&dst[0],
+					     (unsigned long)&dst[1]);
 		}
-		addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;
+		addr = (unsigned long)dst;
 #else /* CONFIG_ARM64_MODULE_PLTS */
 		return -EINVAL;
 #endif /* CONFIG_ARM64_MODULE_PLTS */

From 5912e791f3018de0a007c8cfa9cb38c97d3e5f5c Mon Sep 17 00:00:00 2001
From: Adrian Vladu <avladu@cloudbasesolutions.com>
Date: Mon, 6 May 2019 17:27:37 +0000
Subject: [PATCH 152/157] tools: hv: fixed Python pep8/flake8 warnings for
 lsvmbus

Fixed pep8/flake8 python style code for lsvmbus tool.

The TAB indentation was on purpose ignored (pep8 rule W191) to make
sure the code is complying with the Linux code guideline.
The following command doe not show any warnings now:
pep8 --ignore=W191 lsvmbus
flake8 --ignore=W191 lsvmbus

Signed-off-by: Adrian Vladu <avladu@cloudbasesolutions.com>

Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Dexuan Cui <decui@microsoft.com>
Cc: Alessandro Pilotti <apilotti@cloudbasesolutions.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 tools/hv/lsvmbus | 75 +++++++++++++++++++++++++++---------------------
 1 file changed, 42 insertions(+), 33 deletions(-)

diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus
index 55e7374bade0..099f2c44dbed 100644
--- a/tools/hv/lsvmbus
+++ b/tools/hv/lsvmbus
@@ -4,10 +4,10 @@
 import os
 from optparse import OptionParser
 
+help_msg = "print verbose messages. Try -vv, -vvv for  more verbose messages"
 parser = OptionParser()
-parser.add_option("-v", "--verbose", dest="verbose",
-		   help="print verbose messages. Try -vv, -vvv for \
-			more verbose messages", action="count")
+parser.add_option(
+	"-v", "--verbose", dest="verbose", help=help_msg, action="count")
 
 (options, args) = parser.parse_args()
 
@@ -21,27 +21,28 @@ if not os.path.isdir(vmbus_sys_path):
 	exit(-1)
 
 vmbus_dev_dict = {
-	'{0e0b6031-5213-4934-818b-38d90ced39db}' : '[Operating system shutdown]',
-	'{9527e630-d0ae-497b-adce-e80ab0175caf}' : '[Time Synchronization]',
-	'{57164f39-9115-4e78-ab55-382f3bd5422d}' : '[Heartbeat]',
-	'{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}' : '[Data Exchange]',
-	'{35fa2e29-ea23-4236-96ae-3a6ebacba440}' : '[Backup (volume checkpoint)]',
-	'{34d14be3-dee4-41c8-9ae7-6b174977c192}' : '[Guest services]',
-	'{525074dc-8985-46e2-8057-a307dc18a502}' : '[Dynamic Memory]',
-	'{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}' : 'Synthetic mouse',
-	'{f912ad6d-2b17-48ea-bd65-f927a61c7684}' : 'Synthetic keyboard',
-	'{da0a7802-e377-4aac-8e77-0558eb1073f8}' : 'Synthetic framebuffer adapter',
-	'{f8615163-df3e-46c5-913f-f2d2f965ed0e}' : 'Synthetic network adapter',
-	'{32412632-86cb-44a2-9b5c-50d1417354f5}' : 'Synthetic IDE Controller',
-	'{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}' : 'Synthetic SCSI Controller',
-	'{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}' : 'Synthetic fiber channel adapter',
-	'{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}' : 'Synthetic RDMA adapter',
-	'{44c4f61d-4444-4400-9d52-802e27ede19f}' : 'PCI Express pass-through',
-	'{276aacf4-ac15-426c-98dd-7521ad3f01fe}' : '[Reserved system device]',
-	'{f8e65716-3cb3-4a06-9a60-1889c5cccab5}' : '[Reserved system device]',
-	'{3375baf4-9e15-4b30-b765-67acb10d607b}' : '[Reserved system device]',
+	'{0e0b6031-5213-4934-818b-38d90ced39db}': '[Operating system shutdown]',
+	'{9527e630-d0ae-497b-adce-e80ab0175caf}': '[Time Synchronization]',
+	'{57164f39-9115-4e78-ab55-382f3bd5422d}': '[Heartbeat]',
+	'{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}': '[Data Exchange]',
+	'{35fa2e29-ea23-4236-96ae-3a6ebacba440}': '[Backup (volume checkpoint)]',
+	'{34d14be3-dee4-41c8-9ae7-6b174977c192}': '[Guest services]',
+	'{525074dc-8985-46e2-8057-a307dc18a502}': '[Dynamic Memory]',
+	'{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}': 'Synthetic mouse',
+	'{f912ad6d-2b17-48ea-bd65-f927a61c7684}': 'Synthetic keyboard',
+	'{da0a7802-e377-4aac-8e77-0558eb1073f8}': 'Synthetic framebuffer adapter',
+	'{f8615163-df3e-46c5-913f-f2d2f965ed0e}': 'Synthetic network adapter',
+	'{32412632-86cb-44a2-9b5c-50d1417354f5}': 'Synthetic IDE Controller',
+	'{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}': 'Synthetic SCSI Controller',
+	'{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}': 'Synthetic fiber channel adapter',
+	'{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}': 'Synthetic RDMA adapter',
+	'{44c4f61d-4444-4400-9d52-802e27ede19f}': 'PCI Express pass-through',
+	'{276aacf4-ac15-426c-98dd-7521ad3f01fe}': '[Reserved system device]',
+	'{f8e65716-3cb3-4a06-9a60-1889c5cccab5}': '[Reserved system device]',
+	'{3375baf4-9e15-4b30-b765-67acb10d607b}': '[Reserved system device]',
 }
 
+
 def get_vmbus_dev_attr(dev_name, attr):
 	try:
 		f = open('%s/%s/%s' % (vmbus_sys_path, dev_name, attr), 'r')
@@ -52,6 +53,7 @@ def get_vmbus_dev_attr(dev_name, attr):
 
 	return lines
 
+
 class VMBus_Dev:
 	pass
 
@@ -66,12 +68,13 @@ for f in os.listdir(vmbus_sys_path):
 
 	chn_vp_mapping = get_vmbus_dev_attr(f, 'channel_vp_mapping')
 	chn_vp_mapping = [c.strip() for c in chn_vp_mapping]
-	chn_vp_mapping = sorted(chn_vp_mapping,
-		key = lambda c : int(c.split(':')[0]))
+	chn_vp_mapping = sorted(
+		chn_vp_mapping, key=lambda c: int(c.split(':')[0]))
 
-	chn_vp_mapping = ['\tRel_ID=%s, target_cpu=%s' %
-				(c.split(':')[0], c.split(':')[1])
-					for c in chn_vp_mapping]
+	chn_vp_mapping = [
+		'\tRel_ID=%s, target_cpu=%s' %
+		(c.split(':')[0], c.split(':')[1]) for c in chn_vp_mapping
+	]
 	d = VMBus_Dev()
 	d.sysfs_path = '%s/%s' % (vmbus_sys_path, f)
 	d.vmbus_id = vmbus_id
@@ -85,7 +88,7 @@ for f in os.listdir(vmbus_sys_path):
 	vmbus_dev_list.append(d)
 
 
-vmbus_dev_list  = sorted(vmbus_dev_list, key = lambda d : int(d.vmbus_id))
+vmbus_dev_list = sorted(vmbus_dev_list, key=lambda d: int(d.vmbus_id))
 
 format0 = '%2s: %s'
 format1 = '%2s: Class_ID = %s - %s\n%s'
@@ -95,9 +98,15 @@ for d in vmbus_dev_list:
 	if verbose == 0:
 		print(('VMBUS ID ' + format0) % (d.vmbus_id, d.dev_desc))
 	elif verbose == 1:
-		print (('VMBUS ID ' + format1) %	\
-			(d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping))
+		print(
+			('VMBUS ID ' + format1) %
+			(d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping)
+		)
 	else:
-		print (('VMBUS ID ' + format2) % \
-			(d.vmbus_id, d.class_id, d.dev_desc, \
-			d.device_id, d.sysfs_path, d.chn_vp_mapping))
+		print(
+			('VMBUS ID ' + format2) %
+			(
+				d.vmbus_id, d.class_id, d.dev_desc,
+				d.device_id, d.sysfs_path, d.chn_vp_mapping
+			)
+		)

From b0995156071b0ff29a5902964a9dc8cfad6f81c0 Mon Sep 17 00:00:00 2001
From: Adrian Vladu <avladu@cloudbasesolutions.com>
Date: Mon, 6 May 2019 16:50:58 +0000
Subject: [PATCH 153/157] tools: hv: fix KVP and VSS daemons exit code

HyperV KVP and VSS daemons should exit with 0 when the '--help'
or '-h' flags are used.

Signed-off-by: Adrian Vladu <avladu@cloudbasesolutions.com>

Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Alessandro Pilotti <apilotti@cloudbasesolutions.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 tools/hv/hv_kvp_daemon.c | 2 ++
 tools/hv/hv_vss_daemon.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index d7e06fe0270e..0ce50c319cfd 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1386,6 +1386,8 @@ int main(int argc, char *argv[])
 			daemonize = 0;
 			break;
 		case 'h':
+			print_usage(argv);
+			exit(0);
 		default:
 			print_usage(argv);
 			exit(EXIT_FAILURE);
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index efe1e34dd91b..8f813f5233d4 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -218,6 +218,8 @@ int main(int argc, char *argv[])
 			daemonize = 0;
 			break;
 		case 'h':
+			print_usage(argv);
+			exit(0);
 		default:
 			print_usage(argv);
 			exit(EXIT_FAILURE);

From 2d35c66036b2494c329a32468c85405493370e75 Mon Sep 17 00:00:00 2001
From: Adrian Vladu <avladu@cloudbasesolutions.com>
Date: Mon, 6 May 2019 16:51:24 +0000
Subject: [PATCH 154/157] tools: hv: fix typos in toolchain

Fix typos in the HyperV toolchain.

Signed-off-by: Adrian Vladu <avladu@cloudbasesolutions.com>

Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Alessandro Pilotti <apilotti@cloudbasesolutions.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 tools/hv/hv_get_dhcp_info.sh | 2 +-
 tools/hv/hv_kvp_daemon.c     | 6 +++---
 tools/hv/hv_set_ifconfig.sh  | 2 +-
 tools/hv/hv_vss_daemon.c     | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/hv/hv_get_dhcp_info.sh b/tools/hv/hv_get_dhcp_info.sh
index c38686c44656..2f2a3c7df3de 100755
--- a/tools/hv/hv_get_dhcp_info.sh
+++ b/tools/hv/hv_get_dhcp_info.sh
@@ -13,7 +13,7 @@
 #	the script prints the string "Disabled" to stdout.
 #
 # Each Distro is expected to implement this script in a distro specific
-# fashion. For instance on Distros that ship with Network Manager enabled,
+# fashion. For instance, on Distros that ship with Network Manager enabled,
 # this script can be based on the Network Manager APIs for retrieving DHCP
 # information.
 
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 0ce50c319cfd..f5597503c771 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -700,7 +700,7 @@ static void kvp_get_ipconfig_info(char *if_name,
 
 
 	/*
-	 * Gather the DNS  state.
+	 * Gather the DNS state.
 	 * Since there is no standard way to get this information
 	 * across various distributions of interest; we just invoke
 	 * an external script that needs to be ported across distros
@@ -1051,7 +1051,7 @@ static int parse_ip_val_buffer(char *in_buf, int *offset,
 	char *start;
 
 	/*
-	 * in_buf has sequence of characters that are seperated by
+	 * in_buf has sequence of characters that are separated by
 	 * the character ';'. The last sequence does not have the
 	 * terminating ";" character.
 	 */
@@ -1492,7 +1492,7 @@ int main(int argc, char *argv[])
 		case KVP_OP_GET_IP_INFO:
 			kvp_ip_val = &hv_msg->body.kvp_ip_val;
 
-			error =  kvp_mac_to_ip(kvp_ip_val);
+			error = kvp_mac_to_ip(kvp_ip_val);
 
 			if (error)
 				hv_msg->error = error;
diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
index 7ed9f85ef908..d10fe35b7f25 100755
--- a/tools/hv/hv_set_ifconfig.sh
+++ b/tools/hv/hv_set_ifconfig.sh
@@ -12,7 +12,7 @@
 # be used to configure the interface.
 #
 # Each Distro is expected to implement this script in a distro specific
-# fashion. For instance on Distros that ship with Network Manager enabled,
+# fashion. For instance, on Distros that ship with Network Manager enabled,
 # this script can be based on the Network Manager APIs for configuring the
 # interface.
 #
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 8f813f5233d4..92902a88f671 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -42,7 +42,7 @@ static int vss_do_freeze(char *dir, unsigned int cmd)
 	 * If a partition is mounted more than once, only the first
 	 * FREEZE/THAW can succeed and the later ones will get
 	 * EBUSY/EINVAL respectively: there could be 2 cases:
-	 * 1) a user may mount the same partition to differnt directories
+	 * 1) a user may mount the same partition to different directories
 	 *  by mistake or on purpose;
 	 * 2) The subvolume of btrfs appears to have the same partition
 	 * mounted more than once.

From 61e0f39105b7926a41bc03158eccf5ed13207ebd Mon Sep 17 00:00:00 2001
From: Nishad Kamdar <nishadkamdar@gmail.com>
Date: Mon, 22 Jul 2019 19:01:17 +0530
Subject: [PATCH 155/157] tools: hv: Use the correct style for SPDX License
 Identifier

This patch corrects the SPDX License Identifier style
in the trace header file related to Microsoft Hyper-V
client drivers.
For C header files Documentation/process/license-rules.rst
mandates C-like comments (opposed to C source files where
C++ style should be used)

Changes made by using a script provided by Joe Perches here:
https://lkml.org/lkml/2019/2/7/46

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Nishad Kamdar <nishadkamdar@gmail.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/hv/hv_trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hv/hv_trace.h b/drivers/hv/hv_trace.h
index 999f80a63bff..e70783e33680 100644
--- a/drivers/hv/hv_trace.h
+++ b/drivers/hv/hv_trace.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM hyperv

From bafe1e79e05de725e26b3f60c90b49e635b686b9 Mon Sep 17 00:00:00 2001
From: Lan Tianyu <Tianyu.Lan@microsoft.com>
Date: Tue, 26 Mar 2019 14:28:21 +0800
Subject: [PATCH 156/157] MAINTAINERS: Fix Hyperv vIOMMU driver file name

The Hyperv vIOMMU file name should be "hyperv-iommu.c" rather
than "hyperv_iommu.c". This patch is to fix it.

Signed-off-by: Lan Tianyu <Tianyu.Lan@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 783569e3c4b4..b03179510e26 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7454,7 +7454,7 @@ F:	drivers/net/hyperv/
 F:	drivers/scsi/storvsc_drv.c
 F:	drivers/uio/uio_hv_generic.c
 F:	drivers/video/fbdev/hyperv_fb.c
-F:	drivers/iommu/hyperv_iommu.c
+F:	drivers/iommu/hyperv-iommu.c
 F:	net/vmw_vsock/hyperv_transport.c
 F:	include/clocksource/hyperv_timer.h
 F:	include/linux/hyperv.h

From d1abaeb3be7b5fa6d7a1fbbd2e14e3310005c4c1 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 18 Aug 2019 14:31:08 -0700
Subject: [PATCH 157/157] Linux 5.3-rc5

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1b23f95db176..9fa18613566f 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 3
 SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
 NAME = Bobtail Squid
 
 # *DOCUMENTATION*