From 61ee4137b5744d9c555cd4674aae353a45f9cd2e Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 31 Jul 2020 14:49:52 +0800
Subject: [PATCH 001/322] ip_vti: Fix unused variable warning

If CONFIG_INET_XFRM_TUNNEL is set but CONFIG_IPV6 is n,

net/ipv4/ip_vti.c:493:27: warning: 'vti_ipip6_handler' defined but not used [-Wunused-variable]

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/ip_vti.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 49daaed89764..f687abb069fa 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -490,6 +490,7 @@ static struct xfrm_tunnel vti_ipip_handler __read_mostly = {
 	.priority	=	0,
 };
 
+#if IS_ENABLED(CONFIG_IPV6)
 static struct xfrm_tunnel vti_ipip6_handler __read_mostly = {
 	.handler	=	vti_rcv_tunnel,
 	.cb_handler	=	vti_rcv_cb,
@@ -497,6 +498,7 @@ static struct xfrm_tunnel vti_ipip6_handler __read_mostly = {
 	.priority	=	0,
 };
 #endif
+#endif
 
 static int __net_init vti_init_net(struct net *net)
 {

From 4eb2e13415757a2bce5bb0d580d22bbeef1f5346 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 13 Aug 2020 16:24:04 +0200
Subject: [PATCH 002/322] espintcp: restore IP CB before handing the packet to
 xfrm

Xiumei reported a bug with espintcp over IPv6 in transport mode,
because xfrm6_transport_finish expects to find IP6CB data (struct
inet6_skb_cb). Currently, espintcp zeroes the CB, but the relevant
part is actually preserved by previous layers (first set up by tcp,
then strparser only zeroes a small part of tcp_skb_tb), so we can just
relocate it to the start of skb->cb.

Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)")
Reported-by: Xiumei Mu <xmu@redhat.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/espintcp.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 827ccdf2db57..1f08ebf7d80c 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -29,8 +29,12 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
 
 static void handle_esp(struct sk_buff *skb, struct sock *sk)
 {
+	struct tcp_skb_cb *tcp_cb = (struct tcp_skb_cb *)skb->cb;
+
 	skb_reset_transport_header(skb);
-	memset(skb->cb, 0, sizeof(skb->cb));
+
+	/* restore IP CB, we need at least IP6CB->nhoff */
+	memmove(skb->cb, &tcp_cb->header, sizeof(tcp_cb->header));
 
 	rcu_read_lock();
 	skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);

From d3f99f9183068efb5b931e50e298bad40285f938 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 19 Aug 2020 12:24:45 +0300
Subject: [PATCH 003/322] gpio: omap: Fix warnings if PM is disabled

Fix warnings for omap_gpio_resume and omap_gpio_suspend
defined but not used when PM is disabled as noticed when
doing make randconfig builds.

Fixes: f02a03985d06 ("gpio: omap: Add missing PM ops for suspend")
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-omap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 7fbe0c9e1fc1..0ea640fb636c 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -1516,7 +1516,7 @@ static int __maybe_unused omap_gpio_runtime_resume(struct device *dev)
 	return 0;
 }
 
-static int omap_gpio_suspend(struct device *dev)
+static int __maybe_unused omap_gpio_suspend(struct device *dev)
 {
 	struct gpio_bank *bank = dev_get_drvdata(dev);
 
@@ -1528,7 +1528,7 @@ static int omap_gpio_suspend(struct device *dev)
 	return omap_gpio_runtime_suspend(dev);
 }
 
-static int omap_gpio_resume(struct device *dev)
+static int __maybe_unused omap_gpio_resume(struct device *dev)
 {
 	struct gpio_bank *bank = dev_get_drvdata(dev);
 

From 400d033f5a599120089b5f0c54d14d198499af5a Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Sun, 2 Aug 2020 19:15:41 +0800
Subject: [PATCH 004/322] clocksource/drivers/h8300_timer8: Fix wrong return
 value in h8300_8timer_init()

In the init function, if the call to of_iomap() fails, the return
value is ENXIO instead of -ENXIO.

Change to the right negative errno.

Fixes: 691f8f878290f ("clocksource/drivers/h8300_timer8: Convert init function to return error")
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20200802111541.5429-1-tianjia.zhang@linux.alibaba.com
---
 drivers/clocksource/h8300_timer8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c
index 1d740a8c42ab..47114c2a7cb5 100644
--- a/drivers/clocksource/h8300_timer8.c
+++ b/drivers/clocksource/h8300_timer8.c
@@ -169,7 +169,7 @@ static int __init h8300_8timer_init(struct device_node *node)
 		return PTR_ERR(clk);
 	}
 
-	ret = ENXIO;
+	ret = -ENXIO;
 	base = of_iomap(node, 0);
 	if (!base) {
 		pr_err("failed to map registers for clockevent\n");

From 164805157f3c6834670afbaff563353c773131f1 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 17 Aug 2020 12:24:28 +0300
Subject: [PATCH 005/322] clocksource/drivers/timer-ti-dm: Do reset before
 enable

Commit 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and
resume for am3 and am4") exposed a new issue for type2 dual mode timers
on at least omap5 where the clockevent will stop when the SoC starts
entering idle states during the boot.

Turns out we are wrongly first enabling the system timer and then
resetting it, while we must also re-enable it after reset. The current
sequence leaves the timer module in a partially initialized state. This
issue went unnoticed earlier with ti-sysc driver reconfiguring the timer
module until we fixed the issue of ti-sysc reconfiguring system timers.

Let's fix the issue by calling dmtimer_systimer_enable() from reset for
both type1 and type2 timers, and switch the order of reset and enable in
dmtimer_systimer_setup(). Let's also move dmtimer_systimer_enable() and
dmtimer_systimer_disable() to do this without adding forward declarations.

Fixes: 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4")
Reported-by: H. Nikolaus Schaller" <hns@goldelico.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20200817092428.6176-1-tony@atomide.com
---
 drivers/clocksource/timer-ti-dm-systimer.c | 44 +++++++++++-----------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c
index f6fd1c1cc527..33b3e8aa2cc5 100644
--- a/drivers/clocksource/timer-ti-dm-systimer.c
+++ b/drivers/clocksource/timer-ti-dm-systimer.c
@@ -69,12 +69,33 @@ static bool dmtimer_systimer_revision1(struct dmtimer_systimer *t)
 	return !(tidr >> 16);
 }
 
+static void dmtimer_systimer_enable(struct dmtimer_systimer *t)
+{
+	u32 val;
+
+	if (dmtimer_systimer_revision1(t))
+		val = DMTIMER_TYPE1_ENABLE;
+	else
+		val = DMTIMER_TYPE2_ENABLE;
+
+	writel_relaxed(val, t->base + t->sysc);
+}
+
+static void dmtimer_systimer_disable(struct dmtimer_systimer *t)
+{
+	if (!dmtimer_systimer_revision1(t))
+		return;
+
+	writel_relaxed(DMTIMER_TYPE1_DISABLE, t->base + t->sysc);
+}
+
 static int __init dmtimer_systimer_type1_reset(struct dmtimer_systimer *t)
 {
 	void __iomem *syss = t->base + OMAP_TIMER_V1_SYS_STAT_OFFSET;
 	int ret;
 	u32 l;
 
+	dmtimer_systimer_enable(t);
 	writel_relaxed(BIT(1) | BIT(2), t->base + t->ifctrl);
 	ret = readl_poll_timeout_atomic(syss, l, l & BIT(0), 100,
 					DMTIMER_RESET_WAIT);
@@ -88,6 +109,7 @@ static int __init dmtimer_systimer_type2_reset(struct dmtimer_systimer *t)
 	void __iomem *sysc = t->base + t->sysc;
 	u32 l;
 
+	dmtimer_systimer_enable(t);
 	l = readl_relaxed(sysc);
 	l |= BIT(0);
 	writel_relaxed(l, sysc);
@@ -336,26 +358,6 @@ static int __init dmtimer_systimer_init_clock(struct dmtimer_systimer *t,
 	return 0;
 }
 
-static void dmtimer_systimer_enable(struct dmtimer_systimer *t)
-{
-	u32 val;
-
-	if (dmtimer_systimer_revision1(t))
-		val = DMTIMER_TYPE1_ENABLE;
-	else
-		val = DMTIMER_TYPE2_ENABLE;
-
-	writel_relaxed(val, t->base + t->sysc);
-}
-
-static void dmtimer_systimer_disable(struct dmtimer_systimer *t)
-{
-	if (!dmtimer_systimer_revision1(t))
-		return;
-
-	writel_relaxed(DMTIMER_TYPE1_DISABLE, t->base + t->sysc);
-}
-
 static int __init dmtimer_systimer_setup(struct device_node *np,
 					 struct dmtimer_systimer *t)
 {
@@ -409,8 +411,8 @@ static int __init dmtimer_systimer_setup(struct device_node *np,
 	t->wakeup = regbase + _OMAP_TIMER_WAKEUP_EN_OFFSET;
 	t->ifctrl = regbase + _OMAP_TIMER_IF_CTRL_OFFSET;
 
-	dmtimer_systimer_enable(t);
 	dmtimer_systimer_reset(t);
+	dmtimer_systimer_enable(t);
 	pr_debug("dmtimer rev %08x sysc %08x\n", readl_relaxed(t->base),
 		 readl_relaxed(t->base + t->sysc));
 

From bc6717d55d07110d8f3c6d31ec2af50c11b07091 Mon Sep 17 00:00:00 2001
From: Guo Ren <guoren@linux.alibaba.com>
Date: Tue, 18 Aug 2020 07:31:17 +0000
Subject: [PATCH 006/322] clocksource/drivers/timer-gx6605s: Fixup counter
 reload

When the timer counts to the upper limit, an overflow interrupt is
generated, and the count is reset with the value in the TIME_INI
register. But the software expects to start counting from 0 when
the count overflows, so it forces TIME_INI to 0 to solve the
potential interrupt storm problem.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Tested-by: Xu Kai <xukai@nationalchip.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/1597735877-71115-1-git-send-email-guoren@kernel.org
---
 drivers/clocksource/timer-gx6605s.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clocksource/timer-gx6605s.c b/drivers/clocksource/timer-gx6605s.c
index 80d0939d040b..8d386adbe800 100644
--- a/drivers/clocksource/timer-gx6605s.c
+++ b/drivers/clocksource/timer-gx6605s.c
@@ -28,6 +28,7 @@ static irqreturn_t gx6605s_timer_interrupt(int irq, void *dev)
 	void __iomem *base = timer_of_base(to_timer_of(ce));
 
 	writel_relaxed(GX6605S_STATUS_CLR, base + TIMER_STATUS);
+	writel_relaxed(0, base + TIMER_INI);
 
 	ce->event_handler(ce);
 

From 45a36a18d01907710bad5258d81f76c18882ad88 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 4 Aug 2020 11:37:29 +0200
Subject: [PATCH 007/322] xfrmi: drop ignore_df check before updating pmtu

xfrm interfaces currently test for !skb->ignore_df when deciding
whether to update the pmtu on the skb's dst. Because of this, no pmtu
exception is created when we do something like:

    ping -s 1438 <dest>

By dropping this check, the pmtu exception will be created and the
next ping attempt will work.

Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces")
Reported-by: Xiumei Mu <xmu@redhat.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_interface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index eb8181987620..a8f66112c52b 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -303,7 +303,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	}
 
 	mtu = dst_mtu(dst);
-	if (!skb->ignore_df && skb->len > mtu) {
+	if (skb->len > mtu) {
 		skb_dst_update_pmtu_no_confirm(skb, mtu);
 
 		if (skb->protocol == htons(ETH_P_IPV6)) {

From 5fcface659aab7eac4bd65dd116d98b8f7bb88d5 Mon Sep 17 00:00:00 2001
From: Taiping Lai <taiping.lai@unisoc.com>
Date: Mon, 31 Aug 2020 17:09:47 +0800
Subject: [PATCH 008/322] gpio: sprd: Clear interrupt when setting the type as
 edge

The raw interrupt status of GPIO maybe set before the interrupt is enabled,
which would trigger the interrupt event once enabled it from user side.
This is the case for edge interrupts only. Adding a clear operation when
setting interrupt type can avoid that.

There're a few considerations for the solution:
1) This issue is for edge interrupt only; The interrupts requested by users
   are IRQ_TYPE_LEVEL_HIGH as default, so clearing interrupt when request
   is useless.
2) The interrupt type can be set to edge when request and following up
   with clearing it though, but the problem is still there once users set
   the interrupt type to level trggier.
3) We can add a clear operation after each time of setting interrupt
   enable bit, but it is redundant for level trigger interrupt.

Therefore, the solution is this patch seems the best for now.

Fixes: 9a3821c2bb47 ("gpio: Add GPIO driver for Spreadtrum SC9860 platform")
Signed-off-by: Taiping Lai <taiping.lai@unisoc.com>
Signed-off-by: Chunyan Zhang <chunyan.zhang@unisoc.com>
Reviewed-by: Baolin Wang <baolin.wang7@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-sprd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c
index d7314d39ab65..36ea8a3bd451 100644
--- a/drivers/gpio/gpio-sprd.c
+++ b/drivers/gpio/gpio-sprd.c
@@ -149,17 +149,20 @@ static int sprd_gpio_irq_set_type(struct irq_data *data,
 		sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0);
 		sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 0);
 		sprd_gpio_update(chip, offset, SPRD_GPIO_IEV, 1);
+		sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1);
 		irq_set_handler_locked(data, handle_edge_irq);
 		break;
 	case IRQ_TYPE_EDGE_FALLING:
 		sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0);
 		sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 0);
 		sprd_gpio_update(chip, offset, SPRD_GPIO_IEV, 0);
+		sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1);
 		irq_set_handler_locked(data, handle_edge_irq);
 		break;
 	case IRQ_TYPE_EDGE_BOTH:
 		sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0);
 		sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 1);
+		sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1);
 		irq_set_handler_locked(data, handle_edge_irq);
 		break;
 	case IRQ_TYPE_LEVEL_HIGH:

From 545e5c571662b1cd79d9588f9d3b6e36985b8007 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Fri, 4 Sep 2020 08:49:38 +0200
Subject: [PATCH 009/322] xfrm: clone XFRMA_SET_MARK in xfrm_do_migrate

XFRMA_SET_MARK and XFRMA_SET_MARK_MASK was not cloned from the old
to the new. Migrate these two attributes during XFRMA_MSG_MIGRATE

Fixes: 9b42c1f179a6 ("xfrm: Extend the output_mark to support input direction and masking.")
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 69520ad3d83b..3a000f289dcd 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1510,6 +1510,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 	}
 
 	memcpy(&x->mark, &orig->mark, sizeof(x->mark));
+	memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
 
 	if (xfrm_init_state(x) < 0)
 		goto error;

From 91a46c6d1b4fcbfa4773df9421b8ad3e58088101 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Fri, 4 Sep 2020 08:49:55 +0200
Subject: [PATCH 010/322] xfrm: clone XFRMA_REPLAY_ESN_VAL in xfrm_do_migrate

XFRMA_REPLAY_ESN_VAL was not cloned completely from the old to the new.
Migrate this attribute during XFRMA_MSG_MIGRATE

v1->v2:
 - move curleft cloning to a separate patch

Fixes: af2f464e326e ("xfrm: Assign esn pointers when cloning a state")
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2737d24ec244..9e806c781025 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1773,21 +1773,17 @@ static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_es
 static inline int xfrm_replay_clone(struct xfrm_state *x,
 				     struct xfrm_state *orig)
 {
-	x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn),
+
+	x->replay_esn = kmemdup(orig->replay_esn,
+				xfrm_replay_state_esn_len(orig->replay_esn),
 				GFP_KERNEL);
 	if (!x->replay_esn)
 		return -ENOMEM;
-
-	x->replay_esn->bmp_len = orig->replay_esn->bmp_len;
-	x->replay_esn->replay_window = orig->replay_esn->replay_window;
-
-	x->preplay_esn = kmemdup(x->replay_esn,
-				 xfrm_replay_state_esn_len(x->replay_esn),
+	x->preplay_esn = kmemdup(orig->preplay_esn,
+				 xfrm_replay_state_esn_len(orig->preplay_esn),
 				 GFP_KERNEL);
-	if (!x->preplay_esn) {
-		kfree(x->replay_esn);
+	if (!x->preplay_esn)
 		return -ENOMEM;
-	}
 
 	return 0;
 }

From 7aa05d304785204703a67a6aa7f1db402889a172 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Fri, 4 Sep 2020 08:50:11 +0200
Subject: [PATCH 011/322] xfrm: clone XFRMA_SEC_CTX in xfrm_do_migrate

XFRMA_SEC_CTX was not cloned from the old to the new.
Migrate this attribute during XFRMA_MSG_MIGRATE

v1->v2:
 - return -ENOMEM on error
v2->v3:
 - fix return type to int

Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)")
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 3a000f289dcd..5e5ed8108498 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1441,6 +1441,30 @@ out:
 EXPORT_SYMBOL(xfrm_state_add);
 
 #ifdef CONFIG_XFRM_MIGRATE
+static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security)
+{
+	struct xfrm_user_sec_ctx *uctx;
+	int size = sizeof(*uctx) + security->ctx_len;
+	int err;
+
+	uctx = kmalloc(size, GFP_KERNEL);
+	if (!uctx)
+		return -ENOMEM;
+
+	uctx->exttype = XFRMA_SEC_CTX;
+	uctx->len = size;
+	uctx->ctx_doi = security->ctx_doi;
+	uctx->ctx_alg = security->ctx_alg;
+	uctx->ctx_len = security->ctx_len;
+	memcpy(uctx + 1, security->ctx_str, security->ctx_len);
+	err = security_xfrm_state_alloc(x, uctx);
+	kfree(uctx);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 					   struct xfrm_encap_tmpl *encap)
 {
@@ -1497,6 +1521,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 			goto error;
 	}
 
+	if (orig->security)
+		if (clone_security(x, orig->security))
+			goto error;
+
 	if (orig->coaddr) {
 		x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
 				    GFP_KERNEL);

From 8366685b2883e523f91e9816d7be371eb1144749 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Fri, 4 Sep 2020 08:50:29 +0200
Subject: [PATCH 012/322] xfrm: clone whole liftime_cur structure in
 xfrm_do_migrate

When we clone state only add_time was cloned. It missed values like
bytes, packets.  Now clone the all members of the structure.

v1->v3:
 - use memcpy to copy the entire structure

Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)")
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5e5ed8108498..5ff392e6f3c1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1550,7 +1550,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 	x->tfcpad = orig->tfcpad;
 	x->replay_maxdiff = orig->replay_maxdiff;
 	x->replay_maxage = orig->replay_maxage;
-	x->curlft.add_time = orig->curlft.add_time;
+	memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft));
 	x->km.state = orig->km.state;
 	x->km.seq = orig->km.seq;
 	x->replay = orig->replay;

From 850280156f6421a404f2351bee07a0e7bedfd4c6 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 5 Sep 2020 15:46:48 +0300
Subject: [PATCH 013/322] phy: ti: am654: Fix a leak in serdes_am654_probe()

If devm_phy_create() fails then we need to call of_clk_del_provider(node)
to undo the call to of_clk_add_provider().

Fixes: 71e2f5c5c224 ("phy: ti: Add a new SERDES driver for TI's AM654x SoC")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/20200905124648.GA183976@mwanda
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/ti/phy-am654-serdes.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c
index a174b3c3f010..819c49af169a 100644
--- a/drivers/phy/ti/phy-am654-serdes.c
+++ b/drivers/phy/ti/phy-am654-serdes.c
@@ -725,8 +725,10 @@ static int serdes_am654_probe(struct platform_device *pdev)
 	pm_runtime_enable(dev);
 
 	phy = devm_phy_create(dev, NULL, &ops);
-	if (IS_ERR(phy))
-		return PTR_ERR(phy);
+	if (IS_ERR(phy)) {
+		ret = PTR_ERR(phy);
+		goto clk_err;
+	}
 
 	phy_set_drvdata(phy, am654_phy);
 	phy_provider = devm_of_phy_provider_register(dev, serdes_am654_xlate);

From e338eecf3fe79054e8a31b8c39a1234b5acfdabe Mon Sep 17 00:00:00 2001
From: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Date: Fri, 4 Sep 2020 15:09:04 +0100
Subject: [PATCH 014/322] PCI: rockchip: Fix bus checks in
 rockchip_pcie_valid_device()

The root bus checks rework in d84c572de1a3 ("PCI: rockchip: Use
pci_is_root_bus() to check if bus is root bus") caused a regression whereby
in rockchip_pcie_valid_device() if the bus parameter is the root bus and
the dev value == 0, the function should return 1 (ie true) without checking
if the bus->parent pointer is a root bus because that triggers a NULL
pointer dereference.

Fix this by streamlining the root bus detection.

Fixes: d84c572de1a3 ("PCI: rockchip: Use pci_is_root_bus() to check if bus is root bus")
Link: https://lore.kernel.org/r/20200904140904.944-1-lorenzo.pieralisi@arm.com
Reported-by: Samuel Dionne-Riel <samuel@dionne-riel.com>
Tested-by: Samuel Dionne-Riel <samuel@dionne-riel.com>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Cc: Shawn Lin <shawn.lin@rock-chips.com>
---
 drivers/pci/controller/pcie-rockchip-host.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c
index 0bb2fb3e8a0b..9705059523a6 100644
--- a/drivers/pci/controller/pcie-rockchip-host.c
+++ b/drivers/pci/controller/pcie-rockchip-host.c
@@ -71,16 +71,13 @@ static void rockchip_pcie_update_txcredit_mui(struct rockchip_pcie *rockchip)
 static int rockchip_pcie_valid_device(struct rockchip_pcie *rockchip,
 				      struct pci_bus *bus, int dev)
 {
-	/* access only one slot on each root port */
-	if (pci_is_root_bus(bus) && dev > 0)
-		return 0;
-
 	/*
-	 * do not read more than one device on the bus directly attached
+	 * Access only one slot on each root port.
+	 * Do not read more than one device on the bus directly attached
 	 * to RC's downstream side.
 	 */
-	if (pci_is_root_bus(bus->parent) && dev > 0)
-		return 0;
+	if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent))
+		return dev == 0;
 
 	return 1;
 }

From d89a80ddbfd86d04d0901e576ca2016d324bcb85 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Tue, 25 Aug 2020 16:31:05 +0300
Subject: [PATCH 015/322] dt-bindings: crypto: sa2ul: fix a DT binding check
 warning

DT binding check produces a warning about bad cell size:

Documentation/devicetree/bindings/crypto/ti,sa2ul.example.dt.yaml: example-0: crypto@4e00000:reg:0: [0, 81788928, 0, 4608] is too long
	From schema: python3.6/site-packages/dtschema/schemas/reg.yaml

Fix this by reducing the address sizes for the example to 1 cell from
current 2.

Fixes: 2ce9a7299bf6 ("dt-bindings: crypto: Add TI SA2UL crypto accelerator documentation")
Reported-by: Rob Herring <robh@kernel.org>
Cc: Rob Herring <robh@kernel.org>
Cc: devicetree@vger.kernel.org
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Link: https://lore.kernel.org/r/20200825133106.21542-2-t-kristo@ti.com
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
index 85ef69ffebed..1465c9ebaf93 100644
--- a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
+++ b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml
@@ -67,7 +67,7 @@ examples:
 
     main_crypto: crypto@4e00000 {
         compatible = "ti,j721-sa2ul";
-        reg = <0x0 0x4e00000 0x0 0x1200>;
+        reg = <0x4e00000 0x1200>;
         power-domains = <&k3_pds 264 TI_SCI_PD_EXCLUSIVE>;
         dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>,
                <&main_udmap 0x4001>;

From 214b0e1ad01abf4c1f6d8d28fa096bf167e47cef Mon Sep 17 00:00:00 2001
From: dillon min <dillon.minfei@gmail.com>
Date: Thu, 3 Sep 2020 15:30:21 +0800
Subject: [PATCH 016/322] gpio: tc35894: fix up tc35894 interrupt configuration

The offset of regmap is incorrect, j * 8 is move to the
wrong register.

for example:

asume i = 0, j = 1. we want to set KPY5 as interrupt
falling edge mode, regmap[0][1] should be TC3589x_GPIOIBE1 0xcd
but, regmap[i] + j * 8 = TC3589x_GPIOIBE0 + 8 ,point to 0xd4,
this is TC3589x_GPIOIE2 not TC3589x_GPIOIBE1.

Fixes: d88b25be3584 ("gpio: Add TC35892 GPIO driver")
Cc: Cc: stable@vger.kernel.org
Signed-off-by: dillon min <dillon.minfei@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-tc3589x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c
index 58b0da9eb76f..ea3f68a28fea 100644
--- a/drivers/gpio/gpio-tc3589x.c
+++ b/drivers/gpio/gpio-tc3589x.c
@@ -212,7 +212,7 @@ static void tc3589x_gpio_irq_sync_unlock(struct irq_data *d)
 				continue;
 
 			tc3589x_gpio->oldregs[i][j] = new;
-			tc3589x_reg_write(tc3589x, regmap[i] + j * 8, new);
+			tc3589x_reg_write(tc3589x, regmap[i] + j, new);
 		}
 	}
 

From 45ccf6556720293323c20cda717756014ff63007 Mon Sep 17 00:00:00 2001
From: Ahmad Fatoum <a.fatoum@pengutronix.de>
Date: Mon, 7 Sep 2020 17:31:35 +0200
Subject: [PATCH 017/322] gpio: siox: explicitly support only threaded irqs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gpio-siox driver uses handle_nested_irq() to implement its
interrupt support. This is only capable of handling threaded irq
actions. For a hardirq action it triggers a NULL pointer oops.
(It calls action->thread_fn which is NULL then.)

Prevent registration of a hardirq action by setting
gpio_irq_chip::threaded to true.

Cc: u.kleine-koenig@pengutronix.de
Fixes: be8c8facc707 ("gpio: new driver to work with a 8x12 siox")
Cc: stable@vger.kernel.org
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-siox.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpio-siox.c b/drivers/gpio/gpio-siox.c
index 26e1fe092304..f8c5e9fc4bac 100644
--- a/drivers/gpio/gpio-siox.c
+++ b/drivers/gpio/gpio-siox.c
@@ -245,6 +245,7 @@ static int gpio_siox_probe(struct siox_device *sdevice)
 	girq->chip = &ddata->ichip;
 	girq->default_type = IRQ_TYPE_NONE;
 	girq->handler = handle_level_irq;
+	girq->threaded = true;
 
 	ret = devm_gpiochip_add_data(dev, &ddata->gchip, NULL);
 	if (ret)

From 1b02d9e770cd7087f34c743f85ccf5ea8372b047 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Tue, 8 Sep 2020 15:07:49 +0200
Subject: [PATCH 018/322] gpio: mockup: fix resource leak in error path

If the module init function fails after creating the debugs directory,
it's never removed. Add proper cleanup calls to avoid this resource
leak.

Fixes: 9202ba2397d1 ("gpio: mockup: implement event injecting over debugfs")
Cc: <stable@vger.kernel.org>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-mockup.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index bc345185db26..1652897fdf90 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -552,6 +552,7 @@ static int __init gpio_mockup_init(void)
 	err = platform_driver_register(&gpio_mockup_driver);
 	if (err) {
 		gpio_mockup_err("error registering platform driver\n");
+		debugfs_remove_recursive(gpio_mockup_dbg_dir);
 		return err;
 	}
 
@@ -582,6 +583,7 @@ static int __init gpio_mockup_init(void)
 			gpio_mockup_err("error registering device");
 			platform_driver_unregister(&gpio_mockup_driver);
 			gpio_mockup_unregister_pdevs();
+			debugfs_remove_recursive(gpio_mockup_dbg_dir);
 			return PTR_ERR(pdev);
 		}
 

From ec653df2a0cbc306a4bfcb0e3484d318fa779002 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 1 Jul 2020 09:39:49 +0200
Subject: [PATCH 019/322] drm/vc4/vc4_hdmi: fill ASoC card owner

card->owner is a required property and since commit 81033c6b584b ("ALSA:
core: Warn on empty module") a warning is issued if it is empty. Fix lack
of it. This fixes following warning observed on RaspberryPi 3B board
with ARM 32bit kernel and multi_v7_defconfig:

------------[ cut here ]------------
WARNING: CPU: 1 PID: 210 at sound/core/init.c:207 snd_card_new+0x378/0x398 [snd]
Modules linked in: vc4(+) snd_soc_core ac97_bus snd_pcm_dmaengine bluetooth snd_pcm snd_timer crc32_arm_ce raspberrypi_hwmon snd soundcore ecdh_generic ecc bcm2835_thermal phy_generic
CPU: 1 PID: 210 Comm: systemd-udevd Not tainted 5.8.0-rc1-00027-g81033c6b584b #1087
Hardware name: BCM2835
[<c03113c0>] (unwind_backtrace) from [<c030bcb4>] (show_stack+0x10/0x14)
[<c030bcb4>] (show_stack) from [<c071cef8>] (dump_stack+0xd4/0xe8)
[<c071cef8>] (dump_stack) from [<c0345bfc>] (__warn+0xdc/0xf4)
[<c0345bfc>] (__warn) from [<c0345cc4>] (warn_slowpath_fmt+0xb0/0xb8)
[<c0345cc4>] (warn_slowpath_fmt) from [<bf02ff74>] (snd_card_new+0x378/0x398 [snd])
[<bf02ff74>] (snd_card_new [snd]) from [<bf11f0b4>] (snd_soc_bind_card+0x280/0x99c [snd_soc_core])
[<bf11f0b4>] (snd_soc_bind_card [snd_soc_core]) from [<bf12f000>] (devm_snd_soc_register_card+0x34/0x6c [snd_soc_core])
[<bf12f000>] (devm_snd_soc_register_card [snd_soc_core]) from [<bf165654>] (vc4_hdmi_bind+0x43c/0x5f4 [vc4])
[<bf165654>] (vc4_hdmi_bind [vc4]) from [<c09d660c>] (component_bind_all+0xec/0x24c)
[<c09d660c>] (component_bind_all) from [<bf15c44c>] (vc4_drm_bind+0xd4/0x174 [vc4])
[<bf15c44c>] (vc4_drm_bind [vc4]) from [<c09d6ac0>] (try_to_bring_up_master+0x160/0x1b0)
[<c09d6ac0>] (try_to_bring_up_master) from [<c09d6f38>] (component_master_add_with_match+0xd0/0x104)
[<c09d6f38>] (component_master_add_with_match) from [<bf15c588>] (vc4_platform_drm_probe+0x9c/0xbc [vc4])
[<bf15c588>] (vc4_platform_drm_probe [vc4]) from [<c09df740>] (platform_drv_probe+0x6c/0xa4)
[<c09df740>] (platform_drv_probe) from [<c09dd6f0>] (really_probe+0x210/0x350)
[<c09dd6f0>] (really_probe) from [<c09dd940>] (driver_probe_device+0x5c/0xb4)
[<c09dd940>] (driver_probe_device) from [<c09ddb38>] (device_driver_attach+0x58/0x60)
[<c09ddb38>] (device_driver_attach) from [<c09ddbc0>] (__driver_attach+0x80/0xbc)
[<c09ddbc0>] (__driver_attach) from [<c09db820>] (bus_for_each_dev+0x68/0xb4)
[<c09db820>] (bus_for_each_dev) from [<c09dc9f8>] (bus_add_driver+0x130/0x1e8)
[<c09dc9f8>] (bus_add_driver) from [<c09de648>] (driver_register+0x78/0x110)
[<c09de648>] (driver_register) from [<c0302038>] (do_one_initcall+0x50/0x220)
[<c0302038>] (do_one_initcall) from [<c03db544>] (do_init_module+0x60/0x210)
[<c03db544>] (do_init_module) from [<c03da4f8>] (load_module+0x1e34/0x2338)
[<c03da4f8>] (load_module) from [<c03dac00>] (sys_finit_module+0xac/0xbc)
[<c03dac00>] (sys_finit_module) from [<c03000c0>] (ret_fast_syscall+0x0/0x54)
Exception stack(0xeded9fa8 to 0xeded9ff0)
...
---[ end trace 6414689569c2bc08 ]---

Fixes: bb7d78568814 ("drm/vc4: Add HDMI audio support")
Suggested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20200701073949.28941-1-m.szyprowski@samsung.com
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 15a11cd4de25..6339c6f0f571 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -1117,6 +1117,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *hdmi)
 	card->num_links = 1;
 	card->name = "vc4-hdmi";
 	card->dev = dev;
+	card->owner = THIS_MODULE;
 
 	/*
 	 * Be careful, snd_soc_register_card() calls dev_set_drvdata() and

From f8d4f44df056c5b504b0d49683fb7279218fd207 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 9 Sep 2020 22:25:06 -0400
Subject: [PATCH 020/322] epoll: do not insert into poll queues until all
 sanity checks are done

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/eventpoll.c | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 8107e06d7f6f..5207dfc85b78 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1522,6 +1522,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 		RCU_INIT_POINTER(epi->ws, NULL);
 	}
 
+	/* Add the current item to the list of active epoll hook for this file */
+	spin_lock(&tfile->f_lock);
+	list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
+	spin_unlock(&tfile->f_lock);
+
+	/*
+	 * Add the current item to the RB tree. All RB tree operations are
+	 * protected by "mtx", and ep_insert() is called with "mtx" held.
+	 */
+	ep_rbtree_insert(ep, epi);
+
+	/* now check if we've created too many backpaths */
+	error = -EINVAL;
+	if (full_check && reverse_path_check())
+		goto error_remove_epi;
+
 	/* Initialize the poll table using the queue callback */
 	epq.epi = epi;
 	init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
@@ -1544,22 +1560,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 	if (epi->nwait < 0)
 		goto error_unregister;
 
-	/* Add the current item to the list of active epoll hook for this file */
-	spin_lock(&tfile->f_lock);
-	list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
-	spin_unlock(&tfile->f_lock);
-
-	/*
-	 * Add the current item to the RB tree. All RB tree operations are
-	 * protected by "mtx", and ep_insert() is called with "mtx" held.
-	 */
-	ep_rbtree_insert(ep, epi);
-
-	/* now check if we've created too many backpaths */
-	error = -EINVAL;
-	if (full_check && reverse_path_check())
-		goto error_remove_epi;
-
 	/* We have to drop the new item inside our item list to keep track of it */
 	write_lock_irq(&ep->lock);
 
@@ -1588,6 +1588,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
 
 	return 0;
 
+error_unregister:
+	ep_unregister_pollwait(ep, epi);
 error_remove_epi:
 	spin_lock(&tfile->f_lock);
 	list_del_rcu(&epi->fllink);
@@ -1595,9 +1597,6 @@ error_remove_epi:
 
 	rb_erase_cached(&epi->rbn, &ep->rbr);
 
-error_unregister:
-	ep_unregister_pollwait(ep, epi);
-
 	/*
 	 * We need to do this because an event could have been arrived on some
 	 * allocated wait queue. Note that we don't care about the ep->ovflist

From cab4c03b4ba54c8d9378298cacb8bc0fd74ceece Mon Sep 17 00:00:00 2001
From: Martin Cerveny <m.cerveny@computer.org>
Date: Sun, 6 Sep 2020 18:21:39 +0200
Subject: [PATCH 021/322] drm/sun4i: sun8i-csc: Secondary CSC register
 correction

"Allwinner V3s" has secondary video layer (VI).
Decoded video is displayed in wrong colors until
secondary CSC registers are programmed correctly.

Fixes: 883029390550 ("drm/sun4i: Add DE2 CSC library")
Signed-off-by: Martin Cerveny <m.cerveny@computer.org>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20200906162140.5584-2-m.cerveny@computer.org
---
 drivers/gpu/drm/sun4i/sun8i_csc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/sun4i/sun8i_csc.h b/drivers/gpu/drm/sun4i/sun8i_csc.h
index f42441b1b14d..a55a38ad849c 100644
--- a/drivers/gpu/drm/sun4i/sun8i_csc.h
+++ b/drivers/gpu/drm/sun4i/sun8i_csc.h
@@ -12,7 +12,7 @@ struct sun8i_mixer;
 
 /* VI channel CSC units offsets */
 #define CCSC00_OFFSET 0xAA050
-#define CCSC01_OFFSET 0xFA000
+#define CCSC01_OFFSET 0xFA050
 #define CCSC10_OFFSET 0xA0000
 #define CCSC11_OFFSET 0xF0000
 

From 74ea06164cda81dc80e97790164ca533fd7e3087 Mon Sep 17 00:00:00 2001
From: Martin Cerveny <m.cerveny@computer.org>
Date: Sun, 6 Sep 2020 18:21:40 +0200
Subject: [PATCH 022/322] drm/sun4i: mixer: Extend regmap max_register

Better guess. Secondary CSC registers are from 0xF0000.

Signed-off-by: Martin Cerveny <m.cerveny@computer.org>
Reviewed-by: Jernej Skrabec <jernej.skrabec@siol.net>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20200906162140.5584-3-m.cerveny@computer.org
---
 drivers/gpu/drm/sun4i/sun8i_mixer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c
index cc4fb916318f..c3304028e3dc 100644
--- a/drivers/gpu/drm/sun4i/sun8i_mixer.c
+++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c
@@ -307,7 +307,7 @@ static struct regmap_config sun8i_mixer_regmap_config = {
 	.reg_bits	= 32,
 	.val_bits	= 32,
 	.reg_stride	= 4,
-	.max_register	= 0xbfffc, /* guessed */
+	.max_register	= 0xffffc, /* guessed */
 };
 
 static int sun8i_mixer_of_get_id(struct device_node *node)

From 288eceb0858323d66bff03cf386630a797b248ad Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Date: Tue, 8 Sep 2020 12:02:53 +0200
Subject: [PATCH 023/322] media: cec-adap.c: don't use flush_scheduled_work()

For some inexplicable reason I decided to call flush_scheduled_work()
instead of cancel_delayed_work_sync(). The problem with that is that
flush_scheduled_work() waits for *all* queued scheduled work to be
completed instead of just the work itself.

This can cause a deadlock if a CEC driver also schedules work that
takes the same lock. See the comments for flush_scheduled_work() in
linux/workqueue.h.

This is exactly what has been observed a few times.

This patch simply replaces flush_scheduled_work() by
cancel_delayed_work_sync().

Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: <stable@vger.kernel.org>      # for v5.8 and up
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/media/cec/core/cec-adap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c
index 4efe8014445e..926d65db6d3e 100644
--- a/drivers/media/cec/core/cec-adap.c
+++ b/drivers/media/cec/core/cec-adap.c
@@ -1199,7 +1199,7 @@ void cec_received_msg_ts(struct cec_adapter *adap,
 			/* Cancel the pending timeout work */
 			if (!cancel_delayed_work(&data->work)) {
 				mutex_unlock(&adap->lock);
-				flush_scheduled_work();
+				cancel_delayed_work_sync(&data->work);
 				mutex_lock(&adap->lock);
 			}
 			/*

From 18306c404abe18a0972587a6266830583c60c928 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 10 Sep 2020 08:30:05 -0400
Subject: [PATCH 024/322] epoll: replace ->visited/visited_list with generation
 count

removes the need to clear it, along with the races.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/eventpoll.c | 27 ++++++++-------------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 5207dfc85b78..82ab9a25f12f 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -218,8 +218,7 @@ struct eventpoll {
 	struct file *file;
 
 	/* used to optimize loop detection check */
-	struct list_head visited_list_link;
-	int visited;
+	u64 gen;
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	/* used to track busy poll napi_id */
@@ -274,6 +273,8 @@ static long max_user_watches __read_mostly;
  */
 static DEFINE_MUTEX(epmutex);
 
+static u64 loop_check_gen = 0;
+
 /* Used to check for epoll file descriptor inclusion loops */
 static struct nested_calls poll_loop_ncalls;
 
@@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly;
 /* Slab cache used to allocate "struct eppoll_entry" */
 static struct kmem_cache *pwq_cache __read_mostly;
 
-/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
-static LIST_HEAD(visited_list);
-
 /*
  * List of files with newly added links, where we may need to limit the number
  * of emanating paths. Protected by the epmutex.
@@ -1971,13 +1969,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
 	struct epitem *epi;
 
 	mutex_lock_nested(&ep->mtx, call_nests + 1);
-	ep->visited = 1;
-	list_add(&ep->visited_list_link, &visited_list);
+	ep->gen = loop_check_gen;
 	for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		epi = rb_entry(rbp, struct epitem, rbn);
 		if (unlikely(is_file_epoll(epi->ffd.file))) {
 			ep_tovisit = epi->ffd.file->private_data;
-			if (ep_tovisit->visited)
+			if (ep_tovisit->gen == loop_check_gen)
 				continue;
 			error = ep_call_nested(&poll_loop_ncalls,
 					ep_loop_check_proc, epi->ffd.file,
@@ -2018,18 +2015,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
  */
 static int ep_loop_check(struct eventpoll *ep, struct file *file)
 {
-	int ret;
-	struct eventpoll *ep_cur, *ep_next;
-
-	ret = ep_call_nested(&poll_loop_ncalls,
+	return ep_call_nested(&poll_loop_ncalls,
 			      ep_loop_check_proc, file, ep, current);
-	/* clear visited list */
-	list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
-							visited_list_link) {
-		ep_cur->visited = 0;
-		list_del(&ep_cur->visited_list_link);
-	}
-	return ret;
 }
 
 static void clear_tfile_check_list(void)
@@ -2199,6 +2186,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
 			error = epoll_mutex_lock(&epmutex, 0, nonblock);
 			if (error)
 				goto error_tgt_fput;
+			loop_check_gen++;
 			full_check = 1;
 			if (is_file_epoll(tf.file)) {
 				error = -ELOOP;
@@ -2262,6 +2250,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
 error_tgt_fput:
 	if (full_check) {
 		clear_tfile_check_list();
+		loop_check_gen++;
 		mutex_unlock(&epmutex);
 	}
 

From fe0a916c1eae8e17e86c3753d13919177d63ed7e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 10 Sep 2020 08:33:27 -0400
Subject: [PATCH 025/322] epoll: EPOLL_CTL_ADD: close the race in decision to
 take fast path

Checking for the lack of epitems refering to the epoll we want to insert into
is not enough; we might have an insertion of that epoll into another one that
has already collected the set of files to recheck for excessive reverse paths,
but hasn't gotten to creating/inserting the epitem for it.

However, any such insertion in progress can be detected - it will update the
generation count in our epoll when it's done looking through it for files
to check.  That gets done under ->mtx of our epoll and that allows us to
detect that safely.

We are *not* holding epmutex here, so the generation count is not stable.
However, since both the update of ep->gen by loop check and (later)
insertion into ->f_ep_link are done with ep->mtx held, we are fine -
the sequence is
	grab epmutex
	bump loop_check_gen
	...
	grab tep->mtx		// 1
	tep->gen = loop_check_gen
	...
	drop tep->mtx		// 2
	...
	grab tep->mtx		// 3
	...
	insert into ->f_ep_link
	...
	drop tep->mtx		// 4
	bump loop_check_gen
	drop epmutex
and if the fastpath check in another thread happens for that
eventpoll, it can come
	* before (1) - in that case fastpath is just fine
	* after (4) - we'll see non-empty ->f_ep_link, slow path
taken
	* between (2) and (3) - loop_check_gen is stable,
with ->mtx providing barriers and we end up taking slow path.

Note that ->f_ep_link emptiness check is slightly racy - we are protected
against insertions into that list, but removals can happen right under us.
Not a problem - in the worst case we'll end up taking a slow path for
no good reason.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/eventpoll.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 82ab9a25f12f..16313180e4c1 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2181,6 +2181,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds,
 		goto error_tgt_fput;
 	if (op == EPOLL_CTL_ADD) {
 		if (!list_empty(&f.file->f_ep_links) ||
+				ep->gen == loop_check_gen ||
 						is_file_epoll(tf.file)) {
 			mutex_unlock(&ep->mtx);
 			error = epoll_mutex_lock(&epmutex, 0, nonblock);

From 505623a2be48b36de533951ced130876a76a2d55 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 10 Sep 2020 15:15:32 +0300
Subject: [PATCH 026/322] spi: spi-fsl-dspi: use XSPI mode instead of DMA for
 DPAA2 SoCs

The arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi device tree lacks DMA
channels for DSPI, so naturally, the driver fails to probe:

[ 2.945302] fsl-dspi 2100000.spi: rx dma channel not available
[ 2.951134] fsl-dspi 2100000.spi: can't get dma channels

In retrospect, this should have been obvious, because LS2080A, LS2085A
LS2088A and LX2160A don't appear to have an eDMA module at all. Looking
again at their datasheets, the CTARE register (which is specific to XSPI
functionality) seems to be documented, so switch them to XSPI mode
instead.

Fixes: 0feaf8f5afe0 ("spi: spi-fsl-dspi: Convert the instantiations that support it to DMA")
Reported-by: Qiang Zhao <qiang.zhao@nxp.com>
Tested-by: Qiang Zhao <qiang.zhao@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20200910121532.1138596-1-olteanv@gmail.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-dspi.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 91c6affe139c..283f2468a2f4 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -174,17 +174,17 @@ static const struct fsl_dspi_devtype_data devtype_data[] = {
 		.fifo_size		= 16,
 	},
 	[LS2080A] = {
-		.trans_mode		= DSPI_DMA_MODE,
+		.trans_mode		= DSPI_XSPI_MODE,
 		.max_clock_factor	= 8,
 		.fifo_size		= 4,
 	},
 	[LS2085A] = {
-		.trans_mode		= DSPI_DMA_MODE,
+		.trans_mode		= DSPI_XSPI_MODE,
 		.max_clock_factor	= 8,
 		.fifo_size		= 4,
 	},
 	[LX2160A] = {
-		.trans_mode		= DSPI_DMA_MODE,
+		.trans_mode		= DSPI_XSPI_MODE,
 		.max_clock_factor	= 8,
 		.fifo_size		= 4,
 	},

From 63c3212e7a37d68c89a13bdaebce869f4e064e67 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Tue, 8 Sep 2020 09:17:10 +1200
Subject: [PATCH 027/322] pinctrl: mvebu: Fix i2c sda definition for 98DX3236

Per the datasheet the i2c functions use MPP_Sel=0x1. They are documented
as using MPP_Sel=0x4 as well but mixing 0x1 and 0x4 is clearly wrong. On
the board tested 0x4 resulted in a non-functioning i2c bus so stick with
0x1 which works.

Fixes: d7ae8f8dee7f ("pinctrl: mvebu: pinctrl driver for 98DX3236 SoC")
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20200907211712.9697-2-chris.packham@alliedtelesis.co.nz
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mvebu/pinctrl-armada-xp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c
index a767a05fa3a0..48e2a6c56a83 100644
--- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c
+++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c
@@ -414,7 +414,7 @@ static struct mvebu_mpp_mode mv98dx3236_mpp_modes[] = {
 		 MPP_VAR_FUNCTION(0x1, "i2c0", "sck",        V_98DX3236_PLUS)),
 	MPP_MODE(15,
 		 MPP_VAR_FUNCTION(0x0, "gpio", NULL,         V_98DX3236_PLUS),
-		 MPP_VAR_FUNCTION(0x4, "i2c0", "sda",        V_98DX3236_PLUS)),
+		 MPP_VAR_FUNCTION(0x1, "i2c0", "sda",        V_98DX3236_PLUS)),
 	MPP_MODE(16,
 		 MPP_VAR_FUNCTION(0x0, "gpo", NULL,          V_98DX3236_PLUS),
 		 MPP_VAR_FUNCTION(0x4, "dev", "oe",          V_98DX3236_PLUS)),

From 28284943ac94014767ecc2f7b3c5747c4a5617a0 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Mon, 14 Sep 2020 11:03:02 +0800
Subject: [PATCH 028/322] drm/i915/gvt: Fix port number for BDW on EDID region
 setup

Current BDW virtual display port is initialized as PORT_B, so need
to use same port for VFIO EDID region, otherwise invalid EDID blob
pointer is assigned which caused kernel null pointer reference. We
might evaluate actual display hotplug for BDW to make this function
work as expected, anyway this is always required to be fixed first.

Reported-by: Alejandro Sior <aho@sior.be>
Cc: Alejandro Sior <aho@sior.be>
Fixes: 0178f4ce3c3b ("drm/i915/gvt: Enable vfio edid for all GVT supported platform")
Reviewed-by: Hang Yuan <hang.yuan@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20200914030302.2775505-1-zhenyuw@linux.intel.com
---
 drivers/gpu/drm/i915/gvt/vgpu.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 7d361623ff67..041f601b07d0 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -367,6 +367,7 @@ void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu)
 static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 		struct intel_vgpu_creation_params *param)
 {
+	struct drm_i915_private *dev_priv = gvt->gt->i915;
 	struct intel_vgpu *vgpu;
 	int ret;
 
@@ -434,7 +435,10 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_clean_sched_policy;
 
-	ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D);
+	if (IS_BROADWELL(dev_priv))
+		ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B);
+	else
+		ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D);
 	if (ret)
 		goto out_clean_sched_policy;
 

From 14801c624066a55139c2c57963eb1b859d0a316a Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 14 Sep 2020 11:42:43 +0200
Subject: [PATCH 029/322] mmc: mmc_spi: Fix mmc_spi_dma_alloc() return type for
 !HAS_DMA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If CONFIG_NO_DMA=y (e.g. Sun-3 allmodconfig):

    drivers/mmc/host/mmc_spi.c:1323:15: warning: return type defaults to ‘int’ [-Wreturn-type]
     static inline mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; }
		   ^~~~~~~~~~~~~~~~~

Fix this by adding the missing return type.

Fixes: a395acf0f6dc6409 ("mmc: mmc_spi: Allow the driver to be built when CONFIG_HAS_DMA is unset")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20200914094243.3912-1-geert@linux-m68k.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/mmc_spi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 5055a7eb134a..18a850f37ddc 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1320,7 +1320,7 @@ static void mmc_spi_dma_free(struct mmc_spi_host *host)
 			 DMA_BIDIRECTIONAL);
 }
 #else
-static inline mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; }
+static inline int mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; }
 static inline void mmc_spi_dma_free(struct mmc_spi_host *host) {}
 #endif
 

From 129134e5415d46f38b9978b3809af94ed649b57d Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Date: Fri, 11 Sep 2020 05:07:58 +0200
Subject: [PATCH 030/322] media: media/v4l2: remove
 V4L2_FLAG_MEMORY_NON_CONSISTENT flag

The patch partially reverts some of the UAPI bits of the buffer
cache management hints. Namely, the queue consistency (memory
coherency) user-space hint because, as it turned out, the kernel
implementation of this feature was misusing DMA_ATTR_NON_CONSISTENT.

The patch reverts both kernel and user space parts: removes the
DMA consistency attr functions, rolls back changes to v4l2_requestbuffers,
v4l2_create_buffers structures and corresponding UAPI functions
(plus compat32 layer) and cleans up the documentation.

[hverkuil: fixed a few typos in the commit log]
[hverkuil: fixed vb2_core_reqbufs call in drivers/media/dvb-core/dvb_vb2.c]
[mchehab: fixed a typo in the commit log: revers->reverts]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../userspace-api/media/v4l/buffer.rst        | 17 -------
 .../media/v4l/vidioc-create-bufs.rst          |  6 +--
 .../media/v4l/vidioc-reqbufs.rst              | 12 +----
 .../media/common/videobuf2/videobuf2-core.c   | 46 +++----------------
 .../common/videobuf2/videobuf2-dma-contig.c   | 19 --------
 .../media/common/videobuf2/videobuf2-dma-sg.c |  3 +-
 .../media/common/videobuf2/videobuf2-v4l2.c   | 18 +-------
 drivers/media/dvb-core/dvb_vb2.c              |  2 +-
 drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 10 +---
 drivers/media/v4l2-core/v4l2-ioctl.c          |  5 +-
 include/media/videobuf2-core.h                |  7 +--
 include/uapi/linux/videodev2.h                | 13 +-----
 12 files changed, 23 insertions(+), 135 deletions(-)

diff --git a/Documentation/userspace-api/media/v4l/buffer.rst b/Documentation/userspace-api/media/v4l/buffer.rst
index 57e752aaf414..2044ed13cd9d 100644
--- a/Documentation/userspace-api/media/v4l/buffer.rst
+++ b/Documentation/userspace-api/media/v4l/buffer.rst
@@ -701,23 +701,6 @@ Memory Consistency Flags
     :stub-columns: 0
     :widths:       3 1 4
 
-    * .. _`V4L2-FLAG-MEMORY-NON-CONSISTENT`:
-
-      - ``V4L2_FLAG_MEMORY_NON_CONSISTENT``
-      - 0x00000001
-      - A buffer is allocated either in consistent (it will be automatically
-	coherent between the CPU and the bus) or non-consistent memory. The
-	latter can provide performance gains, for instance the CPU cache
-	sync/flush operations can be avoided if the buffer is accessed by the
-	corresponding device only and the CPU does not read/write to/from that
-	buffer. However, this requires extra care from the driver -- it must
-	guarantee memory consistency by issuing a cache flush/sync when
-	consistency is needed. If this flag is set V4L2 will attempt to
-	allocate the buffer in non-consistent memory. The flag takes effect
-	only if the buffer is used for :ref:`memory mapping <mmap>` I/O and the
-	queue reports the :ref:`V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS
-	<V4L2-BUF-CAP-SUPPORTS-MMAP-CACHE-HINTS>` capability.
-
 .. c:type:: v4l2_memory
 
 enum v4l2_memory
diff --git a/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst b/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst
index f2a702870fad..12cf6b44f414 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst
@@ -120,13 +120,9 @@ than the number requested.
 	If you want to just query the capabilities without making any
 	other changes, then set ``count`` to 0, ``memory`` to
 	``V4L2_MEMORY_MMAP`` and ``format.type`` to the buffer type.
-    * - __u32
-      - ``flags``
-      - Specifies additional buffer management attributes.
-	See :ref:`memory-flags`.
 
     * - __u32
-      - ``reserved``\ [6]
+      - ``reserved``\ [7]
       - A place holder for future extensions. Drivers and applications
 	must set the array to zero.
 
diff --git a/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst b/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst
index 75d894d9c36c..0e3e2fde65e8 100644
--- a/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst
+++ b/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst
@@ -112,17 +112,10 @@ aborting or finishing any DMA in progress, an implicit
 	``V4L2_MEMORY_MMAP`` and ``type`` set to the buffer type. This will
 	free any previously allocated buffers, so this is typically something
 	that will be done at the start of the application.
-    * - union {
-      - (anonymous)
-    * - __u32
-      - ``flags``
-      - Specifies additional buffer management attributes.
-	See :ref:`memory-flags`.
     * - __u32
       - ``reserved``\ [1]
-      - Kept for backwards compatibility. Use ``flags`` instead.
-    * - }
-      -
+      - A place holder for future extensions. Drivers and applications
+	must set the array to zero.
 
 .. tabularcolumns:: |p{6.1cm}|p{2.2cm}|p{8.7cm}|
 
@@ -169,7 +162,6 @@ aborting or finishing any DMA in progress, an implicit
       - This capability is set by the driver to indicate that the queue supports
         cache and memory management hints. However, it's only valid when the
         queue is used for :ref:`memory mapping <mmap>` streaming I/O. See
-        :ref:`V4L2_FLAG_MEMORY_NON_CONSISTENT <V4L2-FLAG-MEMORY-NON-CONSISTENT>`,
         :ref:`V4L2_BUF_FLAG_NO_CACHE_INVALIDATE <V4L2-BUF-FLAG-NO-CACHE-INVALIDATE>` and
         :ref:`V4L2_BUF_FLAG_NO_CACHE_CLEAN <V4L2-BUF-FLAG-NO-CACHE-CLEAN>`.
 
diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c
index f544d3393e9d..4eab6d81cce1 100644
--- a/drivers/media/common/videobuf2/videobuf2-core.c
+++ b/drivers/media/common/videobuf2/videobuf2-core.c
@@ -721,39 +721,14 @@ int vb2_verify_memory_type(struct vb2_queue *q,
 }
 EXPORT_SYMBOL(vb2_verify_memory_type);
 
-static void set_queue_consistency(struct vb2_queue *q, bool consistent_mem)
-{
-	q->dma_attrs &= ~DMA_ATTR_NON_CONSISTENT;
-
-	if (!vb2_queue_allows_cache_hints(q))
-		return;
-	if (!consistent_mem)
-		q->dma_attrs |= DMA_ATTR_NON_CONSISTENT;
-}
-
-static bool verify_consistency_attr(struct vb2_queue *q, bool consistent_mem)
-{
-	bool queue_is_consistent = !(q->dma_attrs & DMA_ATTR_NON_CONSISTENT);
-
-	if (consistent_mem != queue_is_consistent) {
-		dprintk(q, 1, "memory consistency model mismatch\n");
-		return false;
-	}
-	return true;
-}
-
 int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
-		     unsigned int flags, unsigned int *count)
+		     unsigned int *count)
 {
 	unsigned int num_buffers, allocated_buffers, num_planes = 0;
 	unsigned plane_sizes[VB2_MAX_PLANES] = { };
-	bool consistent_mem = true;
 	unsigned int i;
 	int ret;
 
-	if (flags & V4L2_FLAG_MEMORY_NON_CONSISTENT)
-		consistent_mem = false;
-
 	if (q->streaming) {
 		dprintk(q, 1, "streaming active\n");
 		return -EBUSY;
@@ -765,8 +740,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
 	}
 
 	if (*count == 0 || q->num_buffers != 0 ||
-	    (q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory) ||
-	    !verify_consistency_attr(q, consistent_mem)) {
+	    (q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory)) {
 		/*
 		 * We already have buffers allocated, so first check if they
 		 * are not in use and can be freed.
@@ -803,7 +777,6 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
 	num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME);
 	memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
 	q->memory = memory;
-	set_queue_consistency(q, consistent_mem);
 
 	/*
 	 * Ask the driver how many buffers and planes per buffer it requires.
@@ -888,18 +861,14 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
 EXPORT_SYMBOL_GPL(vb2_core_reqbufs);
 
 int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
-			 unsigned int flags, unsigned int *count,
+			 unsigned int *count,
 			 unsigned int requested_planes,
 			 const unsigned int requested_sizes[])
 {
 	unsigned int num_planes = 0, num_buffers, allocated_buffers;
 	unsigned plane_sizes[VB2_MAX_PLANES] = { };
-	bool consistent_mem = true;
 	int ret;
 
-	if (flags & V4L2_FLAG_MEMORY_NON_CONSISTENT)
-		consistent_mem = false;
-
 	if (q->num_buffers == VB2_MAX_FRAME) {
 		dprintk(q, 1, "maximum number of buffers already allocated\n");
 		return -ENOBUFS;
@@ -912,15 +881,12 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
 		}
 		memset(q->alloc_devs, 0, sizeof(q->alloc_devs));
 		q->memory = memory;
-		set_queue_consistency(q, consistent_mem);
 		q->waiting_for_buffers = !q->is_output;
 	} else {
 		if (q->memory != memory) {
 			dprintk(q, 1, "memory model mismatch\n");
 			return -EINVAL;
 		}
-		if (!verify_consistency_attr(q, consistent_mem))
-			return -EINVAL;
 	}
 
 	num_buffers = min(*count, VB2_MAX_FRAME - q->num_buffers);
@@ -2581,7 +2547,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
 	fileio->memory = VB2_MEMORY_MMAP;
 	fileio->type = q->type;
 	q->fileio = fileio;
-	ret = vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count);
+	ret = vb2_core_reqbufs(q, fileio->memory, &fileio->count);
 	if (ret)
 		goto err_kfree;
 
@@ -2638,7 +2604,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read)
 
 err_reqbufs:
 	fileio->count = 0;
-	vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count);
+	vb2_core_reqbufs(q, fileio->memory, &fileio->count);
 
 err_kfree:
 	q->fileio = NULL;
@@ -2658,7 +2624,7 @@ static int __vb2_cleanup_fileio(struct vb2_queue *q)
 		vb2_core_streamoff(q, q->type);
 		q->fileio = NULL;
 		fileio->count = 0;
-		vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count);
+		vb2_core_reqbufs(q, fileio->memory, &fileio->count);
 		kfree(fileio);
 		dprintk(q, 3, "file io emulator closed\n");
 	}
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
index ec3446cc45b8..7b1b86ec942d 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c
@@ -42,11 +42,6 @@ struct vb2_dc_buf {
 	struct dma_buf_attachment	*db_attach;
 };
 
-static inline bool vb2_dc_buffer_consistent(unsigned long attr)
-{
-	return !(attr & DMA_ATTR_NON_CONSISTENT);
-}
-
 /*********************************************/
 /*        scatterlist table functions        */
 /*********************************************/
@@ -341,13 +336,6 @@ static int
 vb2_dc_dmabuf_ops_begin_cpu_access(struct dma_buf *dbuf,
 				   enum dma_data_direction direction)
 {
-	struct vb2_dc_buf *buf = dbuf->priv;
-	struct sg_table *sgt = buf->dma_sgt;
-
-	if (vb2_dc_buffer_consistent(buf->attrs))
-		return 0;
-
-	dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);
 	return 0;
 }
 
@@ -355,13 +343,6 @@ static int
 vb2_dc_dmabuf_ops_end_cpu_access(struct dma_buf *dbuf,
 				 enum dma_data_direction direction)
 {
-	struct vb2_dc_buf *buf = dbuf->priv;
-	struct sg_table *sgt = buf->dma_sgt;
-
-	if (vb2_dc_buffer_consistent(buf->attrs))
-		return 0;
-
-	dma_sync_sg_for_device(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir);
 	return 0;
 }
 
diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
index 0a40e00f0d7e..a86fce5d8ea8 100644
--- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c
+++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c
@@ -123,8 +123,7 @@ static void *vb2_dma_sg_alloc(struct device *dev, unsigned long dma_attrs,
 	/*
 	 * NOTE: dma-sg allocates memory using the page allocator directly, so
 	 * there is no memory consistency guarantee, hence dma-sg ignores DMA
-	 * attributes passed from the upper layer. That means that
-	 * V4L2_FLAG_MEMORY_NON_CONSISTENT has no effect on dma-sg buffers.
+	 * attributes passed from the upper layer.
 	 */
 	buf->pages = kvmalloc_array(buf->num_pages, sizeof(struct page *),
 				    GFP_KERNEL | __GFP_ZERO);
diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c
index 30caad27281e..cfe197df970d 100644
--- a/drivers/media/common/videobuf2/videobuf2-v4l2.c
+++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c
@@ -722,22 +722,12 @@ static void fill_buf_caps(struct vb2_queue *q, u32 *caps)
 #endif
 }
 
-static void clear_consistency_attr(struct vb2_queue *q,
-				   int memory,
-				   unsigned int *flags)
-{
-	if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP)
-		*flags &= ~V4L2_FLAG_MEMORY_NON_CONSISTENT;
-}
-
 int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req)
 {
 	int ret = vb2_verify_memory_type(q, req->memory, req->type);
 
 	fill_buf_caps(q, &req->capabilities);
-	clear_consistency_attr(q, req->memory, &req->flags);
-	return ret ? ret : vb2_core_reqbufs(q, req->memory,
-					    req->flags, &req->count);
+	return ret ? ret : vb2_core_reqbufs(q, req->memory, &req->count);
 }
 EXPORT_SYMBOL_GPL(vb2_reqbufs);
 
@@ -769,7 +759,6 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create)
 	unsigned i;
 
 	fill_buf_caps(q, &create->capabilities);
-	clear_consistency_attr(q, create->memory, &create->flags);
 	create->index = q->num_buffers;
 	if (create->count == 0)
 		return ret != -EBUSY ? ret : 0;
@@ -813,7 +802,6 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create)
 		if (requested_sizes[i] == 0)
 			return -EINVAL;
 	return ret ? ret : vb2_core_create_bufs(q, create->memory,
-						create->flags,
 						&create->count,
 						requested_planes,
 						requested_sizes);
@@ -998,12 +986,11 @@ int vb2_ioctl_reqbufs(struct file *file, void *priv,
 	int res = vb2_verify_memory_type(vdev->queue, p->memory, p->type);
 
 	fill_buf_caps(vdev->queue, &p->capabilities);
-	clear_consistency_attr(vdev->queue, p->memory, &p->flags);
 	if (res)
 		return res;
 	if (vb2_queue_is_busy(vdev, file))
 		return -EBUSY;
-	res = vb2_core_reqbufs(vdev->queue, p->memory, p->flags, &p->count);
+	res = vb2_core_reqbufs(vdev->queue, p->memory, &p->count);
 	/* If count == 0, then the owner has released all buffers and he
 	   is no longer owner of the queue. Otherwise we have a new owner. */
 	if (res == 0)
@@ -1021,7 +1008,6 @@ int vb2_ioctl_create_bufs(struct file *file, void *priv,
 
 	p->index = vdev->queue->num_buffers;
 	fill_buf_caps(vdev->queue, &p->capabilities);
-	clear_consistency_attr(vdev->queue, p->memory, &p->flags);
 	/*
 	 * If count == 0, then just check if memory and type are valid.
 	 * Any -EBUSY result from vb2_verify_memory_type can be mapped to 0.
diff --git a/drivers/media/dvb-core/dvb_vb2.c b/drivers/media/dvb-core/dvb_vb2.c
index 959d110407a4..6974f1731529 100644
--- a/drivers/media/dvb-core/dvb_vb2.c
+++ b/drivers/media/dvb-core/dvb_vb2.c
@@ -342,7 +342,7 @@ int dvb_vb2_reqbufs(struct dvb_vb2_ctx *ctx, struct dmx_requestbuffers *req)
 
 	ctx->buf_siz = req->size;
 	ctx->buf_cnt = req->count;
-	ret = vb2_core_reqbufs(&ctx->vb_q, VB2_MEMORY_MMAP, 0, &req->count);
+	ret = vb2_core_reqbufs(&ctx->vb_q, VB2_MEMORY_MMAP, &req->count);
 	if (ret) {
 		ctx->state = DVB_VB2_STATE_NONE;
 		dprintk(1, "[%s] count=%d size=%d errno=%d\n", ctx->name,
diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
index 593bcf6c3735..a99e82ec9ab6 100644
--- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
+++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c
@@ -246,9 +246,6 @@ struct v4l2_format32 {
  * @memory:	buffer memory type
  * @format:	frame format, for which buffers are requested
  * @capabilities: capabilities of this buffer type.
- * @flags:	additional buffer management attributes (ignored unless the
- *		queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability and
- *		configured for MMAP streaming I/O).
  * @reserved:	future extensions
  */
 struct v4l2_create_buffers32 {
@@ -257,8 +254,7 @@ struct v4l2_create_buffers32 {
 	__u32			memory;	/* enum v4l2_memory */
 	struct v4l2_format32	format;
 	__u32			capabilities;
-	__u32			flags;
-	__u32			reserved[6];
+	__u32			reserved[7];
 };
 
 static int __bufsize_v4l2_format(struct v4l2_format32 __user *p32, u32 *size)
@@ -359,8 +355,7 @@ static int get_v4l2_create32(struct v4l2_create_buffers __user *p64,
 {
 	if (!access_ok(p32, sizeof(*p32)) ||
 	    copy_in_user(p64, p32,
-			 offsetof(struct v4l2_create_buffers32, format)) ||
-	    assign_in_user(&p64->flags, &p32->flags))
+			 offsetof(struct v4l2_create_buffers32, format)))
 		return -EFAULT;
 	return __get_v4l2_format32(&p64->format, &p32->format,
 				   aux_buf, aux_space);
@@ -422,7 +417,6 @@ static int put_v4l2_create32(struct v4l2_create_buffers __user *p64,
 	    copy_in_user(p32, p64,
 			 offsetof(struct v4l2_create_buffers32, format)) ||
 	    assign_in_user(&p32->capabilities, &p64->capabilities) ||
-	    assign_in_user(&p32->flags, &p64->flags) ||
 	    copy_in_user(p32->reserved, p64->reserved, sizeof(p64->reserved)))
 		return -EFAULT;
 	return __put_v4l2_format32(&p64->format, &p32->format);
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index e3a25ea913ac..ccf947632a3b 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -2044,6 +2044,9 @@ static int v4l_reqbufs(const struct v4l2_ioctl_ops *ops,
 
 	if (ret)
 		return ret;
+
+	CLEAR_AFTER_FIELD(p, capabilities);
+
 	return ops->vidioc_reqbufs(file, fh, p);
 }
 
@@ -2083,7 +2086,7 @@ static int v4l_create_bufs(const struct v4l2_ioctl_ops *ops,
 	if (ret)
 		return ret;
 
-	CLEAR_AFTER_FIELD(create, flags);
+	CLEAR_AFTER_FIELD(create, capabilities);
 
 	v4l_sanitize_format(&create->format);
 
diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h
index 52ef92049073..bbb3f26fbde9 100644
--- a/include/media/videobuf2-core.h
+++ b/include/media/videobuf2-core.h
@@ -744,8 +744,6 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb);
  * vb2_core_reqbufs() - Initiate streaming.
  * @q:		pointer to &struct vb2_queue with videobuf2 queue.
  * @memory:	memory type, as defined by &enum vb2_memory.
- * @flags:	auxiliary queue/buffer management flags. Currently, the only
- *		used flag is %V4L2_FLAG_MEMORY_NON_CONSISTENT.
  * @count:	requested buffer count.
  *
  * Videobuf2 core helper to implement VIDIOC_REQBUF() operation. It is called
@@ -770,13 +768,12 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb);
  * Return: returns zero on success; an error code otherwise.
  */
 int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
-		    unsigned int flags, unsigned int *count);
+		    unsigned int *count);
 
 /**
  * vb2_core_create_bufs() - Allocate buffers and any required auxiliary structs
  * @q: pointer to &struct vb2_queue with videobuf2 queue.
  * @memory: memory type, as defined by &enum vb2_memory.
- * @flags: auxiliary queue/buffer management flags.
  * @count: requested buffer count.
  * @requested_planes: number of planes requested.
  * @requested_sizes: array with the size of the planes.
@@ -794,7 +791,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory,
  * Return: returns zero on success; an error code otherwise.
  */
 int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory,
-			 unsigned int flags, unsigned int *count,
+			 unsigned int *count,
 			 unsigned int requested_planes,
 			 const unsigned int requested_sizes[]);
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index c7b70ff53bc1..235db7754606 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -191,8 +191,6 @@ enum v4l2_memory {
 	V4L2_MEMORY_DMABUF           = 4,
 };
 
-#define V4L2_FLAG_MEMORY_NON_CONSISTENT		(1 << 0)
-
 /* see also http://vektor.theorem.ca/graphics/ycbcr/ */
 enum v4l2_colorspace {
 	/*
@@ -949,10 +947,7 @@ struct v4l2_requestbuffers {
 	__u32			type;		/* enum v4l2_buf_type */
 	__u32			memory;		/* enum v4l2_memory */
 	__u32			capabilities;
-	union {
-		__u32		flags;
-		__u32		reserved[1];
-	};
+	__u32			reserved[1];
 };
 
 /* capabilities for struct v4l2_requestbuffers and v4l2_create_buffers */
@@ -2456,9 +2451,6 @@ struct v4l2_dbg_chip_info {
  * @memory:	enum v4l2_memory; buffer memory type
  * @format:	frame format, for which buffers are requested
  * @capabilities: capabilities of this buffer type.
- * @flags:	additional buffer management attributes (ignored unless the
- *		queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability
- *		and configured for MMAP streaming I/O).
  * @reserved:	future extensions
  */
 struct v4l2_create_buffers {
@@ -2467,8 +2459,7 @@ struct v4l2_create_buffers {
 	__u32			memory;
 	struct v4l2_format	format;
 	__u32			capabilities;
-	__u32			flags;
-	__u32			reserved[6];
+	__u32			reserved[7];
 };
 
 /*

From 4aa1615268a8ac2b20096211d3f9ac53874067d7 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 14 Sep 2020 15:20:18 -0300
Subject: [PATCH 031/322] RDMA/core: Fix ordering of CQ pool destruction

rxe will hold a refcount on the IB device as long as CQ objects exist,
this causes destruction of a rxe device to hang if the CQ pool has any
cached CQs since they are being destroyed after the refcount must go to
zero.

Treat the CQ pool like a client and create/destroy it before/after all
other clients. No users of CQ pool can exist past a client remove call.

Link: https://lore.kernel.org/r/e8a240aa-9e9b-3dca-062f-9130b787f29b@acm.org
Fixes: c7ff819aefea ("RDMA/core: Introduce shared CQ pool API")
Tested-by: Bart Van Assche <bvanassche@acm.org>
Tested-by: Yi Zhang <yi.zhang@redhat.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/device.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index c36b4d2b61e0..23ee65a9185f 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1285,6 +1285,8 @@ static void disable_device(struct ib_device *device)
 		remove_client_context(device, cid);
 	}
 
+	ib_cq_pool_destroy(device);
+
 	/* Pairs with refcount_set in enable_device */
 	ib_device_put(device);
 	wait_for_completion(&device->unreg_completion);
@@ -1328,6 +1330,8 @@ static int enable_device_and_get(struct ib_device *device)
 			goto out;
 	}
 
+	ib_cq_pool_init(device);
+
 	down_read(&clients_rwsem);
 	xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
 		ret = add_client_context(device, client);
@@ -1400,7 +1404,6 @@ int ib_register_device(struct ib_device *device, const char *name)
 		goto dev_cleanup;
 	}
 
-	ib_cq_pool_init(device);
 	ret = enable_device_and_get(device);
 	dev_set_uevent_suppress(&device->dev, false);
 	/* Mark for userspace that device is ready */
@@ -1455,7 +1458,6 @@ static void __ib_unregister_device(struct ib_device *ib_dev)
 		goto out;
 
 	disable_device(ib_dev);
-	ib_cq_pool_destroy(ib_dev);
 
 	/* Expedite removing unregistered pointers from the hash table */
 	free_netdevs(ib_dev);

From 0212a0483b0a36cc94cfab882b3edbb41fcfe1cd Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 7 Aug 2020 15:31:43 +0200
Subject: [PATCH 032/322] clk: samsung: Keep top BPLL mux on Exynos542x enabled

BPLL clock must not be disabled because it is needed for proper DRAM
operation. This is normally handled by respective memory devfreq driver,
but when that driver is not yet probed or its probe has been deferred
the clock might get disabled what causes board hang. Fix this by calling
clk_prepare_enable() directly from the clock provider driver.

Cc: stable@vger.kernel.org
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20200807133143.22748-1-m.szyprowski@samsung.com
Fixes: 6e7674c3c6df ("memory: Add DMC driver for Exynos5422")
Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
---
 drivers/clk/samsung/clk-exynos5420.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c
index fea33399a632..bd620876544d 100644
--- a/drivers/clk/samsung/clk-exynos5420.c
+++ b/drivers/clk/samsung/clk-exynos5420.c
@@ -1655,6 +1655,11 @@ static void __init exynos5x_clk_init(struct device_node *np,
 	 * main G3D clock enablement status.
 	 */
 	clk_prepare_enable(__clk_lookup("mout_sw_aclk_g3d"));
+	/*
+	 * Keep top BPLL mux enabled permanently to ensure that DRAM operates
+	 * properly.
+	 */
+	clk_prepare_enable(__clk_lookup("mout_bpll"));
 
 	samsung_clk_of_add_provider(np, ctx);
 }

From 50851f5033a34fb9f2ecefa57fc9efd862634416 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sat, 12 Sep 2020 15:22:11 +0800
Subject: [PATCH 033/322] spi: bcm2835: Make polling_limit_us static

This eliminates the following sparse warning:

drivers/spi/spi-bcm2835.c:78:14: warning: symbol 'polling_limit_us' was
not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20200912072211.602735-1-yanaijie@huawei.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm2835.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c
index c45d76c848c8..41986ac0fbfb 100644
--- a/drivers/spi/spi-bcm2835.c
+++ b/drivers/spi/spi-bcm2835.c
@@ -75,7 +75,7 @@
 #define DRV_NAME	"spi-bcm2835"
 
 /* define polling limits */
-unsigned int polling_limit_us = 30;
+static unsigned int polling_limit_us = 30;
 module_param(polling_limit_us, uint, 0664);
 MODULE_PARM_DESC(polling_limit_us,
 		 "time in us to run a transfer in polling mode\n");

From 7f04839ec4483563f38062b4dd90253e45447198 Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Fri, 11 Sep 2020 13:01:47 -0700
Subject: [PATCH 034/322] scsi: lpfc: Fix initial FLOGI failure due to BBSCN
 not supported

Initial FLOGIs are failing with the following message:

 lpfc 0000:13:00.1: 1:(0):0820 FLOGI Failed (x300). BBCredit Not Supported

In a prior patch, the READ_SPARAM command was re-ordered to post after
CONFIG_LINK as the driver is expected to update the driver's copy of the
service parameters for the FLOGI payload. If the bb-credit recovery feature
is enabled, this is fine. But on adapters were bb-credit recovery isn't
enabled, it would cause the FLOGI to fail.

Fix by restoring the original command order (READ_SPARAM before
CONFIG_LINK), and after issuing CONFIG_LINK, detect bb-credit recovery
support and reissuing READ_SPARAM to obtain the updated service parameters
(effectively adding in the fix command order).

[mkp: corrected SHA]

Link: https://lore.kernel.org/r/20200911200147.110826-1-james.smart@broadcom.com
Fixes: 835214f5d5f5 ("scsi: lpfc: Fix broken Credit Recovery after driver load")
CC: <stable@vger.kernel.org> # v5.7+
Co-developed-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_hbadisc.c | 76 ++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 24 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 142a02114479..bf06d8549bd3 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -71,6 +71,7 @@ static void lpfc_disc_timeout_handler(struct lpfc_vport *);
 static void lpfc_disc_flush_list(struct lpfc_vport *vport);
 static void lpfc_unregister_fcfi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *);
 static int lpfc_fcf_inuse(struct lpfc_hba *);
+static void lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *);
 
 void
 lpfc_terminate_rport_io(struct fc_rport *rport)
@@ -1138,11 +1139,13 @@ out:
 	return;
 }
 
-
 void
 lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 {
 	struct lpfc_vport *vport = pmb->vport;
+	LPFC_MBOXQ_t *sparam_mb;
+	struct lpfc_dmabuf *sparam_mp;
+	int rc;
 
 	if (pmb->u.mb.mbxStatus)
 		goto out;
@@ -1167,12 +1170,42 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
 	}
 
 	/* Start discovery by sending a FLOGI. port_state is identically
-	 * LPFC_FLOGI while waiting for FLOGI cmpl. Check if sending
-	 * the FLOGI is being deferred till after MBX_READ_SPARAM completes.
+	 * LPFC_FLOGI while waiting for FLOGI cmpl.
 	 */
 	if (vport->port_state != LPFC_FLOGI) {
-		if (!(phba->hba_flag & HBA_DEFER_FLOGI))
+		/* Issue MBX_READ_SPARAM to update CSPs before FLOGI if
+		 * bb-credit recovery is in place.
+		 */
+		if (phba->bbcredit_support && phba->cfg_enable_bbcr &&
+		    !(phba->link_flag & LS_LOOPBACK_MODE)) {
+			sparam_mb = mempool_alloc(phba->mbox_mem_pool,
+						  GFP_KERNEL);
+			if (!sparam_mb)
+				goto sparam_out;
+
+			rc = lpfc_read_sparam(phba, sparam_mb, 0);
+			if (rc) {
+				mempool_free(sparam_mb, phba->mbox_mem_pool);
+				goto sparam_out;
+			}
+			sparam_mb->vport = vport;
+			sparam_mb->mbox_cmpl = lpfc_mbx_cmpl_read_sparam;
+			rc = lpfc_sli_issue_mbox(phba, sparam_mb, MBX_NOWAIT);
+			if (rc == MBX_NOT_FINISHED) {
+				sparam_mp = (struct lpfc_dmabuf *)
+						sparam_mb->ctx_buf;
+				lpfc_mbuf_free(phba, sparam_mp->virt,
+					       sparam_mp->phys);
+				kfree(sparam_mp);
+				sparam_mb->ctx_buf = NULL;
+				mempool_free(sparam_mb, phba->mbox_mem_pool);
+				goto sparam_out;
+			}
+
+			phba->hba_flag |= HBA_DEFER_FLOGI;
+		}  else {
 			lpfc_initial_flogi(vport);
+		}
 	} else {
 		if (vport->fc_flag & FC_PT2PT)
 			lpfc_disc_start(vport);
@@ -1184,6 +1217,7 @@ out:
 			 "0306 CONFIG_LINK mbxStatus error x%x "
 			 "HBA state x%x\n",
 			 pmb->u.mb.mbxStatus, vport->port_state);
+sparam_out:
 	mempool_free(pmb, phba->mbox_mem_pool);
 
 	lpfc_linkdown(phba);
@@ -3239,21 +3273,6 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 	lpfc_linkup(phba);
 	sparam_mbox = NULL;
 
-	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
-		cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
-		if (!cfglink_mbox)
-			goto out;
-		vport->port_state = LPFC_LOCAL_CFG_LINK;
-		lpfc_config_link(phba, cfglink_mbox);
-		cfglink_mbox->vport = vport;
-		cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link;
-		rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT);
-		if (rc == MBX_NOT_FINISHED) {
-			mempool_free(cfglink_mbox, phba->mbox_mem_pool);
-			goto out;
-		}
-	}
-
 	sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
 	if (!sparam_mbox)
 		goto out;
@@ -3274,7 +3293,20 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 		goto out;
 	}
 
-	if (phba->hba_flag & HBA_FCOE_MODE) {
+	if (!(phba->hba_flag & HBA_FCOE_MODE)) {
+		cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+		if (!cfglink_mbox)
+			goto out;
+		vport->port_state = LPFC_LOCAL_CFG_LINK;
+		lpfc_config_link(phba, cfglink_mbox);
+		cfglink_mbox->vport = vport;
+		cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link;
+		rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT);
+		if (rc == MBX_NOT_FINISHED) {
+			mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+			goto out;
+		}
+	} else {
 		vport->port_state = LPFC_VPORT_UNKNOWN;
 		/*
 		 * Add the driver's default FCF record at FCF index 0 now. This
@@ -3331,10 +3363,6 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la)
 		}
 		/* Reset FCF roundrobin bmask for new discovery */
 		lpfc_sli4_clear_fcf_rr_bmask(phba);
-	} else {
-		if (phba->bbcredit_support && phba->cfg_enable_bbcr &&
-		    !(phba->link_flag & LS_LOOPBACK_MODE))
-			phba->hba_flag |= HBA_DEFER_FLOGI;
 	}
 
 	/* Prepare for LINK up registrations */

From 27ba3e8ff3ab86449e63d38a8d623053591e65fa Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Tue, 15 Sep 2020 16:33:46 +0900
Subject: [PATCH 035/322] scsi: sd: sd_zbc: Fix handling of host-aware ZBC
 disks

When CONFIG_BLK_DEV_ZONED is disabled, allow using host-aware ZBC disks as
regular disks. In this case, ensure that command completion is correctly
executed by changing sd_zbc_complete() to return good_bytes instead of 0
and causing a hang during device probe (endless retries).

When CONFIG_BLK_DEV_ZONED is enabled and a host-aware disk is detected to
have partitions, it will be used as a regular disk. In this case, make sure
to not do anything in sd_zbc_revalidate_zones() as that triggers warnings.

Since all these different cases result in subtle settings of the disk queue
zoned model, introduce the block layer helper function
blk_queue_set_zoned() to generically implement setting up the effective
zoned model according to the disk type, the presence of partitions on the
disk and CONFIG_BLK_DEV_ZONED configuration.

Link: https://lore.kernel.org/r/20200915073347.832424-2-damien.lemoal@wdc.com
Fixes: b72053072c0b ("block: allow partitions on host aware zone devices")
Cc: <stable@vger.kernel.org>
Reported-by: Borislav Petkov <bp@alien8.de>
Suggested-by: Christoph Hellwig <hch@infradead.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 block/blk-settings.c   | 46 ++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/sd.c      | 30 ++++++++++++++++-----------
 drivers/scsi/sd.h      |  2 +-
 drivers/scsi/sd_zbc.c  |  6 +++++-
 include/linux/blkdev.h |  2 ++
 5 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 76a7e03bcd6c..34b721a2743a 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -801,6 +801,52 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q,
 }
 EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging);
 
+/**
+ * blk_queue_set_zoned - configure a disk queue zoned model.
+ * @disk:	the gendisk of the queue to configure
+ * @model:	the zoned model to set
+ *
+ * Set the zoned model of the request queue of @disk according to @model.
+ * When @model is BLK_ZONED_HM (host managed), this should be called only
+ * if zoned block device support is enabled (CONFIG_BLK_DEV_ZONED option).
+ * If @model specifies BLK_ZONED_HA (host aware), the effective model used
+ * depends on CONFIG_BLK_DEV_ZONED settings and on the existence of partitions
+ * on the disk.
+ */
+void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model)
+{
+	switch (model) {
+	case BLK_ZONED_HM:
+		/*
+		 * Host managed devices are supported only if
+		 * CONFIG_BLK_DEV_ZONED is enabled.
+		 */
+		WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED));
+		break;
+	case BLK_ZONED_HA:
+		/*
+		 * Host aware devices can be treated either as regular block
+		 * devices (similar to drive managed devices) or as zoned block
+		 * devices to take advantage of the zone command set, similarly
+		 * to host managed devices. We try the latter if there are no
+		 * partitions and zoned block device support is enabled, else
+		 * we do nothing special as far as the block layer is concerned.
+		 */
+		if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) ||
+		    disk_has_partitions(disk))
+			model = BLK_ZONED_NONE;
+		break;
+	case BLK_ZONED_NONE:
+	default:
+		if (WARN_ON_ONCE(model != BLK_ZONED_NONE))
+			model = BLK_ZONED_NONE;
+		break;
+	}
+
+	disk->queue->limits.zoned = model;
+}
+EXPORT_SYMBOL_GPL(blk_queue_set_zoned);
+
 static int __init blk_settings_init(void)
 {
 	blk_max_low_pfn = max_low_pfn - 1;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 95018e650f2d..06286b6aeaec 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2964,26 +2964,32 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp)
 
 	if (sdkp->device->type == TYPE_ZBC) {
 		/* Host-managed */
-		q->limits.zoned = BLK_ZONED_HM;
+		blk_queue_set_zoned(sdkp->disk, BLK_ZONED_HM);
 	} else {
 		sdkp->zoned = (buffer[8] >> 4) & 3;
-		if (sdkp->zoned == 1 && !disk_has_partitions(sdkp->disk)) {
+		if (sdkp->zoned == 1) {
 			/* Host-aware */
-			q->limits.zoned = BLK_ZONED_HA;
+			blk_queue_set_zoned(sdkp->disk, BLK_ZONED_HA);
 		} else {
-			/*
-			 * Treat drive-managed devices and host-aware devices
-			 * with partitions as regular block devices.
-			 */
-			q->limits.zoned = BLK_ZONED_NONE;
-			if (sdkp->zoned == 2 && sdkp->first_scan)
-				sd_printk(KERN_NOTICE, sdkp,
-					  "Drive-managed SMR disk\n");
+			/* Regular disk or drive managed disk */
+			blk_queue_set_zoned(sdkp->disk, BLK_ZONED_NONE);
 		}
 	}
-	if (blk_queue_is_zoned(q) && sdkp->first_scan)
+
+	if (!sdkp->first_scan)
+		goto out;
+
+	if (blk_queue_is_zoned(q)) {
 		sd_printk(KERN_NOTICE, sdkp, "Host-%s zoned block device\n",
 		      q->limits.zoned == BLK_ZONED_HM ? "managed" : "aware");
+	} else {
+		if (sdkp->zoned == 1)
+			sd_printk(KERN_NOTICE, sdkp,
+				  "Host-aware SMR disk used as regular disk\n");
+		else if (sdkp->zoned == 2)
+			sd_printk(KERN_NOTICE, sdkp,
+				  "Drive-managed SMR disk\n");
+	}
 
  out:
 	kfree(buffer);
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4933e7daf17d..7251434100e6 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -259,7 +259,7 @@ static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
 static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd,
 			unsigned int good_bytes, struct scsi_sense_hdr *sshdr)
 {
-	return 0;
+	return good_bytes;
 }
 
 static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd,
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 0e94ff056bff..a739456dea02 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -667,7 +667,11 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
 	u32 max_append;
 	int ret = 0;
 
-	if (!sd_is_zoned(sdkp))
+	/*
+	 * There is nothing to do for regular disks, including host-aware disks
+	 * that have partitions.
+	 */
+	if (!blk_queue_is_zoned(q))
 		return 0;
 
 	/*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bb5636cc17b9..868e11face00 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -352,6 +352,8 @@ struct queue_limits {
 typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
 			       void *data);
 
+void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model);
+
 #ifdef CONFIG_BLK_DEV_ZONED
 
 #define BLK_ALL_ZONES  ((unsigned int)-1)

From 6c5dee18756b4721ac8518c69b22ee8ac0c9c442 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Tue, 15 Sep 2020 16:33:47 +0900
Subject: [PATCH 036/322] scsi: sd: sd_zbc: Fix ZBC disk initialization

Make sure to call sd_zbc_init_disk() when the sdkp->zoned field is known,
that is, once sd_read_block_characteristics() is executed in
sd_revalidate_disk(), so that host-aware disks also get initialized.  To do
so, move sd_zbc_init_disk() call in sd_zbc_revalidate_zones() and make sure
to execute it for all zoned disks, including for host-aware disks used as
regular disks as these disk zoned model may be changed back to BLK_ZONED_HA
when partitions are deleted.

Link: https://lore.kernel.org/r/20200915073347.832424-3-damien.lemoal@wdc.com
Fixes: 5795eb443060 ("scsi: sd_zbc: emulate ZONE_APPEND commands")
Cc: <stable@vger.kernel.org> # v5.8+
Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c     |  4 ---
 drivers/scsi/sd.h     |  6 -----
 drivers/scsi/sd_zbc.c | 60 +++++++++++++++++++++++++------------------
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 06286b6aeaec..16503e22691e 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3410,10 +3410,6 @@ static int sd_probe(struct device *dev)
 	sdkp->first_scan = 1;
 	sdkp->max_medium_access_timeouts = SD_MAX_MEDIUM_TIMEOUTS;
 
-	error = sd_zbc_init_disk(sdkp);
-	if (error)
-		goto out_free_index;
-
 	sd_revalidate_disk(gd);
 
 	gd->flags = GENHD_FL_EXT_DEVT;
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 7251434100e6..a3aad608bc38 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -215,7 +215,6 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
 
 #ifdef CONFIG_BLK_DEV_ZONED
 
-int sd_zbc_init_disk(struct scsi_disk *sdkp);
 void sd_zbc_release_disk(struct scsi_disk *sdkp);
 int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
 int sd_zbc_revalidate_zones(struct scsi_disk *sdkp);
@@ -231,11 +230,6 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
 
 #else /* CONFIG_BLK_DEV_ZONED */
 
-static inline int sd_zbc_init_disk(struct scsi_disk *sdkp)
-{
-	return 0;
-}
-
 static inline void sd_zbc_release_disk(struct scsi_disk *sdkp) {}
 
 static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index a739456dea02..cf07b7f93579 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -651,6 +651,28 @@ static void sd_zbc_print_zones(struct scsi_disk *sdkp)
 			  sdkp->zone_blocks);
 }
 
+static int sd_zbc_init_disk(struct scsi_disk *sdkp)
+{
+	sdkp->zones_wp_offset = NULL;
+	spin_lock_init(&sdkp->zones_wp_offset_lock);
+	sdkp->rev_wp_offset = NULL;
+	mutex_init(&sdkp->rev_mutex);
+	INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
+	sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
+	if (!sdkp->zone_wp_update_buf)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void sd_zbc_release_disk(struct scsi_disk *sdkp)
+{
+	kvfree(sdkp->zones_wp_offset);
+	sdkp->zones_wp_offset = NULL;
+	kfree(sdkp->zone_wp_update_buf);
+	sdkp->zone_wp_update_buf = NULL;
+}
+
 static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
 {
 	struct scsi_disk *sdkp = scsi_disk(disk);
@@ -667,6 +689,19 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
 	u32 max_append;
 	int ret = 0;
 
+	/*
+	 * For all zoned disks, initialize zone append emulation data if not
+	 * already done. This is necessary also for host-aware disks used as
+	 * regular disks due to the presence of partitions as these partitions
+	 * may be deleted and the disk zoned model changed back from
+	 * BLK_ZONED_NONE to BLK_ZONED_HA.
+	 */
+	if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) {
+		ret = sd_zbc_init_disk(sdkp);
+		if (ret)
+			return ret;
+	}
+
 	/*
 	 * There is nothing to do for regular disks, including host-aware disks
 	 * that have partitions.
@@ -768,28 +803,3 @@ err:
 
 	return ret;
 }
-
-int sd_zbc_init_disk(struct scsi_disk *sdkp)
-{
-	if (!sd_is_zoned(sdkp))
-		return 0;
-
-	sdkp->zones_wp_offset = NULL;
-	spin_lock_init(&sdkp->zones_wp_offset_lock);
-	sdkp->rev_wp_offset = NULL;
-	mutex_init(&sdkp->rev_mutex);
-	INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
-	sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
-	if (!sdkp->zone_wp_update_buf)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void sd_zbc_release_disk(struct scsi_disk *sdkp)
-{
-	kvfree(sdkp->zones_wp_offset);
-	sdkp->zones_wp_offset = NULL;
-	kfree(sdkp->zone_wp_update_buf);
-	sdkp->zone_wp_update_buf = NULL;
-}

From e5467b672bd99f0c1904900013e778e3a5ce30b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <marek.behun@nic.cz>
Date: Tue, 15 Sep 2020 02:54:26 +0200
Subject: [PATCH 037/322] dt-bindings: leds: cznic,turris-omnia-leds: fix error
 in binding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a bug in the device tree binding for cznic,turris-omnia-leds
which causes make dt_binding_check to complain.

The reason is that the multi-led property binding's regular expression
does not contain the `@` character, while the example nodes do.

Fix this, and also adjust the maximum address to 'b' as there are 12
LEDs.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: devicetree@vger.kernel.org
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Marek Behún <marek.behun@nic.cz>
Link: https://lore.kernel.org/r/20200915005426.15957-1-marek.behun@nic.cz
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/leds/cznic,turris-omnia-leds.yaml       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml b/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml
index 24ad1446445e..fe7fa25877fd 100644
--- a/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml
+++ b/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml
@@ -30,7 +30,7 @@ properties:
     const: 0
 
 patternProperties:
-  "^multi-led[0-9a-f]$":
+  "^multi-led@[0-9a-b]$":
     type: object
     allOf:
       - $ref: leds-class-multicolor.yaml#

From d33030e2ee3508d65db5644551435310df86010e Mon Sep 17 00:00:00 2001
From: Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
Date: Tue, 15 Sep 2020 16:42:52 -0500
Subject: [PATCH 038/322] nfs: Fix security label length not being reset

nfs_readdir_page_filler() iterates over entries in a directory, reusing
the same security label buffer, but does not reset the buffer's length.
This causes decode_attr_security_label() to return -ERANGE if an entry's
security label is longer than the previous one's. This error, in
nfs4_decode_dirent(), only gets passed up as -EAGAIN, which causes another
failed attempt to copy into the buffer. The second error is ignored and
the remaining entries do not show up in ls, specifically the getdents64()
syscall.

Reproduce by creating multiple files in NFS and giving one of the later
files a longer security label. ls will not see that file nor any that are
added afterwards, though they will exist on the backend.

In nfs_readdir_page_filler(), reset security label buffer length before
every reuse

Signed-off-by: Jeffrey Mitchell <jeffrey.mitchell@starlab.io>
Fixes: b4487b935452 ("nfs: Fix getxattr kernel panic and memory overflow")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/dir.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e732580fe47b..cb52db9a0cfb 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -579,6 +579,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
 
 	do {
+		if (entry->label)
+			entry->label->len = NFS4_MAXLABELLEN;
+
 		status = xdr_decode(desc, entry, &stream);
 		if (status != 0) {
 			if (status == -EAGAIN)

From 16abd2a0c124a6c3543c88ca4c53c997c9fb4114 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Wed, 16 Sep 2020 09:07:54 -0400
Subject: [PATCH 039/322] NFSv4.2: fix client's attribute cache management for
 copy_file_range

After client is done with the COPY operation, it needs to invalidate
its pagecache (as it did no reading or writing of the data locally)
and it needs to invalidate it's attributes just like it would have
for a read on the source file and write on the destination file.

Once the linux server started giving out read delegations to
read+write opens, the destination file of the copy_file range
started having delegations and not doing syncup on close of the
file leading to xfstest failures for generic/430,431,432,433,565.

v2: changing cache_validity needs to be protected by the i_lock.

Reported-by: Murphy Zhou <jencce.kernel@gmail.com>
Fixes: 2e72448b07dc ("NFS: Add COPY nfs operation")
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs42proc.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 142225f0af59..2b2211d1234e 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -356,7 +356,15 @@ static ssize_t _nfs42_proc_copy(struct file *src,
 
 	truncate_pagecache_range(dst_inode, pos_dst,
 				 pos_dst + res->write_res.count);
-
+	spin_lock(&dst_inode->i_lock);
+	NFS_I(dst_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
+			NFS_INO_REVAL_FORCED | NFS_INO_INVALID_SIZE |
+			NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA);
+	spin_unlock(&dst_inode->i_lock);
+	spin_lock(&src_inode->i_lock);
+	NFS_I(src_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
+			NFS_INO_REVAL_FORCED | NFS_INO_INVALID_ATIME);
+	spin_unlock(&src_inode->i_lock);
 	status = res->write_res.count;
 out:
 	if (args->sync)

From 642e450b6b5955f2059d0ae372183f7c6323f951 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 16 Sep 2020 14:00:25 +0200
Subject: [PATCH 040/322] xsk: Do not discard packet when NETDEV_TX_BUSY

In the skb Tx path, transmission of a packet is performed with
dev_direct_xmit(). When NETDEV_TX_BUSY is set in the drivers, it
signifies that it was not possible to send the packet right now,
please try later. Unfortunately, the xsk transmit code discarded the
packet and returned EBUSY to the application. Fix this unnecessary
packet loss, by not discarding the packet in the Tx ring and return
EAGAIN. As EAGAIN is returned to the application, it can then retry
the send operation later and the packet will then likely be sent as
the driver will then likely have space/resources to send the packet.

In summary, EAGAIN tells the application that the packet was not
discarded from the Tx ring and that it needs to call send()
again. EBUSY, on the other hand, signifies that the packet was not
sent and discarded from the Tx ring. The application needs to put
the packet on the Tx ring again if it wants it to be sent.

Fixes: 35fcde7f8deb ("xsk: support for Tx")
Reported-by: Arkadiusz Zema <A.Zema@falconvsystems.com>
Suggested-by: Arkadiusz Zema <A.Zema@falconvsystems.com>
Suggested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Link: https://lore.kernel.org/bpf/1600257625-2353-1-git-send-email-magnus.karlsson@gmail.com
---
 net/xdp/xsk.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c3231620d210..6c5e09e7440a 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -377,15 +377,30 @@ static int xsk_generic_xmit(struct sock *sk)
 		skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
 		skb->destructor = xsk_destruct_skb;
 
+		/* Hinder dev_direct_xmit from freeing the packet and
+		 * therefore completing it in the destructor
+		 */
+		refcount_inc(&skb->users);
 		err = dev_direct_xmit(skb, xs->queue_id);
+		if  (err == NETDEV_TX_BUSY) {
+			/* Tell user-space to retry the send */
+			skb->destructor = sock_wfree;
+			/* Free skb without triggering the perf drop trace */
+			consume_skb(skb);
+			err = -EAGAIN;
+			goto out;
+		}
+
 		xskq_cons_release(xs->tx);
 		/* Ignore NET_XMIT_CN as packet might have been sent */
-		if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
+		if (err == NET_XMIT_DROP) {
 			/* SKB completed but not sent */
+			kfree_skb(skb);
 			err = -EBUSY;
 			goto out;
 		}
 
+		consume_skb(skb);
 		sent_frame = true;
 	}
 

From 5ad284ab3a01e2d6a89be2a8663ae76f4e617549 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 15 Sep 2020 15:58:16 +0300
Subject: [PATCH 041/322] gpiolib: Fix line event handling in syscall
 compatible mode

The introduced line event handling ABI in the commit

  61f922db7221 ("gpio: userspace ABI for reading GPIO line events")

missed the fact that 64-bit kernel may serve for 32-bit applications.
In such case the very first check in the lineevent_read() will fail
due to alignment differences.

To workaround this introduce lineevent_get_size() helper which returns actual
size of the structure in user space.

Fixes: 61f922db7221 ("gpio: userspace ABI for reading GPIO line events")
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Kent Gibson <warthog618@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpiolib-cdev.c | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index e6c9b78adfc2..76c36b05aef6 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -423,6 +423,21 @@ static __poll_t lineevent_poll(struct file *file,
 	return events;
 }
 
+static ssize_t lineevent_get_size(void)
+{
+#ifdef __x86_64__
+	/* i386 has no padding after 'id' */
+	if (in_ia32_syscall()) {
+		struct compat_gpioeevent_data {
+			compat_u64	timestamp;
+			u32		id;
+		};
+
+		return sizeof(struct compat_gpioeevent_data);
+	}
+#endif
+	return sizeof(struct gpioevent_data);
+}
 
 static ssize_t lineevent_read(struct file *file,
 			      char __user *buf,
@@ -432,9 +447,20 @@ static ssize_t lineevent_read(struct file *file,
 	struct lineevent_state *le = file->private_data;
 	struct gpioevent_data ge;
 	ssize_t bytes_read = 0;
+	ssize_t ge_size;
 	int ret;
 
-	if (count < sizeof(ge))
+	/*
+	 * When compatible system call is being used the struct gpioevent_data,
+	 * in case of at least ia32, has different size due to the alignment
+	 * differences. Because we have first member 64 bits followed by one of
+	 * 32 bits there is no gap between them. The only difference is the
+	 * padding at the end of the data structure. Hence, we calculate the
+	 * actual sizeof() and pass this as an argument to copy_to_user() to
+	 * drop unneeded bytes from the output.
+	 */
+	ge_size = lineevent_get_size();
+	if (count < ge_size)
 		return -EINVAL;
 
 	do {
@@ -470,10 +496,10 @@ static ssize_t lineevent_read(struct file *file,
 			break;
 		}
 
-		if (copy_to_user(buf + bytes_read, &ge, sizeof(ge)))
+		if (copy_to_user(buf + bytes_read, &ge, ge_size))
 			return -EFAULT;
-		bytes_read += sizeof(ge);
-	} while (count >= bytes_read + sizeof(ge));
+		bytes_read += ge_size;
+	} while (count >= bytes_read + ge_size);
 
 	return bytes_read;
 }

From 94cc89eb8fa5039fcb6e3e3d50f929ddcccee095 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Thu, 17 Sep 2020 13:08:28 +0100
Subject: [PATCH 042/322] regmap: debugfs: Fix handling of name string for
 debugfs init delays

In regmap_debugfs_init the initialisation of the debugfs is delayed
if the root node isn't ready yet. Most callers of regmap_debugfs_init
pass the name from the regmap_config, which is considered temporary
ie. may be unallocated after the regmap_init call returns. This leads
to a potential use after free, where config->name has been freed by
the time it is used in regmap_debugfs_initcall.

This situation can be seen on Zynq, where the architecture init_irq
callback registers a syscon device, using a local variable for the
regmap_config. As init_irq is very early in the platform bring up the
regmap debugfs root isn't ready yet. Although this doesn't crash it
does result in the debugfs entry not having the correct name.

Regmap already sets map->name from config->name on the regmap_init
path and the fact that a separate field is used to pass the name
to regmap_debugfs_init appears to be an artifact of the debugfs
name being added before the map name. As such this patch updates
regmap_debugfs_init to use map->name, which is already duplicated from
the config avoiding the issue.

This does however leave two lose ends, both regmap_attach_dev and
regmap_reinit_cache can be called after a regmap is registered and
would have had the effect of applying a new name to the debugfs
entries. In both of these cases it was chosen to update the map
name. In the case of regmap_attach_dev there are 3 users that
currently use this function to update the name, thus doing so avoids
changes for those users and it seems reasonable that attaching
a device would want to set the name of the map. In the case of
regmap_reinit_cache the primary use-case appears to be devices that
need some register access to identify the device (for example devices
in the same family) and then update the cache to match the exact
hardware. Whilst no users do currently update the name here, given the
use-case it seemed reasonable the name might want to be updated once
the device is better identified.

Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20200917120828.12987-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h       |  4 +--
 drivers/base/regmap/regmap-debugfs.c |  7 ++---
 drivers/base/regmap/regmap.c         | 44 +++++++++++++++++++++-------
 3 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index 3d80c4b43f72..e0ff8e90ebdc 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -217,7 +217,7 @@ struct regmap_field {
 
 #ifdef CONFIG_DEBUG_FS
 extern void regmap_debugfs_initcall(void);
-extern void regmap_debugfs_init(struct regmap *map, const char *name);
+extern void regmap_debugfs_init(struct regmap *map);
 extern void regmap_debugfs_exit(struct regmap *map);
 
 static inline void regmap_debugfs_disable(struct regmap *map)
@@ -227,7 +227,7 @@ static inline void regmap_debugfs_disable(struct regmap *map)
 
 #else
 static inline void regmap_debugfs_initcall(void) { }
-static inline void regmap_debugfs_init(struct regmap *map, const char *name) { }
+static inline void regmap_debugfs_init(struct regmap *map) { }
 static inline void regmap_debugfs_exit(struct regmap *map) { }
 static inline void regmap_debugfs_disable(struct regmap *map) { }
 #endif
diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c
index f58baff2be0a..b6d63ef16b44 100644
--- a/drivers/base/regmap/regmap-debugfs.c
+++ b/drivers/base/regmap/regmap-debugfs.c
@@ -17,7 +17,6 @@
 
 struct regmap_debugfs_node {
 	struct regmap *map;
-	const char *name;
 	struct list_head link;
 };
 
@@ -544,11 +543,12 @@ static const struct file_operations regmap_cache_bypass_fops = {
 	.write = regmap_cache_bypass_write_file,
 };
 
-void regmap_debugfs_init(struct regmap *map, const char *name)
+void regmap_debugfs_init(struct regmap *map)
 {
 	struct rb_node *next;
 	struct regmap_range_node *range_node;
 	const char *devname = "dummy";
+	const char *name = map->name;
 
 	/*
 	 * Userspace can initiate reads from the hardware over debugfs.
@@ -569,7 +569,6 @@ void regmap_debugfs_init(struct regmap *map, const char *name)
 		if (!node)
 			return;
 		node->map = map;
-		node->name = name;
 		mutex_lock(&regmap_debugfs_early_lock);
 		list_add(&node->link, &regmap_debugfs_early_list);
 		mutex_unlock(&regmap_debugfs_early_lock);
@@ -679,7 +678,7 @@ void regmap_debugfs_initcall(void)
 
 	mutex_lock(&regmap_debugfs_early_lock);
 	list_for_each_entry_safe(node, tmp, &regmap_debugfs_early_list, link) {
-		regmap_debugfs_init(node->map, node->name);
+		regmap_debugfs_init(node->map);
 		list_del(&node->link);
 		kfree(node);
 	}
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index e93700af7e6e..e22c21affebd 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -581,14 +581,34 @@ static void regmap_range_exit(struct regmap *map)
 	kfree(map->selector_work_buf);
 }
 
+static int regmap_set_name(struct regmap *map, const struct regmap_config *config)
+{
+	if (config->name) {
+		const char *name = kstrdup_const(config->name, GFP_KERNEL);
+
+		if (!name)
+			return -ENOMEM;
+
+		kfree_const(map->name);
+		map->name = name;
+	}
+
+	return 0;
+}
+
 int regmap_attach_dev(struct device *dev, struct regmap *map,
 		      const struct regmap_config *config)
 {
 	struct regmap **m;
+	int ret;
 
 	map->dev = dev;
 
-	regmap_debugfs_init(map, config->name);
+	ret = regmap_set_name(map, config);
+	if (ret)
+		return ret;
+
+	regmap_debugfs_init(map);
 
 	/* Add a devres resource for dev_get_regmap() */
 	m = devres_alloc(dev_get_regmap_release, sizeof(*m), GFP_KERNEL);
@@ -674,9 +694,9 @@ struct regmap *__regmap_init(struct device *dev,
 			     const char *lock_name)
 {
 	struct regmap *map;
-	int ret = -EINVAL;
 	enum regmap_endian reg_endian, val_endian;
 	int i, j;
+	int ret;
 
 	if (!config)
 		goto err;
@@ -687,13 +707,9 @@ struct regmap *__regmap_init(struct device *dev,
 		goto err;
 	}
 
-	if (config->name) {
-		map->name = kstrdup_const(config->name, GFP_KERNEL);
-		if (!map->name) {
-			ret = -ENOMEM;
-			goto err_map;
-		}
-	}
+	ret = regmap_set_name(map, config);
+	if (ret)
+		goto err_map;
 
 	if (config->disable_locking) {
 		map->lock = map->unlock = regmap_lock_unlock_none;
@@ -1137,7 +1153,7 @@ skip_format_initialization:
 		if (ret != 0)
 			goto err_regcache;
 	} else {
-		regmap_debugfs_init(map, config->name);
+		regmap_debugfs_init(map);
 	}
 
 	return map;
@@ -1297,6 +1313,8 @@ EXPORT_SYMBOL_GPL(regmap_field_free);
  */
 int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config)
 {
+	int ret;
+
 	regcache_exit(map);
 	regmap_debugfs_exit(map);
 
@@ -1309,7 +1327,11 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config)
 	map->readable_noinc_reg = config->readable_noinc_reg;
 	map->cache_type = config->cache_type;
 
-	regmap_debugfs_init(map, config->name);
+	ret = regmap_set_name(map, config);
+	if (ret)
+		return ret;
+
+	regmap_debugfs_init(map);
 
 	map->cache_bypass = false;
 	map->cache_only = false;

From 1d0e16ac1a9e800598dcfa5b6bc53b704a103390 Mon Sep 17 00:00:00 2001
From: Philip Yang <Philip.Yang@amd.com>
Date: Tue, 15 Sep 2020 17:07:35 -0400
Subject: [PATCH 043/322] drm/amdgpu: prevent double kfree ttm->sg
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set ttm->sg to NULL after kfree, to avoid memory corruption backtrace:

[  420.932812] kernel BUG at
/build/linux-do9eLF/linux-4.15.0/mm/slub.c:295!
[  420.934182] invalid opcode: 0000 [#1] SMP NOPTI
[  420.935445] Modules linked in: xt_conntrack ipt_MASQUERADE
[  420.951332] Hardware name: Dell Inc. PowerEdge R7525/0PYVT1, BIOS
1.5.4 07/09/2020
[  420.952887] RIP: 0010:__slab_free+0x180/0x2d0
[  420.954419] RSP: 0018:ffffbe426291fa60 EFLAGS: 00010246
[  420.955963] RAX: ffff9e29263e9c30 RBX: ffff9e29263e9c30 RCX:
000000018100004b
[  420.957512] RDX: ffff9e29263e9c30 RSI: fffff3d33e98fa40 RDI:
ffff9e297e407a80
[  420.959055] RBP: ffffbe426291fb00 R08: 0000000000000001 R09:
ffffffffc0d39ade
[  420.960587] R10: ffffbe426291fb20 R11: ffff9e49ffdd4000 R12:
ffff9e297e407a80
[  420.962105] R13: fffff3d33e98fa40 R14: ffff9e29263e9c30 R15:
ffff9e2954464fd8
[  420.963611] FS:  00007fa2ea097780(0000) GS:ffff9e297e840000(0000)
knlGS:0000000000000000
[  420.965144] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  420.966663] CR2: 00007f16bfffefb8 CR3: 0000001ff0c62000 CR4:
0000000000340ee0
[  420.968193] Call Trace:
[  420.969703]  ? __page_cache_release+0x3c/0x220
[  420.971294]  ? amdgpu_ttm_tt_unpopulate+0x5e/0x80 [amdgpu]
[  420.972789]  kfree+0x168/0x180
[  420.974353]  ? amdgpu_ttm_tt_set_user_pages+0x64/0xc0 [amdgpu]
[  420.975850]  ? kfree+0x168/0x180
[  420.977403]  amdgpu_ttm_tt_unpopulate+0x5e/0x80 [amdgpu]
[  420.978888]  ttm_tt_unpopulate.part.10+0x53/0x60 [amdttm]
[  420.980357]  ttm_tt_destroy.part.11+0x4f/0x60 [amdttm]
[  420.981814]  ttm_tt_destroy+0x13/0x20 [amdttm]
[  420.983273]  ttm_bo_cleanup_memtype_use+0x36/0x80 [amdttm]
[  420.984725]  ttm_bo_release+0x1c9/0x360 [amdttm]
[  420.986167]  amdttm_bo_put+0x24/0x30 [amdttm]
[  420.987663]  amdgpu_bo_unref+0x1e/0x30 [amdgpu]
[  420.989165]  amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x9ca/0xb10
[amdgpu]
[  420.990666]  kfd_ioctl_alloc_memory_of_gpu+0xef/0x2c0 [amdgpu]

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e11c5d69843d..978bae731398 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1076,6 +1076,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 
 release_sg:
 	kfree(ttm->sg);
+	ttm->sg = NULL;
 	return r;
 }
 

From 5d5b71e8a0f2f5649991d651d005181dc087981c Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 1 May 2020 16:42:56 -0400
Subject: [PATCH 044/322] drm/amdgpu: add the GC 10.3 VRS registers

Add the VRS registers.

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../include/asic_reg/gc/gc_10_3_0_default.h   |  2 +
 .../include/asic_reg/gc/gc_10_3_0_offset.h    |  4 ++
 .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   | 50 +++++++++++++++++++
 3 files changed, 56 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h
index 1116779252e6..e245e912535e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h
@@ -2727,6 +2727,7 @@
 #define mmDB_STENCIL_WRITE_BASE_DEFAULT                                          0x00000000
 #define mmDB_RESERVED_REG_1_DEFAULT                                              0x00000000
 #define mmDB_RESERVED_REG_3_DEFAULT                                              0x00000000
+#define mmDB_VRS_OVERRIDE_CNTL_DEFAULT                                           0x00000000
 #define mmDB_Z_READ_BASE_HI_DEFAULT                                              0x00000000
 #define mmDB_STENCIL_READ_BASE_HI_DEFAULT                                        0x00000000
 #define mmDB_Z_WRITE_BASE_HI_DEFAULT                                             0x00000000
@@ -3062,6 +3063,7 @@
 #define mmPA_SU_OVER_RASTERIZATION_CNTL_DEFAULT                                  0x00000000
 #define mmPA_STEREO_CNTL_DEFAULT                                                 0x00000000
 #define mmPA_STATE_STEREO_X_DEFAULT                                              0x00000000
+#define mmPA_CL_VRS_CNTL_DEFAULT                                                 0x00000000
 #define mmPA_SU_POINT_SIZE_DEFAULT                                               0x00000000
 #define mmPA_SU_POINT_MINMAX_DEFAULT                                             0x00000000
 #define mmPA_SU_LINE_CNTL_DEFAULT                                                0x00000000
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
index 05d1b0a5f6d2..644a9fa71bb2 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
@@ -5379,6 +5379,8 @@
 #define mmDB_RESERVED_REG_1_BASE_IDX                                                                   1
 #define mmDB_RESERVED_REG_3                                                                            0x0017
 #define mmDB_RESERVED_REG_3_BASE_IDX                                                                   1
+#define mmDB_VRS_OVERRIDE_CNTL                                                                         0x0019
+#define mmDB_VRS_OVERRIDE_CNTL_BASE_IDX                                                                1
 #define mmDB_Z_READ_BASE_HI                                                                            0x001a
 #define mmDB_Z_READ_BASE_HI_BASE_IDX                                                                   1
 #define mmDB_STENCIL_READ_BASE_HI                                                                      0x001b
@@ -6049,6 +6051,8 @@
 #define mmPA_STEREO_CNTL_BASE_IDX                                                                      1
 #define mmPA_STATE_STEREO_X                                                                            0x0211
 #define mmPA_STATE_STEREO_X_BASE_IDX                                                                   1
+#define mmPA_CL_VRS_CNTL                                                                               0x0212
+#define mmPA_CL_VRS_CNTL_BASE_IDX                                                                      1
 #define mmPA_SU_POINT_SIZE                                                                             0x0280
 #define mmPA_SU_POINT_SIZE_BASE_IDX                                                                    1
 #define mmPA_SU_POINT_MINMAX                                                                           0x0281
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
index aac57f714cf1..2e449fcff893 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
@@ -9777,6 +9777,7 @@
 #define DB_EXCEPTION_CONTROL__AUTO_FLUSH_HTILE__SHIFT                                                         0x3
 #define DB_EXCEPTION_CONTROL__AUTO_FLUSH_QUAD__SHIFT                                                          0x4
 #define DB_EXCEPTION_CONTROL__FORCE_SUMMARIZE__SHIFT                                                          0x8
+#define DB_EXCEPTION_CONTROL__FORCE_VRS_RATE_FINE__SHIFT                                                      0x10
 #define DB_EXCEPTION_CONTROL__DTAG_WATERMARK__SHIFT                                                           0x18
 #define DB_EXCEPTION_CONTROL__EARLY_Z_PANIC_DISABLE_MASK                                                      0x00000001L
 #define DB_EXCEPTION_CONTROL__LATE_Z_PANIC_DISABLE_MASK                                                       0x00000002L
@@ -9784,6 +9785,7 @@
 #define DB_EXCEPTION_CONTROL__AUTO_FLUSH_HTILE_MASK                                                           0x00000008L
 #define DB_EXCEPTION_CONTROL__AUTO_FLUSH_QUAD_MASK                                                            0x00000010L
 #define DB_EXCEPTION_CONTROL__FORCE_SUMMARIZE_MASK                                                            0x00000F00L
+#define DB_EXCEPTION_CONTROL__FORCE_VRS_RATE_FINE_MASK                                                        0x00FF0000L
 #define DB_EXCEPTION_CONTROL__DTAG_WATERMARK_MASK                                                             0x7F000000L
 //DB_DFSM_CONFIG
 #define DB_DFSM_CONFIG__BYPASS_DFSM__SHIFT                                                                    0x0
@@ -10076,6 +10078,7 @@
 #define CB_HW_CONTROL_3__DISABLE_NACK_PROCESSING_CM__SHIFT                                                    0x18
 #define CB_HW_CONTROL_3__DISABLE_NACK_COLOR_RD_WR_OPT__SHIFT                                                  0x19
 #define CB_HW_CONTROL_3__DISABLE_BLENDER_CLOCK_GATING__SHIFT                                                  0x1a
+#define CB_HW_CONTROL_3__DISABLE_DCC_VRS_OPT__SHIFT                                                           0x1c
 #define CB_HW_CONTROL_3__DISABLE_FMASK_NOFETCH_OPT__SHIFT                                                     0x1e
 #define CB_HW_CONTROL_3__DISABLE_FMASK_NOFETCH_OPT_BC__SHIFT                                                  0x1f
 #define CB_HW_CONTROL_3__DISABLE_SLOW_MODE_EMPTY_HALF_QUAD_KILL_MASK                                          0x00000001L
@@ -10103,12 +10106,15 @@
 #define CB_HW_CONTROL_3__DISABLE_NACK_PROCESSING_CM_MASK                                                      0x01000000L
 #define CB_HW_CONTROL_3__DISABLE_NACK_COLOR_RD_WR_OPT_MASK                                                    0x02000000L
 #define CB_HW_CONTROL_3__DISABLE_BLENDER_CLOCK_GATING_MASK                                                    0x04000000L
+#define CB_HW_CONTROL_3__DISABLE_DCC_VRS_OPT_MASK                                                             0x10000000L
 #define CB_HW_CONTROL_3__DISABLE_FMASK_NOFETCH_OPT_MASK                                                       0x40000000L
 #define CB_HW_CONTROL_3__DISABLE_FMASK_NOFETCH_OPT_BC_MASK                                                    0x80000000L
 //CB_HW_CONTROL
 #define CB_HW_CONTROL__ALLOW_MRT_WITH_DUAL_SOURCE__SHIFT                                                      0x0
+#define CB_HW_CONTROL__DISABLE_VRS_FILLRATE_OPTIMIZATION__SHIFT                                               0x1
 #define CB_HW_CONTROL__DISABLE_FILLRATE_OPT_FIX_WITH_CFC__SHIFT                                               0x3
 #define CB_HW_CONTROL__DISABLE_POST_DCC_WITH_CFC_FIX__SHIFT                                                   0x4
+#define CB_HW_CONTROL__DISABLE_COMPRESS_1FRAG_WHEN_VRS_RATE_HINT_EN__SHIFT                                    0x5
 #define CB_HW_CONTROL__RMI_CREDITS__SHIFT                                                                     0x6
 #define CB_HW_CONTROL__CHICKEN_BITS__SHIFT                                                                    0xc
 #define CB_HW_CONTROL__DISABLE_FMASK_MULTI_MGCG_DOMAINS__SHIFT                                                0xf
@@ -10129,8 +10135,10 @@
 #define CB_HW_CONTROL__DISABLE_CC_IB_SERIALIZER_STATE_OPT__SHIFT                                              0x1e
 #define CB_HW_CONTROL__DISABLE_PIXEL_IN_QUAD_FIX_FOR_LINEAR_SURFACE__SHIFT                                    0x1f
 #define CB_HW_CONTROL__ALLOW_MRT_WITH_DUAL_SOURCE_MASK                                                        0x00000001L
+#define CB_HW_CONTROL__DISABLE_VRS_FILLRATE_OPTIMIZATION_MASK                                                 0x00000002L
 #define CB_HW_CONTROL__DISABLE_FILLRATE_OPT_FIX_WITH_CFC_MASK                                                 0x00000008L
 #define CB_HW_CONTROL__DISABLE_POST_DCC_WITH_CFC_FIX_MASK                                                     0x00000010L
+#define CB_HW_CONTROL__DISABLE_COMPRESS_1FRAG_WHEN_VRS_RATE_HINT_EN_MASK                                      0x00000020L
 #define CB_HW_CONTROL__RMI_CREDITS_MASK                                                                       0x00000FC0L
 #define CB_HW_CONTROL__CHICKEN_BITS_MASK                                                                      0x00007000L
 #define CB_HW_CONTROL__DISABLE_FMASK_MULTI_MGCG_DOMAINS_MASK                                                  0x00008000L
@@ -19881,6 +19889,7 @@
 #define DB_RENDER_OVERRIDE2__PRESERVE_SRESULTS__SHIFT                                                         0x16
 #define DB_RENDER_OVERRIDE2__DISABLE_FAST_PASS__SHIFT                                                         0x17
 #define DB_RENDER_OVERRIDE2__ALLOW_PARTIAL_RES_HIER_KILL__SHIFT                                               0x19
+#define DB_RENDER_OVERRIDE2__FORCE_VRS_RATE_FINE__SHIFT                                                       0x1a
 #define DB_RENDER_OVERRIDE2__CENTROID_COMPUTATION_MODE__SHIFT                                                 0x1b
 #define DB_RENDER_OVERRIDE2__PARTIAL_SQUAD_LAUNCH_CONTROL_MASK                                                0x00000003L
 #define DB_RENDER_OVERRIDE2__PARTIAL_SQUAD_LAUNCH_COUNTDOWN_MASK                                              0x0000001CL
@@ -19898,6 +19907,7 @@
 #define DB_RENDER_OVERRIDE2__PRESERVE_SRESULTS_MASK                                                           0x00400000L
 #define DB_RENDER_OVERRIDE2__DISABLE_FAST_PASS_MASK                                                           0x00800000L
 #define DB_RENDER_OVERRIDE2__ALLOW_PARTIAL_RES_HIER_KILL_MASK                                                 0x02000000L
+#define DB_RENDER_OVERRIDE2__FORCE_VRS_RATE_FINE_MASK                                                         0x04000000L
 #define DB_RENDER_OVERRIDE2__CENTROID_COMPUTATION_MODE_MASK                                                   0x18000000L
 //DB_HTILE_DATA_BASE
 #define DB_HTILE_DATA_BASE__BASE_256B__SHIFT                                                                  0x0
@@ -20021,6 +20031,13 @@
 //DB_RESERVED_REG_3
 #define DB_RESERVED_REG_3__FIELD_1__SHIFT                                                                     0x0
 #define DB_RESERVED_REG_3__FIELD_1_MASK                                                                       0x003FFFFFL
+//DB_VRS_OVERRIDE_CNTL
+#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_COMBINER_MODE__SHIFT                                          0x0
+#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_X__SHIFT                                                      0x4
+#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_Y__SHIFT                                                      0x6
+#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_COMBINER_MODE_MASK                                            0x00000007L
+#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_X_MASK                                                        0x00000030L
+#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_Y_MASK                                                        0x000000C0L
 //DB_Z_READ_BASE_HI
 #define DB_Z_READ_BASE_HI__BASE_HI__SHIFT                                                                     0x0
 #define DB_Z_READ_BASE_HI__BASE_HI_MASK                                                                       0x000000FFL
@@ -22598,6 +22615,7 @@
 #define PA_CL_VS_OUT_CNTL__VS_OUT_MISC_SIDE_BUS_ENA__SHIFT                                                    0x18
 #define PA_CL_VS_OUT_CNTL__USE_VTX_GS_CUT_FLAG__SHIFT                                                         0x19
 #define PA_CL_VS_OUT_CNTL__USE_VTX_LINE_WIDTH__SHIFT                                                          0x1b
+#define PA_CL_VS_OUT_CNTL__USE_VTX_VRS_RATE__SHIFT                                                            0x1c
 #define PA_CL_VS_OUT_CNTL__BYPASS_VTX_RATE_COMBINER__SHIFT                                                    0x1d
 #define PA_CL_VS_OUT_CNTL__BYPASS_PRIM_RATE_COMBINER__SHIFT                                                   0x1e
 #define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_0_MASK                                                               0x00000001L
@@ -22627,6 +22645,7 @@
 #define PA_CL_VS_OUT_CNTL__VS_OUT_MISC_SIDE_BUS_ENA_MASK                                                      0x01000000L
 #define PA_CL_VS_OUT_CNTL__USE_VTX_GS_CUT_FLAG_MASK                                                           0x02000000L
 #define PA_CL_VS_OUT_CNTL__USE_VTX_LINE_WIDTH_MASK                                                            0x08000000L
+#define PA_CL_VS_OUT_CNTL__USE_VTX_VRS_RATE_MASK                                                              0x10000000L
 #define PA_CL_VS_OUT_CNTL__BYPASS_VTX_RATE_COMBINER_MASK                                                      0x20000000L
 #define PA_CL_VS_OUT_CNTL__BYPASS_PRIM_RATE_COMBINER_MASK                                                     0x40000000L
 //PA_CL_NANINF_CNTL
@@ -22740,6 +22759,19 @@
 //PA_STATE_STEREO_X
 #define PA_STATE_STEREO_X__STEREO_X_OFFSET__SHIFT                                                             0x0
 #define PA_STATE_STEREO_X__STEREO_X_OFFSET_MASK                                                               0xFFFFFFFFL
+//PA_CL_VRS_CNTL
+#define PA_CL_VRS_CNTL__VERTEX_RATE_COMBINER_MODE__SHIFT                                                      0x0
+#define PA_CL_VRS_CNTL__PRIMITIVE_RATE_COMBINER_MODE__SHIFT                                                   0x3
+#define PA_CL_VRS_CNTL__HTILE_RATE_COMBINER_MODE__SHIFT                                                       0x6
+#define PA_CL_VRS_CNTL__SAMPLE_ITER_COMBINER_MODE__SHIFT                                                      0x9
+#define PA_CL_VRS_CNTL__EXPOSE_VRS_PIXELS_MASK__SHIFT                                                         0xd
+#define PA_CL_VRS_CNTL__CMASK_RATE_HINT_FORCE_ZERO__SHIFT                                                     0xe
+#define PA_CL_VRS_CNTL__VERTEX_RATE_COMBINER_MODE_MASK                                                        0x00000007L
+#define PA_CL_VRS_CNTL__PRIMITIVE_RATE_COMBINER_MODE_MASK                                                     0x00000038L
+#define PA_CL_VRS_CNTL__HTILE_RATE_COMBINER_MODE_MASK                                                         0x000001C0L
+#define PA_CL_VRS_CNTL__SAMPLE_ITER_COMBINER_MODE_MASK                                                        0x00000E00L
+#define PA_CL_VRS_CNTL__EXPOSE_VRS_PIXELS_MASK_MASK                                                           0x00002000L
+#define PA_CL_VRS_CNTL__CMASK_RATE_HINT_FORCE_ZERO_MASK                                                       0x00004000L
 //PA_SU_POINT_SIZE
 #define PA_SU_POINT_SIZE__HEIGHT__SHIFT                                                                       0x0
 #define PA_SU_POINT_SIZE__WIDTH__SHIFT                                                                        0x10
@@ -23088,6 +23120,7 @@
 #define DB_HTILE_SURFACE__DST_OUTSIDE_ZERO_TO_ONE__SHIFT                                                      0x10
 #define DB_HTILE_SURFACE__RESERVED_FIELD_6__SHIFT                                                             0x11
 #define DB_HTILE_SURFACE__PIPE_ALIGNED__SHIFT                                                                 0x12
+#define DB_HTILE_SURFACE__VRS_HTILE_ENCODING__SHIFT                                                           0x13
 #define DB_HTILE_SURFACE__RESERVED_FIELD_1_MASK                                                               0x00000001L
 #define DB_HTILE_SURFACE__FULL_CACHE_MASK                                                                     0x00000002L
 #define DB_HTILE_SURFACE__RESERVED_FIELD_2_MASK                                                               0x00000004L
@@ -23097,6 +23130,7 @@
 #define DB_HTILE_SURFACE__DST_OUTSIDE_ZERO_TO_ONE_MASK                                                        0x00010000L
 #define DB_HTILE_SURFACE__RESERVED_FIELD_6_MASK                                                               0x00020000L
 #define DB_HTILE_SURFACE__PIPE_ALIGNED_MASK                                                                   0x00040000L
+#define DB_HTILE_SURFACE__VRS_HTILE_ENCODING_MASK                                                             0x00180000L
 //DB_SRESULTS_COMPARE_STATE0
 #define DB_SRESULTS_COMPARE_STATE0__COMPAREFUNC0__SHIFT                                                       0x0
 #define DB_SRESULTS_COMPARE_STATE0__COMPAREVALUE0__SHIFT                                                      0x4
@@ -24954,6 +24988,7 @@
 #define CB_COLOR0_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT                                                          0x1a
 #define CB_COLOR0_ATTRIB3__RESOURCE_LEVEL__SHIFT                                                              0x1b
 #define CB_COLOR0_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT                                                            0x1e
+#define CB_COLOR0_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT                                                        0x1f
 #define CB_COLOR0_ATTRIB3__MIP0_DEPTH_MASK                                                                    0x00001FFFL
 #define CB_COLOR0_ATTRIB3__META_LINEAR_MASK                                                                   0x00002000L
 #define CB_COLOR0_ATTRIB3__COLOR_SW_MODE_MASK                                                                 0x0007C000L
@@ -24962,6 +24997,7 @@
 #define CB_COLOR0_ATTRIB3__CMASK_PIPE_ALIGNED_MASK                                                            0x04000000L
 #define CB_COLOR0_ATTRIB3__RESOURCE_LEVEL_MASK                                                                0x38000000L
 #define CB_COLOR0_ATTRIB3__DCC_PIPE_ALIGNED_MASK                                                              0x40000000L
+#define CB_COLOR0_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK                                                          0x80000000L
 //CB_COLOR1_ATTRIB3
 #define CB_COLOR1_ATTRIB3__MIP0_DEPTH__SHIFT                                                                  0x0
 #define CB_COLOR1_ATTRIB3__META_LINEAR__SHIFT                                                                 0xd
@@ -24971,6 +25007,7 @@
 #define CB_COLOR1_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT                                                          0x1a
 #define CB_COLOR1_ATTRIB3__RESOURCE_LEVEL__SHIFT                                                              0x1b
 #define CB_COLOR1_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT                                                            0x1e
+#define CB_COLOR1_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT                                                        0x1f
 #define CB_COLOR1_ATTRIB3__MIP0_DEPTH_MASK                                                                    0x00001FFFL
 #define CB_COLOR1_ATTRIB3__META_LINEAR_MASK                                                                   0x00002000L
 #define CB_COLOR1_ATTRIB3__COLOR_SW_MODE_MASK                                                                 0x0007C000L
@@ -24979,6 +25016,7 @@
 #define CB_COLOR1_ATTRIB3__CMASK_PIPE_ALIGNED_MASK                                                            0x04000000L
 #define CB_COLOR1_ATTRIB3__RESOURCE_LEVEL_MASK                                                                0x38000000L
 #define CB_COLOR1_ATTRIB3__DCC_PIPE_ALIGNED_MASK                                                              0x40000000L
+#define CB_COLOR1_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK                                                          0x80000000L
 //CB_COLOR2_ATTRIB3
 #define CB_COLOR2_ATTRIB3__MIP0_DEPTH__SHIFT                                                                  0x0
 #define CB_COLOR2_ATTRIB3__META_LINEAR__SHIFT                                                                 0xd
@@ -24988,6 +25026,7 @@
 #define CB_COLOR2_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT                                                          0x1a
 #define CB_COLOR2_ATTRIB3__RESOURCE_LEVEL__SHIFT                                                              0x1b
 #define CB_COLOR2_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT                                                            0x1e
+#define CB_COLOR2_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT                                                        0x1f
 #define CB_COLOR2_ATTRIB3__MIP0_DEPTH_MASK                                                                    0x00001FFFL
 #define CB_COLOR2_ATTRIB3__META_LINEAR_MASK                                                                   0x00002000L
 #define CB_COLOR2_ATTRIB3__COLOR_SW_MODE_MASK                                                                 0x0007C000L
@@ -24996,6 +25035,7 @@
 #define CB_COLOR2_ATTRIB3__CMASK_PIPE_ALIGNED_MASK                                                            0x04000000L
 #define CB_COLOR2_ATTRIB3__RESOURCE_LEVEL_MASK                                                                0x38000000L
 #define CB_COLOR2_ATTRIB3__DCC_PIPE_ALIGNED_MASK                                                              0x40000000L
+#define CB_COLOR2_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK                                                          0x80000000L
 //CB_COLOR3_ATTRIB3
 #define CB_COLOR3_ATTRIB3__MIP0_DEPTH__SHIFT                                                                  0x0
 #define CB_COLOR3_ATTRIB3__META_LINEAR__SHIFT                                                                 0xd
@@ -25005,6 +25045,7 @@
 #define CB_COLOR3_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT                                                          0x1a
 #define CB_COLOR3_ATTRIB3__RESOURCE_LEVEL__SHIFT                                                              0x1b
 #define CB_COLOR3_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT                                                            0x1e
+#define CB_COLOR3_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT                                                        0x1f
 #define CB_COLOR3_ATTRIB3__MIP0_DEPTH_MASK                                                                    0x00001FFFL
 #define CB_COLOR3_ATTRIB3__META_LINEAR_MASK                                                                   0x00002000L
 #define CB_COLOR3_ATTRIB3__COLOR_SW_MODE_MASK                                                                 0x0007C000L
@@ -25013,6 +25054,7 @@
 #define CB_COLOR3_ATTRIB3__CMASK_PIPE_ALIGNED_MASK                                                            0x04000000L
 #define CB_COLOR3_ATTRIB3__RESOURCE_LEVEL_MASK                                                                0x38000000L
 #define CB_COLOR3_ATTRIB3__DCC_PIPE_ALIGNED_MASK                                                              0x40000000L
+#define CB_COLOR3_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK                                                          0x80000000L
 //CB_COLOR4_ATTRIB3
 #define CB_COLOR4_ATTRIB3__MIP0_DEPTH__SHIFT                                                                  0x0
 #define CB_COLOR4_ATTRIB3__META_LINEAR__SHIFT                                                                 0xd
@@ -25022,6 +25064,7 @@
 #define CB_COLOR4_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT                                                          0x1a
 #define CB_COLOR4_ATTRIB3__RESOURCE_LEVEL__SHIFT                                                              0x1b
 #define CB_COLOR4_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT                                                            0x1e
+#define CB_COLOR4_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT                                                        0x1f
 #define CB_COLOR4_ATTRIB3__MIP0_DEPTH_MASK                                                                    0x00001FFFL
 #define CB_COLOR4_ATTRIB3__META_LINEAR_MASK                                                                   0x00002000L
 #define CB_COLOR4_ATTRIB3__COLOR_SW_MODE_MASK                                                                 0x0007C000L
@@ -25030,6 +25073,7 @@
 #define CB_COLOR4_ATTRIB3__CMASK_PIPE_ALIGNED_MASK                                                            0x04000000L
 #define CB_COLOR4_ATTRIB3__RESOURCE_LEVEL_MASK                                                                0x38000000L
 #define CB_COLOR4_ATTRIB3__DCC_PIPE_ALIGNED_MASK                                                              0x40000000L
+#define CB_COLOR4_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK                                                          0x80000000L
 //CB_COLOR5_ATTRIB3
 #define CB_COLOR5_ATTRIB3__MIP0_DEPTH__SHIFT                                                                  0x0
 #define CB_COLOR5_ATTRIB3__META_LINEAR__SHIFT                                                                 0xd
@@ -25039,6 +25083,7 @@
 #define CB_COLOR5_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT                                                          0x1a
 #define CB_COLOR5_ATTRIB3__RESOURCE_LEVEL__SHIFT                                                              0x1b
 #define CB_COLOR5_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT                                                            0x1e
+#define CB_COLOR5_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT                                                        0x1f
 #define CB_COLOR5_ATTRIB3__MIP0_DEPTH_MASK                                                                    0x00001FFFL
 #define CB_COLOR5_ATTRIB3__META_LINEAR_MASK                                                                   0x00002000L
 #define CB_COLOR5_ATTRIB3__COLOR_SW_MODE_MASK                                                                 0x0007C000L
@@ -25047,6 +25092,7 @@
 #define CB_COLOR5_ATTRIB3__CMASK_PIPE_ALIGNED_MASK                                                            0x04000000L
 #define CB_COLOR5_ATTRIB3__RESOURCE_LEVEL_MASK                                                                0x38000000L
 #define CB_COLOR5_ATTRIB3__DCC_PIPE_ALIGNED_MASK                                                              0x40000000L
+#define CB_COLOR5_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK                                                          0x80000000L
 //CB_COLOR6_ATTRIB3
 #define CB_COLOR6_ATTRIB3__MIP0_DEPTH__SHIFT                                                                  0x0
 #define CB_COLOR6_ATTRIB3__META_LINEAR__SHIFT                                                                 0xd
@@ -25056,6 +25102,7 @@
 #define CB_COLOR6_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT                                                          0x1a
 #define CB_COLOR6_ATTRIB3__RESOURCE_LEVEL__SHIFT                                                              0x1b
 #define CB_COLOR6_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT                                                            0x1e
+#define CB_COLOR6_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT                                                        0x1f
 #define CB_COLOR6_ATTRIB3__MIP0_DEPTH_MASK                                                                    0x00001FFFL
 #define CB_COLOR6_ATTRIB3__META_LINEAR_MASK                                                                   0x00002000L
 #define CB_COLOR6_ATTRIB3__COLOR_SW_MODE_MASK                                                                 0x0007C000L
@@ -25064,6 +25111,7 @@
 #define CB_COLOR6_ATTRIB3__CMASK_PIPE_ALIGNED_MASK                                                            0x04000000L
 #define CB_COLOR6_ATTRIB3__RESOURCE_LEVEL_MASK                                                                0x38000000L
 #define CB_COLOR6_ATTRIB3__DCC_PIPE_ALIGNED_MASK                                                              0x40000000L
+#define CB_COLOR6_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK                                                          0x80000000L
 //CB_COLOR7_ATTRIB3
 #define CB_COLOR7_ATTRIB3__MIP0_DEPTH__SHIFT                                                                  0x0
 #define CB_COLOR7_ATTRIB3__META_LINEAR__SHIFT                                                                 0xd
@@ -25073,6 +25121,7 @@
 #define CB_COLOR7_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT                                                          0x1a
 #define CB_COLOR7_ATTRIB3__RESOURCE_LEVEL__SHIFT                                                              0x1b
 #define CB_COLOR7_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT                                                            0x1e
+#define CB_COLOR7_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT                                                        0x1f
 #define CB_COLOR7_ATTRIB3__MIP0_DEPTH_MASK                                                                    0x00001FFFL
 #define CB_COLOR7_ATTRIB3__META_LINEAR_MASK                                                                   0x00002000L
 #define CB_COLOR7_ATTRIB3__COLOR_SW_MODE_MASK                                                                 0x0007C000L
@@ -25081,6 +25130,7 @@
 #define CB_COLOR7_ATTRIB3__CMASK_PIPE_ALIGNED_MASK                                                            0x04000000L
 #define CB_COLOR7_ATTRIB3__RESOURCE_LEVEL_MASK                                                                0x38000000L
 #define CB_COLOR7_ATTRIB3__DCC_PIPE_ALIGNED_MASK                                                              0x40000000L
+#define CB_COLOR7_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK                                                          0x80000000L
 
 
 // addressBlock: gc_gfxudec

From 1b51916b9734acd609c4f552e582cdc59ac254d6 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 1 May 2020 16:45:09 -0400
Subject: [PATCH 045/322] drm/amdgpu: add VCN 3.0 AV1 registers

This adds the AV1 registers.

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../include/asic_reg/vcn/vcn_3_0_0_sh_mask.h  | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h
index c0efd90808f2..58cf7adb9d54 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h
@@ -2393,6 +2393,7 @@
 #define VCN_FEATURES__HAS_MJPEG2_IDCT_DEC__SHIFT                                                              0x7
 #define VCN_FEATURES__HAS_SCLR_DEC__SHIFT                                                                     0x8
 #define VCN_FEATURES__HAS_VP9_DEC__SHIFT                                                                      0x9
+#define VCN_FEATURES__HAS_AV1_DEC__SHIFT                                                                      0xa
 #define VCN_FEATURES__HAS_EFC_ENC__SHIFT                                                                      0xb
 #define VCN_FEATURES__HAS_EFC_HDR2SDR_ENC__SHIFT                                                              0xc
 #define VCN_FEATURES__HAS_DUAL_MJPEG_DEC__SHIFT                                                               0xd
@@ -2407,6 +2408,7 @@
 #define VCN_FEATURES__HAS_MJPEG2_IDCT_DEC_MASK                                                                0x00000080L
 #define VCN_FEATURES__HAS_SCLR_DEC_MASK                                                                       0x00000100L
 #define VCN_FEATURES__HAS_VP9_DEC_MASK                                                                        0x00000200L
+#define VCN_FEATURES__HAS_AV1_DEC_MASK                                                                        0x00000400L
 #define VCN_FEATURES__HAS_EFC_ENC_MASK                                                                        0x00000800L
 #define VCN_FEATURES__HAS_EFC_HDR2SDR_ENC_MASK                                                                0x00001000L
 #define VCN_FEATURES__HAS_DUAL_MJPEG_DEC_MASK                                                                 0x00002000L
@@ -2809,8 +2811,10 @@
 #define UVD_SUVD_CGC_GATE__IME_HEVC__SHIFT                                                                    0x18
 #define UVD_SUVD_CGC_GATE__EFC__SHIFT                                                                         0x19
 #define UVD_SUVD_CGC_GATE__SAOE__SHIFT                                                                        0x1a
+#define UVD_SUVD_CGC_GATE__SRE_AV1__SHIFT                                                                     0x1b
 #define UVD_SUVD_CGC_GATE__FBC_PCLK__SHIFT                                                                    0x1c
 #define UVD_SUVD_CGC_GATE__FBC_CCLK__SHIFT                                                                    0x1d
+#define UVD_SUVD_CGC_GATE__SCM_AV1__SHIFT                                                                     0x1e
 #define UVD_SUVD_CGC_GATE__SMPA__SHIFT                                                                        0x1f
 #define UVD_SUVD_CGC_GATE__SRE_MASK                                                                           0x00000001L
 #define UVD_SUVD_CGC_GATE__SIT_MASK                                                                           0x00000002L
@@ -2839,8 +2843,10 @@
 #define UVD_SUVD_CGC_GATE__IME_HEVC_MASK                                                                      0x01000000L
 #define UVD_SUVD_CGC_GATE__EFC_MASK                                                                           0x02000000L
 #define UVD_SUVD_CGC_GATE__SAOE_MASK                                                                          0x04000000L
+#define UVD_SUVD_CGC_GATE__SRE_AV1_MASK                                                                       0x08000000L
 #define UVD_SUVD_CGC_GATE__FBC_PCLK_MASK                                                                      0x10000000L
 #define UVD_SUVD_CGC_GATE__FBC_CCLK_MASK                                                                      0x20000000L
+#define UVD_SUVD_CGC_GATE__SCM_AV1_MASK                                                                       0x40000000L
 #define UVD_SUVD_CGC_GATE__SMPA_MASK                                                                          0x80000000L
 //UVD_SUVD_CGC_STATUS
 #define UVD_SUVD_CGC_STATUS__SRE_VCLK__SHIFT                                                                  0x0
@@ -2873,6 +2879,8 @@
 #define UVD_SUVD_CGC_STATUS__IME_HEVC_DCLK__SHIFT                                                             0x1b
 #define UVD_SUVD_CGC_STATUS__EFC_DCLK__SHIFT                                                                  0x1c
 #define UVD_SUVD_CGC_STATUS__SAOE_DCLK__SHIFT                                                                 0x1d
+#define UVD_SUVD_CGC_STATUS__SRE_AV1_VCLK__SHIFT                                                              0x1e
+#define UVD_SUVD_CGC_STATUS__SCM_AV1_DCLK__SHIFT                                                              0x1f
 #define UVD_SUVD_CGC_STATUS__SRE_VCLK_MASK                                                                    0x00000001L
 #define UVD_SUVD_CGC_STATUS__SRE_DCLK_MASK                                                                    0x00000002L
 #define UVD_SUVD_CGC_STATUS__SIT_DCLK_MASK                                                                    0x00000004L
@@ -2903,6 +2911,8 @@
 #define UVD_SUVD_CGC_STATUS__IME_HEVC_DCLK_MASK                                                               0x08000000L
 #define UVD_SUVD_CGC_STATUS__EFC_DCLK_MASK                                                                    0x10000000L
 #define UVD_SUVD_CGC_STATUS__SAOE_DCLK_MASK                                                                   0x20000000L
+#define UVD_SUVD_CGC_STATUS__SRE_AV1_VCLK_MASK                                                                0x40000000L
+#define UVD_SUVD_CGC_STATUS__SCM_AV1_DCLK_MASK                                                                0x80000000L
 //UVD_SUVD_CGC_CTRL
 #define UVD_SUVD_CGC_CTRL__SRE_MODE__SHIFT                                                                    0x0
 #define UVD_SUVD_CGC_CTRL__SIT_MODE__SHIFT                                                                    0x1
@@ -2919,6 +2929,8 @@
 #define UVD_SUVD_CGC_CTRL__SMPA_MODE__SHIFT                                                                   0xc
 #define UVD_SUVD_CGC_CTRL__MPBE0_MODE__SHIFT                                                                  0xd
 #define UVD_SUVD_CGC_CTRL__MPBE1_MODE__SHIFT                                                                  0xe
+#define UVD_SUVD_CGC_CTRL__SIT_AV1_MODE__SHIFT                                                                0xf
+#define UVD_SUVD_CGC_CTRL__SDB_AV1_MODE__SHIFT                                                                0x10
 #define UVD_SUVD_CGC_CTRL__MPC1_MODE__SHIFT                                                                   0x11
 #define UVD_SUVD_CGC_CTRL__FBC_PCLK__SHIFT                                                                    0x1c
 #define UVD_SUVD_CGC_CTRL__FBC_CCLK__SHIFT                                                                    0x1d
@@ -2937,6 +2949,8 @@
 #define UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK                                                                     0x00001000L
 #define UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK                                                                    0x00002000L
 #define UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK                                                                    0x00004000L
+#define UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK                                                                  0x00008000L
+#define UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK                                                                  0x00010000L
 #define UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK                                                                     0x00020000L
 #define UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK                                                                      0x10000000L
 #define UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK                                                                      0x20000000L
@@ -3658,6 +3672,8 @@
 #define UVD_SUVD_CGC_STATUS2__SMPA_VCLK__SHIFT                                                                0x0
 #define UVD_SUVD_CGC_STATUS2__SMPA_DCLK__SHIFT                                                                0x1
 #define UVD_SUVD_CGC_STATUS2__MPBE1_DCLK__SHIFT                                                               0x3
+#define UVD_SUVD_CGC_STATUS2__SIT_AV1_DCLK__SHIFT                                                             0x4
+#define UVD_SUVD_CGC_STATUS2__SDB_AV1_DCLK__SHIFT                                                             0x5
 #define UVD_SUVD_CGC_STATUS2__MPC1_DCLK__SHIFT                                                                0x6
 #define UVD_SUVD_CGC_STATUS2__MPC1_SCLK__SHIFT                                                                0x7
 #define UVD_SUVD_CGC_STATUS2__MPC1_VCLK__SHIFT                                                                0x8
@@ -3666,6 +3682,8 @@
 #define UVD_SUVD_CGC_STATUS2__SMPA_VCLK_MASK                                                                  0x00000001L
 #define UVD_SUVD_CGC_STATUS2__SMPA_DCLK_MASK                                                                  0x00000002L
 #define UVD_SUVD_CGC_STATUS2__MPBE1_DCLK_MASK                                                                 0x00000008L
+#define UVD_SUVD_CGC_STATUS2__SIT_AV1_DCLK_MASK                                                               0x00000010L
+#define UVD_SUVD_CGC_STATUS2__SDB_AV1_DCLK_MASK                                                               0x00000020L
 #define UVD_SUVD_CGC_STATUS2__MPC1_DCLK_MASK                                                                  0x00000040L
 #define UVD_SUVD_CGC_STATUS2__MPC1_SCLK_MASK                                                                  0x00000080L
 #define UVD_SUVD_CGC_STATUS2__MPC1_VCLK_MASK                                                                  0x00000100L
@@ -3674,25 +3692,41 @@
 //UVD_SUVD_CGC_GATE2
 #define UVD_SUVD_CGC_GATE2__MPBE0__SHIFT                                                                      0x0
 #define UVD_SUVD_CGC_GATE2__MPBE1__SHIFT                                                                      0x1
+#define UVD_SUVD_CGC_GATE2__SIT_AV1__SHIFT                                                                    0x2
+#define UVD_SUVD_CGC_GATE2__SDB_AV1__SHIFT                                                                    0x3
 #define UVD_SUVD_CGC_GATE2__MPC1__SHIFT                                                                       0x4
 #define UVD_SUVD_CGC_GATE2__MPBE0_MASK                                                                        0x00000001L
 #define UVD_SUVD_CGC_GATE2__MPBE1_MASK                                                                        0x00000002L
+#define UVD_SUVD_CGC_GATE2__SIT_AV1_MASK                                                                      0x00000004L
+#define UVD_SUVD_CGC_GATE2__SDB_AV1_MASK                                                                      0x00000008L
 #define UVD_SUVD_CGC_GATE2__MPC1_MASK                                                                         0x00000010L
 //UVD_SUVD_INT_STATUS2
 #define UVD_SUVD_INT_STATUS2__SMPA_FUNC_INT__SHIFT                                                            0x0
 #define UVD_SUVD_INT_STATUS2__SMPA_ERR_INT__SHIFT                                                             0x5
+#define UVD_SUVD_INT_STATUS2__SDB_AV1_FUNC_INT__SHIFT                                                         0x6
+#define UVD_SUVD_INT_STATUS2__SDB_AV1_ERR_INT__SHIFT                                                          0xb
 #define UVD_SUVD_INT_STATUS2__SMPA_FUNC_INT_MASK                                                              0x0000001FL
 #define UVD_SUVD_INT_STATUS2__SMPA_ERR_INT_MASK                                                               0x00000020L
+#define UVD_SUVD_INT_STATUS2__SDB_AV1_FUNC_INT_MASK                                                           0x000007C0L
+#define UVD_SUVD_INT_STATUS2__SDB_AV1_ERR_INT_MASK                                                            0x00000800L
 //UVD_SUVD_INT_EN2
 #define UVD_SUVD_INT_EN2__SMPA_FUNC_INT_EN__SHIFT                                                             0x0
 #define UVD_SUVD_INT_EN2__SMPA_ERR_INT_EN__SHIFT                                                              0x5
+#define UVD_SUVD_INT_EN2__SDB_AV1_FUNC_INT_EN__SHIFT                                                          0x6
+#define UVD_SUVD_INT_EN2__SDB_AV1_ERR_INT_EN__SHIFT                                                           0xb
 #define UVD_SUVD_INT_EN2__SMPA_FUNC_INT_EN_MASK                                                               0x0000001FL
 #define UVD_SUVD_INT_EN2__SMPA_ERR_INT_EN_MASK                                                                0x00000020L
+#define UVD_SUVD_INT_EN2__SDB_AV1_FUNC_INT_EN_MASK                                                            0x000007C0L
+#define UVD_SUVD_INT_EN2__SDB_AV1_ERR_INT_EN_MASK                                                             0x00000800L
 //UVD_SUVD_INT_ACK2
 #define UVD_SUVD_INT_ACK2__SMPA_FUNC_INT_ACK__SHIFT                                                           0x0
 #define UVD_SUVD_INT_ACK2__SMPA_ERR_INT_ACK__SHIFT                                                            0x5
+#define UVD_SUVD_INT_ACK2__SDB_AV1_FUNC_INT_ACK__SHIFT                                                        0x6
+#define UVD_SUVD_INT_ACK2__SDB_AV1_ERR_INT_ACK__SHIFT                                                         0xb
 #define UVD_SUVD_INT_ACK2__SMPA_FUNC_INT_ACK_MASK                                                             0x0000001FL
 #define UVD_SUVD_INT_ACK2__SMPA_ERR_INT_ACK_MASK                                                              0x00000020L
+#define UVD_SUVD_INT_ACK2__SDB_AV1_FUNC_INT_ACK_MASK                                                          0x000007C0L
+#define UVD_SUVD_INT_ACK2__SDB_AV1_ERR_INT_ACK_MASK                                                           0x00000800L
 
 
 // addressBlock: uvd0_ecpudec

From 8a410da6aaf6e9a1903aeefc6353f62dfcf0065a Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Fri, 1 May 2020 16:46:11 -0400
Subject: [PATCH 046/322] drm/amdgpu: use the AV1 defines for VCN 3.0

Switch from magic numbers to defines for AV1 clockgating.

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
index 63e5547cfb16..3a805eaf6f11 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c
@@ -746,18 +746,18 @@ static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
 		| UVD_SUVD_CGC_GATE__IME_HEVC_MASK
 		| UVD_SUVD_CGC_GATE__EFC_MASK
 		| UVD_SUVD_CGC_GATE__SAOE_MASK
-		| 0x08000000
+		| UVD_SUVD_CGC_GATE__SRE_AV1_MASK
 		| UVD_SUVD_CGC_GATE__FBC_PCLK_MASK
 		| UVD_SUVD_CGC_GATE__FBC_CCLK_MASK
-		| 0x40000000
+		| UVD_SUVD_CGC_GATE__SCM_AV1_MASK
 		| UVD_SUVD_CGC_GATE__SMPA_MASK);
 	WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data);
 
 	data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2);
 	data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK
 		| UVD_SUVD_CGC_GATE2__MPBE1_MASK
-		| 0x00000004
-		| 0x00000008
+		| UVD_SUVD_CGC_GATE2__SIT_AV1_MASK
+		| UVD_SUVD_CGC_GATE2__SDB_AV1_MASK
 		| UVD_SUVD_CGC_GATE2__MPC1_MASK);
 	WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data);
 
@@ -776,8 +776,8 @@ static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst)
 		| UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
 		| UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
 		| UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
-		| 0x00008000
-		| 0x00010000
+		| UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
+		| UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
 		| UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
 		| UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
 		| UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
@@ -892,8 +892,8 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
 		| UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
 		| UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
 		| UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
-		| 0x00008000
-		| 0x00010000
+		| UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
+		| UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
 		| UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
 		| UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
 		| UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);

From fc08ce66c0f3776b185b099b8093df2fd7f81151 Mon Sep 17 00:00:00 2001
From: Likun Gao <Likun.Gao@amd.com>
Date: Thu, 16 Jan 2020 10:45:38 +0800
Subject: [PATCH 047/322] drm/amdgpu: add device ID for sienna_cichlid (v2)

Add device ID for sienna_cichlid.

v2: squash in additional device ids.

Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 26127c7d2f32..eb11149c3fd2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1047,6 +1047,14 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
 	{0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
 
+	/* Sienna_Cichlid */
+	{0x1002, 0x73A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+	{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+
 	{0, 0, 0}
 };
 

From b4ebd0827fd024d0d23c1c9ee33be39c57cf7ad0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 15 Sep 2020 13:36:20 -0400
Subject: [PATCH 048/322] drm/amdgpu: remove experimental flag from navi12

Navi12 has worked fine for a while now.

Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index eb11149c3fd2..321032d3a51a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -1044,8 +1044,8 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU},
 
 	/* Navi12 */
-	{0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
-	{0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT},
+	{0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
+	{0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12},
 
 	/* Sienna_Cichlid */
 	{0x1002, 0x73A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},

From d36cb0205f034e943aa29e35b59c6a441f0056b5 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.cirrus.com>
Date: Fri, 18 Sep 2020 12:20:02 +0100
Subject: [PATCH 049/322] regmap: debugfs: Add back in erroneously removed
 initialisation of ret

Fixes: 94cc89eb8fa5 ("regmap: debugfs: Fix handling of name string for debugfs init delays")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Charles Keepax <ckeepax@opensource.cirrus.com>
Link: https://lore.kernel.org/r/20200918112002.15216-1-ckeepax@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index e22c21affebd..388ff8a6816c 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -694,9 +694,9 @@ struct regmap *__regmap_init(struct device *dev,
 			     const char *lock_name)
 {
 	struct regmap *map;
+	int ret = -EINVAL;
 	enum regmap_endian reg_endian, val_endian;
 	int i, j;
-	int ret;
 
 	if (!config)
 		goto err;

From ee15c7b53e52fb04583f734461244c4dcca828fa Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 18 Sep 2020 08:58:58 -0400
Subject: [PATCH 050/322] pNFS/flexfiles: Ensure we initialise the mirror
 bsizes correctly on read

While it is true that reading from an unmirrored source always uses
index 0, that is no longer true for mirrored sources when we fail over.

Fixes: 563c53e73b8b ("NFS: Fix flexfiles read failover")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index ff8965d1a4d4..1edeebd51937 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -838,6 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 	struct nfs4_ff_layout_mirror *mirror;
 	struct nfs4_pnfs_ds *ds;
 	int ds_idx;
+	u32 i;
 
 retry:
 	ff_layout_pg_check_layout(pgio, req);
@@ -863,14 +864,14 @@ retry:
 		goto retry;
 	}
 
-	mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
+	for (i = 0; i < pgio->pg_mirror_count; i++) {
+		mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
+		pgm = &pgio->pg_mirrors[i];
+		pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
+	}
 
 	pgio->pg_mirror_idx = ds_idx;
 
-	/* read always uses only one mirror - idx 0 for pgio layer */
-	pgm = &pgio->pg_mirrors[0];
-	pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
-
 	if (NFS_SERVER(pgio->pg_inode)->flags &
 			(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
 		pgio->pg_maxretrans = io_maxretrans;

From b9df46d08a8d098ea2124cb9e3b84458a474b4d4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 18 Sep 2020 09:19:43 -0400
Subject: [PATCH 051/322] pNFS/flexfiles: Be consistent about mirror index
 types

A mirror index is always of type u32.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c | 34 +++++++++++++-------------
 include/linux/nfs_xdr.h                |  4 +--
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 1edeebd51937..a163533446fa 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -715,7 +715,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task,
 }
 
 static void
-ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
+ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx)
 {
 	struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
 
@@ -724,7 +724,7 @@ ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx)
 }
 
 static void
-ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
+ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx)
 {
 	struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
 
@@ -734,14 +734,14 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx)
 
 static struct nfs4_pnfs_ds *
 ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
-			     int start_idx, int *best_idx,
+			     u32 start_idx, u32 *best_idx,
 			     bool check_device)
 {
 	struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
 	struct nfs4_ff_layout_mirror *mirror;
 	struct nfs4_pnfs_ds *ds;
 	bool fail_return = false;
-	int idx;
+	u32 idx;
 
 	/* mirrors are initially sorted by efficiency */
 	for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
@@ -766,21 +766,21 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
 
 static struct nfs4_pnfs_ds *
 ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg,
-				 int start_idx, int *best_idx)
+				 u32 start_idx, u32 *best_idx)
 {
 	return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false);
 }
 
 static struct nfs4_pnfs_ds *
 ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg,
-				   int start_idx, int *best_idx)
+				   u32 start_idx, u32 *best_idx)
 {
 	return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true);
 }
 
 static struct nfs4_pnfs_ds *
 ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
-				  int start_idx, int *best_idx)
+				  u32 start_idx, u32 *best_idx)
 {
 	struct nfs4_pnfs_ds *ds;
 
@@ -791,7 +791,8 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
 }
 
 static struct nfs4_pnfs_ds *
-ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx)
+ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
+			  u32 *best_idx)
 {
 	struct pnfs_layout_segment *lseg = pgio->pg_lseg;
 	struct nfs4_pnfs_ds *ds;
@@ -837,8 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
 	struct nfs_pgio_mirror *pgm;
 	struct nfs4_ff_layout_mirror *mirror;
 	struct nfs4_pnfs_ds *ds;
-	int ds_idx;
-	u32 i;
+	u32 ds_idx, i;
 
 retry:
 	ff_layout_pg_check_layout(pgio, req);
@@ -895,7 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
 	struct nfs4_ff_layout_mirror *mirror;
 	struct nfs_pgio_mirror *pgm;
 	struct nfs4_pnfs_ds *ds;
-	int i;
+	u32 i;
 
 retry:
 	ff_layout_pg_check_layout(pgio, req);
@@ -1039,7 +1039,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs)
 static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
 {
 	u32 idx = hdr->pgio_mirror_idx + 1;
-	int new_idx = 0;
+	u32 new_idx = 0;
 
 	if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx))
 		ff_layout_send_layouterror(hdr->lseg);
@@ -1076,7 +1076,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
 					   struct nfs4_state *state,
 					   struct nfs_client *clp,
 					   struct pnfs_layout_segment *lseg,
-					   int idx)
+					   u32 idx)
 {
 	struct pnfs_layout_hdr *lo = lseg->pls_layout;
 	struct inode *inode = lo->plh_inode;
@@ -1150,7 +1150,7 @@ reset:
 /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
 static int ff_layout_async_handle_error_v3(struct rpc_task *task,
 					   struct pnfs_layout_segment *lseg,
-					   int idx)
+					   u32 idx)
 {
 	struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
 
@@ -1185,7 +1185,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
 					struct nfs4_state *state,
 					struct nfs_client *clp,
 					struct pnfs_layout_segment *lseg,
-					int idx)
+					u32 idx)
 {
 	int vers = clp->cl_nfs_mod->rpc_vers->number;
 
@@ -1212,7 +1212,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
 }
 
 static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
-					int idx, u64 offset, u64 length,
+					u32 idx, u64 offset, u64 length,
 					u32 *op_status, int opnum, int error)
 {
 	struct nfs4_ff_layout_mirror *mirror;
@@ -1810,7 +1810,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
 	loff_t offset = hdr->args.offset;
 	int vers;
 	struct nfs_fh *fh;
-	int idx = hdr->pgio_mirror_idx;
+	u32 idx = hdr->pgio_mirror_idx;
 
 	mirror = FF_LAYOUT_COMP(lseg, idx);
 	ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9408f3252c8e..69cb46f7b8d2 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1611,8 +1611,8 @@ struct nfs_pgio_header {
 	__u64			mds_offset;	/* Filelayout dense stripe */
 	struct nfs_page_array	page_array;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
-	int			ds_commit_idx;	/* ds index if ds_clp is set */
-	int			pgio_mirror_idx;/* mirror index in pgio layer */
+	u32			ds_commit_idx;	/* ds index if ds_clp is set */
+	u32			pgio_mirror_idx;/* mirror index in pgio layer */
 };
 
 struct nfs_mds_commit_info {

From ba2fd563b7408b8dd9bbd72fcae8bcb3c14e8ec8 Mon Sep 17 00:00:00 2001
From: Tony Ambardar <tony.ambardar@gmail.com>
Date: Thu, 17 Sep 2020 04:58:33 -0700
Subject: [PATCH 052/322] tools/bpftool: Support passing BPFTOOL_VERSION to
 make

This change facilitates out-of-tree builds, packaging, and versioning for
test and debug purposes. Defining BPFTOOL_VERSION allows self-contained
builds within the tools tree, since it avoids use of the 'kernelversion'
target in the top-level makefile, which would otherwise pull in several
other includes from outside the tools tree.

Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200917115833.1235518-1-Tony.Ambardar@gmail.com
---
 tools/bpf/bpftool/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8462690a039b..4828913703b6 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -25,7 +25,7 @@ endif
 
 LIBBPF = $(LIBBPF_PATH)libbpf.a
 
-BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
 
 $(LIBBPF): FORCE
 	$(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))

From 62fe3e42ba4fa8156ac6f9096f796dd721489e0a Mon Sep 17 00:00:00 2001
From: Mircea Caprioru <mircea.caprioru@analog.com>
Date: Wed, 2 Sep 2020 16:42:22 +0300
Subject: [PATCH 053/322] iio: adc: ad7124: Fix typo in device name

This patch fixes the device name typo.

Fixes: 951ad4700313 ("iio: adc: ad7124: move chip ID & name on the chip_info table")
Signed-off-by: Mircea Caprioru <mircea.caprioru@analog.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20200902134222.28357-1-mircea.caprioru@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7124.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index 8dce06e9e69c..766c73333604 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -177,12 +177,12 @@ static const struct iio_chan_spec ad7124_channel_template = {
 
 static struct ad7124_chip_info ad7124_chip_info_tbl[] = {
 	[ID_AD7124_4] = {
-		.name = "ad7127-4",
+		.name = "ad7124-4",
 		.chip_id = CHIPID_AD7124_4,
 		.num_inputs = 8,
 	},
 	[ID_AD7124_8] = {
-		.name = "ad7127-8",
+		.name = "ad7124-8",
 		.chip_id = CHIPID_AD7124_8,
 		.num_inputs = 16,
 	},

From 472eb39103e885f302fd8fd6eff104fcf5503f1b Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sun, 13 Sep 2020 09:52:30 -0700
Subject: [PATCH 054/322] ALSA: asihpi: fix iounmap in error handler

clang static analysis flags this problem
hpioctl.c:513:7: warning: Branch condition evaluates to
  a garbage value
                if (pci.ap_mem_base[idx]) {
                    ^~~~~~~~~~~~~~~~~~~~

If there is a failure in the middle of the memory space loop,
only some of the memory spaces need to be cleaned up.

At the error handler, idx holds the number of successful
memory spaces mapped.  So rework the handler loop to use the
old idx.

There is a second problem, the memory space loop conditionally
iomaps()/sets the mem_base so it is necessay to initize pci.

Fixes: 719f82d3987a ("ALSA: Add support of AudioScience ASI boards")
Signed-off-by: Tom Rix <trix@redhat.com>
Link: https://lore.kernel.org/r/20200913165230.17166-1-trix@redhat.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/asihpi/hpioctl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c
index 496dcde9715d..9790f5108a16 100644
--- a/sound/pci/asihpi/hpioctl.c
+++ b/sound/pci/asihpi/hpioctl.c
@@ -343,7 +343,7 @@ int asihpi_adapter_probe(struct pci_dev *pci_dev,
 	struct hpi_message hm;
 	struct hpi_response hr;
 	struct hpi_adapter adapter;
-	struct hpi_pci pci;
+	struct hpi_pci pci = { 0 };
 
 	memset(&adapter, 0, sizeof(adapter));
 
@@ -499,7 +499,7 @@ int asihpi_adapter_probe(struct pci_dev *pci_dev,
 	return 0;
 
 err:
-	for (idx = 0; idx < HPI_MAX_ADAPTER_MEM_SPACES; idx++) {
+	while (--idx >= 0) {
 		if (pci.ap_mem_base[idx]) {
 			iounmap(pci.ap_mem_base[idx]);
 			pci.ap_mem_base[idx] = NULL;

From 3f74249057827c5f6676c41c18f6be12ce1469ce Mon Sep 17 00:00:00 2001
From: Hui Wang <hui.wang@canonical.com>
Date: Mon, 14 Sep 2020 14:51:18 +0800
Subject: [PATCH 055/322] ALSA: hda/realtek - Couldn't detect Mic if booting
 with headset plugged

We found a Mic detection issue on many Lenovo laptops, those laptops
belong to differnt models and they have different audio design like
internal mic connects to the codec or PCH, they all have this problem,
the problem is if plugging a headset before powerup/reboot the
machine, after booting up, the headphone could be detected but Mic
couldn't. If we plug out and plug in the headset, both headphone and
Mic could be detected then.

Through debugging we found the codec on those laptops are same, it is
alc257, and if we don't disable the 3k pulldown in alc256_shutup(),
the issue will be fixed. So far there is no pop noise or power
consumption regression on those laptops after this change.

Cc: Kailang Yang <kailang@realtek.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Hui Wang <hui.wang@canonical.com>
Link: https://lore.kernel.org/r/20200914065118.19238-1-hui.wang@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 85e207173f5d..b6dc47da1d7b 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -3428,7 +3428,11 @@ static void alc256_shutup(struct hda_codec *codec)
 
 	/* 3k pull low control for Headset jack. */
 	/* NOTE: call this before clearing the pin, otherwise codec stalls */
-	alc_update_coef_idx(codec, 0x46, 0, 3 << 12);
+	/* If disable 3k pulldown control for alc257, the Mic detection will not work correctly
+	 * when booting with headset plugged. So skip setting it for the codec alc257
+	 */
+	if (codec->core.vendor_id != 0x10ec0257)
+		alc_update_coef_idx(codec, 0x46, 0, 3 << 12);
 
 	if (!spec->no_shutup_pins)
 		snd_hda_codec_write(codec, hp_pin, 0,

From f73bbf639b32acb6b409e188fdde5644b301978f Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Mon, 14 Sep 2020 15:02:29 +0800
Subject: [PATCH 056/322] ALSA: hda/realtek: Enable front panel headset LED on
 Lenovo ThinkStation P520

On Lenovo P520, the front panel headset LED isn't lit up right now.

Realtek states that the LED needs to be enabled by ALC233's GPIO2, so
let's do it accordingly to light the LED up.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Acked-by: Hui Wang <hui.wang@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20200914070231.13192-1-kai.heng.feng@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index b6dc47da1d7b..c4d615cf8de1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6055,6 +6055,7 @@ static void alc_fixup_thinkpad_acpi(struct hda_codec *codec,
 #include "hp_x360_helper.c"
 
 enum {
+	ALC269_FIXUP_GPIO2,
 	ALC269_FIXUP_SONY_VAIO,
 	ALC275_FIXUP_SONY_VAIO_GPIO2,
 	ALC269_FIXUP_DELL_M101Z,
@@ -6236,6 +6237,10 @@ enum {
 };
 
 static const struct hda_fixup alc269_fixups[] = {
+	[ALC269_FIXUP_GPIO2] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = alc_fixup_gpio2,
+	},
 	[ALC269_FIXUP_SONY_VAIO] = {
 		.type = HDA_FIXUP_PINCTLS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -7055,6 +7060,8 @@ static const struct hda_fixup alc269_fixups[] = {
 	[ALC233_FIXUP_LENOVO_MULTI_CODECS] = {
 		.type = HDA_FIXUP_FUNC,
 		.v.func = alc233_alc662_fixup_lenovo_dual_codecs,
+		.chained = true,
+		.chain_id = ALC269_FIXUP_GPIO2
 	},
 	[ALC233_FIXUP_ACER_HEADSET_MIC] = {
 		.type = HDA_FIXUP_VERBS,

From 315c7ad7a701baba28c628c4c5426b3d9617ceed Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Date: Thu, 10 Sep 2020 10:53:28 +0200
Subject: [PATCH 057/322] ALSA: usb-audio: Add delay quirk for H570e USB
 headsets

Needs the same delay as H650e

Signed-off-by: Joakim Tjernlund <joakim.tjernlund@infinera.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20200910085328.19188-1-joakim.tjernlund@infinera.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/quirks.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 75bbdc691243..892296df131d 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1678,12 +1678,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
 	    && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
 		msleep(20);
 
-	/* Zoom R16/24, Logitech H650e, Jabra 550a, Kingston HyperX needs a tiny
-	 * delay here, otherwise requests like get/set frequency return as
-	 * failed despite actually succeeding.
+	/* Zoom R16/24, Logitech H650e/H570e, Jabra 550a, Kingston HyperX
+	 *  needs a tiny delay here, otherwise requests like get/set
+	 *  frequency return as failed despite actually succeeding.
 	 */
 	if ((chip->usb_id == USB_ID(0x1686, 0x00dd) ||
 	     chip->usb_id == USB_ID(0x046d, 0x0a46) ||
+	     chip->usb_id == USB_ID(0x046d, 0x0a56) ||
 	     chip->usb_id == USB_ID(0x0b0e, 0x0349) ||
 	     chip->usb_id == USB_ID(0x0951, 0x16ad)) &&
 	    (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)

From 19a508bd1ad8e444de86873bf2f2b2ab8edd6552 Mon Sep 17 00:00:00 2001
From: Charan Teja Reddy <charante@codeaurora.org>
Date: Fri, 18 Sep 2020 16:02:31 +0530
Subject: [PATCH 058/322] dmabuf: fix NULL pointer dereference in
 dma_buf_release()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

NULL pointer dereference is observed while exporting the dmabuf but
failed to allocate the 'struct file' which results into the dropping of
the allocated dentry corresponding to this file in the dmabuf fs, which
is ending up in dma_buf_release() and accessing the uninitialzed
dentry->d_fsdata.

Call stack on 5.4 is below:
 dma_buf_release+0x2c/0x254 drivers/dma-buf/dma-buf.c:88
 __dentry_kill+0x294/0x31c fs/dcache.c:584
 dentry_kill fs/dcache.c:673 [inline]
 dput+0x250/0x380 fs/dcache.c:859
 path_put+0x24/0x40 fs/namei.c:485
 alloc_file_pseudo+0x1a4/0x200 fs/file_table.c:235
 dma_buf_getfile drivers/dma-buf/dma-buf.c:473 [inline]
 dma_buf_export+0x25c/0x3ec drivers/dma-buf/dma-buf.c:585

Fix this by checking for the valid pointer in the dentry->d_fsdata.

Fixes: 4ab59c3c638c ("dma-buf: Move dma_buf_release() from fops to dentry_ops")
Cc: <stable@vger.kernel.org> [5.7+]
Signed-off-by: Charan Teja Reddy <charante@codeaurora.org>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/391319/
Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/dma-buf/dma-buf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 58564d82a3a2..844967f98866 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -59,6 +59,8 @@ static void dma_buf_release(struct dentry *dentry)
 	struct dma_buf *dmabuf;
 
 	dmabuf = dentry->d_fsdata;
+	if (unlikely(!dmabuf))
+		return;
 
 	BUG_ON(dmabuf->vmapping_counter);
 

From 845b89127bc5458d0152a4d63f165c62a22fcb70 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 10 Sep 2020 11:57:08 +0200
Subject: [PATCH 059/322] i2c: i801: Exclude device from suspend direct
 complete optimization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

By default, PCI drivers with runtime PM enabled will skip the calls
to suspend and resume on system PM. For this driver, we don't want
that, as we need to perform additional steps for system PM to work
properly on all systems. So instruct the PM core to not skip these
calls.

Fixes: a9c8088c7988 ("i2c: i801: Don't restore config registers on runtime PM")
Reported-by: Volker Rümelin <volker.ruemelin@googlemail.com>
Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: stable@vger.kernel.org
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-i801.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index ebb4c0b03057..bffca729e1c7 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -1917,6 +1917,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 
 	pci_set_drvdata(dev, priv);
 
+	dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
 	pm_runtime_set_autosuspend_delay(&dev->dev, 1000);
 	pm_runtime_use_autosuspend(&dev->dev);
 	pm_runtime_put_autosuspend(&dev->dev);

From 8f8bf00b1c00b37ef5c32a642e5c3e0a364182b6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 21 Sep 2020 12:26:32 +0200
Subject: [PATCH 060/322] Revert "ALSA: hda - Fix silent audio output and
 corrupted input on MSI X570-A PRO"

This reverts commit 15cbff3fbbc6 ("ALSA: hda - Fix silent audio output
and corrupted input on MSI X570-A PRO").

A regression reported by a Fedora user for MSI X570-A PRO mobo.
Until the correct solution is found out, let's revert the quirk as a
quick workaround.

Fixes: 15cbff3fbbc6 ("ALSA: hda - Fix silent audio output and corrupted input on MSI X570-A PRO")
Reported-by: Hans de Goede <hdegoede@redhat.com>
Cc: <stable@vger.kernel.org>
Cc: Dan Crawford <dnlcrwfrd@gmail.com>
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1879277
Link: https://lore.kernel.org/r/7efd2fe5-bf38-7f85-891a-eee3845d1493@redhat.com
Link: https://lore.kernel.org/r/20200921102632.31139-1-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index c4d615cf8de1..d4f17b465892 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2475,7 +2475,6 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950),
 	SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950),
 	SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
-	SND_PCI_QUIRK(0x1462, 0x9c37, "MSI X570-A PRO", ALC1220_FIXUP_CLEVO_P950),
 	SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
 	SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
 	SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),

From 35be8851d172c6e3db836c0f28c19087b10c9e00 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Date: Mon, 21 Sep 2020 16:57:14 +0900
Subject: [PATCH 061/322] btrfs: fix overflow when copying corrupt csums for a
 message

Syzkaller reported a buffer overflow in btree_readpage_end_io_hook()
when loop mounting a crafted image:

  detected buffer overflow in memcpy
  ------------[ cut here ]------------
  kernel BUG at lib/string.c:1129!
  invalid opcode: 0000 [#1] PREEMPT SMP KASAN
  CPU: 1 PID: 26 Comm: kworker/u4:2 Not tainted 5.9.0-rc4-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  Workqueue: btrfs-endio-meta btrfs_work_helper
  RIP: 0010:fortify_panic+0xf/0x20 lib/string.c:1129
  RSP: 0018:ffffc90000e27980 EFLAGS: 00010286
  RAX: 0000000000000022 RBX: ffff8880a80dca64 RCX: 0000000000000000
  RDX: ffff8880a90860c0 RSI: ffffffff815dba07 RDI: fffff520001c4f22
  RBP: ffff8880a80dca00 R08: 0000000000000022 R09: ffff8880ae7318e7
  R10: 0000000000000000 R11: 0000000000077578 R12: 00000000ffffff6e
  R13: 0000000000000008 R14: ffffc90000e27a40 R15: 1ffff920001c4f3c
  FS:  0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000557335f440d0 CR3: 000000009647d000 CR4: 00000000001506e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   memcpy include/linux/string.h:405 [inline]
   btree_readpage_end_io_hook.cold+0x206/0x221 fs/btrfs/disk-io.c:642
   end_bio_extent_readpage+0x4de/0x10c0 fs/btrfs/extent_io.c:2854
   bio_endio+0x3cf/0x7f0 block/bio.c:1449
   end_workqueue_fn+0x114/0x170 fs/btrfs/disk-io.c:1695
   btrfs_work_helper+0x221/0xe20 fs/btrfs/async-thread.c:318
   process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
   worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
   kthread+0x3b5/0x4a0 kernel/kthread.c:292
   ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294
  Modules linked in:
  ---[ end trace b68924293169feef ]---
  RIP: 0010:fortify_panic+0xf/0x20 lib/string.c:1129
  RSP: 0018:ffffc90000e27980 EFLAGS: 00010286
  RAX: 0000000000000022 RBX: ffff8880a80dca64 RCX: 0000000000000000
  RDX: ffff8880a90860c0 RSI: ffffffff815dba07 RDI: fffff520001c4f22
  RBP: ffff8880a80dca00 R08: 0000000000000022 R09: ffff8880ae7318e7
  R10: 0000000000000000 R11: 0000000000077578 R12: 00000000ffffff6e
  R13: 0000000000000008 R14: ffffc90000e27a40 R15: 1ffff920001c4f3c
  FS:  0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007f95b7c4d008 CR3: 000000009647d000 CR4: 00000000001506e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

The overflow happens, because in btree_readpage_end_io_hook() we assume
that we have found a 4 byte checksum instead of the real possible 32
bytes we have for the checksums.

With the fix applied:

[   35.726623] BTRFS: device fsid 815caf9a-dc43-4d2a-ac54-764b8333d765 devid 1 transid 5 /dev/loop0 scanned by syz-repro (215)
[   35.738994] BTRFS info (device loop0): disk space caching is enabled
[   35.738998] BTRFS info (device loop0): has skinny extents
[   35.743337] BTRFS warning (device loop0): loop0 checksum verify failed on 1052672 wanted 0xf9c035fc8d239a54 found 0x67a25c14b7eabcf9 level 0
[   35.743420] BTRFS error (device loop0): failed to read chunk root
[   35.745899] BTRFS error (device loop0): open_ctree failed

Reported-by: syzbot+e864a35d361e1d4e29a5@syzkaller.appspotmail.com
Fixes: d5178578bcd4 ("btrfs: directly call into crypto framework for checksumming")
CC: stable@vger.kernel.org # 5.4+
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/disk-io.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 9fe9ec6aab71..93d3bb8989df 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -636,16 +636,15 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
 	csum_tree_block(eb, result);
 
 	if (memcmp_extent_buffer(eb, result, 0, csum_size)) {
-		u32 val;
-		u32 found = 0;
-
-		memcpy(&found, result, csum_size);
+		u8 val[BTRFS_CSUM_SIZE] = { 0 };
 
 		read_extent_buffer(eb, &val, 0, csum_size);
 		btrfs_warn_rl(fs_info,
-		"%s checksum verify failed on %llu wanted %x found %x level %d",
+	"%s checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d",
 			      fs_info->sb->s_id, eb->start,
-			      val, found, btrfs_header_level(eb));
+			      CSUM_FMT_VALUE(csum_size, val),
+			      CSUM_FMT_VALUE(csum_size, result),
+			      btrfs_header_level(eb));
 		ret = -EUCLEAN;
 		goto err;
 	}

From 6402e780e9434246a4c221034e1f8a0fd67f20bd Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 3 Jun 2020 13:12:12 +0200
Subject: [PATCH 062/322] clk: tegra: Capitalization fixes

HW, XUSB and PLL are abbreviations and should be all-uppercase.

Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-pll.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
index f180c055d33f..52ccb13d0a88 100644
--- a/drivers/clk/tegra/clk-pll.c
+++ b/drivers/clk/tegra/clk-pll.c
@@ -1673,7 +1673,7 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw)
 	pll_writel(val, PLLE_SS_CTRL, pll);
 	udelay(1);
 
-	/* Enable hw control of xusb brick pll */
+	/* Enable HW control of XUSB brick PLL */
 	val = pll_readl_misc(pll);
 	val &= ~PLLE_MISC_IDDQ_SW_CTRL;
 	pll_writel_misc(val, pll);
@@ -1696,7 +1696,7 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw)
 	val |= XUSBIO_PLL_CFG0_SEQ_ENABLE;
 	pll_writel(val, XUSBIO_PLL_CFG0, pll);
 
-	/* Enable hw control of SATA pll */
+	/* Enable HW control of SATA PLL */
 	val = pll_readl(SATA_PLL_CFG0, pll);
 	val &= ~SATA_PLL_CFG0_PADPLL_RESET_SWCTL;
 	val |= SATA_PLL_CFG0_PADPLL_USE_LOCKDET;

From 5105660ee80862b85f7769626d0f936c18ce1885 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 3 Jun 2020 13:13:07 +0200
Subject: [PATCH 063/322] clk: tegra: Always program PLL_E when enabled

Commit bff1cef5f23a ("clk: tegra: Don't enable already enabled PLLs")
added checks to avoid enabling PLLs that have already been enabled by
the bootloader. However, the PLL_E configuration inherited from the
bootloader isn't necessarily the one that is needed for the kernel.

This can cause SATA to fail like this:

    [    5.310270] phy phy-sata.6: phy poweron failed --> -110
    [    5.315604] tegra-ahci 70027000.sata: failed to power on AHCI controller: -110
    [    5.323022] tegra-ahci: probe of 70027000.sata failed with error -110

Fix this by always programming the PLL_E. This ensures that any mis-
configuration by the bootloader will be overwritten by the kernel.

Fixes: bff1cef5f23a ("clk: tegra: Don't enable already enabled PLLs")
Reported-by: LABBE Corentin <clabbe@baylibre.com>
Tested-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-pll.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c
index 52ccb13d0a88..c5cc0a2dac6f 100644
--- a/drivers/clk/tegra/clk-pll.c
+++ b/drivers/clk/tegra/clk-pll.c
@@ -1611,9 +1611,6 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw)
 	unsigned long flags = 0;
 	unsigned long input_rate;
 
-	if (clk_pll_is_enabled(hw))
-		return 0;
-
 	input_rate = clk_hw_get_rate(clk_hw_get_parent(hw));
 
 	if (_get_table_rate(hw, &sel, pll->params->fixed_rate, input_rate))

From 2f878d04218c8b26f6d0ab26955ca6b03848a1ad Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Mon, 22 Jun 2020 08:08:26 +0200
Subject: [PATCH 064/322] clk: tegra: Fix missing prototype for
 tegra210_clk_register_emc()

Include the Tegra driver's clk.h to pull in the prototype definition for
this function so that compilers don't warn about it being missing.

Fixes: 0ac65fc946d3 ("clk: tegra: Implement Tegra210 EMC clock")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 drivers/clk/tegra/clk-tegra210-emc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/clk/tegra/clk-tegra210-emc.c b/drivers/clk/tegra/clk-tegra210-emc.c
index 352a2c3fc374..51fd0ec2a2d0 100644
--- a/drivers/clk/tegra/clk-tegra210-emc.c
+++ b/drivers/clk/tegra/clk-tegra210-emc.c
@@ -12,6 +12,8 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 
+#include "clk.h"
+
 #define CLK_SOURCE_EMC 0x19c
 #define  CLK_SOURCE_EMC_2X_CLK_SRC GENMASK(31, 29)
 #define  CLK_SOURCE_EMC_MC_EMC_SAME_FREQ BIT(16)

From 3ad1c8ef083bef96ec922688966484be1039e6b5 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 21 Sep 2020 12:31:36 +0200
Subject: [PATCH 065/322] rcu/tree: Export rcu_idle_{enter,exit} to modules

Fix this link error:

  ERROR: modpost: "rcu_idle_enter" [drivers/acpi/processor.ko] undefined!
  ERROR: modpost: "rcu_idle_exit" [drivers/acpi/processor.ko] undefined!

when CONFIG_ACPI_PROCESSOR is built as module. PeterZ says that in light
of ARM needing those soon too, they should simply be exported.

Fixes: 1fecfdbb7acc ("ACPI: processor: Take over RCU-idle for C3-BM idle")
Reported-by: Sven Joachim <svenjoac@gmx.de>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Paul E. McKenney <paulmckrcu@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/rcu/tree.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8ce77d9ac716..f78ee759af9c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -673,6 +673,7 @@ void rcu_idle_enter(void)
 	lockdep_assert_irqs_disabled();
 	rcu_eqs_enter(false);
 }
+EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
 #ifdef CONFIG_NO_HZ_FULL
 /**
@@ -886,6 +887,7 @@ void rcu_idle_exit(void)
 	rcu_eqs_exit(false);
 	local_irq_restore(flags);
 }
+EXPORT_SYMBOL_GPL(rcu_idle_exit);
 
 #ifdef CONFIG_NO_HZ_FULL
 /**

From 36050d8984ab743f9990a2eb97a0062fdc3d7bbd Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 4 Sep 2020 08:47:05 +0200
Subject: [PATCH 066/322] cpuidle: psci: Fix suspicious RCU usage

The commit eb1f00237aca ("lockdep,trace: Expose tracepoints"), started to
expose us for tracepoints. This lead to the following RCU splat on an ARM64
Qcom board.

[    5.529634] WARNING: suspicious RCU usage
[    5.537307] sdhci-pltfm: SDHCI platform and OF driver helper
[    5.541092] 5.9.0-rc3 #86 Not tainted
[    5.541098] -----------------------------
[    5.541105] ../include/trace/events/lock.h:37 suspicious rcu_dereference_check() usage!
[    5.541110]
[    5.541110] other info that might help us debug this:
[    5.541110]
[    5.541116]
[    5.541116] rcu_scheduler_active = 2, debug_locks = 1
[    5.541122] RCU used illegally from extended quiescent state!
[    5.541129] no locks held by swapper/0/0.
[    5.541134]
[    5.541134] stack backtrace:
[    5.541143] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.9.0-rc3 #86
[    5.541149] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT)
[    5.541157] Call trace:
[    5.568185] sdhci_msm 7864900.sdhci: Got CD GPIO
[    5.574186]  dump_backtrace+0x0/0x1c8
[    5.574206]  show_stack+0x14/0x20
[    5.574229]  dump_stack+0xe8/0x154
[    5.574250]  lockdep_rcu_suspicious+0xd4/0xf8
[    5.574269]  lock_acquire+0x3f0/0x460
[    5.574292]  _raw_spin_lock_irqsave+0x80/0xb0
[    5.574314]  __pm_runtime_suspend+0x4c/0x188
[    5.574341]  psci_enter_domain_idle_state+0x40/0xa0
[    5.574362]  cpuidle_enter_state+0xc0/0x610
[    5.646487]  cpuidle_enter+0x38/0x50
[    5.650651]  call_cpuidle+0x18/0x40
[    5.654467]  do_idle+0x228/0x278
[    5.657678]  cpu_startup_entry+0x24/0x70
[    5.661153]  rest_init+0x1a4/0x278
[    5.665061]  arch_call_rest_init+0xc/0x14
[    5.668272]  start_kernel+0x508/0x540

Following the path in pm_runtime_put_sync_suspend() from
psci_enter_domain_idle_state(), it seems like we end up using the RCU.
Therefore, let's simply silence the splat by informing the RCU about it
with RCU_NONIDLE.

Note that, this is a temporary solution. Instead we should strive to avoid
using RCU_NONIDLE (and similar), but rather push rcu_idle_enter|exit()
further down, closer to the arch specific code. However, as the CPU PM
notifiers are also using the RCU, additional rework is needed.

Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle-psci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index 74463841805f..d928b37718bd 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -66,7 +66,7 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
 		return -1;
 
 	/* Do runtime PM to manage a hierarchical CPU toplogy. */
-	pm_runtime_put_sync_suspend(pd_dev);
+	RCU_NONIDLE(pm_runtime_put_sync_suspend(pd_dev));
 
 	state = psci_get_domain_state();
 	if (!state)
@@ -74,7 +74,7 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
 
 	ret = psci_cpu_suspend_enter(state) ? -1 : idx;
 
-	pm_runtime_get_sync(pd_dev);
+	RCU_NONIDLE(pm_runtime_get_sync(pd_dev));
 
 	cpu_pm_exit();
 

From 13a9a9d74d4d9689ad65938966dbc66386063648 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 20 Sep 2020 13:46:25 -0400
Subject: [PATCH 067/322] SUNRPC: Fix svc_flush_dcache()

On platforms that implement flush_dcache_page(), a large NFS WRITE
triggers the WARN_ONCE in bvec_iter_advance():

Sep 20 14:01:05 klimt.1015granger.net kernel: Attempted to advance past end of bvec iter
Sep 20 14:01:05 klimt.1015granger.net kernel: WARNING: CPU: 0 PID: 1032 at include/linux/bvec.h:101 bvec_iter_advance.isra.0+0xa7/0x158 [sunrpc]

Sep 20 14:01:05 klimt.1015granger.net kernel: Call Trace:
Sep 20 14:01:05 klimt.1015granger.net kernel:  svc_tcp_recvfrom+0x60c/0x12c7 [sunrpc]
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? bvec_iter_advance.isra.0+0x158/0x158 [sunrpc]
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? del_timer_sync+0x4b/0x55
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? test_bit+0x1d/0x27 [sunrpc]
Sep 20 14:01:05 klimt.1015granger.net kernel:  svc_recv+0x1193/0x15e4 [sunrpc]
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? try_to_freeze.isra.0+0x6f/0x6f [sunrpc]
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? refcount_sub_and_test.constprop.0+0x13/0x40 [sunrpc]
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? svc_xprt_put+0x1e/0x29f [sunrpc]
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? svc_send+0x39f/0x3c1 [sunrpc]
Sep 20 14:01:05 klimt.1015granger.net kernel:  nfsd+0x282/0x345 [nfsd]
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? __kthread_parkme+0x74/0xba
Sep 20 14:01:05 klimt.1015granger.net kernel:  kthread+0x2ad/0x2bc
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? nfsd_destroy+0x124/0x124 [nfsd]
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? test_bit+0x1d/0x27
Sep 20 14:01:05 klimt.1015granger.net kernel:  ? kthread_mod_delayed_work+0x115/0x115
Sep 20 14:01:05 klimt.1015granger.net kernel:  ret_from_fork+0x22/0x30

Reported-by: He Zhe <zhe.he@windriver.com>
Fixes: ca07eda33e01 ("SUNRPC: Refactor svc_recvfrom()")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 net/sunrpc/svcsock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d5805fa1d066..c2752e2b9ce3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -228,7 +228,7 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
 static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
 {
 	struct bvec_iter bi = {
-		.bi_size	= size,
+		.bi_size	= size + seek,
 	};
 	struct bio_vec bv;
 

From e23bb04b0c938588eae41b7f4712b722290ed2b8 Mon Sep 17 00:00:00 2001
From: Tony Ambardar <tony.ambardar@gmail.com>
Date: Sat, 19 Sep 2020 22:01:33 -0700
Subject: [PATCH 068/322] bpf: Fix sysfs export of empty BTF section

If BTF data is missing or removed from the ELF section it is still exported
via sysfs as a zero-length file:

  root@OpenWrt:/# ls -l /sys/kernel/btf/vmlinux
  -r--r--r--    1 root    root    0 Jul 18 02:59 /sys/kernel/btf/vmlinux

Moreover, reads from this file succeed and leak kernel data:

  root@OpenWrt:/# hexdump -C /sys/kernel/btf/vmlinux|head -10
  000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
  *
  000cc0 00 00 00 00 00 00 00 00 00 00 00 00 80 83 b0 80 |................|
  000cd0 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
  000ce0 00 00 00 00 00 00 00 00 00 00 00 00 57 ac 6e 9d |............W.n.|
  000cf0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
  *
  002650 00 00 00 00 00 00 00 10 00 00 00 01 00 00 00 01 |................|
  002660 80 82 9a c4 80 85 97 80 81 a9 51 68 00 00 00 02 |..........Qh....|
  002670 80 25 44 dc 80 85 97 80 81 a9 50 24 81 ab c4 60 |.%D.......P$...`|

This situation was first observed with kernel 5.4.x, cross-compiled for a
MIPS target system. Fix by adding a sanity-check for export of zero-length
data sections.

Fixes: 341dfcf8d78e ("btf: expose BTF info through sysfs")
Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/b38db205a66238f70823039a8c531535864eaac5.1600417359.git.Tony.Ambardar@gmail.com
---
 kernel/bpf/sysfs_btf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c
index 3b495773de5a..11b3380887fa 100644
--- a/kernel/bpf/sysfs_btf.c
+++ b/kernel/bpf/sysfs_btf.c
@@ -30,15 +30,15 @@ static struct kobject *btf_kobj;
 
 static int __init btf_vmlinux_init(void)
 {
-	if (!__start_BTF)
+	bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF;
+
+	if (!__start_BTF || bin_attr_btf_vmlinux.size == 0)
 		return 0;
 
 	btf_kobj = kobject_create_and_add("btf", kernel_kobj);
 	if (!btf_kobj)
 		return -ENOMEM;
 
-	bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF;
-
 	return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux);
 }
 

From 65c204398928f9c79f1a29912b410439f7052635 Mon Sep 17 00:00:00 2001
From: Tony Ambardar <tony.ambardar@gmail.com>
Date: Sat, 19 Sep 2020 22:01:34 -0700
Subject: [PATCH 069/322] bpf: Prevent .BTF section elimination

Systems with memory or disk constraints often reduce the kernel footprint
by configuring LD_DEAD_CODE_DATA_ELIMINATION. However, this can result in
removal of any BTF information.

Use the KEEP() macro to preserve the BTF data as done with other important
sections, while still allowing for smaller kernels.

Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF")
Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/a635b5d3e2da044e7b51ec1315e8910fbce0083f.1600417359.git.Tony.Ambardar@gmail.com
---
 include/asm-generic/vmlinux.lds.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 5430febd34be..7636bc71c71f 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -661,7 +661,7 @@
 #define BTF								\
 	.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) {				\
 		__start_BTF = .;					\
-		*(.BTF)							\
+		KEEP(*(.BTF))						\
 		__stop_BTF = .;						\
 	}								\
 	. = ALIGN(4);							\

From 1245008122d7311683d70c05b2eea167a314fb5f Mon Sep 17 00:00:00 2001
From: Tony Ambardar <tony.ambardar@gmail.com>
Date: Sat, 19 Sep 2020 22:01:35 -0700
Subject: [PATCH 070/322] libbpf: Fix native endian assumption when parsing BTF

Code in btf__parse_raw() fails to detect raw BTF of non-native endianness
and assumes it must be ELF data, which then fails to parse as ELF and
yields a misleading error message:

  root:/# bpftool btf dump file /sys/kernel/btf/vmlinux
  libbpf: failed to get EHDR from /sys/kernel/btf/vmlinux

For example, this could occur after cross-compiling a BTF-enabled kernel
for a target with non-native endianness, which is currently unsupported.

Check for correct endianness and emit a clearer error message:

  root:/# bpftool btf dump file /sys/kernel/btf/vmlinux
  libbpf: non-native BTF endianness is not supported

Fixes: 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs")
Signed-off-by: Tony Ambardar <Tony.Ambardar@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/90f81508ecc57bc0da318e0fe0f45cfe49b17ea7.1600417359.git.Tony.Ambardar@gmail.com
---
 tools/lib/bpf/btf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 7dfca7016aaa..6bdbc389b493 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -659,6 +659,12 @@ struct btf *btf__parse_raw(const char *path)
 		err = -EIO;
 		goto err_out;
 	}
+	if (magic == __bswap_16(BTF_MAGIC)) {
+		/* non-native endian raw BTF */
+		pr_warn("non-native BTF endianness is not supported\n");
+		err = -LIBBPF_ERRNO__ENDIAN;
+		goto err_out;
+	}
 	if (magic != BTF_MAGIC) {
 		/* definitely not a raw BTF */
 		err = -EPROTO;

From 4003324856311faebb46cbd56a1616bd3f3b67c2 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 17 Sep 2020 18:34:04 +0300
Subject: [PATCH 071/322] regmap: fix page selection for noinc reads

Non-incrementing reads can fail if register + length crosses page
border. However for non-incrementing reads we should not check for page
border crossing. Fix this by passing additional flag to _regmap_raw_read
and passing length to _regmap_select_page basing on the flag.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Fixes: 74fe7b551f33 ("regmap: Add regmap_noinc_read API")
Link: https://lore.kernel.org/r/20200917153405.3139200-1-dmitry.baryshkov@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 388ff8a6816c..e2822dc21022 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2472,7 +2472,7 @@ int regmap_raw_write_async(struct regmap *map, unsigned int reg,
 EXPORT_SYMBOL_GPL(regmap_raw_write_async);
 
 static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
-			    unsigned int val_len)
+			    unsigned int val_len, bool noinc)
 {
 	struct regmap_range_node *range;
 	int ret;
@@ -2485,7 +2485,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 	range = _regmap_range_lookup(map, reg);
 	if (range) {
 		ret = _regmap_select_page(map, &reg, range,
-					  val_len / map->format.val_bytes);
+					  noinc ? 1 : val_len / map->format.val_bytes);
 		if (ret != 0)
 			return ret;
 	}
@@ -2523,7 +2523,7 @@ static int _regmap_bus_read(void *context, unsigned int reg,
 	if (!map->format.parse_val)
 		return -EINVAL;
 
-	ret = _regmap_raw_read(map, reg, work_val, map->format.val_bytes);
+	ret = _regmap_raw_read(map, reg, work_val, map->format.val_bytes, false);
 	if (ret == 0)
 		*val = map->format.parse_val(work_val);
 
@@ -2639,7 +2639,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 
 		/* Read bytes that fit into whole chunks */
 		for (i = 0; i < chunk_count; i++) {
-			ret = _regmap_raw_read(map, reg, val, chunk_bytes);
+			ret = _regmap_raw_read(map, reg, val, chunk_bytes, false);
 			if (ret != 0)
 				goto out;
 
@@ -2650,7 +2650,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
 
 		/* Read remaining bytes */
 		if (val_len) {
-			ret = _regmap_raw_read(map, reg, val, val_len);
+			ret = _regmap_raw_read(map, reg, val, val_len, false);
 			if (ret != 0)
 				goto out;
 		}
@@ -2725,7 +2725,7 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg,
 			read_len = map->max_raw_read;
 		else
 			read_len = val_len;
-		ret = _regmap_raw_read(map, reg, val, read_len);
+		ret = _regmap_raw_read(map, reg, val, read_len, true);
 		if (ret)
 			goto out_unlock;
 		val = ((u8 *)val) + read_len;

From 05669b63170771d554854c0e465b76dc98fc7c84 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 17 Sep 2020 18:34:05 +0300
Subject: [PATCH 072/322] regmap: fix page selection for noinc writes

Non-incrementing writes can fail if register + length crosses page
border. However for non-incrementing writes we should not check for page
border crossing. Fix this by passing additional flag to _regmap_raw_write
and passing length to _regmap_select_page basing on the flag.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Fixes: cdf6b11daa77 ("regmap: Add regmap_noinc_write API")
Link: https://lore.kernel.org/r/20200917153405.3139200-2-dmitry.baryshkov@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h |  2 +-
 drivers/base/regmap/regcache.c |  2 +-
 drivers/base/regmap/regmap.c   | 21 +++++++++++----------
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index e0ff8e90ebdc..7be2fcfeea52 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -259,7 +259,7 @@ bool regcache_set_val(struct regmap *map, void *base, unsigned int idx,
 int regcache_lookup_reg(struct regmap *map, unsigned int reg);
 
 int _regmap_raw_write(struct regmap *map, unsigned int reg,
-		      const void *val, size_t val_len);
+		      const void *val, size_t val_len, bool noinc);
 
 void regmap_async_complete_cb(struct regmap_async *async, int ret);
 
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index a93cafd7be4f..7f4b3b62492c 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -717,7 +717,7 @@ static int regcache_sync_block_raw_flush(struct regmap *map, const void **data,
 
 	map->cache_bypass = true;
 
-	ret = _regmap_raw_write(map, base, *data, count * val_bytes);
+	ret = _regmap_raw_write(map, base, *data, count * val_bytes, false);
 	if (ret)
 		dev_err(map->dev, "Unable to sync registers %#x-%#x. %d\n",
 			base, cur - map->reg_stride, ret);
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index e2822dc21022..b71f9ecddff5 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -1486,7 +1486,7 @@ static void regmap_set_work_buf_flag_mask(struct regmap *map, int max_bytes,
 }
 
 static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
-				  const void *val, size_t val_len)
+				  const void *val, size_t val_len, bool noinc)
 {
 	struct regmap_range_node *range;
 	unsigned long flags;
@@ -1545,7 +1545,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 				win_residue, val_len / map->format.val_bytes);
 			ret = _regmap_raw_write_impl(map, reg, val,
 						     win_residue *
-						     map->format.val_bytes);
+						     map->format.val_bytes, noinc);
 			if (ret != 0)
 				return ret;
 
@@ -1559,7 +1559,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg,
 			win_residue = range->window_len - win_offset;
 		}
 
-		ret = _regmap_select_page(map, &reg, range, val_num);
+		ret = _regmap_select_page(map, &reg, range, noinc ? 1 : val_num);
 		if (ret != 0)
 			return ret;
 	}
@@ -1767,7 +1767,8 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg,
 				      map->work_buf +
 				      map->format.reg_bytes +
 				      map->format.pad_bytes,
-				      map->format.val_bytes);
+				      map->format.val_bytes,
+				      false);
 }
 
 static inline void *_regmap_map_get_context(struct regmap *map)
@@ -1861,7 +1862,7 @@ int regmap_write_async(struct regmap *map, unsigned int reg, unsigned int val)
 EXPORT_SYMBOL_GPL(regmap_write_async);
 
 int _regmap_raw_write(struct regmap *map, unsigned int reg,
-		      const void *val, size_t val_len)
+		      const void *val, size_t val_len, bool noinc)
 {
 	size_t val_bytes = map->format.val_bytes;
 	size_t val_count = val_len / val_bytes;
@@ -1882,7 +1883,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
 
 	/* Write as many bytes as possible with chunk_size */
 	for (i = 0; i < chunk_count; i++) {
-		ret = _regmap_raw_write_impl(map, reg, val, chunk_bytes);
+		ret = _regmap_raw_write_impl(map, reg, val, chunk_bytes, noinc);
 		if (ret)
 			return ret;
 
@@ -1893,7 +1894,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
 
 	/* Write remaining bytes */
 	if (val_len)
-		ret = _regmap_raw_write_impl(map, reg, val, val_len);
+		ret = _regmap_raw_write_impl(map, reg, val, val_len, noinc);
 
 	return ret;
 }
@@ -1926,7 +1927,7 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 
 	map->lock(map->lock_arg);
 
-	ret = _regmap_raw_write(map, reg, val, val_len);
+	ret = _regmap_raw_write(map, reg, val, val_len, false);
 
 	map->unlock(map->lock_arg);
 
@@ -1984,7 +1985,7 @@ int regmap_noinc_write(struct regmap *map, unsigned int reg,
 			write_len = map->max_raw_write;
 		else
 			write_len = val_len;
-		ret = _regmap_raw_write(map, reg, val, write_len);
+		ret = _regmap_raw_write(map, reg, val, write_len, true);
 		if (ret)
 			goto out_unlock;
 		val = ((u8 *)val) + write_len;
@@ -2461,7 +2462,7 @@ int regmap_raw_write_async(struct regmap *map, unsigned int reg,
 
 	map->async = true;
 
-	ret = _regmap_raw_write(map, reg, val, val_len);
+	ret = _regmap_raw_write(map, reg, val, val_len, false);
 
 	map->async = false;
 

From b867eef4cf548cd9541225aadcdcee644669b9e1 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Fri, 4 Sep 2020 12:28:12 +1200
Subject: [PATCH 073/322] spi: fsl-espi: Only process interrupts for expected
 events

The SPIE register contains counts for the TX FIFO so any time the irq
handler was invoked we would attempt to process the RX/TX fifos. Use the
SPIM value to mask the events so that we only process interrupts that
were expected.

This was a latent issue exposed by commit 3282a3da25bd ("powerpc/64:
Implement soft interrupt replay in C").

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Link: https://lore.kernel.org/r/20200904002812.7300-1-chris.packham@alliedtelesis.co.nz
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-espi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index e60581283a24..6d148ab70b93 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -564,13 +564,14 @@ static void fsl_espi_cpu_irq(struct fsl_espi *espi, u32 events)
 static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
 {
 	struct fsl_espi *espi = context_data;
-	u32 events;
+	u32 events, mask;
 
 	spin_lock(&espi->lock);
 
 	/* Get interrupt events(tx/rx) */
 	events = fsl_espi_read_reg(espi, ESPI_SPIE);
-	if (!events) {
+	mask = fsl_espi_read_reg(espi, ESPI_SPIM);
+	if (!(events & mask)) {
 		spin_unlock(&espi->lock);
 		return IRQ_NONE;
 	}

From 921daeeca91bda2dfb57fcba9f69ff368083be4b Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 4 Sep 2020 19:21:41 +0200
Subject: [PATCH 074/322] pinctrl: cherryview: Preserve
 CHV_PADCTRL1_INVRXTX_TXDATA flag on GPIOs

One some devices the GPIO should output the inverted value from what
device-drivers / ACPI code expects. The reason for this is unknown,
perhaps these systems use an external buffer chip on the GPIO which
inverts the signal. The BIOS makes this work by setting the
CHV_PADCTRL1_INVRXTX_TXDATA flag.

Before this commit we would unconditionally clear all INVRXTX flags,
including the CHV_PADCTRL1_INVRXTX_TXDATA flag when a GPIO is requested
by a driver (from chv_gpio_request_enable()).

This breaks systems using this setup. Specifically it is causing
problems for systems with a goodix touchscreen, where the BIOS sets the
INVRXTX_TXDATA flag on the GPIO used for the touchscreen's reset pin.

The goodix touchscreen driver by defaults configures this pin as input
(relying on the pull-up to keep it high), but the clearing of the
INVRXTX_TXDATA flag done by chv_gpio_request_enable() causes it to be
driven low for a brief time before the GPIO gets set to input mode.

This causes the touchscreen controller to get reset. On most CHT devs
with this touchscreen this leads to:

[   31.596534] Goodix-TS i2c-GDIX1001:00: i2c test failed attempt 1: -121

The driver retries this though and then everything is fine. But during
reset the touchscreen uses its interrupt pin as bootstrap to determine
which i2c address to use and on the Acer One S1003 the spurious reset
caused by the clearing of the INVRXTX_TXDATA flag causes the controller
to come back up again on the wrong i2c address, breaking things.

This commit fixes both the -121 errors, as well as the total breakage
on the Acer One S1003, by making chv_gpio_clear_triggering() not clear
the INVRXTX_TXDATA flag if the pin is already configured as a GPIO.

Note that chv_pinmux_set_mux() does still unconditionally clear the
flag, so this only affects GPIO usage.

Fixes: a7d4b171660c ("Input: goodix - add support for getting IRQ + reset GPIOs on Cherry Trail devices")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/intel/pinctrl-cherryview.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 9ef246145bde..06521097513a 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -58,6 +58,7 @@
 #define CHV_PADCTRL1_CFGLOCK		BIT(31)
 #define CHV_PADCTRL1_INVRXTX_SHIFT	4
 #define CHV_PADCTRL1_INVRXTX_MASK	GENMASK(7, 4)
+#define CHV_PADCTRL1_INVRXTX_TXDATA	BIT(7)
 #define CHV_PADCTRL1_INVRXTX_RXDATA	BIT(6)
 #define CHV_PADCTRL1_INVRXTX_TXENABLE	BIT(5)
 #define CHV_PADCTRL1_ODEN		BIT(3)
@@ -792,11 +793,22 @@ static int chv_pinmux_set_mux(struct pinctrl_dev *pctldev,
 static void chv_gpio_clear_triggering(struct chv_pinctrl *pctrl,
 				      unsigned int offset)
 {
+	u32 invrxtx_mask = CHV_PADCTRL1_INVRXTX_MASK;
 	u32 value;
 
+	/*
+	 * One some devices the GPIO should output the inverted value from what
+	 * device-drivers / ACPI code expects (inverted external buffer?). The
+	 * BIOS makes this work by setting the CHV_PADCTRL1_INVRXTX_TXDATA flag,
+	 * preserve this flag if the pin is already setup as GPIO.
+	 */
+	value = chv_readl(pctrl, offset, CHV_PADCTRL0);
+	if (value & CHV_PADCTRL0_GPIOEN)
+		invrxtx_mask &= ~CHV_PADCTRL1_INVRXTX_TXDATA;
+
 	value = chv_readl(pctrl, offset, CHV_PADCTRL1);
 	value &= ~CHV_PADCTRL1_INTWAKECFG_MASK;
-	value &= ~CHV_PADCTRL1_INVRXTX_MASK;
+	value &= ~invrxtx_mask;
 	chv_writel(pctrl, offset, CHV_PADCTRL1, value);
 }
 

From ee1dfad5325ff1cfb2239e564cd411b3bfe8667a Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 14 Sep 2020 13:04:19 -0400
Subject: [PATCH 075/322] dm: fix bio splitting and its bio completion order
 for regular IO

dm_queue_split() is removed because __split_and_process_bio() _must_
handle splitting bios to ensure proper bio submission and completion
ordering as a bio is split.

Otherwise, multiple recursive calls to ->submit_bio will cause multiple
split bios to be allocated from the same ->bio_split mempool at the same
time. This would result in deadlock in low memory conditions because no
progress could be made (only one bio is available in ->bio_split
mempool).

This fix has been verified to still fix the loss of performance, due
to excess splitting, that commit 120c9257f5f1 provided.

Fixes: 120c9257f5f1 ("Revert "dm: always call blk_queue_split() in dm_process_bio()"")
Cc: stable@vger.kernel.org # 5.0+, requires custom backport due to 5.9 changes
Reported-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c | 23 ++---------------------
 1 file changed, 2 insertions(+), 21 deletions(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4a40df8af7d3..d948cd522431 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1724,23 +1724,6 @@ out:
 	return ret;
 }
 
-static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio)
-{
-	unsigned len, sector_count;
-
-	sector_count = bio_sectors(*bio);
-	len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count);
-
-	if (sector_count > len) {
-		struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split);
-
-		bio_chain(split, *bio);
-		trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
-		submit_bio_noacct(*bio);
-		*bio = split;
-	}
-}
-
 static blk_qc_t dm_process_bio(struct mapped_device *md,
 			       struct dm_table *map, struct bio *bio)
 {
@@ -1768,14 +1751,12 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
 	if (current->bio_list) {
 		if (is_abnormal_io(bio))
 			blk_queue_split(&bio);
-		else
-			dm_queue_split(md, ti, &bio);
+		/* regular IO is split by __split_and_process_bio */
 	}
 
 	if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
 		return __process_bio(md, map, bio, ti);
-	else
-		return __split_and_process_bio(md, map, bio);
+	return __split_and_process_bio(md, map, bio);
 }
 
 static blk_qc_t dm_submit_bio(struct bio *bio)

From cf9c37865557d39292d82da29e9ebda1dbc584b3 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Mon, 21 Sep 2020 19:08:30 -0400
Subject: [PATCH 076/322] dm: fix comment in dm_process_bio()

Refer to the correct function (->submit_bio instead of ->queue_bio).
Also, add details about why using blk_queue_split() isn't needed for
dm_wq_work()'s call to dm_process_bio().

Fixes: c62b37d96b6eb ("block: move ->make_request_fn to struct block_device_operations")
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d948cd522431..6ed05ca65a0f 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1744,9 +1744,11 @@ static blk_qc_t dm_process_bio(struct mapped_device *md,
 	}
 
 	/*
-	 * If in ->queue_bio we need to use blk_queue_split(), otherwise
+	 * If in ->submit_bio we need to use blk_queue_split(), otherwise
 	 * queue_limits for abnormal requests (e.g. discard, writesame, etc)
 	 * won't be imposed.
+	 * If called from dm_wq_work() for deferred bio processing, bio
+	 * was already handled by following code with previous ->submit_bio.
 	 */
 	if (current->bio_list) {
 		if (is_abnormal_io(bio))

From ead1e19ad905b97261f0ad7a98bb64abb9323b2b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 21 Sep 2020 18:44:42 +0900
Subject: [PATCH 077/322] lib/bootconfig: Fix a bug of breaking existing tree
 nodes

Fix a bug of breaking existing tree nodes by parsing the second
and subsequent braces. Since the bootconfig parser uses the
node.next field as a flag of current parent node, but this will
break the existing tree if the same key node is specified again
in the bootconfig.

For example, the following bootconfig should be foo.buz and bar.

foo
bar
foo { buz }

However, when parsing the brace "{", it breaks foo->bar link
by marking open-brace node. So the bootconfig unlinks bar
from the bootconfig internal tree.

This introduces a stack outside of the tree and record the
last open-brace on the stack instead of using node.next field.

Link: https://lkml.kernel.org/r/160068148267.1088739.8264704338030168660.stgit@devnote2

Fixes: 76db5a27a827 ("bootconfig: Add Extra Boot Config support")
Cc: Ingo Molnar <mingo@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 lib/bootconfig.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index 2c905a91d4eb..b44bba0f1583 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -31,6 +31,8 @@ static size_t xbc_data_size __initdata;
 static struct xbc_node *last_parent __initdata;
 static const char *xbc_err_msg __initdata;
 static int xbc_err_pos __initdata;
+static int open_brace[XBC_DEPTH_MAX] __initdata;
+static int brace_index __initdata;
 
 static int __init xbc_parse_error(const char *msg, const char *p)
 {
@@ -431,27 +433,27 @@ static char *skip_spaces_until_newline(char *p)
 	return p;
 }
 
-static int __init __xbc_open_brace(void)
+static int __init __xbc_open_brace(char *p)
 {
-	/* Mark the last key as open brace */
-	last_parent->next = XBC_NODE_MAX;
+	/* Push the last key as open brace */
+	open_brace[brace_index++] = xbc_node_index(last_parent);
+	if (brace_index >= XBC_DEPTH_MAX)
+		return xbc_parse_error("Exceed max depth of braces", p);
 
 	return 0;
 }
 
 static int __init __xbc_close_brace(char *p)
 {
-	struct xbc_node *node;
-
-	if (!last_parent || last_parent->next != XBC_NODE_MAX)
+	brace_index--;
+	if (!last_parent || brace_index < 0 ||
+	    (open_brace[brace_index] != xbc_node_index(last_parent)))
 		return xbc_parse_error("Unexpected closing brace", p);
 
-	node = last_parent;
-	node->next = 0;
-	do {
-		node = xbc_node_get_parent(node);
-	} while (node && node->next != XBC_NODE_MAX);
-	last_parent = node;
+	if (brace_index == 0)
+		last_parent = NULL;
+	else
+		last_parent = &xbc_nodes[open_brace[brace_index - 1]];
 
 	return 0;
 }
@@ -661,7 +663,7 @@ static int __init xbc_open_brace(char **k, char *n)
 		return ret;
 	*k = n;
 
-	return __xbc_open_brace();
+	return __xbc_open_brace(n - 1);
 }
 
 static int __init xbc_close_brace(char **k, char *n)
@@ -681,6 +683,13 @@ static int __init xbc_verify_tree(void)
 	int i, depth, len, wlen;
 	struct xbc_node *n, *m;
 
+	/* Brace closing */
+	if (brace_index) {
+		n = &xbc_nodes[open_brace[brace_index]];
+		return xbc_parse_error("Brace is not closed",
+					xbc_node_get_data(n));
+	}
+
 	/* Empty tree */
 	if (xbc_node_num == 0) {
 		xbc_parse_error("Empty config", xbc_data);
@@ -745,6 +754,7 @@ void __init xbc_destroy_all(void)
 	xbc_node_num = 0;
 	memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX);
 	xbc_nodes = NULL;
+	brace_index = 0;
 }
 
 /**

From c7af4ecdffe1537ba8aeed0ac12c3326f908df43 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 21 Sep 2020 18:44:51 +0900
Subject: [PATCH 078/322] lib/bootconfig: Fix to remove tailing spaces after
 value

Fix to remove tailing spaces after value. If there is a space
after value, the bootconfig failed to remove it because it
applies strim() before replacing the delimiter with null.

For example,

foo = var    # comment

was parsed as below.

foo="var    "

but user will expect

foo="var"

This fixes it by applying strim() after removing the delimiter.

Link: https://lkml.kernel.org/r/160068149134.1088739.8868306567670058853.stgit@devnote2

Fixes: 76db5a27a827 ("bootconfig: Add Extra Boot Config support")
Cc: Ingo Molnar <mingo@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 lib/bootconfig.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/bootconfig.c b/lib/bootconfig.c
index b44bba0f1583..649ed44f199c 100644
--- a/lib/bootconfig.c
+++ b/lib/bootconfig.c
@@ -494,8 +494,8 @@ static int __init __xbc_parse_value(char **__v, char **__n)
 			break;
 		}
 		if (strchr(",;\n#}", c)) {
-			v = strim(v);
 			*p++ = '\0';
+			v = strim(v);
 			break;
 		}
 	}

From 1d210c166b693c3a659abbac4b58385c2d4cf887 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 21 Sep 2020 18:45:02 +0900
Subject: [PATCH 079/322] tools/bootconfig: Add testcases for repeated key with
 brace

Add a testcase for repeated key with brace parsing issue.

Link: https://lkml.kernel.org/r/160068150176.1088739.409481347784771987.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/test-bootconfig.sh | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 56284b98d8f0..95eec768b503 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -137,6 +137,18 @@ $BOOTCONF $INITRD > $TEMPCONF
 cat $TEMPCONF
 xpass grep \'\"string\"\' $TEMPCONF
 
+echo "Repeat same-key tree"
+cat > $TEMPCONF << EOF
+foo
+bar
+foo { buz }
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xpass grep -q "bar" $OUTFILE
+
 echo "=== expected failure cases ==="
 for i in samples/bad-* ; do
   xfail $BOOTCONF -a $i $INITRD

From 2f5fb555637eff4a3e5579f2323cd358c77efdc8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Mon, 21 Sep 2020 18:45:11 +0900
Subject: [PATCH 080/322] tools/bootconfig: Add testcase for tailing space

Add testcases for removing/keeping tailing space
in the value.

Link: https://lkml.kernel.org/r/160068151151.1088739.3469541807296024227.stgit@devnote2

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/bootconfig/test-bootconfig.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 95eec768b503..d295e406a756 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -149,6 +149,19 @@ xpass $BOOTCONF -a $TEMPCONF $INITRD
 $BOOTCONF $INITRD > $OUTFILE
 xpass grep -q "bar" $OUTFILE
 
+
+echo "Remove/keep tailing spaces"
+cat > $TEMPCONF << EOF
+foo = val     # comment
+bar = "val2 " # comment
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xfail grep -q val[[:space:]] $OUTFILE
+xpass grep -q val2[[:space:]] $OUTFILE
+
 echo "=== expected failure cases ==="
 for i in samples/bad-* ; do
   xfail $BOOTCONF -a $i $INITRD

From d0254f82d702a1d0d92e42f87676111de88846cf Mon Sep 17 00:00:00 2001
From: Jacopo Mondi <jacopo+renesas@jmondi.org>
Date: Sat, 12 Sep 2020 12:30:45 +0200
Subject: [PATCH 081/322] media: dt-bindings: media: imx274: Convert to
 json-schema

Convert the imx274 bindings document to json-schema and update
the MAINTAINERS file accordingly.

Reviewed-by: Luca Ceresoli <luca@lucaceresoli.net>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Jacopo Mondi <jacopo+renesas@jmondi.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 .../devicetree/bindings/media/i2c/imx274.txt  | 38 ----------
 .../bindings/media/i2c/sony,imx274.yaml       | 76 +++++++++++++++++++
 MAINTAINERS                                   |  2 +-
 3 files changed, 77 insertions(+), 39 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/media/i2c/imx274.txt
 create mode 100644 Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml

diff --git a/Documentation/devicetree/bindings/media/i2c/imx274.txt b/Documentation/devicetree/bindings/media/i2c/imx274.txt
deleted file mode 100644
index 0727079d2410..000000000000
--- a/Documentation/devicetree/bindings/media/i2c/imx274.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-* Sony 1/2.5-Inch 8.51Mp CMOS Digital Image Sensor
-
-The Sony imx274 is a 1/2.5-inch CMOS active pixel digital image sensor with
-an active array size of 3864H x 2202V. It is programmable through I2C
-interface. The I2C address is fixed to 0x1a as per sensor data sheet.
-Image data is sent through MIPI CSI-2, which is configured as 4 lanes
-at 1440 Mbps.
-
-
-Required Properties:
-- compatible: value should be "sony,imx274" for imx274 sensor
-- reg: I2C bus address of the device
-
-Optional Properties:
-- reset-gpios: Sensor reset GPIO
-- clocks: Reference to the input clock.
-- clock-names: Should be "inck".
-- VANA-supply: Sensor 2.8v analog supply.
-- VDIG-supply: Sensor 1.8v digital core supply.
-- VDDL-supply: Sensor digital IO 1.2v supply.
-
-The imx274 device node should contain one 'port' child node with
-an 'endpoint' subnode. For further reading on port node refer to
-Documentation/devicetree/bindings/media/video-interfaces.txt.
-
-Example:
-	sensor@1a {
-		compatible = "sony,imx274";
-		reg = <0x1a>;
-		#address-cells = <1>;
-		#size-cells = <0>;
-		reset-gpios = <&gpio_sensor 0 0>;
-		port {
-			sensor_out: endpoint {
-				remote-endpoint = <&csiss_in>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml
new file mode 100644
index 000000000000..f697e1a20beb
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/i2c/sony,imx274.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Sony 1/2.5-Inch 8.51MP CMOS Digital Image Sensor
+
+maintainers:
+  - Leon Luo <leonl@leopardimaging.com>
+
+description: |
+  The Sony IMX274 is a 1/2.5-inch CMOS active pixel digital image sensor with an
+  active array size of 3864H x 2202V. It is programmable through I2C interface.
+  Image data is sent through MIPI CSI-2, which is configured as 4 lanes at 1440
+  Mbps.
+
+properties:
+  compatible:
+    const: sony,imx274
+
+  reg:
+    const: 0x1a
+
+  reset-gpios:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    const: inck
+
+  vana-supply:
+    description: Sensor 2.8 V analog supply.
+    maxItems: 1
+
+  vdig-supply:
+    description: Sensor 1.8 V digital core supply.
+    maxItems: 1
+
+  vddl-supply:
+    description: Sensor digital IO 1.2 V supply.
+    maxItems: 1
+
+  port:
+    type: object
+    description: Output video port. See ../video-interfaces.txt.
+
+required:
+  - compatible
+  - reg
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    i2c0 {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        imx274: camera-sensor@1a {
+            compatible = "sony,imx274";
+            reg = <0x1a>;
+            reset-gpios = <&gpio_sensor 0 0>;
+
+            port {
+                sensor_out: endpoint {
+                    remote-endpoint = <&csiss_in>;
+                };
+            };
+        };
+    };
+
+...
diff --git a/MAINTAINERS b/MAINTAINERS
index f0068bceeb61..ba1a4d2238cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16125,7 +16125,7 @@ M:	Leon Luo <leonl@leopardimaging.com>
 L:	linux-media@vger.kernel.org
 S:	Maintained
 T:	git git://linuxtv.org/media_tree.git
-F:	Documentation/devicetree/bindings/media/i2c/imx274.txt
+F:	Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml
 F:	drivers/media/i2c/imx274.c
 
 SONY IMX290 SENSOR DRIVER

From 01ce6d4d2c8157b076425e3dd8319948652583c5 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhc@lemote.com>
Date: Mon, 24 Aug 2020 15:44:03 +0800
Subject: [PATCH 082/322] MIPS: Loongson-3: Fix fp register access if MSA
 enabled

If MSA is enabled, FPU_REG_WIDTH is 128 rather than 64, then get_fpr64()
/set_fpr64() in the original unaligned instruction emulation code access
the wrong fp registers. This is because the current code doesn't specify
the correct index field, so fix it.

Fixes: f83e4f9896eff614d0f2547a ("MIPS: Loongson-3: Add some unaligned instructions emulation")
Signed-off-by: Huacai Chen <chenhc@lemote.com>
Signed-off-by: Pei Huang <huangpei@loongson.cn>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/loongson64/cop2-ex.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/arch/mips/loongson64/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c
index f130f62129b8..00055d4b6042 100644
--- a/arch/mips/loongson64/cop2-ex.c
+++ b/arch/mips/loongson64/cop2-ex.c
@@ -95,10 +95,8 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 			if (res)
 				goto fault;
 
-			set_fpr64(current->thread.fpu.fpr,
-				insn.loongson3_lswc2_format.rt, value);
-			set_fpr64(current->thread.fpu.fpr,
-				insn.loongson3_lswc2_format.rq, value_next);
+			set_fpr64(&current->thread.fpu.fpr[insn.loongson3_lswc2_format.rt], 0, value);
+			set_fpr64(&current->thread.fpu.fpr[insn.loongson3_lswc2_format.rq], 0, value_next);
 			compute_return_epc(regs);
 			own_fpu(1);
 		}
@@ -130,15 +128,13 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 				goto sigbus;
 
 			lose_fpu(1);
-			value_next = get_fpr64(current->thread.fpu.fpr,
-					insn.loongson3_lswc2_format.rq);
+			value_next = get_fpr64(&current->thread.fpu.fpr[insn.loongson3_lswc2_format.rq], 0);
 
 			StoreDW(addr + 8, value_next, res);
 			if (res)
 				goto fault;
 
-			value = get_fpr64(current->thread.fpu.fpr,
-					insn.loongson3_lswc2_format.rt);
+			value = get_fpr64(&current->thread.fpu.fpr[insn.loongson3_lswc2_format.rt], 0);
 
 			StoreDW(addr, value, res);
 			if (res)
@@ -204,8 +200,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 			if (res)
 				goto fault;
 
-			set_fpr64(current->thread.fpu.fpr,
-					insn.loongson3_lsdc2_format.rt, value);
+			set_fpr64(&current->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0, value);
 			compute_return_epc(regs);
 			own_fpu(1);
 
@@ -221,8 +216,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 			if (res)
 				goto fault;
 
-			set_fpr64(current->thread.fpu.fpr,
-					insn.loongson3_lsdc2_format.rt, value);
+			set_fpr64(&current->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0, value);
 			compute_return_epc(regs);
 			own_fpu(1);
 			break;
@@ -286,8 +280,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 				goto sigbus;
 
 			lose_fpu(1);
-			value = get_fpr64(current->thread.fpu.fpr,
-					insn.loongson3_lsdc2_format.rt);
+			value = get_fpr64(&current->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0);
 
 			StoreW(addr, value, res);
 			if (res)
@@ -305,8 +298,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action,
 				goto sigbus;
 
 			lose_fpu(1);
-			value = get_fpr64(current->thread.fpu.fpr,
-					insn.loongson3_lsdc2_format.rt);
+			value = get_fpr64(&current->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0);
 
 			StoreDW(addr, value, res);
 			if (res)

From fdb29f4de1374483291232ae7515e5e7bb464762 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 10 Sep 2020 16:59:51 +0300
Subject: [PATCH 083/322] iio: adc: qcom-spmi-adc5: fix driver name

Remove superfluous '.c' from qcom-spmi-adc5 device driver name.
Fixes: e13d757279bb ("iio: adc: Add QCOM SPMI PMIC5 ADC driver")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Cc: <Stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20200910140000.324091-2-dmitry.baryshkov@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/qcom-spmi-adc5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/qcom-spmi-adc5.c b/drivers/iio/adc/qcom-spmi-adc5.c
index b4b73c9920b4..c10aa28be70a 100644
--- a/drivers/iio/adc/qcom-spmi-adc5.c
+++ b/drivers/iio/adc/qcom-spmi-adc5.c
@@ -982,7 +982,7 @@ static int adc5_probe(struct platform_device *pdev)
 
 static struct platform_driver adc5_driver = {
 	.driver = {
-		.name = "qcom-spmi-adc5.c",
+		.name = "qcom-spmi-adc5",
 		.of_match_table = adc5_match_table,
 	},
 	.probe = adc5_probe,

From b5ddcffa37778244d5e786fe32f778edf2bfc93e Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Sat, 5 Sep 2020 01:34:21 +0800
Subject: [PATCH 084/322] btrfs: fix put of uninitialized kobject after seed
 device delete

The following test case leads to NULL kobject free error:

  mount seed /mnt
  add sprout to /mnt
  umount /mnt
  mount sprout to /mnt
  delete seed

  kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called.
  WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350
  RIP: 0010:kobject_put+0x80/0x350
  ::
  Call Trace:
  btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs]
  btrfs_rm_device.cold+0xa8/0x298 [btrfs]
  btrfs_ioctl+0x206c/0x22a0 [btrfs]
  ksys_ioctl+0xe2/0x140
  __x64_sys_ioctl+0x1e/0x29
  do_syscall_64+0x96/0x150
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7f4047c6288b
  ::

This is because, at the end of the seed device-delete, we try to remove
the seed's devid sysfs entry. But for the seed devices under the sprout
fs, we don't initialize the devid kobject yet. So add a kobject state
check, which takes care of the bug.

Fixes: 668e48af7a94 ("btrfs: sysfs, add devid/dev_state kobject and device attributes")
CC: stable@vger.kernel.org # 5.6+
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/sysfs.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index c8df2edafd85..5be30066563c 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1170,10 +1170,12 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
 					  disk_kobj->name);
 		}
 
-		kobject_del(&one_device->devid_kobj);
-		kobject_put(&one_device->devid_kobj);
+		if (one_device->devid_kobj.state_initialized) {
+			kobject_del(&one_device->devid_kobj);
+			kobject_put(&one_device->devid_kobj);
 
-		wait_for_completion(&one_device->kobj_unregister);
+			wait_for_completion(&one_device->kobj_unregister);
+		}
 
 		return 0;
 	}
@@ -1186,10 +1188,12 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
 			sysfs_remove_link(fs_devices->devices_kobj,
 					  disk_kobj->name);
 		}
-		kobject_del(&one_device->devid_kobj);
-		kobject_put(&one_device->devid_kobj);
+		if (one_device->devid_kobj.state_initialized) {
+			kobject_del(&one_device->devid_kobj);
+			kobject_put(&one_device->devid_kobj);
 
-		wait_for_completion(&one_device->kobj_unregister);
+			wait_for_completion(&one_device->kobj_unregister);
+		}
 	}
 
 	return 0;

From ce65d55f92a67e247f4d799e581cf9fed677871c Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Tue, 22 Sep 2020 14:58:45 +0300
Subject: [PATCH 085/322] dmaengine: dmatest: Prevent to run on misconfigured
 channel

Andy reported that commit 6b41030fdc79 ("dmaengine: dmatest:
Restore default for channel") broke his scripts for the case
where "busy" channel is used for configuration with expectation
that run command would do nothing. Instead, behavior was
(unintentionally) changed to treat such case as under-configuration
and progress with defaults, i.e. run command would start a test
with default setting for channel (which would use all channels).

Restore original behavior with tracking status of channel setter
so we can distinguish between misconfigured and under-configured
cases in run command and act accordingly.

Fixes: 6b41030fdc79 ("dmaengine: dmatest: Restore default for channel")
Reported-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Tested-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20200922115847.30100-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dmatest.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 45d4d92e91db..a819611b8892 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -129,6 +129,7 @@ struct dmatest_params {
  * @nr_channels:	number of channels under test
  * @lock:		access protection to the fields of this structure
  * @did_init:		module has been initialized completely
+ * @last_error:		test has faced configuration issues
  */
 static struct dmatest_info {
 	/* Test parameters */
@@ -137,6 +138,7 @@ static struct dmatest_info {
 	/* Internal state */
 	struct list_head	channels;
 	unsigned int		nr_channels;
+	int			last_error;
 	struct mutex		lock;
 	bool			did_init;
 } test_info = {
@@ -1184,10 +1186,22 @@ static int dmatest_run_set(const char *val, const struct kernel_param *kp)
 		return ret;
 	} else if (dmatest_run) {
 		if (!is_threaded_test_pending(info)) {
-			pr_info("No channels configured, continue with any\n");
-			if (!is_threaded_test_run(info))
-				stop_threaded_test(info);
-			add_threaded_test(info);
+			/*
+			 * We have nothing to run. This can be due to:
+			 */
+			ret = info->last_error;
+			if (ret) {
+				/* 1) Misconfiguration */
+				pr_err("Channel misconfigured, can't continue\n");
+				mutex_unlock(&info->lock);
+				return ret;
+			} else {
+				/* 2) We rely on defaults */
+				pr_info("No channels configured, continue with any\n");
+				if (!is_threaded_test_run(info))
+					stop_threaded_test(info);
+				add_threaded_test(info);
+			}
 		}
 		start_threaded_tests(info);
 	} else {
@@ -1204,7 +1218,7 @@ static int dmatest_chan_set(const char *val, const struct kernel_param *kp)
 	struct dmatest_info *info = &test_info;
 	struct dmatest_chan *dtc;
 	char chan_reset_val[20];
-	int ret = 0;
+	int ret;
 
 	mutex_lock(&info->lock);
 	ret = param_set_copystring(val, kp);
@@ -1259,12 +1273,14 @@ static int dmatest_chan_set(const char *val, const struct kernel_param *kp)
 		goto add_chan_err;
 	}
 
+	info->last_error = ret;
 	mutex_unlock(&info->lock);
 
 	return ret;
 
 add_chan_err:
 	param_set_copystring(chan_reset_val, kp);
+	info->last_error = ret;
 	mutex_unlock(&info->lock);
 
 	return ret;

From c413c3102703a453c1312ce2160c6b7ffd55403e Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 15 Sep 2020 18:39:23 +0800
Subject: [PATCH 086/322] Revert "ALSA: usb-audio: Disable Lenovo P620 Rear
 line-in volume control"

This reverts commit 34dedd2a83b241ba6aeb290260313c65dc58660e.

According to Realtek, volume FU works for line-in.

I can confirm volume control works after device firmware is updated.

Fixes: 34dedd2a83b2 ("ALSA: usb-audio: Disable Lenovo P620 Rear line-in volume control")
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Link: https://lore.kernel.org/r/20200915103925.12777-1-kai.heng.feng@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/usb/mixer_maps.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 5b43e9e40e49..c369c81e74c4 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -371,7 +371,6 @@ static const struct usbmix_name_map asus_rog_map[] = {
 };
 
 static const struct usbmix_name_map lenovo_p620_rear_map[] = {
-	{ 19, NULL, 2 }, /* FU, Volume */
 	{ 19, NULL, 12 }, /* FU, Input Gain Pad */
 	{}
 };

From 59e330f8ff7ada7aa64fa422f6adf22a45152a7e Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Thu, 17 Sep 2020 08:50:25 -0700
Subject: [PATCH 087/322] nvme: return errors for hwmon init

Initializing the nvme hwmon retrieves a log from the controller. If the
controller is broken, we need to return the appropriate error so that
subsequent initialization doesn't attempt to continue.

Reported-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c  |  7 +++++--
 drivers/nvme/host/hwmon.c | 14 ++++++--------
 drivers/nvme/host/nvme.h  |  7 +++++--
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c013eb52fdc8..4cea14c18a6d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3236,8 +3236,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	if (ret < 0)
 		return ret;
 
-	if (!ctrl->identified)
-		nvme_hwmon_init(ctrl);
+	if (!ctrl->identified) {
+		ret = nvme_hwmon_init(ctrl);
+		if (ret < 0)
+			return ret;
+	}
 
 	ctrl->identified = true;
 
diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
index 412a6c97c0d8..552dbc04567b 100644
--- a/drivers/nvme/host/hwmon.c
+++ b/drivers/nvme/host/hwmon.c
@@ -59,12 +59,8 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
 
 static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
 {
-	int ret;
-
-	ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
+	return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
 			   NVME_CSI_NVM, &data->log, sizeof(data->log), 0);
-
-	return ret <= 0 ? ret : -EIO;
 }
 
 static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
@@ -225,7 +221,7 @@ static const struct hwmon_chip_info nvme_hwmon_chip_info = {
 	.info	= nvme_hwmon_info,
 };
 
-void nvme_hwmon_init(struct nvme_ctrl *ctrl)
+int nvme_hwmon_init(struct nvme_ctrl *ctrl)
 {
 	struct device *dev = ctrl->dev;
 	struct nvme_hwmon_data *data;
@@ -234,7 +230,7 @@ void nvme_hwmon_init(struct nvme_ctrl *ctrl)
 
 	data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
-		return;
+		return 0;
 
 	data->ctrl = ctrl;
 	mutex_init(&data->read_lock);
@@ -244,7 +240,7 @@ void nvme_hwmon_init(struct nvme_ctrl *ctrl)
 		dev_warn(ctrl->device,
 			"Failed to read smart log (error %d)\n", err);
 		devm_kfree(dev, data);
-		return;
+		return err;
 	}
 
 	hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data,
@@ -254,4 +250,6 @@ void nvme_hwmon_init(struct nvme_ctrl *ctrl)
 		dev_warn(dev, "Failed to instantiate hwmon device\n");
 		devm_kfree(dev, data);
 	}
+
+	return 0;
 }
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 9fd45ff656da..2aaedfa43ed8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -827,9 +827,12 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
 }
 
 #ifdef CONFIG_NVME_HWMON
-void nvme_hwmon_init(struct nvme_ctrl *ctrl);
+int nvme_hwmon_init(struct nvme_ctrl *ctrl);
 #else
-static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { }
+static inline int nvme_hwmon_init(struct nvme_ctrl *ctrl)
+{
+	return 0;
+}
 #endif
 
 u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns,

From 50b7c24390a53c78de546215282fb52980f1d7b7 Mon Sep 17 00:00:00 2001
From: Xianting Tian <tian.xianting@h3c.com>
Date: Tue, 22 Sep 2020 14:25:17 +0800
Subject: [PATCH 088/322] nvme-pci: fix NULL req in completion handler

Currently, we use nvmeq->q_depth as the upper limit for a valid tag in
nvme_handle_cqe(), it is not correct. Because the available tag number
is recorded in tagset, which is not equal to nvmeq->q_depth.

The nvme driver registers interrupts for queues before initializing the
tagset, because it uses the number of successful request_irq() calls to
configure the tagset parameters. This allows a race condition with the
current tag validity check if the controller happens to produce an
interrupt with a corrupted CQE before the tagset is initialized.

Replace the driver's indirect tag check with the one already provided by
the block layer.

Signed-off-by: Xianting Tian <tian.xianting@h3c.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d31e298669a9..d286c267869d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -940,13 +940,6 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 	struct nvme_completion *cqe = &nvmeq->cqes[idx];
 	struct request *req;
 
-	if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
-		dev_warn(nvmeq->dev->ctrl.device,
-			"invalid id %d completed on queue %d\n",
-			cqe->command_id, le16_to_cpu(cqe->sq_id));
-		return;
-	}
-
 	/*
 	 * AEN requests are special as they don't time out and can
 	 * survive any kind of queue freeze and often don't respond to
@@ -960,6 +953,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
 	}
 
 	req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id);
+	if (unlikely(!req)) {
+		dev_warn(nvmeq->dev->ctrl.device,
+			"invalid id %d completed on queue %d\n",
+			cqe->command_id, le16_to_cpu(cqe->sq_id));
+		return;
+	}
+
 	trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
 	if (!nvme_try_complete_req(req, cqe->status, cqe->result))
 		nvme_pci_complete_rq(req);

From 9e0e8dac985d4bd07d9e62922b9d189d3ca2fccf Mon Sep 17 00:00:00 2001
From: James Smart <james.smart@broadcom.com>
Date: Thu, 17 Sep 2020 13:33:22 -0700
Subject: [PATCH 089/322] nvme-fc: fail new connections to a deleted host or
 remote port

The lldd may have made calls to delete a remote port or local port and
the delete is in progress when the cli then attempts to create a new
controller. Currently, this proceeds without error although it can't be
very successful.

Fix this by validating that both the host port and remote port are
present when a new controller is to be created.

Signed-off-by: James Smart <james.smart@broadcom.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e8ef42b9d50c..e2e09e25c056 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3671,12 +3671,14 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
 	spin_lock_irqsave(&nvme_fc_lock, flags);
 	list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
 		if (lport->localport.node_name != laddr.nn ||
-		    lport->localport.port_name != laddr.pn)
+		    lport->localport.port_name != laddr.pn ||
+		    lport->localport.port_state != FC_OBJSTATE_ONLINE)
 			continue;
 
 		list_for_each_entry(rport, &lport->endp_list, endp_list) {
 			if (rport->remoteport.node_name != raddr.nn ||
-			    rport->remoteport.port_name != raddr.pn)
+			    rport->remoteport.port_name != raddr.pn ||
+			    rport->remoteport.port_state != FC_OBJSTATE_ONLINE)
 				continue;
 
 			/* if fail to get reference fall through. Will error */

From 2b405533c2560d7878199c57d95a39151351df72 Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Sun, 20 Sep 2020 18:01:58 +0100
Subject: [PATCH 090/322] USB: gadget: f_ncm: Fix NDP16 datagram validation

commit 2b74b0a04d3e ("USB: gadget: f_ncm: add bounds checks to ncm_unwrap_ntb()")
adds important bounds checking however it unfortunately also introduces  a
bug with respect to section 3.3.1 of the NCM specification.

wDatagramIndex[1] : "Byte index, in little endian, of the second datagram
described by this NDP16. If zero, then this marks the end of the sequence
of datagrams in this NDP16."

wDatagramLength[1]: "Byte length, in little endian, of the second datagram
described by this NDP16. If zero, then this marks the end of the sequence
of datagrams in this NDP16."

wDatagramIndex[1] and wDatagramLength[1] respectively then may be zero but
that does not mean we should throw away the data referenced by
wDatagramIndex[0] and wDatagramLength[0] as is currently the case.

Breaking the loop on (index2 == 0 || dg_len2 == 0) should come at the end
as was previously the case and checks for index2 and dg_len2 should be
removed since zero is valid.

I'm not sure how much testing the above patch received but for me right now
after enumeration ping doesn't work. Reverting the commit restores ping,
scp, etc.

The extra validation associated with wDatagramIndex[0] and
wDatagramLength[0] appears to be valid so, this change removes the incorrect
restriction on wDatagramIndex[1] and wDatagramLength[1] restoring data
processing between host and device.

Fixes: 2b74b0a04d3e ("USB: gadget: f_ncm: add bounds checks to ncm_unwrap_ntb()")
Cc: Ilja Van Sprundel <ivansprundel@ioactive.com>
Cc: Brooke Basile <brookebasile@gmail.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Link: https://lore.kernel.org/r/20200920170158.1217068-1-bryan.odonoghue@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_ncm.c | 30 ++---------------------------
 1 file changed, 2 insertions(+), 28 deletions(-)

diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index b4206b0dede5..1f638759a953 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1189,7 +1189,6 @@ static int ncm_unwrap_ntb(struct gether *port,
 	const struct ndp_parser_opts *opts = ncm->parser_opts;
 	unsigned	crc_len = ncm->is_crc ? sizeof(uint32_t) : 0;
 	int		dgram_counter;
-	bool		ndp_after_header;
 
 	/* dwSignature */
 	if (get_unaligned_le32(tmp) != opts->nth_sign) {
@@ -1216,7 +1215,6 @@ static int ncm_unwrap_ntb(struct gether *port,
 	}
 
 	ndp_index = get_ncm(&tmp, opts->ndp_index);
-	ndp_after_header = false;
 
 	/* Run through all the NDP's in the NTB */
 	do {
@@ -1232,8 +1230,6 @@ static int ncm_unwrap_ntb(struct gether *port,
 			     ndp_index);
 			goto err;
 		}
-		if (ndp_index == opts->nth_size)
-			ndp_after_header = true;
 
 		/*
 		 * walk through NDP
@@ -1312,37 +1308,13 @@ static int ncm_unwrap_ntb(struct gether *port,
 			index2 = get_ncm(&tmp, opts->dgram_item_len);
 			dg_len2 = get_ncm(&tmp, opts->dgram_item_len);
 
-			if (index2 == 0 || dg_len2 == 0)
-				break;
-
 			/* wDatagramIndex[1] */
-			if (ndp_after_header) {
-				if (index2 < opts->nth_size + opts->ndp_size) {
-					INFO(port->func.config->cdev,
-					     "Bad index: %#X\n", index2);
-					goto err;
-				}
-			} else {
-				if (index2 < opts->nth_size + opts->dpe_size) {
-					INFO(port->func.config->cdev,
-					     "Bad index: %#X\n", index2);
-					goto err;
-				}
-			}
 			if (index2 > block_len - opts->dpe_size) {
 				INFO(port->func.config->cdev,
 				     "Bad index: %#X\n", index2);
 				goto err;
 			}
 
-			/* wDatagramLength[1] */
-			if ((dg_len2 < 14 + crc_len) ||
-					(dg_len2 > frame_max)) {
-				INFO(port->func.config->cdev,
-				     "Bad dgram length: %#X\n", dg_len);
-				goto err;
-			}
-
 			/*
 			 * Copy the data into a new skb.
 			 * This ensures the truesize is correct
@@ -1359,6 +1331,8 @@ static int ncm_unwrap_ntb(struct gether *port,
 			ndp_len -= 2 * (opts->dgram_item_len * 2);
 
 			dgram_counter++;
+			if (index2 == 0 || dg_len2 == 0)
+				break;
 		} while (ndp_len > 2 * (opts->dgram_item_len * 2));
 	} while (ndp_index);
 

From 4a5caa4af0df5cd8f1c7f0c2f871d382662b022e Mon Sep 17 00:00:00 2001
From: Milan Broz <gmazyland@gmail.com>
Date: Thu, 20 Aug 2020 19:45:38 +0200
Subject: [PATCH 091/322] dm crypt: document new no_workqueue flags

Commit 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd
workqueues") introduced new dm-crypt 'no_read_workqueue' and
'no_write_workqueue' flags.

Add documentation to admin guide for them.

Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues")
Signed-off-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 Documentation/admin-guide/device-mapper/dm-crypt.rst | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
index 8f4a3f889d43..40dc2df58cd5 100644
--- a/Documentation/admin-guide/device-mapper/dm-crypt.rst
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
@@ -121,6 +121,14 @@ submit_from_crypt_cpus
     thread because it benefits CFQ to have writes submitted using the
     same context.
 
+no_read_workqueue
+    Bypass dm-crypt internal workqueue and process read requests synchronously.
+
+no_write_workqueue
+    Bypass dm-crypt internal workqueue and process write requests synchronously.
+    This option is automatically enabled for host-managed zoned block devices
+    (e.g. host-managed SMR hard-disks).
+
 integrity:<bytes>:<type>
     The device requires additional <bytes> metadata per-sector stored
     in per-bio integrity structure. This metadata must by provided

From 4c07ae0ad493b7b2d3dd3e53870e594f136ce8a5 Mon Sep 17 00:00:00 2001
From: Milan Broz <gmazyland@gmail.com>
Date: Thu, 20 Aug 2020 21:20:26 +0200
Subject: [PATCH 092/322] dm crypt: document encrypted keyring key option

Commit 27f5411a718c4 ("dm crypt: support using encrypted keys")
introduced support for encrypted keyring type.

Fix documentation in admin guide to mention this type.

Fixes: 27f5411a718c4 ("dm crypt: support using encrypted keys")
Signed-off-by: Milan Broz <gmazyland@gmail.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 Documentation/admin-guide/device-mapper/dm-crypt.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
index 40dc2df58cd5..bc28a9527ee5 100644
--- a/Documentation/admin-guide/device-mapper/dm-crypt.rst
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
@@ -67,7 +67,7 @@ Parameters::
     the value passed in <key_size>.
 
 <key_type>
-    Either 'logon' or 'user' kernel key type.
+    Either 'logon', 'user' or 'encrypted' kernel key type.
 
 <key_description>
     The kernel keyring key description crypt target should look for

From bd805274577457f301bf826f86543f550cf5c1ef Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 22 Sep 2020 11:15:50 +0200
Subject: [PATCH 093/322] cpuidle: Drop misleading comments about RCU usage

The commit 1098582a0f6c ("sched,idle,rcu: Push rcu_idle deeper into the
idle path"), moved the calls rcu_idle_enter|exit() into the cpuidle core.

However, it forgot to remove a couple of comments in enter_s2idle_proper()
about why RCU_NONIDLE earlier was needed. So, let's drop them as they have
become a bit misleading.

Fixes: 1098582a0f6c ("sched,idle,rcu: Push rcu_idle deeper into the idle path")
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 6c7e5621cf9a..29e84687f3c3 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -142,11 +142,6 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
 
 	time_start = ns_to_ktime(local_clock());
 
-	/*
-	 * trace_suspend_resume() called by tick_freeze() for the last CPU
-	 * executing it contains RCU usage regarded as invalid in the idle
-	 * context, so tell RCU about that.
-	 */
 	tick_freeze();
 	/*
 	 * The state used here cannot be a "coupled" one, because the "coupled"
@@ -159,11 +154,6 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv,
 	target_state->enter_s2idle(dev, drv, index);
 	if (WARN_ON_ONCE(!irqs_disabled()))
 		local_irq_disable();
-	/*
-	 * timekeeping_resume() that will be called by tick_unfreeze() for the
-	 * first CPU executing it calls functions containing RCU read-side
-	 * critical sections, so tell RCU about that.
-	 */
 	if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE))
 		rcu_idle_exit();
 	tick_unfreeze();

From f3bb0f796f5ffe32f0fbdce5b1b12eb85511158f Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Tue, 22 Sep 2020 14:40:46 +0200
Subject: [PATCH 094/322] clk: samsung: exynos4: mark 'chipid' clock as
 CLK_IGNORE_UNUSED

The ChipID IO region has it's own clock, which is being disabled while
scanning for unused clocks. It turned out that some CPU hotplug, CPU idle
or even SOC firmware code depends on the reads from that area. Fix the
mysterious hang caused by entering deep CPU idle state by ignoring the
'chipid' clock during unused clocks scan, as there are no direct clients
for it which will keep it enabled.

Fixes: e062b571777f ("clk: exynos4: register clocks using common clock framework")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20200922124046.10496-1-m.szyprowski@samsung.com
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Acked-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/samsung/clk-exynos4.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c
index 51564fc23c63..f4086287bb71 100644
--- a/drivers/clk/samsung/clk-exynos4.c
+++ b/drivers/clk/samsung/clk-exynos4.c
@@ -927,7 +927,7 @@ static const struct samsung_gate_clock exynos4210_gate_clks[] __initconst = {
 	GATE(CLK_PCIE, "pcie", "aclk133", GATE_IP_FSYS, 14, 0, 0),
 	GATE(CLK_SMMU_PCIE, "smmu_pcie", "aclk133", GATE_IP_FSYS, 18, 0, 0),
 	GATE(CLK_MODEMIF, "modemif", "aclk100", GATE_IP_PERIL, 28, 0, 0),
-	GATE(CLK_CHIPID, "chipid", "aclk100", E4210_GATE_IP_PERIR, 0, 0, 0),
+	GATE(CLK_CHIPID, "chipid", "aclk100", E4210_GATE_IP_PERIR, 0, CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_SYSREG, "sysreg", "aclk100", E4210_GATE_IP_PERIR, 0,
 			CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_HDMI_CEC, "hdmi_cec", "aclk100", E4210_GATE_IP_PERIR, 11, 0,
@@ -969,7 +969,7 @@ static const struct samsung_gate_clock exynos4x12_gate_clks[] __initconst = {
 		0),
 	GATE(CLK_TSADC, "tsadc", "aclk133", E4X12_GATE_BUS_FSYS1, 16, 0, 0),
 	GATE(CLK_MIPI_HSI, "mipi_hsi", "aclk133", GATE_IP_FSYS, 10, 0, 0),
-	GATE(CLK_CHIPID, "chipid", "aclk100", E4X12_GATE_IP_PERIR, 0, 0, 0),
+	GATE(CLK_CHIPID, "chipid", "aclk100", E4X12_GATE_IP_PERIR, 0, CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_SYSREG, "sysreg", "aclk100", E4X12_GATE_IP_PERIR, 1,
 			CLK_IGNORE_UNUSED, 0),
 	GATE(CLK_HDMI_CEC, "hdmi_cec", "aclk100", E4X12_GATE_IP_PERIR, 11, 0,

From b02cf0c4736c65c6667f396efaae6b5521e82abf Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Mon, 31 Aug 2020 15:26:57 -0500
Subject: [PATCH 095/322] clk: socfpga: stratix10: fix the divider for the
 emac_ptp_free_clk

The fixed divider the emac_ptp_free_clk should be 2, not 4.

Fixes: 07afb8db7340 ("clk: socfpga: stratix10: add clock driver for
Stratix10 platform")
Cc: stable@vger.kernel.org
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
Link: https://lore.kernel.org/r/20200831202657.8224-1-dinguyen@kernel.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/socfpga/clk-s10.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c
index c1dfc9b34e4e..661a8e9bfb9b 100644
--- a/drivers/clk/socfpga/clk-s10.c
+++ b/drivers/clk/socfpga/clk-s10.c
@@ -209,7 +209,7 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = {
 	{ STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux),
 	  0, 0, 2, 0xB0, 1},
 	{ STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux,
-	  ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2},
+	  ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 2, 0xB0, 2},
 	{ STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux,
 	  ARRAY_SIZE(gpio_db_free_mux), 0, 0, 0, 0xB0, 3},
 	{ STRATIX10_SDMMC_FREE_CLK, "sdmmc_free_clk", NULL, sdmmc_free_mux,

From a7b3474cbb2864d5500d5e4f48dd57c903975cab Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 22 Sep 2020 09:58:52 +0200
Subject: [PATCH 096/322] x86/irq: Make run_on_irqstack_cond() typesafe

Sami reported that run_on_irqstack_cond() requires the caller to cast
functions to mismatching types, which trips indirect call Control-Flow
Integrity (CFI) in Clang.

Instead of disabling CFI on that function, provide proper helpers for
the three call variants. The actual ASM code stays the same as that is
out of reach.

 [ bp: Fix __run_on_irqstack() prototype to match. ]

Fixes: 931b94145981 ("x86/entry: Provide helpers for executing on the irqstack")
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Reported-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Tested-by: Sami Tolvanen <samitolvanen@google.com>
Cc: <stable@vger.kernel.org>
Link: https://github.com/ClangBuiltLinux/linux/issues/1052
Link: https://lkml.kernel.org/r/87pn6eb5tv.fsf@nanos.tec.linutronix.de
---
 arch/x86/entry/common.c          |  2 +-
 arch/x86/entry/entry_64.S        |  2 +
 arch/x86/include/asm/idtentry.h  |  2 +-
 arch/x86/include/asm/irq_stack.h | 71 ++++++++++++++++++++++++++++----
 arch/x86/kernel/irq.c            |  2 +-
 arch/x86/kernel/irq_64.c         |  2 +-
 6 files changed, 68 insertions(+), 13 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 2f84c7ca74ea..870efeec8bda 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -299,7 +299,7 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
 	old_regs = set_irq_regs(regs);
 
 	instrumentation_begin();
-	run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, NULL, regs);
+	run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
 	instrumentation_begin();
 
 	set_irq_regs(old_regs);
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 70dea9337816..d977079a7d02 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -682,6 +682,8 @@ SYM_CODE_END(.Lbad_gs)
  * rdx: Function argument (can be NULL if none)
  */
 SYM_FUNC_START(asm_call_on_stack)
+SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL)
+SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL)
 	/*
 	 * Save the frame pointer unconditionally. This allows the ORC
 	 * unwinder to handle the stack switch.
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index a43366191212..a0638640f1ed 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -242,7 +242,7 @@ __visible noinstr void func(struct pt_regs *regs)			\
 	instrumentation_begin();					\
 	irq_enter_rcu();						\
 	kvm_set_cpu_l1tf_flush_l1d();					\
-	run_on_irqstack_cond(__##func, regs, regs);			\
+	run_sysvec_on_irqstack_cond(__##func, regs);			\
 	irq_exit_rcu();							\
 	instrumentation_end();						\
 	irqentry_exit(regs, state);					\
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 4ae66f097101..775816965c6a 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -12,20 +12,50 @@ static __always_inline bool irqstack_active(void)
 	return __this_cpu_read(irq_count) != -1;
 }
 
-void asm_call_on_stack(void *sp, void *func, void *arg);
+void asm_call_on_stack(void *sp, void (*func)(void), void *arg);
+void asm_call_sysvec_on_stack(void *sp, void (*func)(struct pt_regs *regs),
+			      struct pt_regs *regs);
+void asm_call_irq_on_stack(void *sp, void (*func)(struct irq_desc *desc),
+			   struct irq_desc *desc);
 
-static __always_inline void __run_on_irqstack(void *func, void *arg)
+static __always_inline void __run_on_irqstack(void (*func)(void))
 {
 	void *tos = __this_cpu_read(hardirq_stack_ptr);
 
 	__this_cpu_add(irq_count, 1);
-	asm_call_on_stack(tos - 8, func, arg);
+	asm_call_on_stack(tos - 8, func, NULL);
+	__this_cpu_sub(irq_count, 1);
+}
+
+static __always_inline void
+__run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs),
+			 struct pt_regs *regs)
+{
+	void *tos = __this_cpu_read(hardirq_stack_ptr);
+
+	__this_cpu_add(irq_count, 1);
+	asm_call_sysvec_on_stack(tos - 8, func, regs);
+	__this_cpu_sub(irq_count, 1);
+}
+
+static __always_inline void
+__run_irq_on_irqstack(void (*func)(struct irq_desc *desc),
+		      struct irq_desc *desc)
+{
+	void *tos = __this_cpu_read(hardirq_stack_ptr);
+
+	__this_cpu_add(irq_count, 1);
+	asm_call_irq_on_stack(tos - 8, func, desc);
 	__this_cpu_sub(irq_count, 1);
 }
 
 #else /* CONFIG_X86_64 */
 static inline bool irqstack_active(void) { return false; }
-static inline void __run_on_irqstack(void *func, void *arg) { }
+static inline void __run_on_irqstack(void (*func)(void)) { }
+static inline void __run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs),
+					    struct pt_regs *regs) { }
+static inline void __run_irq_on_irqstack(void (*func)(struct irq_desc *desc),
+					 struct irq_desc *desc) { }
 #endif /* !CONFIG_X86_64 */
 
 static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs)
@@ -37,17 +67,40 @@ static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs)
 	return !user_mode(regs) && !irqstack_active();
 }
 
-static __always_inline void run_on_irqstack_cond(void *func, void *arg,
+
+static __always_inline void run_on_irqstack_cond(void (*func)(void),
 						 struct pt_regs *regs)
 {
-	void (*__func)(void *arg) = func;
-
 	lockdep_assert_irqs_disabled();
 
 	if (irq_needs_irq_stack(regs))
-		__run_on_irqstack(__func, arg);
+		__run_on_irqstack(func);
 	else
-		__func(arg);
+		func();
+}
+
+static __always_inline void
+run_sysvec_on_irqstack_cond(void (*func)(struct pt_regs *regs),
+			    struct pt_regs *regs)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (irq_needs_irq_stack(regs))
+		__run_sysvec_on_irqstack(func, regs);
+	else
+		func(regs);
+}
+
+static __always_inline void
+run_irq_on_irqstack_cond(void (*func)(struct irq_desc *desc), struct irq_desc *desc,
+			 struct pt_regs *regs)
+{
+	lockdep_assert_irqs_disabled();
+
+	if (irq_needs_irq_stack(regs))
+		__run_irq_on_irqstack(func, desc);
+	else
+		func(desc);
 }
 
 #endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 181060247e3c..c5dd50369e2f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -227,7 +227,7 @@ static __always_inline void handle_irq(struct irq_desc *desc,
 				       struct pt_regs *regs)
 {
 	if (IS_ENABLED(CONFIG_X86_64))
-		run_on_irqstack_cond(desc->handle_irq, desc, regs);
+		run_irq_on_irqstack_cond(desc->handle_irq, desc, regs);
 	else
 		__handle_irq(desc, regs);
 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1b4fe93a86c5..440eed558558 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -74,5 +74,5 @@ int irq_init_percpu_irqstack(unsigned int cpu)
 
 void do_softirq_own_stack(void)
 {
-	run_on_irqstack_cond(__do_softirq, NULL, NULL);
+	run_on_irqstack_cond(__do_softirq, NULL);
 }

From 149415586243bd0ea729760fb6dd7b3c50601871 Mon Sep 17 00:00:00 2001
From: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Date: Wed, 16 Sep 2020 23:54:31 +0000
Subject: [PATCH 097/322] scsi: target: Fix lun lookup for
 TARGET_SCF_LOOKUP_LUN_FROM_TAG case

transport_lookup_tmr_lun() uses "orig_fe_lun" member of struct se_cmd for
the lookup. Hence, update this field directly for the
TARGET_SCF_LOOKUP_LUN_FROM_TAG case.

Link: https://lore.kernel.org/r/1600300471-26135-1-git-send-email-sudhakar.panneerselvam@oracle.com
Fixes: a36840d80027 ("target: Initialize LUN in transport_init_se_cmd()")
Reported-by: Martin Wilck <mwilck@suse.com>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Sudhakar Panneerselvam <sudhakar.panneerselvam@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_transport.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 9fb0be0aa620..8dd289214dd8 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1840,7 +1840,8 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
 	 * out unpacked_lun for the original se_cmd.
 	 */
 	if (tm_type == TMR_ABORT_TASK && (flags & TARGET_SCF_LOOKUP_LUN_FROM_TAG)) {
-		if (!target_lookup_lun_from_tag(se_sess, tag, &unpacked_lun))
+		if (!target_lookup_lun_from_tag(se_sess, tag,
+						&se_cmd->orig_fe_lun))
 			goto failure;
 	}
 

From 0c309ed17c50e7eeaf3c66102b475198064e80da Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 8 Sep 2020 19:57:07 +0900
Subject: [PATCH 098/322] PM / devfreq: Add timer type to devfreq_summary
 debugfs

The commit 4dc3bab8687f ("PM / devfreq: Add support delayed timer for
polling mode") supports the delayed timer but this commit missed
the adding the timer type to devfreq_summary debugfs node.
Add the timer type to devfreq_summary debugfs.

Fixes: 4dc3bab8687f ("PM / devfreq: Add support delayed timer for polling mode")
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/devfreq.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 561d91b2d3bf..071b59fe84d2 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -1766,20 +1766,23 @@ static int devfreq_summary_show(struct seq_file *s, void *data)
 	struct devfreq *p_devfreq = NULL;
 	unsigned long cur_freq, min_freq, max_freq;
 	unsigned int polling_ms;
+	unsigned int timer;
 
-	seq_printf(s, "%-30s %-30s %-15s %10s %12s %12s %12s\n",
+	seq_printf(s, "%-30s %-30s %-15s %-10s %10s %12s %12s %12s\n",
 			"dev",
 			"parent_dev",
 			"governor",
+			"timer",
 			"polling_ms",
 			"cur_freq_Hz",
 			"min_freq_Hz",
 			"max_freq_Hz");
-	seq_printf(s, "%30s %30s %15s %10s %12s %12s %12s\n",
+	seq_printf(s, "%30s %30s %15s %10s %10s %12s %12s %12s\n",
 			"------------------------------",
 			"------------------------------",
 			"---------------",
 			"----------",
+			"----------",
 			"------------",
 			"------------",
 			"------------");
@@ -1803,13 +1806,15 @@ static int devfreq_summary_show(struct seq_file *s, void *data)
 		cur_freq = devfreq->previous_freq;
 		get_freq_range(devfreq, &min_freq, &max_freq);
 		polling_ms = devfreq->profile->polling_ms;
+		timer = devfreq->profile->timer;
 		mutex_unlock(&devfreq->lock);
 
 		seq_printf(s,
-			"%-30s %-30s %-15s %10d %12ld %12ld %12ld\n",
+			"%-30s %-30s %-15s %-10s %10d %12ld %12ld %12ld\n",
 			dev_name(&devfreq->dev),
 			p_devfreq ? dev_name(&p_devfreq->dev) : "null",
 			devfreq->governor_name,
+			polling_ms ? timer_name[timer] : "null",
 			polling_ms,
 			cur_freq,
 			min_freq,

From 6bf560766a8ef5afe4faa3244220cf5b3a934549 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 8 Sep 2020 10:25:57 +0300
Subject: [PATCH 099/322] PM / devfreq: tegra30: Disable clock on error in
 probe

This error path needs to call clk_disable_unprepare().

Fixes: 7296443b900e ("PM / devfreq: tegra30: Handle possible round-rate error")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Dmitry Osipenko <digetx@gmail.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/devfreq/tegra30-devfreq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c
index e94a27804c20..dedd39de7367 100644
--- a/drivers/devfreq/tegra30-devfreq.c
+++ b/drivers/devfreq/tegra30-devfreq.c
@@ -836,7 +836,8 @@ static int tegra_devfreq_probe(struct platform_device *pdev)
 	rate = clk_round_rate(tegra->emc_clock, ULONG_MAX);
 	if (rate < 0) {
 		dev_err(&pdev->dev, "Failed to round clock rate: %ld\n", rate);
-		return rate;
+		err = rate;
+		goto disable_clk;
 	}
 
 	tegra->max_freq = rate / KHZ;
@@ -897,6 +898,7 @@ remove_opps:
 	dev_pm_opp_remove_all_dynamic(&pdev->dev);
 
 	reset_control_reset(tegra->reset);
+disable_clk:
 	clk_disable_unprepare(tegra->clock);
 
 	return err;

From 16cce04cdb200ba905d1241b425ac48da5a9ace5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Wed, 16 Sep 2020 11:50:22 +0100
Subject: [PATCH 100/322] drm/i915/selftests: Push the fake iommu device from
 the stack to data

Since we store a pointer to the fake iommu device that is allocated on
the stack, as soon as we leave the function it goes out of scope and any
future dereference is undefined behaviour. Just in case we may need to
look at the fake iommu device after initialiation, move the allocation
from the stack into the data.

Fixes: 01b9d4e21148 ("iommu/vt-d: Use dev_iommu_priv_get/set()")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200916105022.28316-2-chris@chris-wilson.co.uk
(cherry picked from commit 9f9f4101fc98db56714e71676d5a1e2d27e01f7e)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/selftests/mock_gem_device.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index f127e633f7ca..397c313a8b69 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -118,11 +118,11 @@ static struct dev_pm_domain pm_domain = {
 
 struct drm_i915_private *mock_gem_device(void)
 {
+#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
+	static struct dev_iommu fake_iommu = { .priv = (void *)-1 };
+#endif
 	struct drm_i915_private *i915;
 	struct pci_dev *pdev;
-#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
-	struct dev_iommu iommu;
-#endif
 	int err;
 
 	pdev = kzalloc(sizeof(*pdev), GFP_KERNEL);
@@ -141,10 +141,8 @@ struct drm_i915_private *mock_gem_device(void)
 	dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
 
 #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU)
-	/* HACK HACK HACK to disable iommu for the fake device; force identity mapping */
-	memset(&iommu, 0, sizeof(iommu));
-	iommu.priv = (void *)-1;
-	pdev->dev.iommu = &iommu;
+	/* HACK to disable iommu for the fake device; force identity mapping */
+	pdev->dev.iommu = &fake_iommu;
 #endif
 
 	pci_set_drvdata(pdev, i915);

From 955921289dcdbc49b46a731ec07978fb7d4e1fc7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 23 Sep 2020 13:50:12 +0200
Subject: [PATCH 101/322] ACPI: processor: Fix build for
 ARCH_APICTIMER_STOPS_ON_C3 unset

Fix the lapic_timer_needs_broadcast() stub for
ARCH_APICTIMER_STOPS_ON_C3 unset to actually return
a value.

Fixes: aa6b43d57f99 ("ACPI: processor: Use CPUIDLE_FLAG_TIMER_STOP")
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_idle.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 7ecb90e90afd..f66236cff69b 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -176,6 +176,7 @@ static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
 static bool lapic_timer_needs_broadcast(struct acpi_processor *pr,
 					struct acpi_processor_cx *cx)
 {
+	return false;
 }
 
 #endif

From b13812ddea615b6507beef24f76540c0c1143c5c Mon Sep 17 00:00:00 2001
From: Jiaxun Yang <jiaxun.yang@flygoat.com>
Date: Wed, 23 Sep 2020 18:33:12 +0800
Subject: [PATCH 102/322] MIPS: Loongson2ef: Disable Loongson MMI instructions

It was missed when I was forking Loongson2ef from Loongson64 but
should be applied to Loongson2ef as march=loongson2f
will also enable Loongson MMI in GCC-9+.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Fixes: 71e2f4dd5a65 ("MIPS: Fork loongson2ef from loongson64")
Reported-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: stable@vger.kernel.org # v5.8+
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/loongson2ef/Platform | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform
index 4ab55f1123a0..ae023b9a1c51 100644
--- a/arch/mips/loongson2ef/Platform
+++ b/arch/mips/loongson2ef/Platform
@@ -44,6 +44,10 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS
   endif
 endif
 
+# Some -march= flags enable MMI instructions, and GCC complains about that
+# support being enabled alongside -msoft-float. Thus explicitly disable MMI.
+cflags-y += $(call cc-option,-mno-loongson-mmi)
+
 #
 # Loongson Machines' Support
 #

From e393fbe6fa27af23f78df6e16a8fd2963578a8c4 Mon Sep 17 00:00:00 2001
From: Wei Li <liwei391@huawei.com>
Date: Wed, 23 Sep 2020 14:53:12 +0800
Subject: [PATCH 103/322] MIPS: Add the missing 'CPU_1074K' into
 __get_cpu_type()

Commit 442e14a2c55e ("MIPS: Add 1074K CPU support explicitly.") split
1074K from the 74K as an unique CPU type, while it missed to add the
'CPU_1074K' in __get_cpu_type(). So let's add it back.

Fixes: 442e14a2c55e ("MIPS: Add 1074K CPU support explicitly.")
Signed-off-by: Wei Li <liwei391@huawei.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/include/asm/cpu-type.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index 75a7a382da09..3288cef4b168 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -47,6 +47,7 @@ static inline int __pure __get_cpu_type(const int cpu_type)
 	case CPU_34K:
 	case CPU_1004K:
 	case CPU_74K:
+	case CPU_1074K:
 	case CPU_M14KC:
 	case CPU_M14KEC:
 	case CPU_INTERAPTIV:

From be090fa62080d8501a5651a73cb954721966b125 Mon Sep 17 00:00:00 2001
From: Wei Li <liwei391@huawei.com>
Date: Wed, 23 Sep 2020 14:53:26 +0800
Subject: [PATCH 104/322] MIPS: BCM47XX: Remove the needless check with the
 1074K
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As there is no known soc powered by mips 1074K in bcm47xx series,
the check with 1074K is needless. So just remove it.

Link: https://wireless.wiki.kernel.org/en/users/Drivers/b43/soc
Fixes: 442e14a2c55e ("MIPS: Add 1074K CPU support explicitly.")
Signed-off-by: Wei Li <liwei391@huawei.com>
Acked-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 arch/mips/bcm47xx/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 82627c264964..01427bde2397 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -148,7 +148,7 @@ void __init plat_mem_setup(void)
 {
 	struct cpuinfo_mips *c = &current_cpu_data;
 
-	if ((c->cputype == CPU_74K) || (c->cputype == CPU_1074K)) {
+	if (c->cputype == CPU_74K) {
 		pr_info("Using bcma bus\n");
 #ifdef CONFIG_BCM47XX_BCMA
 		bcm47xx_bus_type = BCM47XX_BUS_TYPE_BCMA;

From b96e6506c2ea7c8e3ef0860169b6d0ba2f1aca9f Mon Sep 17 00:00:00 2001
From: Mohammed Gamal <mgamal@redhat.com>
Date: Thu, 3 Sep 2020 16:11:22 +0200
Subject: [PATCH 105/322] KVM: x86: VMX: Make smaller physical guest address
 space support user-configurable

This patch exposes allow_smaller_maxphyaddr to the user as a module parameter.
Since smaller physical address spaces are only supported on VMX, the
parameter is only exposed in the kvm_intel module.

For now disable support by default, and let the user decide if they want
to enable it.

Modifications to VMX page fault and EPT violation handling will depend
on whether that parameter is enabled.

Signed-off-by: Mohammed Gamal <mgamal@redhat.com>
Message-Id: <20200903141122.72908-1-mgamal@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 15 ++++++++++-----
 arch/x86/kvm/vmx/vmx.h |  5 ++++-
 arch/x86/kvm/x86.c     |  2 +-
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 75cd720c9e8c..f0384e93548a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -129,6 +129,9 @@ static bool __read_mostly enable_preemption_timer = 1;
 module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
 #endif
 
+extern bool __read_mostly allow_smaller_maxphyaddr;
+module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
+
 #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE
 #define KVM_VM_CR0_ALWAYS_ON				\
@@ -4803,6 +4806,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
 			 * EPT will cause page fault only if we need to
 			 * detect illegal GPAs.
 			 */
+			WARN_ON_ONCE(!allow_smaller_maxphyaddr);
 			kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code);
 			return 1;
 		} else
@@ -5331,7 +5335,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	 * would also use advanced VM-exit information for EPT violations to
 	 * reconstruct the page fault error code.
 	 */
-	if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa)))
+	if (unlikely(allow_smaller_maxphyaddr && kvm_mmu_is_illegal_gpa(vcpu, gpa)))
 		return kvm_emulate_instruction(vcpu, 0);
 
 	return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0);
@@ -8305,11 +8309,12 @@ static int __init vmx_init(void)
 	vmx_check_vmcs12_offsets();
 
 	/*
-	 * Intel processors don't have problems with
-	 * GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable
-	 * it for VMX by default
+	 * Shadow paging doesn't have a (further) performance penalty
+	 * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it
+	 * by default
 	 */
-	allow_smaller_maxphyaddr = true;
+	if (!enable_ept)
+		allow_smaller_maxphyaddr = true;
 
 	return 0;
 }
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index a2f82127c170..a0e47720f60c 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -552,7 +552,10 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx)
 
 static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu)
 {
-	return !enable_ept || cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
+	if (!enable_ept)
+		return true;
+
+	return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits;
 }
 
 void dump_vmcs(void);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e3de0fe5af37..67362607e396 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -188,7 +188,7 @@ static struct kvm_shared_msrs __percpu *shared_msrs;
 u64 __read_mostly host_efer;
 EXPORT_SYMBOL_GPL(host_efer);
 
-bool __read_mostly allow_smaller_maxphyaddr;
+bool __read_mostly allow_smaller_maxphyaddr = 0;
 EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
 
 static u64 __read_mostly host_xss;

From 18391e5e9cf59226354a864e5befa3c764eb8903 Mon Sep 17 00:00:00 2001
From: Yang Weijiang <weijiang.yang@intel.com>
Date: Wed, 26 Aug 2020 09:55:24 +0800
Subject: [PATCH 106/322] selftests: kvm: Fix assert failure in single-step
 test

This is a follow-up patch to fix an issue left in commit:
98b0bf02738004829d7e26d6cb47b2e469aaba86
selftests: kvm: Use a shorter encoding to clear RAX

With the change in the commit, we also need to modify "xor" instruction
length from 3 to 2 in array ss_size accordingly to pass below check:

for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
        target_rip += ss_size[i];
        CLEAR_DEBUG();
        debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
        debug.arch.debugreg[7] = 0x00000400;
        APPLY_DEBUG();
        vcpu_run(vm, VCPU_ID);
        TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
                    run->debug.arch.exception == DB_VECTOR &&
                    run->debug.arch.pc == target_rip &&
                    run->debug.arch.dr6 == target_dr6,
                    "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
                    "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
                    i, run->exit_reason, run->debug.arch.exception,
                    run->debug.arch.pc, target_rip, run->debug.arch.dr6,
                    target_dr6);
}

Reported-by: kernel test robot <rong.a.chen@intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Message-Id: <20200826015524.13251-1-weijiang.yang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/x86_64/debug_regs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index b8d14f9db5f9..2fc6b3af81a1 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -73,7 +73,7 @@ int main(void)
 	int i;
 	/* Instruction lengths starting at ss_start */
 	int ss_size[4] = {
-		3,		/* xor */
+		2,		/* xor */
 		2,		/* cpuid */
 		5,		/* mov */
 		2,		/* rdmsr */

From fbb5a79d2fe7b01c6424fbbc04368373b1672d61 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Wed, 23 Sep 2020 08:51:42 +0800
Subject: [PATCH 107/322] regulator: axp20x: fix LDO2/4 description

Currently we wrongly set the mask of value of LDO2/4 both to the mask of
LDO2, and the LDO4 voltage configuration is left untouched. This leads
to conflict when LDO2/4 are both in use.

Fix this issue by setting different vsel_mask to both regulators.

Fixes: db4a555f7c4c ("regulator: axp20x: use defines for masks")
Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Link: https://lore.kernel.org/r/20200923005142.147135-1-icenowy@aosc.io
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/axp20x-regulator.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index fbc95cadaf53..126649c172e1 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -42,8 +42,9 @@
 
 #define AXP20X_DCDC2_V_OUT_MASK		GENMASK(5, 0)
 #define AXP20X_DCDC3_V_OUT_MASK		GENMASK(7, 0)
-#define AXP20X_LDO24_V_OUT_MASK		GENMASK(7, 4)
+#define AXP20X_LDO2_V_OUT_MASK		GENMASK(7, 4)
 #define AXP20X_LDO3_V_OUT_MASK		GENMASK(6, 0)
+#define AXP20X_LDO4_V_OUT_MASK		GENMASK(3, 0)
 #define AXP20X_LDO5_V_OUT_MASK		GENMASK(7, 4)
 
 #define AXP20X_PWR_OUT_EXTEN_MASK	BIT_MASK(0)
@@ -542,14 +543,14 @@ static const struct regulator_desc axp20x_regulators[] = {
 		 AXP20X_PWR_OUT_CTRL, AXP20X_PWR_OUT_DCDC3_MASK),
 	AXP_DESC_FIXED(AXP20X, LDO1, "ldo1", "acin", 1300),
 	AXP_DESC(AXP20X, LDO2, "ldo2", "ldo24in", 1800, 3300, 100,
-		 AXP20X_LDO24_V_OUT, AXP20X_LDO24_V_OUT_MASK,
+		 AXP20X_LDO24_V_OUT, AXP20X_LDO2_V_OUT_MASK,
 		 AXP20X_PWR_OUT_CTRL, AXP20X_PWR_OUT_LDO2_MASK),
 	AXP_DESC(AXP20X, LDO3, "ldo3", "ldo3in", 700, 3500, 25,
 		 AXP20X_LDO3_V_OUT, AXP20X_LDO3_V_OUT_MASK,
 		 AXP20X_PWR_OUT_CTRL, AXP20X_PWR_OUT_LDO3_MASK),
 	AXP_DESC_RANGES(AXP20X, LDO4, "ldo4", "ldo24in",
 			axp20x_ldo4_ranges, AXP20X_LDO4_V_OUT_NUM_VOLTAGES,
-			AXP20X_LDO24_V_OUT, AXP20X_LDO24_V_OUT_MASK,
+			AXP20X_LDO24_V_OUT, AXP20X_LDO4_V_OUT_MASK,
 			AXP20X_PWR_OUT_CTRL, AXP20X_PWR_OUT_LDO4_MASK),
 	AXP_DESC_IO(AXP20X, LDO5, "ldo5", "ldo5in", 1800, 3300, 100,
 		    AXP20X_LDO5_V_OUT, AXP20X_LDO5_V_OUT_MASK,

From 530b5affc675ade5db4a03f04ed7cd66806c8a1a Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 23 Sep 2020 15:10:26 +0200
Subject: [PATCH 108/322] spi: fsl-dspi: fix use-after-free in remove path

spi_unregister_controller() not only unregisters the controller, but
also frees the controller. This will free the driver data with it, so
we must not access it later dspi_remove().

Solve this by allocating the driver data separately from the SPI
controller.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20200923131026.20707-1-s.hauer@pengutronix.de
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-fsl-dspi.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 283f2468a2f4..127323a4b27c 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -1273,11 +1273,14 @@ static int dspi_probe(struct platform_device *pdev)
 	void __iomem *base;
 	bool big_endian;
 
-	ctlr = spi_alloc_master(&pdev->dev, sizeof(struct fsl_dspi));
+	dspi = devm_kzalloc(&pdev->dev, sizeof(*dspi), GFP_KERNEL);
+	if (!dspi)
+		return -ENOMEM;
+
+	ctlr = spi_alloc_master(&pdev->dev, 0);
 	if (!ctlr)
 		return -ENOMEM;
 
-	dspi = spi_controller_get_devdata(ctlr);
 	dspi->pdev = pdev;
 	dspi->ctlr = ctlr;
 
@@ -1414,7 +1417,7 @@ poll_mode:
 	if (dspi->devtype_data->trans_mode != DSPI_DMA_MODE)
 		ctlr->ptp_sts_supported = true;
 
-	platform_set_drvdata(pdev, ctlr);
+	platform_set_drvdata(pdev, dspi);
 
 	ret = spi_register_controller(ctlr);
 	if (ret != 0) {
@@ -1437,8 +1440,7 @@ out_ctlr_put:
 
 static int dspi_remove(struct platform_device *pdev)
 {
-	struct spi_controller *ctlr = platform_get_drvdata(pdev);
-	struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr);
+	struct fsl_dspi *dspi = platform_get_drvdata(pdev);
 
 	/* Disconnect from the SPI framework */
 	spi_unregister_controller(dspi->ctlr);

From df3a57d1f6072d07978bafa7dbd9904cdf8f3e13 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 23 Sep 2020 09:56:59 -0700
Subject: [PATCH 109/322] mm: split out the non-present case from
 copy_one_pte()

This is a purely mechanical split of the copy_one_pte() function.  It's
not immediately obvious when looking at the diff because of the
indentation change, but the way to see what is going on in this commit
is to use the "-w" flag to not show pure whitespace changes, and you see
how the first part of copy_one_pte() is simply lifted out into a
separate function.

And since the non-present case is marked unlikely, don't make the new
function be inlined.  Not that gcc really seems to care, since it looks
like it will inline it anyway due to the whole "single callsite for
static function" logic.  In fact, code generation with the function
split is almost identical to before.  But not marking it inline is the
right thing to do.

This is pure prep-work and cleanup for subsequent changes.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 152 ++++++++++++++++++++++++++++------------------------
 1 file changed, 82 insertions(+), 70 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 469af373ae76..31a3ab7d9aa3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -695,6 +695,84 @@ out:
  * covered by this vma.
  */
 
+static unsigned long
+copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+		unsigned long addr, int *rss)
+{
+	unsigned long vm_flags = vma->vm_flags;
+	pte_t pte = *src_pte;
+	struct page *page;
+	swp_entry_t entry = pte_to_swp_entry(pte);
+
+	if (likely(!non_swap_entry(entry))) {
+		if (swap_duplicate(entry) < 0)
+			return entry.val;
+
+		/* make sure dst_mm is on swapoff's mmlist. */
+		if (unlikely(list_empty(&dst_mm->mmlist))) {
+			spin_lock(&mmlist_lock);
+			if (list_empty(&dst_mm->mmlist))
+				list_add(&dst_mm->mmlist,
+						&src_mm->mmlist);
+			spin_unlock(&mmlist_lock);
+		}
+		rss[MM_SWAPENTS]++;
+	} else if (is_migration_entry(entry)) {
+		page = migration_entry_to_page(entry);
+
+		rss[mm_counter(page)]++;
+
+		if (is_write_migration_entry(entry) &&
+				is_cow_mapping(vm_flags)) {
+			/*
+			 * COW mappings require pages in both
+			 * parent and child to be set to read.
+			 */
+			make_migration_entry_read(&entry);
+			pte = swp_entry_to_pte(entry);
+			if (pte_swp_soft_dirty(*src_pte))
+				pte = pte_swp_mksoft_dirty(pte);
+			if (pte_swp_uffd_wp(*src_pte))
+				pte = pte_swp_mkuffd_wp(pte);
+			set_pte_at(src_mm, addr, src_pte, pte);
+		}
+	} else if (is_device_private_entry(entry)) {
+		page = device_private_entry_to_page(entry);
+
+		/*
+		 * Update rss count even for unaddressable pages, as
+		 * they should treated just like normal pages in this
+		 * respect.
+		 *
+		 * We will likely want to have some new rss counters
+		 * for unaddressable pages, at some point. But for now
+		 * keep things as they are.
+		 */
+		get_page(page);
+		rss[mm_counter(page)]++;
+		page_dup_rmap(page, false);
+
+		/*
+		 * We do not preserve soft-dirty information, because so
+		 * far, checkpoint/restore is the only feature that
+		 * requires that. And checkpoint/restore does not work
+		 * when a device driver is involved (you cannot easily
+		 * save and restore device driver state).
+		 */
+		if (is_write_device_private_entry(entry) &&
+		    is_cow_mapping(vm_flags)) {
+			make_device_private_entry_read(&entry);
+			pte = swp_entry_to_pte(entry);
+			if (pte_swp_uffd_wp(*src_pte))
+				pte = pte_swp_mkuffd_wp(pte);
+			set_pte_at(src_mm, addr, src_pte, pte);
+		}
+	}
+	set_pte_at(dst_mm, addr, dst_pte, pte);
+	return 0;
+}
+
 static inline unsigned long
 copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
@@ -705,75 +783,10 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	struct page *page;
 
 	/* pte contains position in swap or file, so copy. */
-	if (unlikely(!pte_present(pte))) {
-		swp_entry_t entry = pte_to_swp_entry(pte);
-
-		if (likely(!non_swap_entry(entry))) {
-			if (swap_duplicate(entry) < 0)
-				return entry.val;
-
-			/* make sure dst_mm is on swapoff's mmlist. */
-			if (unlikely(list_empty(&dst_mm->mmlist))) {
-				spin_lock(&mmlist_lock);
-				if (list_empty(&dst_mm->mmlist))
-					list_add(&dst_mm->mmlist,
-							&src_mm->mmlist);
-				spin_unlock(&mmlist_lock);
-			}
-			rss[MM_SWAPENTS]++;
-		} else if (is_migration_entry(entry)) {
-			page = migration_entry_to_page(entry);
-
-			rss[mm_counter(page)]++;
-
-			if (is_write_migration_entry(entry) &&
-					is_cow_mapping(vm_flags)) {
-				/*
-				 * COW mappings require pages in both
-				 * parent and child to be set to read.
-				 */
-				make_migration_entry_read(&entry);
-				pte = swp_entry_to_pte(entry);
-				if (pte_swp_soft_dirty(*src_pte))
-					pte = pte_swp_mksoft_dirty(pte);
-				if (pte_swp_uffd_wp(*src_pte))
-					pte = pte_swp_mkuffd_wp(pte);
-				set_pte_at(src_mm, addr, src_pte, pte);
-			}
-		} else if (is_device_private_entry(entry)) {
-			page = device_private_entry_to_page(entry);
-
-			/*
-			 * Update rss count even for unaddressable pages, as
-			 * they should treated just like normal pages in this
-			 * respect.
-			 *
-			 * We will likely want to have some new rss counters
-			 * for unaddressable pages, at some point. But for now
-			 * keep things as they are.
-			 */
-			get_page(page);
-			rss[mm_counter(page)]++;
-			page_dup_rmap(page, false);
-
-			/*
-			 * We do not preserve soft-dirty information, because so
-			 * far, checkpoint/restore is the only feature that
-			 * requires that. And checkpoint/restore does not work
-			 * when a device driver is involved (you cannot easily
-			 * save and restore device driver state).
-			 */
-			if (is_write_device_private_entry(entry) &&
-			    is_cow_mapping(vm_flags)) {
-				make_device_private_entry_read(&entry);
-				pte = swp_entry_to_pte(entry);
-				if (pte_swp_uffd_wp(*src_pte))
-					pte = pte_swp_mkuffd_wp(pte);
-				set_pte_at(src_mm, addr, src_pte, pte);
-			}
-		}
-		goto out_set_pte;
-	}
+	if (unlikely(!pte_present(pte)))
+		return copy_nonpresent_pte(dst_mm, src_mm,
+					   dst_pte, src_pte, vma,
+					   addr, rss);
 
 	/*
 	 * If it's a COW mapping, write protect it both
@@ -807,7 +820,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		rss[mm_counter(page)]++;
 	}
 
-out_set_pte:
 	set_pte_at(dst_mm, addr, dst_pte, pte);
 	return 0;
 }

From 79a1971c5f14ea3a6e2b0c4caf73a1760db7cab8 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 23 Sep 2020 10:04:16 -0700
Subject: [PATCH 110/322] mm: move the copy_one_pte() pte_present check into
 the caller

This completes the split of the non-present and present pte cases by
moving the check for the source pte being present into the single
caller, which also means that we clearly separate out the very different
return value case for a non-present pte.

The present pte case currently always succeeds.

This is a pure code re-organization with no semantic change: the intent
is to make it much easier to add a new return case to the present pte
case for when we do early COW at page table copy time.

This was split out from the previous commit simply to make it easy to
visually see that there were no semantic changes from this code
re-organization.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 31a3ab7d9aa3..e315b1f1ef08 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -773,8 +773,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	return 0;
 }
 
-static inline unsigned long
-copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+static inline void
+copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
 		unsigned long addr, int *rss)
 {
@@ -782,12 +782,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pte_t pte = *src_pte;
 	struct page *page;
 
-	/* pte contains position in swap or file, so copy. */
-	if (unlikely(!pte_present(pte)))
-		return copy_nonpresent_pte(dst_mm, src_mm,
-					   dst_pte, src_pte, vma,
-					   addr, rss);
-
 	/*
 	 * If it's a COW mapping, write protect it both
 	 * in the parent and the child
@@ -821,7 +815,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	}
 
 	set_pte_at(dst_mm, addr, dst_pte, pte);
-	return 0;
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -863,10 +856,17 @@ again:
 			progress++;
 			continue;
 		}
-		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
+		if (unlikely(!pte_present(*src_pte))) {
+			entry.val = copy_nonpresent_pte(dst_mm, src_mm,
+							dst_pte, src_pte,
 							vma, addr, rss);
-		if (entry.val)
-			break;
+			if (entry.val)
+				break;
+			progress += 8;
+			continue;
+		}
+		copy_present_pte(dst_mm, src_mm, dst_pte, src_pte,
+				 vma, addr, rss);
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 

From 46d2613eae51d527ecaf0e8248a9bfcc0b92aa7e Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Tue, 22 Sep 2020 12:49:38 -0700
Subject: [PATCH 111/322] nvme-core: don't use NVME_NSID_ALL for command
 effects and supported log

In the function nvme_get_effects_log() it uses NVME_NSID_ALL which has
namespace scope. The command effect log page is controller specific.

Replace NVME_NSID_ALL with 0x00 which specifies the controller scope
instead of namespace scope.

Fixes: 84fef62d135b ("nvme: check admin passthru command effects")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=209287
Reported-by: Huai-Cheng Kuo <hh81478072@gmail.com>
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 4cea14c18a6d..53c93836c7c6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3041,7 +3041,7 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi,
 	if (!cel)
 		return -ENOMEM;
 
-	ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0, csi,
+	ret = nvme_get_log(ctrl, 0x00, NVME_LOG_CMD_EFFECTS, 0, csi,
 			&cel->log, sizeof(cel->log), 0);
 	if (ret) {
 		kfree(cel);

From 86a82ae0b5095ea24c55898a3f025791e7958b21 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 23 Sep 2020 17:46:20 +0200
Subject: [PATCH 112/322] x86/ioapic: Unbreak check_timer()

Several people reported in the kernel bugzilla that between v4.12 and v4.13
the magic which works around broken hardware and BIOSes to find the proper
timer interrupt delivery mode stopped working for some older affected
platforms which need to fall back to ExtINT delivery mode.

The reason is that the core code changed to keep track of the masked and
disabled state of an interrupt line more accurately to avoid the expensive
hardware operations.

That broke an assumption in i8259_make_irq() which invokes

     disable_irq_nosync();
     irq_set_chip_and_handler();
     enable_irq();

Up to v4.12 this worked because enable_irq() unconditionally unmasked the
interrupt line, but after the state tracking improvements this is not
longer the case because the IO/APIC uses lazy disabling. So the line state
is unmasked which means that enable_irq() does not call into the new irq
chip to unmask it.

In principle this is a shortcoming of the core code, but it's more than
unclear whether the core code should try to reset state. At least this
cannot be done unconditionally as that would break other existing use cases
where the chip type is changed, e.g. when changing the trigger type, but
the callers expect the state to be preserved.

As the way how check_timer() is switching the delivery modes is truly
unique, the obvious fix is to simply unmask the i8259 manually after
changing the mode to ExtINT delivery and switching the irq chip to the
legacy PIC.

Note, that the fixes tag is not really precise, but identifies the commit
which broke the assumptions in the IO/APIC and i8259 code and that's the
kernel version to which this needs to be backported.

Fixes: bf22ff45bed6 ("genirq: Avoid unnecessary low level irq function calls")
Reported-by: p_c_chan@hotmail.com
Reported-by: ecm4@mail.com
Reported-by: perdigao1@yahoo.com
Reported-by: matzes@users.sourceforge.net
Reported-by: rvelascog@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: p_c_chan@hotmail.com
Tested-by: matzes@users.sourceforge.net
Cc: stable@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=197769
---
 arch/x86/kernel/apic/io_apic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 779a89e31c4c..21f9c7f11779 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2243,6 +2243,7 @@ static inline void __init check_timer(void)
 	legacy_pic->init(0);
 	legacy_pic->make_irq(0);
 	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+	legacy_pic->unmask(0);
 
 	unlock_ExtINT_logic();
 

From ef59b1953c26130ef23e6986b2ceea1efaa472e9 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Mon, 21 Sep 2020 16:57:58 +0200
Subject: [PATCH 113/322] mptcp: Wake up MPTCP worker when DATA_FIN found on a
 TCP FIN packet

When receiving a DATA_FIN MPTCP option on a TCP FIN packet, the DATA_FIN
information would be stored but the MPTCP worker did not get
scheduled. In turn, the MPTCP socket state would remain in
TCP_ESTABLISHED and no blocked operations would be awakened.

TCP FIN packets are seen by the MPTCP socket when moving skbs out of the
subflow receive queues, so schedule the MPTCP worker when a skb with
DATA_FIN but no data payload is moved from a subflow queue. Other cases
(DATA_FIN on a bare TCP ACK or on a packet with data payload) are
already handled.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/84
Fixes: 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine")
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9ead43f79023..8cbeb68f3775 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -731,7 +731,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 
 	if (mpext->data_fin == 1) {
 		if (data_len == 1) {
-			mptcp_update_rcv_data_fin(msk, mpext->data_seq);
+			bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq);
 			pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
 			if (subflow->map_valid) {
 				/* A DATA_FIN might arrive in a DSS
@@ -742,6 +742,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 				skb_ext_del(skb, SKB_EXT_MPTCP);
 				return MAPPING_OK;
 			} else {
+				if (updated && schedule_work(&msk->work))
+					sock_hold((struct sock *)msk);
+
 				return MAPPING_DATA_FIN;
 			}
 		} else {

From 77972b55fb9d35d4a6b0abca99abffaa4ec6a85b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 22 Sep 2020 09:29:31 +0200
Subject: [PATCH 114/322] Revert "ravb: Fixed to be able to unload modules"

This reverts commit 1838d6c62f57836639bd3d83e7855e0ee4f6defc.

This commit moved the ravb_mdio_init() call (and thus the
of_mdiobus_register() call) from the ravb_probe() to the ravb_open()
call.  This causes a regression during system resume (s2idle/s2ram), as
new PHY devices cannot be bound while suspended.

During boot, the Micrel PHY is detected like this:

    Micrel KSZ9031 Gigabit PHY e6800000.ethernet-ffffffff:00: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=228)
    ravb e6800000.ethernet eth0: Link is Up - 1Gbps/Full - flow control off

During system suspend, (A) defer_all_probes is set to true, and (B)
usermodehelper_disabled is set to UMH_DISABLED, to avoid drivers being
probed while suspended.

  A. If CONFIG_MODULES=n, phy_device_register() calling device_add()
     merely adds the device, but does not probe it yet, as
     really_probe() returns early due to defer_all_probes being set:

       dpm_resume+0x128/0x4f8
	 device_resume+0xcc/0x1b0
	   dpm_run_callback+0x74/0x340
	     ravb_resume+0x190/0x1b8
	       ravb_open+0x84/0x770
		 of_mdiobus_register+0x1e0/0x468
		   of_mdiobus_register_phy+0x1b8/0x250
		     of_mdiobus_phy_device_register+0x178/0x1e8
		       phy_device_register+0x114/0x1b8
			 device_add+0x3d4/0x798
			   bus_probe_device+0x98/0xa0
			     device_initial_probe+0x10/0x18
			       __device_attach+0xe4/0x140
				 bus_for_each_drv+0x64/0xc8
				   __device_attach_driver+0xb8/0xe0
				     driver_probe_device.part.11+0xc4/0xd8
				       really_probe+0x32c/0x3b8

     Later, phy_attach_direct() notices no PHY driver has been bound,
     and falls back to the Generic PHY, leading to degraded operation:

       Generic PHY e6800000.ethernet-ffffffff:00: attached PHY driver [Generic PHY] (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=POLL)
       ravb e6800000.ethernet eth0: Link is Up - 1Gbps/Full - flow control off

  B. If CONFIG_MODULES=y, request_module() returns early with -EBUSY due
     to UMH_DISABLED, and MDIO initialization fails completely:

       mdio_bus e6800000.ethernet-ffffffff:00: error -16 loading PHY driver module for ID 0x00221622
       ravb e6800000.ethernet eth0: failed to initialize MDIO
       PM: dpm_run_callback(): ravb_resume+0x0/0x1b8 returns -16
       PM: Device e6800000.ethernet failed to resume: error -16

     Ignoring -EBUSY in phy_request_driver_module(), like was done for
     -ENOENT in commit 21e194425abd65b5 ("net: phy: fix issue with loading
     PHY driver w/o initramfs"), would makes it fall back to the Generic
     PHY, like in the CONFIG_MODULES=n case.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: stable@vger.kernel.org
Reviewed-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 110 +++++++++++------------
 1 file changed, 55 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index df89d09b253e..99f7aae102ce 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1342,51 +1342,6 @@ static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler,
 	return error;
 }
 
-/* MDIO bus init function */
-static int ravb_mdio_init(struct ravb_private *priv)
-{
-	struct platform_device *pdev = priv->pdev;
-	struct device *dev = &pdev->dev;
-	int error;
-
-	/* Bitbang init */
-	priv->mdiobb.ops = &bb_ops;
-
-	/* MII controller setting */
-	priv->mii_bus = alloc_mdio_bitbang(&priv->mdiobb);
-	if (!priv->mii_bus)
-		return -ENOMEM;
-
-	/* Hook up MII support for ethtool */
-	priv->mii_bus->name = "ravb_mii";
-	priv->mii_bus->parent = dev;
-	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
-		 pdev->name, pdev->id);
-
-	/* Register MDIO bus */
-	error = of_mdiobus_register(priv->mii_bus, dev->of_node);
-	if (error)
-		goto out_free_bus;
-
-	return 0;
-
-out_free_bus:
-	free_mdio_bitbang(priv->mii_bus);
-	return error;
-}
-
-/* MDIO bus release function */
-static int ravb_mdio_release(struct ravb_private *priv)
-{
-	/* Unregister mdio bus */
-	mdiobus_unregister(priv->mii_bus);
-
-	/* Free bitbang info */
-	free_mdio_bitbang(priv->mii_bus);
-
-	return 0;
-}
-
 /* Network device open function for Ethernet AVB */
 static int ravb_open(struct net_device *ndev)
 {
@@ -1395,13 +1350,6 @@ static int ravb_open(struct net_device *ndev)
 	struct device *dev = &pdev->dev;
 	int error;
 
-	/* MDIO bus init */
-	error = ravb_mdio_init(priv);
-	if (error) {
-		netdev_err(ndev, "failed to initialize MDIO\n");
-		return error;
-	}
-
 	napi_enable(&priv->napi[RAVB_BE]);
 	napi_enable(&priv->napi[RAVB_NC]);
 
@@ -1479,7 +1427,6 @@ out_free_irq:
 out_napi_off:
 	napi_disable(&priv->napi[RAVB_NC]);
 	napi_disable(&priv->napi[RAVB_BE]);
-	ravb_mdio_release(priv);
 	return error;
 }
 
@@ -1789,8 +1736,6 @@ static int ravb_close(struct net_device *ndev)
 	ravb_ring_free(ndev, RAVB_BE);
 	ravb_ring_free(ndev, RAVB_NC);
 
-	ravb_mdio_release(priv);
-
 	return 0;
 }
 
@@ -1942,6 +1887,51 @@ static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_set_features	= ravb_set_features,
 };
 
+/* MDIO bus init function */
+static int ravb_mdio_init(struct ravb_private *priv)
+{
+	struct platform_device *pdev = priv->pdev;
+	struct device *dev = &pdev->dev;
+	int error;
+
+	/* Bitbang init */
+	priv->mdiobb.ops = &bb_ops;
+
+	/* MII controller setting */
+	priv->mii_bus = alloc_mdio_bitbang(&priv->mdiobb);
+	if (!priv->mii_bus)
+		return -ENOMEM;
+
+	/* Hook up MII support for ethtool */
+	priv->mii_bus->name = "ravb_mii";
+	priv->mii_bus->parent = dev;
+	snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x",
+		 pdev->name, pdev->id);
+
+	/* Register MDIO bus */
+	error = of_mdiobus_register(priv->mii_bus, dev->of_node);
+	if (error)
+		goto out_free_bus;
+
+	return 0;
+
+out_free_bus:
+	free_mdio_bitbang(priv->mii_bus);
+	return error;
+}
+
+/* MDIO bus release function */
+static int ravb_mdio_release(struct ravb_private *priv)
+{
+	/* Unregister mdio bus */
+	mdiobus_unregister(priv->mii_bus);
+
+	/* Free bitbang info */
+	free_mdio_bitbang(priv->mii_bus);
+
+	return 0;
+}
+
 static const struct of_device_id ravb_match_table[] = {
 	{ .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 },
@@ -2184,6 +2174,13 @@ static int ravb_probe(struct platform_device *pdev)
 		eth_hw_addr_random(ndev);
 	}
 
+	/* MDIO bus init */
+	error = ravb_mdio_init(priv);
+	if (error) {
+		dev_err(&pdev->dev, "failed to initialize MDIO\n");
+		goto out_dma_free;
+	}
+
 	netif_napi_add(ndev, &priv->napi[RAVB_BE], ravb_poll, 64);
 	netif_napi_add(ndev, &priv->napi[RAVB_NC], ravb_poll, 64);
 
@@ -2205,6 +2202,8 @@ static int ravb_probe(struct platform_device *pdev)
 out_napi_del:
 	netif_napi_del(&priv->napi[RAVB_NC]);
 	netif_napi_del(&priv->napi[RAVB_BE]);
+	ravb_mdio_release(priv);
+out_dma_free:
 	dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat,
 			  priv->desc_bat_dma);
 
@@ -2236,6 +2235,7 @@ static int ravb_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 	netif_napi_del(&priv->napi[RAVB_NC]);
 	netif_napi_del(&priv->napi[RAVB_BE]);
+	ravb_mdio_release(priv);
 	pm_runtime_disable(&pdev->dev);
 	free_netdev(ndev);
 	platform_set_drvdata(pdev, NULL);

From ea6754aef2449e2cadfeb28741a199d35da53e28 Mon Sep 17 00:00:00 2001
From: Tian Tao <tiantao6@hisilicon.com>
Date: Tue, 22 Sep 2020 21:32:19 +0800
Subject: [PATCH 115/322] net: switchdev: Fixed kerneldoc warning

Update kernel-doc line comments to fix warnings reported by make W=1.
net/switchdev/switchdev.c:413: warning: Function parameter or
member 'extack' not described in 'call_switchdev_notifiers'

Signed-off-by: Tian Tao <tiantao6@hisilicon.com>
Acked-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/switchdev/switchdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 865f3e037425..23d868545362 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -404,7 +404,7 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier);
  *	@val: value passed unmodified to notifier function
  *	@dev: port device
  *	@info: notifier information data
- *
+ *	@extack: netlink extended ack
  *	Call all network notifier blocks.
  */
 int call_switchdev_notifiers(unsigned long val, struct net_device *dev,

From f9317ae5523f99999fb54c513ebabbb2bc887ddf Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Tue, 22 Sep 2020 23:41:12 +0200
Subject: [PATCH 116/322] net: lantiq: Add locking for TX DMA channel

The TX DMA channel data is accessed by the xrx200_start_xmit() and the
xrx200_tx_housekeeping() function from different threads. Make sure the
accesses are synchronized by acquiring the netif_tx_lock() in the
xrx200_tx_housekeeping() function too. This lock is acquired by the
kernel before calling xrx200_start_xmit().

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 635ff3a5dcfb..51ed8a54d380 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -245,6 +245,7 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget)
 	int pkts = 0;
 	int bytes = 0;
 
+	netif_tx_lock(net_dev);
 	while (pkts < budget) {
 		struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->tx_free];
 
@@ -268,6 +269,7 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget)
 	net_dev->stats.tx_bytes += bytes;
 	netdev_completed_queue(ch->priv->net_dev, pkts, bytes);
 
+	netif_tx_unlock(net_dev);
 	if (netif_queue_stopped(net_dev))
 		netif_wake_queue(net_dev);
 

From 7241c5a697479c7d0c5a96595822cdab750d41ae Mon Sep 17 00:00:00 2001
From: Voon Weifeng <weifeng.voon@intel.com>
Date: Wed, 23 Sep 2020 16:56:14 +0800
Subject: [PATCH 117/322] net: stmmac: removed enabling eee in EEE set callback

EEE should be only be enabled during stmmac_mac_link_up() when the
link are up and being set up properly. set_eee should only do settings
configuration and disabling the eee.

Without this fix, turning on EEE using ethtool will return
"Operation not supported". This is due to the driver is in a dead loop
waiting for eee to be advertised in the for eee to be activated but the
driver will only configure the EEE advertisement after the eee is
activated.

Ethtool should only return "Operation not supported" if there is no EEE
capbility in the MAC controller.

Fixes: 8a7493e58ad6 ("net: stmmac: Fix a race in EEE enable callback")
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Acked-by: Mark Gross <mgross@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c  | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index ac5e8cc5fb9f..430a4b32ec1e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -675,23 +675,16 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 	struct stmmac_priv *priv = netdev_priv(dev);
 	int ret;
 
-	if (!edata->eee_enabled) {
+	if (!priv->dma_cap.eee)
+		return -EOPNOTSUPP;
+
+	if (!edata->eee_enabled)
 		stmmac_disable_eee_mode(priv);
-	} else {
-		/* We are asking for enabling the EEE but it is safe
-		 * to verify all by invoking the eee_init function.
-		 * In case of failure it will return an error.
-		 */
-		edata->eee_enabled = stmmac_eee_init(priv);
-		if (!edata->eee_enabled)
-			return -EOPNOTSUPP;
-	}
 
 	ret = phylink_ethtool_set_eee(priv->phylink, edata);
 	if (ret)
 		return ret;
 
-	priv->eee_enabled = edata->eee_enabled;
 	priv->tx_lpi_timer = edata->tx_lpi_timer;
 	return 0;
 }

From f7e80983f0cf470bb82036e73bff4d5a7daf8fc2 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 21 Sep 2020 12:48:36 +0200
Subject: [PATCH 118/322] s390/zcrypt: Fix ZCRYPT_PERDEV_REQCNT ioctl

reqcnt is an u32 pointer but we do copy sizeof(reqcnt) which is the
size of the pointer. This means we only copy 8 byte. Let us copy
the full monty.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Harald Freudenberger <freude@linux.ibm.com>
Cc: stable@vger.kernel.org
Fixes: af4a72276d49 ("s390/zcrypt: Support up to 256 crypto adapters.")
Reviewed-by: Harald Freudenberger <freude@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 drivers/s390/crypto/zcrypt_api.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 4dbbfd88262c..f314936b5462 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -1449,7 +1449,8 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd,
 		if (!reqcnt)
 			return -ENOMEM;
 		zcrypt_perdev_reqcnt(reqcnt, AP_DEVICES);
-		if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt)))
+		if (copy_to_user((int __user *) arg, reqcnt,
+				 sizeof(u32) * AP_DEVICES))
 			rc = -EFAULT;
 		kfree(reqcnt);
 		return rc;

From 1a26044954a6d1f4d375d5e62392446af663be7a Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Fri, 18 Sep 2020 09:13:35 +0800
Subject: [PATCH 119/322] iommu/exynos: add missing put_device() call in
 exynos_iommu_of_xlate()

if of_find_device_by_node() succeed, exynos_iommu_of_xlate() doesn't have
a corresponding put_device(). Thus add put_device() to fix the exception
handling for this function implementation.

Fixes: aa759fd376fb ("iommu/exynos: Add callback for initializing devices from device tree")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Link: https://lore.kernel.org/r/20200918011335.909141-1-yukuai3@huawei.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index bad3c0ce10cb..de324b4eedfe 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1295,13 +1295,17 @@ static int exynos_iommu_of_xlate(struct device *dev,
 		return -ENODEV;
 
 	data = platform_get_drvdata(sysmmu);
-	if (!data)
+	if (!data) {
+		put_device(&sysmmu->dev);
 		return -ENODEV;
+	}
 
 	if (!owner) {
 		owner = kzalloc(sizeof(*owner), GFP_KERNEL);
-		if (!owner)
+		if (!owner) {
+			put_device(&sysmmu->dev);
 			return -ENOMEM;
+		}
 
 		INIT_LIST_HEAD(&owner->controllers);
 		mutex_init(&owner->rpm_lock);

From 4fd9ac6bd3044734a7028bd993944c3617d1eede Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Sun, 23 Aug 2020 19:12:11 +0800
Subject: [PATCH 120/322] Platform: OLPC: Fix memleak in olpc_ec_probe

When devm_regulator_register() fails, ec should be
freed just like when olpc_ec_cmd() fails.

Fixes: 231c0c216172a ("Platform: OLPC: Add a regulator for the DCON")
Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/olpc/olpc-ec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c
index 190e4a6186ef..f64b82824db2 100644
--- a/drivers/platform/olpc/olpc-ec.c
+++ b/drivers/platform/olpc/olpc-ec.c
@@ -439,7 +439,9 @@ static int olpc_ec_probe(struct platform_device *pdev)
 								&config);
 	if (IS_ERR(ec->dcon_rdev)) {
 		dev_err(&pdev->dev, "failed to register DCON regulator\n");
-		return PTR_ERR(ec->dcon_rdev);
+		err = PTR_ERR(ec->dcon_rdev);
+		kfree(ec);
+		return err;
 	}
 
 	ec->dbgfs_dir = olpc_ec_setup_debugfs();

From d41ec792edf89858a77ad485f1a4c10e3d26a10f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 23 Aug 2020 15:55:16 -0700
Subject: [PATCH 121/322] Documentation: laptops: thinkpad-acpi: fix underline
 length build warning

Fix underline length build warning in thinkpad-acpi.rst documentation:

Documentation/admin-guide/laptops/thinkpad-acpi.rst:1437: WARNING: Title underline too short.
DYTC Lapmode sensor
------------------

Fixes: acf7f4a59114 ("platform/x86: thinkpad_acpi: lap or desk mode interface")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Nitin Joshi <njoshi1@lenovo.com>
Cc: Sugumaran <slacshiminar@lenovo.com>
Cc: Bastien Nocera <bnocera@redhat.com>
Cc: Mark Pearson <markpearson@lenovo.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Henrique de Moraes Holschuh <ibm-acpi@hmh.eng.br>
Cc: ibm-acpi-devel@lists.sourceforge.net
Cc: platform-driver-x86@vger.kernel.org
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 Documentation/admin-guide/laptops/thinkpad-acpi.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
index 5e477869df18..5fe1ade88c17 100644
--- a/Documentation/admin-guide/laptops/thinkpad-acpi.rst
+++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst
@@ -1434,7 +1434,7 @@ on the feature, restricting the viewing angles.
 
 
 DYTC Lapmode sensor
-------------------
+-------------------
 
 sysfs: dytc_lapmode
 

From 71c548c26de20334cf0d52217de0b02471442e6a Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Tue, 30 Jun 2020 08:29:25 +0300
Subject: [PATCH 122/322] vhost: Fix documentation

Fix documentation to match actual function prototypes

"end" used instead of "last". Fix that.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Link: https://lore.kernel.org/r/20200630052925.GA157062@mtl-vdi-166.wap.labs.mlnx
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/iotlb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c
index 34aec4ba331e..0fd3f87e913c 100644
--- a/drivers/vhost/iotlb.c
+++ b/drivers/vhost/iotlb.c
@@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_free);
  * vhost_iotlb_itree_first - return the first overlapped range
  * @iotlb: the IOTLB
  * @start: start of IOVA range
- * @end: end of IOVA range
+ * @last: last byte in IOVA range
  */
 struct vhost_iotlb_map *
 vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last)
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_itree_first);
  * vhost_iotlb_itree_next - return the next overlapped range
  * @map: the starting map node
  * @start: start of IOVA range
- * @end: end of IOVA range
+ * @last: last byte IOVA range
  */
 struct vhost_iotlb_map *
 vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last)

From a127c5bbb6a8eee851cbdec254424c480b8edd75 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Mon, 7 Sep 2020 18:43:43 +0800
Subject: [PATCH 123/322] vhost-vdpa: fix backend feature ioctls

Commit 653055b9acd4 ("vhost-vdpa: support get/set backend features")
introduces two malfunction backend features ioctls:

1) the ioctls was blindly added to vring ioctl instead of vdpa device
   ioctl
2) vhost_set_backend_features() was called when dev mutex has already
   been held which will lead a deadlock

This patch fixes the above issues.

Cc: Eli Cohen <elic@nvidia.com>
Reported-by: Zhu Lingshan <lingshan.zhu@intel.com>
Fixes: 653055b9acd4 ("vhost-vdpa: support get/set backend features")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20200907104343.31141-1-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vdpa.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 3fab94f88894..796fe979f997 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -353,8 +353,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 	struct vdpa_callback cb;
 	struct vhost_virtqueue *vq;
 	struct vhost_vring_state s;
-	u64 __user *featurep = argp;
-	u64 features;
 	u32 idx;
 	long r;
 
@@ -381,18 +379,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 
 		vq->last_avail_idx = vq_state.avail_index;
 		break;
-	case VHOST_GET_BACKEND_FEATURES:
-		features = VHOST_VDPA_BACKEND_FEATURES;
-		if (copy_to_user(featurep, &features, sizeof(features)))
-			return -EFAULT;
-		return 0;
-	case VHOST_SET_BACKEND_FEATURES:
-		if (copy_from_user(&features, featurep, sizeof(features)))
-			return -EFAULT;
-		if (features & ~VHOST_VDPA_BACKEND_FEATURES)
-			return -EOPNOTSUPP;
-		vhost_set_backend_features(&v->vdev, features);
-		return 0;
 	}
 
 	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
@@ -440,8 +426,20 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 	struct vhost_vdpa *v = filep->private_data;
 	struct vhost_dev *d = &v->vdev;
 	void __user *argp = (void __user *)arg;
+	u64 __user *featurep = argp;
+	u64 features;
 	long r;
 
+	if (cmd == VHOST_SET_BACKEND_FEATURES) {
+		r = copy_from_user(&features, featurep, sizeof(features));
+		if (r)
+			return r;
+		if (features & ~VHOST_VDPA_BACKEND_FEATURES)
+			return -EOPNOTSUPP;
+		vhost_set_backend_features(&v->vdev, features);
+		return 0;
+	}
+
 	mutex_lock(&d->mutex);
 
 	switch (cmd) {
@@ -476,6 +474,10 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 	case VHOST_VDPA_SET_CONFIG_CALL:
 		r = vhost_vdpa_set_config_call(v, argp);
 		break;
+	case VHOST_GET_BACKEND_FEATURES:
+		features = VHOST_VDPA_BACKEND_FEATURES;
+		r = copy_to_user(featurep, &features, sizeof(features));
+		break;
 	default:
 		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 		if (r == -ENOIOCTLCMD)

From efe813d0b0e90a19102a36fd1d448a65dbf5d474 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 1 Sep 2020 10:07:52 +0200
Subject: [PATCH 124/322] platform/x86: touchscreen_dmi: Add info for the MPMAN
 Converter9 2-in-1

Add touchscreen info for the MPMAN Converter9 2-in-1. This device uses the
same case as the ITworks TW891, but it uses a different digitizer, so it
needs its own firmware.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/touchscreen_dmi.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index 5c223015ee71..dda60f89c951 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -373,6 +373,23 @@ static const struct ts_dmi_data jumper_ezpad_mini3_data = {
 	.properties	= jumper_ezpad_mini3_props,
 };
 
+static const struct property_entry mpman_converter9_props[] = {
+	PROPERTY_ENTRY_U32("touchscreen-min-x", 8),
+	PROPERTY_ENTRY_U32("touchscreen-min-y", 8),
+	PROPERTY_ENTRY_U32("touchscreen-size-x", 1664),
+	PROPERTY_ENTRY_U32("touchscreen-size-y", 880),
+	PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+	PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+	PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-mpman-converter9.fw"),
+	PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+	{ }
+};
+
+static const struct ts_dmi_data mpman_converter9_data = {
+	.acpi_name	= "MSSL1680:00",
+	.properties	= mpman_converter9_props,
+};
+
 static const struct property_entry mpman_mpwin895cl_props[] = {
 	PROPERTY_ENTRY_U32("touchscreen-min-x", 3),
 	PROPERTY_ENTRY_U32("touchscreen-min-y", 9),
@@ -976,6 +993,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "FlexBook edge11 - M-FBE11"),
 		},
 	},
+	{
+		/* MP Man Converter 9 */
+		.driver_data = (void *)&mpman_converter9_data,
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MPMAN"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Converter9"),
+		},
+	},
 	{
 		/* MP Man MPWIN895CL */
 		.driver_data = (void *)&mpman_mpwin895cl_data,

From 8a333dab282467562b55096bab941b761ac26a73 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 10 Sep 2020 17:06:50 +0200
Subject: [PATCH 125/322] platform/x86: asus-nb-wmi: Revert "Do not load on
 Asus T100TA and T200TA"

The WMI INIT method on for some reason turns on the camera LED on these
2-in-1s, without the WMI interface allowing further control over the LED.

To fix this commit b5f7311d3a2e ("platform/x86: asus-nb-wmi: Do not load
on Asus T100TA and T200TA") added a blacklist with these 2 models on it
since the WMI driver did not add any extra functionality to these models.

Recently I've been working on making more 2-in-1 models report their
tablet-mode (SW_TABLET_MODE) to userspace; and I've found that these 2
Asus models report this through WMI. This commit reverts the adding
of the blacklist, so that the Asus WMI driver can be used on these
models to report their tablet-mode. I have another patch fixing the LED
issue in a different manner.

Note this is the second time the we revert the adding of the
asus_nb_wmi_blacklist. It was reverted before in commit:

aab9e7896ec9 ("platform/x86: asus-nb-wmi: Revert "Do not load on Asus
T100TA and T200TA")"

But some how (accidentally re-applying of the patch?) it got re-added
again in commit 3bd12da7f50b ("platform/x86: asus-nb-wmi: Do not load
on Asus T100TA and T200TA"), so now we need to revert it again.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-nb-wmi.c | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index b2e3d1e3b3e9..680c3640e013 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c
@@ -593,33 +593,9 @@ static struct asus_wmi_driver asus_nb_wmi_driver = {
 	.detect_quirks = asus_nb_wmi_quirks,
 };
 
-static const struct dmi_system_id asus_nb_wmi_blacklist[] __initconst = {
-	{
-		/*
-		 * asus-nb-wm adds no functionality. The T100TA has a detachable
-		 * USB kbd, so no hotkeys and it has no WMI rfkill; and loading
-		 * asus-nb-wm causes the camera LED to turn and _stay_ on.
-		 */
-		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
-		},
-	},
-	{
-		/* The Asus T200TA has the same issue as the T100TA */
-		.matches = {
-			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
-			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T200TA"),
-		},
-	},
-	{} /* Terminating entry */
-};
 
 static int __init asus_nb_wmi_init(void)
 {
-	if (dmi_check_system(asus_nb_wmi_blacklist))
-		return -ENODEV;
-
 	return asus_wmi_register_driver(&asus_nb_wmi_driver);
 }
 

From 1d2dd379bd99ee4356ae4552fd1b8e43c7ca02cd Mon Sep 17 00:00:00 2001
From: Marius Iacob <themariusus@gmail.com>
Date: Mon, 31 Aug 2020 15:58:01 +0300
Subject: [PATCH 126/322] platform/x86: asus-wmi: Add BATC battery name to the
 list of supported

The Intel Atom Cherry Trail platform reports a new battery
name (BATC). Tested on ASUS Transformer Mini T103HAF.

Signed-off-by: Marius Iacob <themariusus@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-wmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 8f4acdc06b13..fa39ff030bd7 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -442,6 +442,7 @@ static int asus_wmi_battery_add(struct power_supply *battery)
 	 */
 	if (strcmp(battery->desc->name, "BAT0") != 0 &&
 	    strcmp(battery->desc->name, "BAT1") != 0 &&
+	    strcmp(battery->desc->name, "BATC") != 0 &&
 	    strcmp(battery->desc->name, "BATT") != 0)
 		return -ENODEV;
 

From d823346876a970522ff9e4d2b323c9b734dcc4de Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sat, 12 Sep 2020 11:35:32 +0200
Subject: [PATCH 127/322] platform/x86: intel-vbtn: Fix SW_TABLET_MODE always
 reporting 1 on the HP Pavilion 11 x360

Commit cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist
SW_TABLET_MODE on the 9 / "Laptop" chasis-type") restored SW_TABLET_MODE
reporting on the HP stream x360 11 series on which it was previously broken
by commit de9647efeaa9 ("platform/x86: intel-vbtn: Only activate tablet
mode switch on 2-in-1's").

It turns out that enabling SW_TABLET_MODE reporting on devices with a
chassis-type of 10 ("Notebook") causes SW_TABLET_MODE to always report 1
at boot on the HP Pavilion 11 x360, which causes libinput to disable the
kbd and touchpad.

The HP Pavilion 11 x360's ACPI VGBS method sets bit 4 instead of bit 6 when
NOT in tablet mode at boot. Inspecting all the DSDTs in my DSDT collection
shows only one other model, the Medion E1239T ever setting bit 4 and it
always sets this together with bit 6.

So lets treat bit 4 as a second bit which when set indicates the device not
being in tablet-mode, as we already do for bit 6.

While at it also prefix all VGBS constant defines with "VGBS_".

Fixes: cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Mark Gross <mgross@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/intel-vbtn.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index e85d8e58320c..f443619e1e7e 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -15,9 +15,13 @@
 #include <linux/platform_device.h>
 #include <linux/suspend.h>
 
+/* Returned when NOT in tablet mode on some HP Stream x360 11 models */
+#define VGBS_TABLET_MODE_FLAG_ALT	0x10
 /* When NOT in tablet mode, VGBS returns with the flag 0x40 */
-#define TABLET_MODE_FLAG 0x40
-#define DOCK_MODE_FLAG   0x80
+#define VGBS_TABLET_MODE_FLAG		0x40
+#define VGBS_DOCK_MODE_FLAG		0x80
+
+#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT)
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("AceLan Kao");
@@ -72,9 +76,9 @@ static void detect_tablet_mode(struct platform_device *device)
 	if (ACPI_FAILURE(status))
 		return;
 
-	m = !(vgbs & TABLET_MODE_FLAG);
+	m = !(vgbs & VGBS_TABLET_MODE_FLAGS);
 	input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
-	m = (vgbs & DOCK_MODE_FLAG) ? 1 : 0;
+	m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0;
 	input_report_switch(priv->input_dev, SW_DOCK, m);
 }
 

From 5f38b06db8af3ed6c2fc1b427504ca56fae2eacc Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sun, 13 Sep 2020 12:02:03 -0700
Subject: [PATCH 128/322] platform/x86: thinkpad_acpi: initialize
 tp_nvram_state variable

clang static analysis flags this represenative problem
thinkpad_acpi.c:2523:7: warning: Branch condition evaluates
  to a garbage value
                if (!oldn->mute ||
                    ^~~~~~~~~~~

In hotkey_kthread() mute is conditionally set by hotkey_read_nvram()
but unconditionally checked by hotkey_compare_and_issue_event().
So the tp_nvram_state variable s[2] needs to be initialized.

Fixes: 01e88f25985d ("ACPI: thinkpad-acpi: add CMOS NVRAM polling for hot keys (v9)")
Signed-off-by: Tom Rix <trix@redhat.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: mark gross <mgross@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 4864a5c189d4..7cb3b71c67fe 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2569,7 +2569,7 @@ static void hotkey_compare_and_issue_event(struct tp_nvram_state *oldn,
  */
 static int hotkey_kthread(void *data)
 {
-	struct tp_nvram_state s[2];
+	struct tp_nvram_state s[2] = { 0 };
 	u32 poll_mask, event_mask;
 	unsigned int si, so;
 	unsigned long t;

From 8f0c01e666685c4d2e1a233e6f4d7ab16c9f8b2a Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran <fazilyildiran@gmail.com>
Date: Tue, 15 Sep 2020 12:09:23 +0300
Subject: [PATCH 129/322] platform/x86: fix kconfig dependency warning for
 LG_LAPTOP

When LG_LAPTOP is enabled and NEW_LEDS is disabled, it results in the
following Kbuild warning:

WARNING: unmet direct dependencies detected for LEDS_CLASS
  Depends on [n]: NEW_LEDS [=n]
  Selected by [y]:
  - LG_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] && ACPI_WMI [=y] && INPUT [=y]

The reason is that LG_LAPTOP selects LEDS_CLASS without depending on or
selecting NEW_LEDS while LEDS_CLASS is subordinate to NEW_LEDS.

Honor the kconfig menu hierarchy to remove kconfig dependency warnings.

Fixes: dbf0c5a6b1f8 ("platform/x86: Add LG Gram laptop special features driver")
Signed-off-by: Necip Fazil Yildiran <fazilyildiran@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: mark gross <mgross@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 40219bba6801..81f6079d08e6 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -1112,6 +1112,7 @@ config LG_LAPTOP
 	depends on ACPI_WMI
 	depends on INPUT
 	select INPUT_SPARSEKMAP
+	select NEW_LEDS
 	select LEDS_CLASS
 	help
 	 This driver adds support for hotkeys as well as control of keyboard

From afdd1ebb72051e8b6b83c4d7dc542a9be0e1352d Mon Sep 17 00:00:00 2001
From: Necip Fazil Yildiran <fazilyildiran@gmail.com>
Date: Thu, 17 Sep 2020 19:16:53 +0300
Subject: [PATCH 130/322] platform/x86: fix kconfig dependency warning for
 FUJITSU_LAPTOP

When FUJITSU_LAPTOP is enabled and NEW_LEDS is disabled, it results in the
following Kbuild warning:

WARNING: unmet direct dependencies detected for LEDS_CLASS
  Depends on [n]: NEW_LEDS [=n]
  Selected by [y]:
  - FUJITSU_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] && INPUT [=y] && BACKLIGHT_CLASS_DEVICE [=y] && (ACPI_VIDEO [=n] || ACPI_VIDEO [=n]=n)

The reason is that FUJITSU_LAPTOP selects LEDS_CLASS without depending on
or selecting NEW_LEDS while LEDS_CLASS is subordinate to NEW_LEDS.

Honor the kconfig menu hierarchy to remove kconfig dependency warnings.

Reported-by: Hans de Goede <hdegoede@redhat.com>
Fixes: d89bcc83e709 ("platform/x86: fujitsu-laptop: select LEDS_CLASS")
Signed-off-by: Necip Fazil Yildiran <fazilyildiran@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index 81f6079d08e6..0d91d136bc3b 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -469,6 +469,7 @@ config FUJITSU_LAPTOP
 	depends on BACKLIGHT_CLASS_DEVICE
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
 	select INPUT_SPARSEKMAP
+	select NEW_LEDS
 	select LEDS_CLASS
 	help
 	  This is a driver for laptops built by Fujitsu:

From fce55cc8b7ade3c583f47eab5885e2f541ede1ee Mon Sep 17 00:00:00 2001
From: Ed Wildgoose <lists@wildgooses.com>
Date: Sun, 20 Sep 2020 21:32:06 +0100
Subject: [PATCH 131/322] platform/x86: pcengines-apuv2: Fix typo on define of
 AMD_FCH_GPIO_REG_GPIO55_DEVSLP0

Schematics show that the GPIO number is 55 (not 59). Trivial typo.

Signed-off-by: Ed Wildgoose <lists@wildgooses.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/pcengines-apuv2.c          | 2 +-
 include/linux/platform_data/gpio/gpio-amd-fch.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c
index 6aff6cf41414..c37349f97bb8 100644
--- a/drivers/platform/x86/pcengines-apuv2.c
+++ b/drivers/platform/x86/pcengines-apuv2.c
@@ -32,7 +32,7 @@
 #define APU2_GPIO_REG_LED3		AMD_FCH_GPIO_REG_GPIO59_DEVSLP1
 #define APU2_GPIO_REG_MODESW		AMD_FCH_GPIO_REG_GPIO32_GE1
 #define APU2_GPIO_REG_SIMSWAP		AMD_FCH_GPIO_REG_GPIO33_GE2
-#define APU2_GPIO_REG_MPCIE2		AMD_FCH_GPIO_REG_GPIO59_DEVSLP0
+#define APU2_GPIO_REG_MPCIE2		AMD_FCH_GPIO_REG_GPIO55_DEVSLP0
 #define APU2_GPIO_REG_MPCIE3		AMD_FCH_GPIO_REG_GPIO51
 
 /* Order in which the GPIO lines are defined in the register list */
diff --git a/include/linux/platform_data/gpio/gpio-amd-fch.h b/include/linux/platform_data/gpio/gpio-amd-fch.h
index 9e46678edb2a..255d51c9d36d 100644
--- a/include/linux/platform_data/gpio/gpio-amd-fch.h
+++ b/include/linux/platform_data/gpio/gpio-amd-fch.h
@@ -19,7 +19,7 @@
 #define AMD_FCH_GPIO_REG_GPIO49		0x40
 #define AMD_FCH_GPIO_REG_GPIO50		0x41
 #define AMD_FCH_GPIO_REG_GPIO51		0x42
-#define AMD_FCH_GPIO_REG_GPIO59_DEVSLP0	0x43
+#define AMD_FCH_GPIO_REG_GPIO55_DEVSLP0	0x43
 #define AMD_FCH_GPIO_REG_GPIO57		0x44
 #define AMD_FCH_GPIO_REG_GPIO58		0x45
 #define AMD_FCH_GPIO_REG_GPIO59_DEVSLP1	0x46

From 2b06a1c889ca33d550675db4b0ca91e1b4dd9873 Mon Sep 17 00:00:00 2001
From: Vadim Pasternak <vadimp@nvidia.com>
Date: Wed, 23 Sep 2020 20:20:50 +0300
Subject: [PATCH 132/322] platform/x86: mlx-platform: Fix extended topology
 configuration for power supply units

Fix topology configuration for power supply units in structure
'mlxplat_mlxcpld_ext_pwr_items_data', due to hardware change.

Note: no need to backport the fix, since there is no such hardware yet
(equipped with four power) at the filed.

Signed-off-by: Vadim Pasternak <vadimp@nvidia.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/mlx-platform.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c
index 8cf8c1be2666..1506ec0a4777 100644
--- a/drivers/platform/x86/mlx-platform.c
+++ b/drivers/platform/x86/mlx-platform.c
@@ -171,7 +171,6 @@
 #define MLXPLAT_CPLD_NR_NONE			-1
 #define MLXPLAT_CPLD_PSU_DEFAULT_NR		10
 #define MLXPLAT_CPLD_PSU_MSNXXXX_NR		4
-#define MLXPLAT_CPLD_PSU_MSNXXXX_NR2		3
 #define MLXPLAT_CPLD_FAN1_DEFAULT_NR		11
 #define MLXPLAT_CPLD_FAN2_DEFAULT_NR		12
 #define MLXPLAT_CPLD_FAN3_DEFAULT_NR		13
@@ -347,6 +346,15 @@ static struct i2c_board_info mlxplat_mlxcpld_pwr[] = {
 	},
 };
 
+static struct i2c_board_info mlxplat_mlxcpld_ext_pwr[] = {
+	{
+		I2C_BOARD_INFO("dps460", 0x5b),
+	},
+	{
+		I2C_BOARD_INFO("dps460", 0x5a),
+	},
+};
+
 static struct i2c_board_info mlxplat_mlxcpld_fan[] = {
 	{
 		I2C_BOARD_INFO("24c32", 0x50),
@@ -921,15 +929,15 @@ static struct mlxreg_core_data mlxplat_mlxcpld_ext_pwr_items_data[] = {
 		.label = "pwr3",
 		.reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
 		.mask = BIT(2),
-		.hpdev.brdinfo = &mlxplat_mlxcpld_pwr[0],
-		.hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR2,
+		.hpdev.brdinfo = &mlxplat_mlxcpld_ext_pwr[0],
+		.hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR,
 	},
 	{
 		.label = "pwr4",
 		.reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET,
 		.mask = BIT(3),
-		.hpdev.brdinfo = &mlxplat_mlxcpld_pwr[1],
-		.hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR2,
+		.hpdev.brdinfo = &mlxplat_mlxcpld_ext_pwr[1],
+		.hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR,
 	},
 };
 

From 938835aa903ae19ad62805134f79bbcf20fc3bea Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 23 Sep 2020 20:48:03 +0200
Subject: [PATCH 133/322] platform/x86: intel_pmc_core: do not create a static
 struct device

A struct device is a dynamic structure, with reference counting.
"Tricking" the kernel to make a dynamic structure static, by working
around the driver core release detection logic, is not nice.

Because of this, this code has been used as an example for others on
"how to do things", which is just about the worst thing possible to have
happen.

Fix this all up by making the platform device dynamic and providing a
real release function.

Cc: Rajneesh Bhardwaj <rajneesh.bhardwaj@intel.com>
Cc: Vishwanath Somayaji <vishwanath.somayaji@intel.com>
Cc: Darren Hart <dvhart@infradead.org>
Cc: Andy Shevchenko <andy@infradead.org>
Cc: Rajat Jain <rajatja@google.com>
Cc: platform-driver-x86@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Reported-by: Maximilian Luz <luzmaximilian@gmail.com>
Fixes: b02f6a2ef0a1 ("platform/x86: intel_pmc_core: Attach using APCI HID "INT33A1"")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Rajat Jain <rajatja@google.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/intel_pmc_core_pltdrv.c | 26 +++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/platform/x86/intel_pmc_core_pltdrv.c b/drivers/platform/x86/intel_pmc_core_pltdrv.c
index 731281855cc8..73797680b895 100644
--- a/drivers/platform/x86/intel_pmc_core_pltdrv.c
+++ b/drivers/platform/x86/intel_pmc_core_pltdrv.c
@@ -20,15 +20,10 @@
 
 static void intel_pmc_core_release(struct device *dev)
 {
-	/* Nothing to do. */
+	kfree(dev);
 }
 
-static struct platform_device pmc_core_device = {
-	.name = "intel_pmc_core",
-	.dev  = {
-		.release = intel_pmc_core_release,
-	},
-};
+static struct platform_device *pmc_core_device;
 
 /*
  * intel_pmc_core_platform_ids is the list of platforms where we want to
@@ -52,6 +47,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pmc_core_platform_ids);
 
 static int __init pmc_core_platform_init(void)
 {
+	int retval;
+
 	/* Skip creating the platform device if ACPI already has a device */
 	if (acpi_dev_present("INT33A1", NULL, -1))
 		return -ENODEV;
@@ -59,12 +56,23 @@ static int __init pmc_core_platform_init(void)
 	if (!x86_match_cpu(intel_pmc_core_platform_ids))
 		return -ENODEV;
 
-	return platform_device_register(&pmc_core_device);
+	pmc_core_device = kzalloc(sizeof(*pmc_core_device), GFP_KERNEL);
+	if (!pmc_core_device)
+		return -ENOMEM;
+
+	pmc_core_device->name = "intel_pmc_core";
+	pmc_core_device->dev.release = intel_pmc_core_release;
+
+	retval = platform_device_register(pmc_core_device);
+	if (retval)
+		kfree(pmc_core_device);
+
+	return retval;
 }
 
 static void __exit pmc_core_platform_exit(void)
 {
-	platform_device_unregister(&pmc_core_device);
+	platform_device_unregister(pmc_core_device);
 }
 
 module_init(pmc_core_platform_init);

From 00fb259c618ea1198fc51b53a6167aa0d78672a9 Mon Sep 17 00:00:00 2001
From: Ray Jui <ray.jui@broadcom.com>
Date: Thu, 10 Sep 2020 08:25:38 -0700
Subject: [PATCH 134/322] spi: bcm-qspi: Fix probe regression on iProc
 platforms

iProc chips have QSPI controller that does not have the MSPI_REV
offset. Reading from that offset will cause a bus error. Fix it by
having MSPI_REV query disabled in the generic compatible string.

Fixes: 3a01f04d74ef ("spi: bcm-qspi: Handle lack of MSPI_REV offset")
Link: https://lore.kernel.org/linux-arm-kernel/20200909211857.4144718-1-f.fainelli@gmail.com/T/#u
Signed-off-by: Ray Jui <ray.jui@broadcom.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20200910152539.45584-3-ray.jui@broadcom.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-bcm-qspi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c
index 681d09085175..9cfa15ec8b08 100644
--- a/drivers/spi/spi-bcm-qspi.c
+++ b/drivers/spi/spi-bcm-qspi.c
@@ -1295,7 +1295,7 @@ static const struct of_device_id bcm_qspi_of_match[] = {
 	},
 	{
 		.compatible = "brcm,spi-bcm-qspi",
-		.data = &bcm_qspi_rev_data,
+		.data = &bcm_qspi_no_rev_data,
 	},
 	{
 		.compatible = "brcm,spi-bcm7216-qspi",

From e43c26e12dd49a41cf5a4cd5c5b59a1eb98ed11e Mon Sep 17 00:00:00 2001
From: Ye Li <ye.li@nxp.com>
Date: Wed, 23 Sep 2020 02:03:44 -0700
Subject: [PATCH 135/322] gpio: pca953x: Fix uninitialized pending variable

When pca953x_irq_pending returns false, the pending parameter won't
be set. But pca953x_irq_handler continues using this uninitialized
variable as pending irqs and will cause problem.
Fix the issue by initializing pending to 0.

Fixes: 064c73afe738 ("gpio: pca953x: Synchronize interrupt handler properly")
Signed-off-by: Ye Li <ye.li@nxp.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-pca953x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index bd2e96c34f82..29342e5def82 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -814,7 +814,7 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid)
 {
 	struct pca953x_chip *chip = devid;
 	struct gpio_chip *gc = &chip->gpio_chip;
-	DECLARE_BITMAP(pending, MAX_LINE);
+	DECLARE_BITMAP(pending, MAX_LINE) = {};
 	int level;
 	bool ret;
 

From ac67b07e268d46eba675a60c37051bb3e59fd201 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Fri, 11 Sep 2020 09:51:04 +0800
Subject: [PATCH 136/322] gpio/aspeed-sgpio: enable access to all 80 input &
 output sgpios

Currently, the aspeed-sgpio driver exposes up to 80 GPIO lines,
corresponding to the 80 status bits available in hardware. Each of these
lines can be configured as either an input or an output.

However, each of these GPIOs is actually an input *and* an output; we
actually have 80 inputs plus 80 outputs.

This change expands the maximum number of GPIOs to 160; the lower half
of this range are the input-only GPIOs, the upper half are the outputs.
We fix the GPIO directions to correspond to this mapping.

This also fixes a bug when setting GPIOs - we were reading from the
input register, making it impossible to set more than one output GPIO.

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Fixes: 7db47faae79b ("gpio: aspeed: Add SGPIO driver")
Reviewed-by: Joel Stanley <joel@jms.id.au>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 .../devicetree/bindings/gpio/sgpio-aspeed.txt |   5 +-
 drivers/gpio/gpio-aspeed-sgpio.c              | 126 ++++++++++++------
 2 files changed, 87 insertions(+), 44 deletions(-)

diff --git a/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt b/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt
index d4d83916c09d..be329ea4794f 100644
--- a/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt
+++ b/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt
@@ -20,8 +20,9 @@ Required properties:
 - gpio-controller : Marks the device node as a GPIO controller
 - interrupts : Interrupt specifier, see interrupt-controller/interrupts.txt
 - interrupt-controller : Mark the GPIO controller as an interrupt-controller
-- ngpios : number of GPIO lines, see gpio.txt
-  (should be multiple of 8, up to 80 pins)
+- ngpios : number of *hardware* GPIO lines, see gpio.txt. This will expose
+  2 software GPIOs per hardware GPIO: one for hardware input, one for hardware
+  output. Up to 80 pins, must be a multiple of 8.
 - clocks : A phandle to the APB clock for SGPM clock division
 - bus-frequency : SGPM CLK frequency
 
diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c
index d16645c1d8d9..5d678dbf1a62 100644
--- a/drivers/gpio/gpio-aspeed-sgpio.c
+++ b/drivers/gpio/gpio-aspeed-sgpio.c
@@ -17,7 +17,17 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
-#define MAX_NR_SGPIO			80
+/*
+ * MAX_NR_HW_GPIO represents the number of actual hardware-supported GPIOs (ie,
+ * slots within the clocked serial GPIO data). Since each HW GPIO is both an
+ * input and an output, we provide MAX_NR_HW_GPIO * 2 lines on our gpiochip
+ * device.
+ *
+ * We use SGPIO_OUTPUT_OFFSET to define the split between the inputs and
+ * outputs; the inputs start at line 0, the outputs start at OUTPUT_OFFSET.
+ */
+#define MAX_NR_HW_SGPIO			80
+#define SGPIO_OUTPUT_OFFSET		MAX_NR_HW_SGPIO
 
 #define ASPEED_SGPIO_CTRL		0x54
 
@@ -30,8 +40,8 @@ struct aspeed_sgpio {
 	struct clk *pclk;
 	spinlock_t lock;
 	void __iomem *base;
-	uint32_t dir_in[3];
 	int irq;
+	int n_sgpio;
 };
 
 struct aspeed_sgpio_bank {
@@ -111,31 +121,69 @@ static void __iomem *bank_reg(struct aspeed_sgpio *gpio,
 	}
 }
 
-#define GPIO_BANK(x)    ((x) >> 5)
-#define GPIO_OFFSET(x)  ((x) & 0x1f)
+#define GPIO_BANK(x)    ((x % SGPIO_OUTPUT_OFFSET) >> 5)
+#define GPIO_OFFSET(x)  ((x % SGPIO_OUTPUT_OFFSET) & 0x1f)
 #define GPIO_BIT(x)     BIT(GPIO_OFFSET(x))
 
 static const struct aspeed_sgpio_bank *to_bank(unsigned int offset)
 {
-	unsigned int bank = GPIO_BANK(offset);
+	unsigned int bank;
+
+	bank = GPIO_BANK(offset);
 
 	WARN_ON(bank >= ARRAY_SIZE(aspeed_sgpio_banks));
 	return &aspeed_sgpio_banks[bank];
 }
 
+static int aspeed_sgpio_init_valid_mask(struct gpio_chip *gc,
+		unsigned long *valid_mask, unsigned int ngpios)
+{
+	struct aspeed_sgpio *sgpio = gpiochip_get_data(gc);
+	int n = sgpio->n_sgpio;
+	int c = SGPIO_OUTPUT_OFFSET - n;
+
+	WARN_ON(ngpios < MAX_NR_HW_SGPIO * 2);
+
+	/* input GPIOs in the lower range */
+	bitmap_set(valid_mask, 0, n);
+	bitmap_clear(valid_mask, n, c);
+
+	/* output GPIOS above SGPIO_OUTPUT_OFFSET */
+	bitmap_set(valid_mask, SGPIO_OUTPUT_OFFSET, n);
+	bitmap_clear(valid_mask, SGPIO_OUTPUT_OFFSET + n, c);
+
+	return 0;
+}
+
+static void aspeed_sgpio_irq_init_valid_mask(struct gpio_chip *gc,
+		unsigned long *valid_mask, unsigned int ngpios)
+{
+	struct aspeed_sgpio *sgpio = gpiochip_get_data(gc);
+	int n = sgpio->n_sgpio;
+
+	WARN_ON(ngpios < MAX_NR_HW_SGPIO * 2);
+
+	/* input GPIOs in the lower range */
+	bitmap_set(valid_mask, 0, n);
+	bitmap_clear(valid_mask, n, ngpios - n);
+}
+
+static bool aspeed_sgpio_is_input(unsigned int offset)
+{
+	return offset < SGPIO_OUTPUT_OFFSET;
+}
+
 static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset)
 {
 	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
 	const struct aspeed_sgpio_bank *bank = to_bank(offset);
 	unsigned long flags;
 	enum aspeed_sgpio_reg reg;
-	bool is_input;
 	int rc = 0;
 
 	spin_lock_irqsave(&gpio->lock, flags);
 
-	is_input = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset);
-	reg = is_input ? reg_val : reg_rdata;
+	reg = aspeed_sgpio_is_input(offset) ? reg_val : reg_rdata;
 	rc = !!(ioread32(bank_reg(gpio, bank, reg)) & GPIO_BIT(offset));
 
 	spin_unlock_irqrestore(&gpio->lock, flags);
@@ -143,22 +191,31 @@ static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset)
 	return rc;
 }
 
-static void sgpio_set_value(struct gpio_chip *gc, unsigned int offset, int val)
+static int sgpio_set_value(struct gpio_chip *gc, unsigned int offset, int val)
 {
 	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
 	const struct aspeed_sgpio_bank *bank = to_bank(offset);
-	void __iomem *addr;
+	void __iomem *addr_r, *addr_w;
 	u32 reg = 0;
 
-	addr = bank_reg(gpio, bank, reg_val);
-	reg = ioread32(addr);
+	if (aspeed_sgpio_is_input(offset))
+		return -EINVAL;
+
+	/* Since this is an output, read the cached value from rdata, then
+	 * update val. */
+	addr_r = bank_reg(gpio, bank, reg_rdata);
+	addr_w = bank_reg(gpio, bank, reg_val);
+
+	reg = ioread32(addr_r);
 
 	if (val)
 		reg |= GPIO_BIT(offset);
 	else
 		reg &= ~GPIO_BIT(offset);
 
-	iowrite32(reg, addr);
+	iowrite32(reg, addr_w);
+
+	return 0;
 }
 
 static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val)
@@ -175,43 +232,28 @@ static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val)
 
 static int aspeed_sgpio_dir_in(struct gpio_chip *gc, unsigned int offset)
 {
-	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio->lock, flags);
-	gpio->dir_in[GPIO_BANK(offset)] |= GPIO_BIT(offset);
-	spin_unlock_irqrestore(&gpio->lock, flags);
-
-	return 0;
+	return aspeed_sgpio_is_input(offset) ? 0 : -EINVAL;
 }
 
 static int aspeed_sgpio_dir_out(struct gpio_chip *gc, unsigned int offset, int val)
 {
 	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
 	unsigned long flags;
+	int rc;
+
+	/* No special action is required for setting the direction; we'll
+	 * error-out in sgpio_set_value if this isn't an output GPIO */
 
 	spin_lock_irqsave(&gpio->lock, flags);
-
-	gpio->dir_in[GPIO_BANK(offset)] &= ~GPIO_BIT(offset);
-	sgpio_set_value(gc, offset, val);
-
+	rc = sgpio_set_value(gc, offset, val);
 	spin_unlock_irqrestore(&gpio->lock, flags);
 
-	return 0;
+	return rc;
 }
 
 static int aspeed_sgpio_get_direction(struct gpio_chip *gc, unsigned int offset)
 {
-	int dir_status;
-	struct aspeed_sgpio *gpio = gpiochip_get_data(gc);
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpio->lock, flags);
-	dir_status = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset);
-	spin_unlock_irqrestore(&gpio->lock, flags);
-
-	return dir_status;
-
+	return !!aspeed_sgpio_is_input(offset);
 }
 
 static void irqd_to_aspeed_sgpio_data(struct irq_data *d,
@@ -402,6 +444,7 @@ static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio,
 
 	irq = &gpio->chip.irq;
 	irq->chip = &aspeed_sgpio_irqchip;
+	irq->init_valid_mask = aspeed_sgpio_irq_init_valid_mask;
 	irq->handler = handle_bad_irq;
 	irq->default_type = IRQ_TYPE_NONE;
 	irq->parent_handler = aspeed_sgpio_irq_handler;
@@ -452,11 +495,12 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev)
 	if (rc < 0) {
 		dev_err(&pdev->dev, "Could not read ngpios property\n");
 		return -EINVAL;
-	} else if (nr_gpios > MAX_NR_SGPIO) {
+	} else if (nr_gpios > MAX_NR_HW_SGPIO) {
 		dev_err(&pdev->dev, "Number of GPIOs exceeds the maximum of %d: %d\n",
-			MAX_NR_SGPIO, nr_gpios);
+			MAX_NR_HW_SGPIO, nr_gpios);
 		return -EINVAL;
 	}
+	gpio->n_sgpio = nr_gpios;
 
 	rc = of_property_read_u32(pdev->dev.of_node, "bus-frequency", &sgpio_freq);
 	if (rc < 0) {
@@ -497,7 +541,8 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev)
 	spin_lock_init(&gpio->lock);
 
 	gpio->chip.parent = &pdev->dev;
-	gpio->chip.ngpio = nr_gpios;
+	gpio->chip.ngpio = MAX_NR_HW_SGPIO * 2;
+	gpio->chip.init_valid_mask = aspeed_sgpio_init_valid_mask;
 	gpio->chip.direction_input = aspeed_sgpio_dir_in;
 	gpio->chip.direction_output = aspeed_sgpio_dir_out;
 	gpio->chip.get_direction = aspeed_sgpio_get_direction;
@@ -509,9 +554,6 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev)
 	gpio->chip.label = dev_name(&pdev->dev);
 	gpio->chip.base = -1;
 
-	/* set all SGPIO pins as input (1). */
-	memset(gpio->dir_in, 0xff, sizeof(gpio->dir_in));
-
 	aspeed_sgpio_setup_irqs(gpio, pdev);
 
 	rc = devm_gpiochip_add_data(&pdev->dev, &gpio->chip, gpio);

From bf0d394e885015941ed2d5724c0a6ed8d42dd95e Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Fri, 11 Sep 2020 09:51:05 +0800
Subject: [PATCH 137/322] gpio/aspeed-sgpio: don't enable all interrupts by
 default

Currently, the IRQ setup for the SGPIO driver enables all interrupts in
dual-edge trigger mode. Since the default handler is handle_bad_irq, any
state change on input GPIOs will trigger bad IRQ warnings.

This change applies sensible IRQ defaults: single-edge trigger, and all
IRQs disabled.

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Fixes: 7db47faae79b ("gpio: aspeed: Add SGPIO driver")
Reviewed-by: Joel Stanley <joel@jms.id.au>
Acked-by: Andrew Jeffery <andrew@aj.id.au>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-aspeed-sgpio.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c
index 5d678dbf1a62..a0eb00c024f6 100644
--- a/drivers/gpio/gpio-aspeed-sgpio.c
+++ b/drivers/gpio/gpio-aspeed-sgpio.c
@@ -452,17 +452,15 @@ static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio,
 	irq->parents = &gpio->irq;
 	irq->num_parents = 1;
 
-	/* set IRQ settings and Enable Interrupt */
+	/* Apply default IRQ settings */
 	for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) {
 		bank = &aspeed_sgpio_banks[i];
 		/* set falling or level-low irq */
 		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type0));
 		/* trigger type is edge */
 		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type1));
-		/* dual edge trigger mode. */
-		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_type2));
-		/* enable irq */
-		iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_enable));
+		/* single edge trigger */
+		iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type2));
 	}
 
 	return 0;

From efb1676306f664625c0c546dd10d18d33c75e3fc Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 23 Sep 2020 07:24:42 +0200
Subject: [PATCH 138/322] mt76: mt7615: reduce maximum VHT MPDU length to 7991

After fixing mac80211 to allow larger A-MSDUs in some cases, there have been
reports of performance regressions and packet loss with some clients.
It appears that the issue occurs when the hardware is transmitting A-MSDUs
bigger than 8k. Limit the local VHT MPDU size capability to 7991, matching
the value used for MT7915 as well.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200923052442.24141-1-nbd@nbd.name
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index fc1ebabfebac..1f57b43693bc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -460,7 +460,7 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	dev->mphy.sband_2g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;
 	dev->mphy.sband_5g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;
 	dev->mphy.sband_5g.sband.vht_cap.cap |=
-			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
+			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
 			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
 	mt7615_cap_dbdc_disable(dev);
 	dev->phy.dfs_state = -1;

From 3e640b1eec38e4c8eba160f26cba4f592e657f3d Mon Sep 17 00:00:00 2001
From: Tao Ren <rentao.bupt@gmail.com>
Date: Wed, 16 Sep 2020 13:42:16 -0700
Subject: [PATCH 139/322] gpio: aspeed: fix ast2600 bank properties

GPIO_U is mapped to the least significant byte of input/output mask, and
the byte in "output" mask should be 0 because GPIO_U is input only. All
the other bits need to be 1 because GPIO_V/W/X support both input and
output modes.

Similarly, GPIO_Y/Z are mapped to the 2 least significant bytes, and the
according bits need to be 1 because GPIO_Y/Z support both input and
output modes.

Fixes: ab4a85534c3e ("gpio: aspeed: Add in ast2600 details to Aspeed driver")
Signed-off-by: Tao Ren <rentao.bupt@gmail.com>
Reviewed-by: Andrew Jeffery <andrew@aj.id.au>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-aspeed.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c
index 879db23d8454..d07bf2c3f136 100644
--- a/drivers/gpio/gpio-aspeed.c
+++ b/drivers/gpio/gpio-aspeed.c
@@ -1114,8 +1114,8 @@ static const struct aspeed_gpio_config ast2500_config =
 
 static const struct aspeed_bank_props ast2600_bank_props[] = {
 	/*     input	  output   */
-	{5, 0xffffffff,  0x0000ffff}, /* U/V/W/X */
-	{6, 0xffff0000,  0x0fff0000}, /* Y/Z */
+	{5, 0xffffffff,  0xffffff00}, /* U/V/W/X */
+	{6, 0x0000ffff,  0x0000ffff}, /* Y/Z */
 	{ },
 };
 

From be068f29034fb00530a053d18b8cf140c32b12b3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 24 Sep 2020 08:41:32 -0700
Subject: [PATCH 140/322] mm: fix misplaced unlock_page in do_wp_page()

Commit 09854ba94c6a ("mm: do_wp_page() simplification") reorganized all
the code around the page re-use vs copy, but in the process also moved
the final unlock_page() around to after the wp_page_reuse() call.

That normally doesn't matter - but it means that the unlock_page() is
now done after releasing the page table lock.  Again, not a big deal,
you'd think.

But it turns out that it's very wrong indeed, because once we've
released the page table lock, we've basically lost our only reference to
the page - the page tables - and it could now be free'd at any time.  We
do hold the mmap_sem, so no actual unmap() can happen, but madvise can
come in and a MADV_DONTNEED will zap the page range - and free the page.

So now the page may be free'd just as we're unlocking it, which in turn
will usually trigger a "Bad page state" error in the freeing path.  To
make matters more confusing, by the time the debug code prints out the
page state, the unlock has typically completed and everything looks fine
again.

This all doesn't happen in any normal situations, but it does trigger
with the dirtyc0w_child LTP test.  And it seems to trigger much more
easily (but not expclusively) on s390 than elsewhere, probably because
s390 doesn't do the "batch pages up for freeing after the TLB flush"
that gives the unlock_page() more time to complete and makes the race
harder to hit.

Fixes: 09854ba94c6a ("mm: do_wp_page() simplification")
Link: https://lore.kernel.org/lkml/a46e9bbef2ed4e17778f5615e818526ef848d791.camel@redhat.com/
Link: https://lore.kernel.org/linux-mm/c41149a8-211e-390b-af1d-d5eee690fecb@linux.alibaba.com/
Reported-by: Qian Cai <cai@redhat.com>
Reported-by: Alex Shi <alex.shi@linux.alibaba.com>
Bisected-and-analyzed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Tested-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index e315b1f1ef08..f3eb55975902 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2967,8 +2967,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 		 * page count reference, and the page is locked,
 		 * it's dark out, and we're wearing sunglasses. Hit it.
 		 */
-		wp_page_reuse(vmf);
 		unlock_page(page);
+		wp_page_reuse(vmf);
 		return VM_FAULT_WRITE;
 	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
 					(VM_WRITE|VM_SHARED))) {

From 516d980f85415d76ae3d0d2a871eb20243f46c95 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Wed, 23 Sep 2020 02:48:56 +0900
Subject: [PATCH 141/322] scripts/kallsyms: skip ppc compiler stub
 *.long_branch.* / *.plt_branch.*

PowerPC allmodconfig often fails to build as follows:

    LD      .tmp_vmlinux.kallsyms1
    KSYM    .tmp_vmlinux.kallsyms1.o
    LD      .tmp_vmlinux.kallsyms2
    KSYM    .tmp_vmlinux.kallsyms2.o
    LD      .tmp_vmlinux.kallsyms3
    KSYM    .tmp_vmlinux.kallsyms3.o
    LD      vmlinux
    SORTTAB vmlinux
    SYSMAP  System.map
  Inconsistent kallsyms data
  Try make KALLSYMS_EXTRA_PASS=1 as a workaround
  make[2]: *** [../Makefile:1162: vmlinux] Error 1

Setting KALLSYMS_EXTRA_PASS=1 does not help.

This is caused by the compiler inserting stubs such as *.long_branch.*
and *.plt_branch.*

  $ powerpc-linux-nm -n .tmp_vmlinux.kallsyms2
   [ snip ]
  c00000000210c010 t 00000075.plt_branch.da9:19
  c00000000210c020 t 00000075.plt_branch.1677:5
  c00000000210c030 t 00000075.long_branch.memmove
  c00000000210c034 t 00000075.plt_branch.9e0:5
  c00000000210c044 t 00000075.plt_branch.free_initrd_mem
    ...

Actually, the problem mentioned in scripts/link-vmlinux.sh comments;
"In theory it's possible this results in even more stubs, but unlikely"
is happening here, and ends up with another kallsyms step required.

scripts/kallsyms.c already ignores various compiler stubs. Let's do
similar to make kallsysms for PowerPC always succeed in 2 steps.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Tested-by: Guenter Roeck <linux@roeck-us.net>
---
 scripts/kallsyms.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 0096cd965332..7ecd2ccba531 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -82,6 +82,7 @@ static char *sym_name(const struct sym_entry *s)
 
 static bool is_ignored_symbol(const char *name, char type)
 {
+	/* Symbol names that exactly match to the following are ignored.*/
 	static const char * const ignored_symbols[] = {
 		/*
 		 * Symbols which vary between passes. Passes 1 and 2 must have
@@ -104,6 +105,7 @@ static bool is_ignored_symbol(const char *name, char type)
 		NULL
 	};
 
+	/* Symbol names that begin with the following are ignored.*/
 	static const char * const ignored_prefixes[] = {
 		"$",			/* local symbols for ARM, MIPS, etc. */
 		".LASANPC",		/* s390 kasan local symbols */
@@ -113,6 +115,7 @@ static bool is_ignored_symbol(const char *name, char type)
 		NULL
 	};
 
+	/* Symbol names that end with the following are ignored.*/
 	static const char * const ignored_suffixes[] = {
 		"_from_arm",		/* arm */
 		"_from_thumb",		/* arm */
@@ -120,9 +123,15 @@ static bool is_ignored_symbol(const char *name, char type)
 		NULL
 	};
 
+	/* Symbol names that contain the following are ignored.*/
+	static const char * const ignored_matches[] = {
+		".long_branch.",	/* ppc stub */
+		".plt_branch.",		/* ppc stub */
+		NULL
+	};
+
 	const char * const *p;
 
-	/* Exclude symbols which vary between passes. */
 	for (p = ignored_symbols; *p; p++)
 		if (!strcmp(name, *p))
 			return true;
@@ -138,6 +147,11 @@ static bool is_ignored_symbol(const char *name, char type)
 			return true;
 	}
 
+	for (p = ignored_matches; *p; p++) {
+		if (strstr(name, *p))
+			return true;
+	}
+
 	if (type == 'U' || type == 'u')
 		return true;
 	/* exclude debugging symbols */

From 87f92ac4c12758c4da3bbe4393f1d884b610b8a6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 24 Sep 2020 10:17:05 -0700
Subject: [PATCH 142/322] libbpf: Fix XDP program load regression for old
 kernels

Fix regression in libbpf, introduced by XDP link change, which causes XDP
programs to fail to be loaded into kernel due to specified BPF_XDP
expected_attach_type. While kernel doesn't enforce expected_attach_type for
BPF_PROG_TYPE_XDP, some old kernels already support XDP program, but they
don't yet recognize expected_attach_type field in bpf_attr, so setting it to
non-zero value causes program load to fail.

Luckily, libbpf already has a mechanism to deal with such cases, so just make
expected_attach_type optional for XDP programs.

Fixes: dc8698cac7aa ("libbpf: Add support for BPF XDP link")
Reported-by: Nikita Shirokov <tehnerd@tehnerd.com>
Reported-by: Udip Pant <udippant@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200924171705.3803628-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7253b833576c..e493d6048143 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6925,7 +6925,7 @@ static const struct bpf_sec_def section_defs[] = {
 						BPF_XDP_DEVMAP),
 	BPF_EAPROG_SEC("xdp_cpumap/",		BPF_PROG_TYPE_XDP,
 						BPF_XDP_CPUMAP),
-	BPF_EAPROG_SEC("xdp",			BPF_PROG_TYPE_XDP,
+	BPF_APROG_SEC("xdp",			BPF_PROG_TYPE_XDP,
 						BPF_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),

From ee6fa053019478918ca15eadaa93e516ddb9da8d Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Mon, 21 Sep 2020 13:38:05 +0300
Subject: [PATCH 143/322] KVM: x86: fix MSR_IA32_TSC read for nested migration

MSR reads/writes should always access the L1 state, since the (nested)
hypervisor should intercept all the msrs it wants to adjust, and these
that it doesn't should be read by the guest as if the host had read it.

However IA32_TSC is an exception. Even when not intercepted, guest still
reads the value + TSC offset.
The write however does not take any TSC offset into account.

This is documented in Intel's SDM and seems also to happen on AMD as well.

This creates a problem when userspace wants to read the IA32_TSC value and then
write it. (e.g for migration)

In this case it reads L2 value but write is interpreted as an L1 value.
To fix this make the userspace initiated reads of IA32_TSC return L1 value
as well.

Huge thanks to Dave Gilbert for helping me understand this very confusing
semantic of MSR writes.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20200921103805.9102-2-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 67362607e396..e3fe1d126cc3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3224,9 +3224,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_POWER_CTL:
 		msr_info->data = vcpu->arch.msr_ia32_power_ctl;
 		break;
-	case MSR_IA32_TSC:
-		msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
+	case MSR_IA32_TSC: {
+		/*
+		 * Intel SDM states that MSR_IA32_TSC read adds the TSC offset
+		 * even when not intercepted. AMD manual doesn't explicitly
+		 * state this but appears to behave the same.
+		 *
+		 * On userspace reads and writes, however, we unconditionally
+		 * operate L1's TSC value to ensure backwards-compatible
+		 * behavior for migration.
+		 */
+		u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset :
+							    vcpu->arch.tsc_offset;
+
+		msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset;
 		break;
+	}
 	case MSR_MTRRcap:
 	case 0x200 ... 0x2ff:
 		return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);

From 3701cb59d892b88d569427586f01491552f377b1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Sep 2020 19:41:58 -0400
Subject: [PATCH 144/322] ep_create_wakeup_source(): dentry name can change
 under you...

or get freed, for that matter, if it's a long (separately stored)
name.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/eventpoll.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 16313180e4c1..4df61129566d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1448,7 +1448,7 @@ static int reverse_path_check(void)
 
 static int ep_create_wakeup_source(struct epitem *epi)
 {
-	const char *name;
+	struct name_snapshot n;
 	struct wakeup_source *ws;
 
 	if (!epi->ep->ws) {
@@ -1457,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi)
 			return -ENOMEM;
 	}
 
-	name = epi->ffd.file->f_path.dentry->d_name.name;
-	ws = wakeup_source_register(NULL, name);
+	take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry);
+	ws = wakeup_source_register(NULL, n.name.name);
+	release_dentry_name_snapshot(&n);
 
 	if (!ws)
 		return -ENOMEM;

From e49d8c22f1261c43a986a7fdbf677ac309682a07 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 22 Sep 2020 20:56:23 -0700
Subject: [PATCH 145/322] net_sched: defer tcf_idr_insert() in
 tcf_action_init_1()

All TC actions call tcf_idr_insert() for new action at the end
of their ->init(), so we can actually move it to a central place
in tcf_action_init_1().

And once the action is inserted into the global IDR, other parallel
process could free it immediately as its refcnt is still 1, so we can
not fail after this, we need to move it after the goto action
validation to avoid handling the failure case after insertion.

This is found during code review, is not directly triggered by syzbot.
And this prepares for the next patch.

Cc: Vlad Buslov <vladbu@mellanox.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  2 --
 net/sched/act_api.c        | 38 ++++++++++++++++++++------------------
 net/sched/act_bpf.c        |  4 +---
 net/sched/act_connmark.c   |  1 -
 net/sched/act_csum.c       |  3 ---
 net/sched/act_ct.c         |  2 --
 net/sched/act_ctinfo.c     |  3 ---
 net/sched/act_gact.c       |  2 --
 net/sched/act_gate.c       |  3 ---
 net/sched/act_ife.c        |  3 ---
 net/sched/act_ipt.c        |  2 --
 net/sched/act_mirred.c     |  2 --
 net/sched/act_mpls.c       |  2 --
 net/sched/act_nat.c        |  3 ---
 net/sched/act_pedit.c      |  2 --
 net/sched/act_police.c     |  2 --
 net/sched/act_sample.c     |  2 --
 net/sched/act_simple.c     |  2 --
 net/sched/act_skbedit.c    |  2 --
 net/sched/act_skbmod.c     |  2 --
 net/sched/act_tunnel_key.c |  3 ---
 net/sched/act_vlan.c       |  2 --
 22 files changed, 21 insertions(+), 66 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index cb382a89ea58..87214927314a 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -166,8 +166,6 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
 			      struct nlattr *est, struct tc_action **a,
 			      const struct tc_action_ops *ops, int bind,
 			      u32 flags);
-void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a);
-
 void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
 int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
 			struct tc_action **a, int bind);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 063d8aaf2900..0030f00234ee 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -467,17 +467,6 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
 }
 EXPORT_SYMBOL(tcf_idr_create_from_flags);
 
-void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a)
-{
-	struct tcf_idrinfo *idrinfo = tn->idrinfo;
-
-	mutex_lock(&idrinfo->lock);
-	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
-	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
-	mutex_unlock(&idrinfo->lock);
-}
-EXPORT_SYMBOL(tcf_idr_insert);
-
 /* Cleanup idr index that was allocated but not initialized. */
 
 void tcf_idr_cleanup(struct tc_action_net *tn, u32 index)
@@ -902,6 +891,16 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
 	[TCA_ACT_HW_STATS]	= NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
 };
 
+static void tcf_idr_insert(struct tc_action *a)
+{
+	struct tcf_idrinfo *idrinfo = a->idrinfo;
+
+	mutex_lock(&idrinfo->lock);
+	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
+	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
+	mutex_unlock(&idrinfo->lock);
+}
+
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
@@ -989,6 +988,16 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (err < 0)
 		goto err_mod;
 
+	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
+	    !rcu_access_pointer(a->goto_chain)) {
+		tcf_action_destroy_1(a, bind);
+		NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (err == ACT_P_CREATED)
+		tcf_idr_insert(a);
+
 	if (!name && tb[TCA_ACT_COOKIE])
 		tcf_set_action_cookie(&a->act_cookie, cookie);
 
@@ -1002,13 +1011,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (err != ACT_P_CREATED)
 		module_put(a_o->owner);
 
-	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
-	    !rcu_access_pointer(a->goto_chain)) {
-		tcf_action_destroy_1(a, bind);
-		NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
-		return ERR_PTR(-EINVAL);
-	}
-
 	return a;
 
 err_mod:
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 54d5652cfe6c..a4c7ba35a343 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -365,9 +365,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-	if (res == ACT_P_CREATED) {
-		tcf_idr_insert(tn, *act);
-	} else {
+	if (res != ACT_P_CREATED) {
 		/* make sure the program being replaced is no longer executing */
 		synchronize_rcu();
 		tcf_bpf_cfg_cleanup(&old);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index f901421b0634..e19885d7fe2c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -139,7 +139,6 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 		ci->net = net;
 		ci->zone = parm->zone;
 
-		tcf_idr_insert(tn, *a);
 		ret = ACT_P_CREATED;
 	} else if (ret > 0) {
 		ci = to_connmark(*a);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index f5826e457679..4fa4fcb842ba 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -110,9 +110,6 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
 	if (params_new)
 		kfree_rcu(params_new, rcu);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
-
 	return ret;
 put_chain:
 	if (goto_ch)
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 2c3619165680..a780afdf570d 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1297,8 +1297,6 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
 		tcf_chain_put_by_act(goto_ch);
 	if (params)
 		call_rcu(&params->rcu, tcf_ct_params_free);
-	if (res == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 
 	return res;
 
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index b5042f3ea079..6084300e51ad 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -269,9 +269,6 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
 	if (cp_new)
 		kfree_rcu(cp_new, rcu);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
-
 	return ret;
 
 put_chain:
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 410e3bbfb9ca..73c3926358a0 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -140,8 +140,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 release_idr:
 	tcf_idr_release(*a, bind);
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 1fb8d428d2c1..7c0771dd77a3 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -437,9 +437,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
-
 	return ret;
 
 chain_put:
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 5c568757643b..a2ddea04183a 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -627,9 +627,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
 	if (p)
 		kfree_rcu(p, rcu);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
-
 	return ret;
 metadata_parse_err:
 	if (goto_ch)
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 400a2cfe8452..8dc3bec0d325 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -189,8 +189,6 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 	ipt->tcfi_t     = t;
 	ipt->tcfi_hook  = hook;
 	spin_unlock_bh(&ipt->tcf_lock);
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 
 err3:
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index b2705318993b..e24b7e2331cd 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -194,8 +194,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 		spin_lock(&mirred_list_lock);
 		list_add(&m->tcfm_list, &mirred_list);
 		spin_unlock(&mirred_list_lock);
-
-		tcf_idr_insert(tn, *a);
 	}
 
 	return ret;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 8118e2640979..e298ec3b3c9e 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -273,8 +273,6 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
 	if (p)
 		kfree_rcu(p, rcu);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 put_chain:
 	if (goto_ch)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 855a6fa16a62..1ebd2a86d980 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -93,9 +93,6 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
-
 	return ret;
 release_idr:
 	tcf_idr_release(*a, bind);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c158bfed86d5..b45304446e13 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -238,8 +238,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 	spin_unlock_bh(&p->tcf_lock);
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 
 put_chain:
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0b431d493768..8d8452b1cdd4 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -201,8 +201,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
 	if (new)
 		kfree_rcu(new, rcu);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 
 failure:
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 5e2df590bb58..3ebf9ede3cf1 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -116,8 +116,6 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 put_chain:
 	if (goto_ch)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 9813ca4006dd..a4f3d0f0daa9 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -157,8 +157,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			goto release_idr;
 	}
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 put_chain:
 	if (goto_ch)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index d0652386c6e2..e5f3fb8b00e3 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -225,8 +225,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 put_chain:
 	if (goto_ch)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 39e6d94cfafb..81a1c67335be 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -190,8 +190,6 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 put_chain:
 	if (goto_ch)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 37f1e10f35e0..a229751ee8c4 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -537,9 +537,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
 	if (goto_ch)
 		tcf_chain_put_by_act(goto_ch);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
-
 	return ret;
 
 put_chain:
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index a5ff9f68ab02..163b0385fd4c 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -229,8 +229,6 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 	if (p)
 		kfree_rcu(p, rcu);
 
-	if (ret == ACT_P_CREATED)
-		tcf_idr_insert(tn, *a);
 	return ret;
 put_chain:
 	if (goto_ch)

From 0fedc63fadf0404a729e73a35349481c8009c02f Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 22 Sep 2020 20:56:24 -0700
Subject: [PATCH 146/322] net_sched: commit action insertions together

syzbot is able to trigger a failure case inside the loop in
tcf_action_init(), and when this happens we clean up with
tcf_action_destroy(). But, as these actions are already inserted
into the global IDR, other parallel process could free them
before tcf_action_destroy(), then we will trigger a use-after-free.

Fix this by deferring the insertions even later, after the loop,
and committing all the insertions in a separate loop, so we will
never fail in the middle of the insertions any more.

One side effect is that the window between alloction and final
insertion becomes larger, now it is more likely that the loop in
tcf_del_walker() sees the placeholder -EBUSY pointer. So we have
to check for error pointer in tcf_del_walker().

Reported-and-tested-by: syzbot+2287853d392e4b42374a@syzkaller.appspotmail.com
Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action")
Cc: Vlad Buslov <vladbu@mellanox.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 32 +++++++++++++++++++++++---------
 1 file changed, 23 insertions(+), 9 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 0030f00234ee..104b47f5184f 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -307,6 +307,8 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 
 	mutex_lock(&idrinfo->lock);
 	idr_for_each_entry_ul(idr, p, tmp, id) {
+		if (IS_ERR(p))
+			continue;
 		ret = tcf_idr_release_unsafe(p);
 		if (ret == ACT_P_DELETED) {
 			module_put(ops->owner);
@@ -891,14 +893,24 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
 	[TCA_ACT_HW_STATS]	= NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
 };
 
-static void tcf_idr_insert(struct tc_action *a)
+static void tcf_idr_insert_many(struct tc_action *actions[])
 {
-	struct tcf_idrinfo *idrinfo = a->idrinfo;
+	int i;
 
-	mutex_lock(&idrinfo->lock);
-	/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
-	WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index)));
-	mutex_unlock(&idrinfo->lock);
+	for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
+		struct tc_action *a = actions[i];
+		struct tcf_idrinfo *idrinfo;
+
+		if (!a)
+			continue;
+		idrinfo = a->idrinfo;
+		mutex_lock(&idrinfo->lock);
+		/* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if
+		 * it is just created, otherwise this is just a nop.
+		 */
+		idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
+		mutex_unlock(&idrinfo->lock);
+	}
 }
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -995,9 +1007,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (err == ACT_P_CREATED)
-		tcf_idr_insert(a);
-
 	if (!name && tb[TCA_ACT_COOKIE])
 		tcf_set_action_cookie(&a->act_cookie, cookie);
 
@@ -1053,6 +1062,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
 		actions[i - 1] = act;
 	}
 
+	/* We have to commit them all together, because if any error happened in
+	 * between, we could not handle the failure gracefully.
+	 */
+	tcf_idr_insert_many(actions);
+
 	*attr_size = tcf_action_full_attrs_size(sz);
 	return i - 1;
 

From 02a1b175b0e92d9e0fa5df3957ade8d733ceb6a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= <maze@google.com>
Date: Wed, 23 Sep 2020 13:18:15 -0700
Subject: [PATCH 147/322] net/ipv4: always honour route mtu during forwarding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Documentation/networking/ip-sysctl.txt:46 says:
  ip_forward_use_pmtu - BOOLEAN
    By default we don't trust protocol path MTUs while forwarding
    because they could be easily forged and can lead to unwanted
    fragmentation by the router.
    You only need to enable this if you have user-space software
    which tries to discover path mtus by itself and depends on the
    kernel honoring this information. This is normally not the case.
    Default: 0 (disabled)
    Possible values:
    0 - disabled
    1 - enabled

Which makes it pretty clear that setting it to 1 is a potential
security/safety/DoS issue, and yet it is entirely reasonable to want
forwarded traffic to honour explicitly administrator configured
route mtus (instead of defaulting to device mtu).

Indeed, I can't think of a single reason why you wouldn't want to.
Since you configured a route mtu you probably know better...

It is pretty common to have a higher device mtu to allow receiving
large (jumbo) frames, while having some routes via that interface
(potentially including the default route to the internet) specify
a lower mtu.

Note that ipv6 forwarding uses device mtu unless the route is locked
(in which case it will use the route mtu).

This approach is not usable for IPv4 where an 'mtu lock' on a route
also has the side effect of disabling TCP path mtu discovery via
disabling the IPv4 DF (don't frag) bit on all outgoing frames.

I'm not aware of a way to lock a route from an IPv6 RA, so that also
potentially seems wrong.

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: Sunmeet Gill (Sunny) <sgill@quicinc.com>
Cc: Vinay Paradkar <vparadka@qti.qualcomm.com>
Cc: Tyler Wear <twear@quicinc.com>
Cc: David Ahern <dsahern@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/net/ip.h b/include/net/ip.h
index b09c48d862cc..2a52787db64a 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -436,12 +436,18 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
 						    bool forwarding)
 {
 	struct net *net = dev_net(dst->dev);
+	unsigned int mtu;
 
 	if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
 	    ip_mtu_locked(dst) ||
 	    !forwarding)
 		return dst_mtu(dst);
 
+	/* 'forwarding = true' case should always honour route mtu */
+	mtu = dst_metric_raw(dst, RTAX_MTU);
+	if (mtu)
+		return mtu;
+
 	return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU);
 }
 

From ed46cd1d4cc4b2cf05f31fe25fc68d1a9d3589ba Mon Sep 17 00:00:00 2001
From: Xie He <xie.he.0141@gmail.com>
Date: Wed, 23 Sep 2020 11:18:18 -0700
Subject: [PATCH 148/322] drivers/net/wan/x25_asy: Correct the ndo_open and
 ndo_stop functions

1.
Move the lapb_register/lapb_unregister calls into the ndo_open/ndo_stop
functions.
This makes the LAPB protocol start/stop when the network interface
starts/stops. When the network interface is down, the LAPB protocol
shouldn't be running and the LAPB module shoudn't be generating control
frames.

2.
Move netif_start_queue/netif_stop_queue into the ndo_open/ndo_stop
functions.
This makes the TX queue start/stop when the network interface
starts/stops.
(netif_stop_queue was originally in the ndo_stop function. But to make
the code look better, I created a new function to use as ndo_stop, and
made it call the original ndo_stop function. I moved netif_stop_queue
from the original ndo_stop function to the new ndo_stop function.)

Cc: Martin Schiller <ms@dev.tdt.de>
Signed-off-by: Xie He <xie.he.0141@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/x25_asy.c | 43 +++++++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 7ee980575208..c418767a890a 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -464,7 +464,6 @@ static int x25_asy_open(struct net_device *dev)
 {
 	struct x25_asy *sl = netdev_priv(dev);
 	unsigned long len;
-	int err;
 
 	if (sl->tty == NULL)
 		return -ENODEV;
@@ -490,14 +489,7 @@ static int x25_asy_open(struct net_device *dev)
 	sl->xleft    = 0;
 	sl->flags   &= (1 << SLF_INUSE);      /* Clear ESCAPE & ERROR flags */
 
-	netif_start_queue(dev);
-
-	/*
-	 *	Now attach LAPB
-	 */
-	err = lapb_register(dev, &x25_asy_callbacks);
-	if (err == LAPB_OK)
-		return 0;
+	return 0;
 
 	/* Cleanup */
 	kfree(sl->xbuff);
@@ -519,7 +511,6 @@ static int x25_asy_close(struct net_device *dev)
 	if (sl->tty)
 		clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags);
 
-	netif_stop_queue(dev);
 	sl->rcount = 0;
 	sl->xleft  = 0;
 	spin_unlock(&sl->lock);
@@ -604,7 +595,6 @@ static int x25_asy_open_tty(struct tty_struct *tty)
 static void x25_asy_close_tty(struct tty_struct *tty)
 {
 	struct x25_asy *sl = tty->disc_data;
-	int err;
 
 	/* First make sure we're connected. */
 	if (!sl || sl->magic != X25_ASY_MAGIC)
@@ -615,11 +605,6 @@ static void x25_asy_close_tty(struct tty_struct *tty)
 		dev_close(sl->dev);
 	rtnl_unlock();
 
-	err = lapb_unregister(sl->dev);
-	if (err != LAPB_OK)
-		pr_err("%s: lapb_unregister error: %d\n",
-		       __func__, err);
-
 	tty->disc_data = NULL;
 	sl->tty = NULL;
 	x25_asy_free(sl);
@@ -722,15 +707,39 @@ static int x25_asy_ioctl(struct tty_struct *tty, struct file *file,
 
 static int x25_asy_open_dev(struct net_device *dev)
 {
+	int err;
 	struct x25_asy *sl = netdev_priv(dev);
 	if (sl->tty == NULL)
 		return -ENODEV;
+
+	err = lapb_register(dev, &x25_asy_callbacks);
+	if (err != LAPB_OK)
+		return -ENOMEM;
+
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+static int x25_asy_close_dev(struct net_device *dev)
+{
+	int err;
+
+	netif_stop_queue(dev);
+
+	err = lapb_unregister(dev);
+	if (err != LAPB_OK)
+		pr_err("%s: lapb_unregister error: %d\n",
+		       __func__, err);
+
+	x25_asy_close(dev);
+
 	return 0;
 }
 
 static const struct net_device_ops x25_asy_netdev_ops = {
 	.ndo_open	= x25_asy_open_dev,
-	.ndo_stop	= x25_asy_close,
+	.ndo_stop	= x25_asy_close_dev,
 	.ndo_start_xmit	= x25_asy_xmit,
 	.ndo_tx_timeout	= x25_asy_timeout,
 	.ndo_change_mtu	= x25_asy_change_mtu,

From f68910a8056f9451ee9fe7e1b962f7d90d326ad3 Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Thu, 24 Sep 2020 09:31:51 +0800
Subject: [PATCH 149/322] hinic: fix wrong return value of mac-set cmd

It should also be regarded as an error when hw return status=4 for PF's
setting mac cmd. Only if PF return status=4 to VF should this cmd be
taken special treatment.

Fixes: 7dd29ee12865 ("hinic: add sriov feature support")
Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_port.c  |  6 +++---
 drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 12 ++----------
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 02cd635d6914..eb97f2d6b1ad 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -58,9 +58,9 @@ static int change_mac(struct hinic_dev *nic_dev, const u8 *addr,
 				 sizeof(port_mac_cmd),
 				 &port_mac_cmd, &out_size);
 	if (err || out_size != sizeof(port_mac_cmd) ||
-	    (port_mac_cmd.status  &&
-	    port_mac_cmd.status != HINIC_PF_SET_VF_ALREADY &&
-	    port_mac_cmd.status != HINIC_MGMT_STATUS_EXIST)) {
+	    (port_mac_cmd.status &&
+	     (port_mac_cmd.status != HINIC_PF_SET_VF_ALREADY || !HINIC_IS_VF(hwif)) &&
+	     port_mac_cmd.status != HINIC_MGMT_STATUS_EXIST)) {
 		dev_err(&pdev->dev, "Failed to change MAC, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, port_mac_cmd.status, out_size);
 		return -EFAULT;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
index 4d63680f2143..f8a26459ff65 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
@@ -38,8 +38,7 @@ static int hinic_set_mac(struct hinic_hwdev *hwdev, const u8 *mac_addr,
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_MAC, &mac_info,
 				 sizeof(mac_info), &mac_info, &out_size);
 	if (err || out_size != sizeof(mac_info) ||
-	    (mac_info.status && mac_info.status != HINIC_PF_SET_VF_ALREADY &&
-	    mac_info.status != HINIC_MGMT_STATUS_EXIST)) {
+	    (mac_info.status && mac_info.status != HINIC_MGMT_STATUS_EXIST)) {
 		dev_err(&hwdev->func_to_io.hwif->pdev->dev, "Failed to set MAC, err: %d, status: 0x%x, out size: 0x%x\n",
 			err, mac_info.status, out_size);
 		return -EIO;
@@ -503,8 +502,7 @@ struct hinic_sriov_info *hinic_get_sriov_info_by_pcidev(struct pci_dev *pdev)
 
 static int hinic_check_mac_info(u8 status, u16 vlan_id)
 {
-	if ((status && status != HINIC_MGMT_STATUS_EXIST &&
-	     status != HINIC_PF_SET_VF_ALREADY) ||
+	if ((status && status != HINIC_MGMT_STATUS_EXIST) ||
 	    (vlan_id & CHECK_IPSU_15BIT &&
 	     status == HINIC_MGMT_STATUS_EXIST))
 		return -EINVAL;
@@ -546,12 +544,6 @@ static int hinic_update_mac(struct hinic_hwdev *hwdev, u8 *old_mac,
 		return -EINVAL;
 	}
 
-	if (mac_info.status == HINIC_PF_SET_VF_ALREADY) {
-		dev_warn(&hwdev->hwif->pdev->dev,
-			 "PF has already set VF MAC. Ignore update operation\n");
-		return HINIC_PF_SET_VF_ALREADY;
-	}
-
 	if (mac_info.status == HINIC_MGMT_STATUS_EXIST)
 		dev_warn(&hwdev->hwif->pdev->dev, "MAC is repeated. Ignore update operation\n");
 

From dba1e4660a87927bdc03c23e36fd2c81a16a7ab1 Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Thu, 24 Sep 2020 09:57:46 +0800
Subject: [PATCH 150/322] net: dsa: felix: convert TAS link speed based on
 phylink speed

state->speed holds a value of 10, 100, 1000 or 2500, but
QSYS_TAG_CONFIG_LINK_SPEED expects a value of 0, 1, 2, 3. So convert the
speed to a proper value.

Fixes: de143c0e274b ("net: dsa: felix: Configure Time-Aware Scheduler via taprio offload")
Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 6855c94256f8..36db631a55e6 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1284,8 +1284,28 @@ void vsc9959_mdio_bus_free(struct ocelot *ocelot)
 static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
 				    u32 speed)
 {
+	u8 tas_speed;
+
+	switch (speed) {
+	case SPEED_10:
+		tas_speed = OCELOT_SPEED_10;
+		break;
+	case SPEED_100:
+		tas_speed = OCELOT_SPEED_100;
+		break;
+	case SPEED_1000:
+		tas_speed = OCELOT_SPEED_1000;
+		break;
+	case SPEED_2500:
+		tas_speed = OCELOT_SPEED_2500;
+		break;
+	default:
+		tas_speed = OCELOT_SPEED_1000;
+		break;
+	}
+
 	ocelot_rmw_rix(ocelot,
-		       QSYS_TAG_CONFIG_LINK_SPEED(speed),
+		       QSYS_TAG_CONFIG_LINK_SPEED(tas_speed),
 		       QSYS_TAG_CONFIG_LINK_SPEED_M,
 		       QSYS_TAG_CONFIG, port);
 }

From 4ab810a4e04ab6c851007033d39c13e6d3f55110 Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Thu, 24 Sep 2020 10:11:13 +0800
Subject: [PATCH 151/322] net: mscc: ocelot: fix fields offset in
 SG_CONFIG_REG_3

INIT_IPS and GATE_ENABLE fields have a wrong offset in SG_CONFIG_REG_3.
This register is used by stream gate control of PSFP, and it has not
been used before, because PSFP is not implemented in ocelot driver.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot_ana.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/soc/mscc/ocelot_ana.h b/include/soc/mscc/ocelot_ana.h
index 841c6ec22b64..1669481d9779 100644
--- a/include/soc/mscc/ocelot_ana.h
+++ b/include/soc/mscc/ocelot_ana.h
@@ -252,10 +252,10 @@
 #define ANA_SG_CONFIG_REG_3_LIST_LENGTH_M                 GENMASK(18, 16)
 #define ANA_SG_CONFIG_REG_3_LIST_LENGTH_X(x)              (((x) & GENMASK(18, 16)) >> 16)
 #define ANA_SG_CONFIG_REG_3_GATE_ENABLE                   BIT(20)
-#define ANA_SG_CONFIG_REG_3_INIT_IPS(x)                   (((x) << 24) & GENMASK(27, 24))
-#define ANA_SG_CONFIG_REG_3_INIT_IPS_M                    GENMASK(27, 24)
-#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x)                 (((x) & GENMASK(27, 24)) >> 24)
-#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE               BIT(28)
+#define ANA_SG_CONFIG_REG_3_INIT_IPS(x)                   (((x) << 21) & GENMASK(24, 21))
+#define ANA_SG_CONFIG_REG_3_INIT_IPS_M                    GENMASK(24, 21)
+#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x)                 (((x) & GENMASK(24, 21)) >> 21)
+#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE               BIT(25)
 
 #define ANA_SG_GCL_GS_CONFIG_RSZ                          0x4
 

From 0eb11dfe224f9bf0f7d66a68500a43a9b9c83b02 Mon Sep 17 00:00:00 2001
From: Wang Qing <wangqing@vivo.com>
Date: Thu, 24 Sep 2020 14:50:24 +0800
Subject: [PATCH 152/322] net/ethernet/broadcom: fix spelling typo

Modify the comment typo: "compliment" -> "complement".

Signed-off-by: Wang Qing <wangqing@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index bfc0e45d4a2b..5caa75b41b73 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
@@ -284,12 +284,12 @@
 #define CCM_REG_GR_ARB_TYPE					 0xd015c
 /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the
    highest priority is 3. It is supposed; that the Store channel priority is
-   the compliment to 4 of the rest priorities - Aggregation channel; Load
+   the complement to 4 of the rest priorities - Aggregation channel; Load
    (FIC0) channel and Load (FIC1). */
 #define CCM_REG_GR_LD0_PR					 0xd0164
 /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the
    highest priority is 3. It is supposed; that the Store channel priority is
-   the compliment to 4 of the rest priorities - Aggregation channel; Load
+   the complement to 4 of the rest priorities - Aggregation channel; Load
    (FIC0) channel and Load (FIC1). */
 #define CCM_REG_GR_LD1_PR					 0xd0168
 /* [RW 2] General flags index. */
@@ -4489,11 +4489,11 @@
 #define TCM_REG_GR_ARB_TYPE					 0x50114
 /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the
    highest priority is 3. It is supposed that the Store channel is the
-   compliment of the other 3 groups. */
+   complement of the other 3 groups. */
 #define TCM_REG_GR_LD0_PR					 0x5011c
 /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the
    highest priority is 3. It is supposed that the Store channel is the
-   compliment of the other 3 groups. */
+   complement of the other 3 groups. */
 #define TCM_REG_GR_LD1_PR					 0x50120
 /* [RW 4] The number of double REG-pairs; loaded from the STORM context and
    sent to STORM; for a specific connection type. The double REG-pairs are
@@ -5020,11 +5020,11 @@
 #define UCM_REG_GR_ARB_TYPE					 0xe0144
 /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the
    highest priority is 3. It is supposed that the Store channel group is
-   compliment to the others. */
+   complement to the others. */
 #define UCM_REG_GR_LD0_PR					 0xe014c
 /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the
    highest priority is 3. It is supposed that the Store channel group is
-   compliment to the others. */
+   complement to the others. */
 #define UCM_REG_GR_LD1_PR					 0xe0150
 /* [RW 2] The queue index for invalidate counter flag decision. */
 #define UCM_REG_INV_CFLG_Q					 0xe00e4
@@ -5523,11 +5523,11 @@
 #define XCM_REG_GR_ARB_TYPE					 0x2020c
 /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the
    highest priority is 3. It is supposed that the Channel group is the
-   compliment of the other 3 groups. */
+   complement of the other 3 groups. */
 #define XCM_REG_GR_LD0_PR					 0x20214
 /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the
    highest priority is 3. It is supposed that the Channel group is the
-   compliment of the other 3 groups. */
+   complement of the other 3 groups. */
 #define XCM_REG_GR_LD1_PR					 0x20218
 /* [RW 1] Input nig0 Interface enable. If 0 - the valid input is
    disregarded; acknowledge output is deasserted; all other signals are

From 38f7e1c0c43dd25b06513137bb6fd35476f9ec6d Mon Sep 17 00:00:00 2001
From: Rohit Maheshwari <rohitm@chelsio.com>
Date: Thu, 24 Sep 2020 12:28:45 +0530
Subject: [PATCH 153/322] net/tls: race causes kernel panic

BUG: kernel NULL pointer dereference, address: 00000000000000b8
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 80000008b6fef067 P4D 80000008b6fef067 PUD 8b6fe6067 PMD 0
 Oops: 0000 [#1] SMP PTI
 CPU: 12 PID: 23871 Comm: kworker/12:80 Kdump: loaded Tainted: G S
 5.9.0-rc3+ #1
 Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.1 03/29/2018
 Workqueue: events tx_work_handler [tls]
 RIP: 0010:tx_work_handler+0x1b/0x70 [tls]
 Code: dc fe ff ff e8 16 d4 a3 f6 66 0f 1f 44 00 00 0f 1f 44 00 00 55 53 48 8b
 6f 58 48 8b bd a0 04 00 00 48 85 ff 74 1c 48 8b 47 28 <48> 8b 90 b8 00 00 00 83
 e2 02 75 0c f0 48 0f ba b0 b8 00 00 00 00
 RSP: 0018:ffffa44ace61fe88 EFLAGS: 00010286
 RAX: 0000000000000000 RBX: ffff91da9e45cc30 RCX: dead000000000122
 RDX: 0000000000000001 RSI: ffff91da9e45cc38 RDI: ffff91d95efac200
 RBP: ffff91da133fd780 R08: 0000000000000000 R09: 000073746e657665
 R10: 8080808080808080 R11: 0000000000000000 R12: ffff91dad7d30700
 R13: ffff91dab6561080 R14: 0ffff91dad7d3070 R15: ffff91da9e45cc38
 FS:  0000000000000000(0000) GS:ffff91dad7d00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00000000000000b8 CR3: 0000000906478003 CR4: 00000000003706e0
 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
 Call Trace:
  process_one_work+0x1a7/0x370
  worker_thread+0x30/0x370
  ? process_one_work+0x370/0x370
  kthread+0x114/0x130
  ? kthread_park+0x80/0x80
  ret_from_fork+0x22/0x30

tls_sw_release_resources_tx() waits for encrypt_pending, which
can have race, so we need similar changes as in commit
0cada33241d9de205522e3858b18e506ca5cce2c here as well.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Signed-off-by: Rohit Maheshwari <rohitm@chelsio.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tls/tls_sw.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9a3d9fedd7aa..95ab5545a931 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2143,10 +2143,15 @@ void tls_sw_release_resources_tx(struct sock *sk)
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec, *tmp;
+	int pending;
 
 	/* Wait for any pending async encryptions to complete */
-	smp_store_mb(ctx->async_notify, true);
-	if (atomic_read(&ctx->encrypt_pending))
+	spin_lock_bh(&ctx->encrypt_compl_lock);
+	ctx->async_notify = true;
+	pending = atomic_read(&ctx->encrypt_pending);
+	spin_unlock_bh(&ctx->encrypt_compl_lock);
+
+	if (pending)
 		crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
 
 	tls_tx_records(sk, -1);

From 912aae27c6af6605eae967ab540c5e26bd76d421 Mon Sep 17 00:00:00 2001
From: Helmut Grohne <helmut.grohne@intenta.de>
Date: Thu, 24 Sep 2020 10:37:47 +0200
Subject: [PATCH 154/322] net: dsa: microchip: really look for phy-mode in port
 nodes

The previous implementation failed to account for the "ports" node. The
actual port nodes are not child nodes of the switch node, but a "ports"
node sits in between.

Fixes: edecfa98f602 ("net: dsa: microchip: look for phy-mode in port nodes")
Signed-off-by: Helmut Grohne <helmut.grohne@intenta.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/microchip/ksz_common.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 8e755b50c9c1..c796d42730ba 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -387,8 +387,8 @@ EXPORT_SYMBOL(ksz_switch_alloc);
 int ksz_switch_register(struct ksz_device *dev,
 			const struct ksz_dev_ops *ops)
 {
+	struct device_node *port, *ports;
 	phy_interface_t interface;
-	struct device_node *port;
 	unsigned int port_num;
 	int ret;
 
@@ -429,13 +429,17 @@ int ksz_switch_register(struct ksz_device *dev,
 		ret = of_get_phy_mode(dev->dev->of_node, &interface);
 		if (ret == 0)
 			dev->compat_interface = interface;
-		for_each_available_child_of_node(dev->dev->of_node, port) {
-			if (of_property_read_u32(port, "reg", &port_num))
-				continue;
-			if (port_num >= dev->port_cnt)
-				return -EINVAL;
-			of_get_phy_mode(port, &dev->ports[port_num].interface);
-		}
+		ports = of_get_child_by_name(dev->dev->of_node, "ports");
+		if (ports)
+			for_each_available_child_of_node(ports, port) {
+				if (of_property_read_u32(port, "reg",
+							 &port_num))
+					continue;
+				if (port_num >= dev->port_cnt)
+					return -EINVAL;
+				of_get_phy_mode(port,
+						&dev->ports[port_num].interface);
+			}
 		dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
 							 "microchip,synclko-125");
 	}

From 1ec8e74855588cecb2620b28b877c08f45765374 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@nuviainc.com>
Date: Thu, 24 Sep 2020 15:56:45 +0100
Subject: [PATCH 155/322] net/fsl: quieten expected MDIO access failures

MDIO reads can happen during PHY probing, and printing an error with
dev_err can result in a large number of error messages during device
probe.  On a platform with a serial console this can result in
excessively long boot times in a way that looks like an infinite loop
when multiple busses are present.  Since 0f183fd151c (net/fsl: enable
extended scanning in xgmac_mdio) we perform more scanning so there are
potentially more failures.

Reduce the logging level to dev_dbg which is consistent with the
Freescale enetc driver.

Cc: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/xgmac_mdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c
index 98be51d8b08c..bfa2826c5545 100644
--- a/drivers/net/ethernet/freescale/xgmac_mdio.c
+++ b/drivers/net/ethernet/freescale/xgmac_mdio.c
@@ -229,7 +229,7 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
 	/* Return all Fs if nothing was there */
 	if ((xgmac_read32(&regs->mdio_stat, endian) & MDIO_STAT_RD_ER) &&
 	    !priv->has_a011043) {
-		dev_err(&bus->dev,
+		dev_dbg(&bus->dev,
 			"Error while reading PHY%d reg at %d.%hhu\n",
 			phy_id, dev_addr, regnum);
 		return 0xffff;

From ad2b9b0f8d0107602bdc1f3b1ab90719842ace11 Mon Sep 17 00:00:00 2001
From: Priyaranjan Jha <priyarjha@google.com>
Date: Thu, 24 Sep 2020 15:23:14 -0700
Subject: [PATCH 156/322] tcp: skip DSACKs with dubious sequence ranges

Currently, we use length of DSACKed range to compute number of
delivered packets. And if sequence range in DSACK is corrupted,
we can get bogus dsacked/acked count, and bogus cwnd.

This patch put bounds on DSACKed range to skip update of data
delivery and spurious retransmission information, if the DSACK
is unlikely caused by sender's action:
- DSACKed range shouldn't be greater than maximum advertised rwnd.
- Total no. of DSACKed segments shouldn't be greater than total
  no. of retransmitted segs. Unlike spurious retransmits, network
  duplicates or corrupted DSACKs shouldn't be counted as delivery.

Signed-off-by: Priyaranjan Jha <priyarjha@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/snmp.h |  1 +
 net/ipv4/proc.c           |  1 +
 net/ipv4/tcp_input.c      | 32 +++++++++++++++++++++++++-------
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index cee9f8e6fce3..f84e7bcad6de 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -288,6 +288,7 @@ enum
 	LINUX_MIB_TCPTIMEOUTREHASH,		/* TCPTimeoutRehash */
 	LINUX_MIB_TCPDUPLICATEDATAREHASH,	/* TCPDuplicateDataRehash */
 	LINUX_MIB_TCPDSACKRECVSEGS,		/* TCPDSACKRecvSegs */
+	LINUX_MIB_TCPDSACKIGNOREDDUBIOUS,	/* TCPDSACKIgnoredDubious */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 1074df726ec0..8d5e1695b9aa 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -293,6 +293,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH),
 	SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH),
 	SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS),
+	SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 184ea556f50e..b1ce2054291d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -885,21 +885,34 @@ struct tcp_sacktag_state {
 	struct rate_sample *rate;
 };
 
-/* Take a notice that peer is sending D-SACKs */
+/* Take a notice that peer is sending D-SACKs. Skip update of data delivery
+ * and spurious retransmission information if this DSACK is unlikely caused by
+ * sender's action:
+ * - DSACKed sequence range is larger than maximum receiver's window.
+ * - Total no. of DSACKed segments exceed the total no. of retransmitted segs.
+ */
 static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq,
 			  u32 end_seq, struct tcp_sacktag_state *state)
 {
 	u32 seq_len, dup_segs = 1;
 
-	if (before(start_seq, end_seq)) {
-		seq_len = end_seq - start_seq;
-		if (seq_len > tp->mss_cache)
-			dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
-	}
+	if (!before(start_seq, end_seq))
+		return 0;
+
+	seq_len = end_seq - start_seq;
+	/* Dubious DSACK: DSACKed range greater than maximum advertised rwnd */
+	if (seq_len > tp->max_window)
+		return 0;
+	if (seq_len > tp->mss_cache)
+		dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache);
+
+	tp->dsack_dups += dup_segs;
+	/* Skip the DSACK if dup segs weren't retransmitted by sender */
+	if (tp->dsack_dups > tp->total_retrans)
+		return 0;
 
 	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
 	tp->rack.dsack_seen = 1;
-	tp->dsack_dups += dup_segs;
 
 	state->flag |= FLAG_DSACKING_ACK;
 	/* A spurious retransmission is delivered */
@@ -1153,6 +1166,11 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
 	}
 
 	dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state);
+	if (!dup_segs) {	/* Skip dubious DSACK */
+		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS);
+		return false;
+	}
+
 	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs);
 
 	/* D-SACK for already forgotten data... Do dumb counting. */

From e94ee171349db84c7cfdc5fefbebe414054d0924 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Sep 2020 14:42:56 +1000
Subject: [PATCH 157/322] xfrm: Use correct address family in xfrm_state_find

The struct flowi must never be interpreted by itself as its size
depends on the address family.  Therefore it must always be grouped
with its original family value.

In this particular instance, the original family value is lost in
the function xfrm_state_find.  Therefore we get a bogus read when
it's coupled with the wrong family which would occur with inter-
family xfrm states.

This patch fixes it by keeping the original family value.

Note that the same bug could potentially occur in LSM through
the xfrm_state_pol_flow_match hook.  I checked the current code
there and it seems to be safe for now as only secid is used which
is part of struct flowi_common.  But that API should be changed
so that so that we don't get new bugs in the future.  We could
do that by replacing fl with just secid or adding a family field.

Reported-by: syzbot+577fbac3145a6eb2e7a5@syzkaller.appspotmail.com
Fixes: 48b8d78315bf ("[XFRM]: State selection update to use inner...")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 5ff392e6f3c1..efc89a92961d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1019,7 +1019,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
 	 */
 	if (x->km.state == XFRM_STATE_VALID) {
 		if ((x->sel.family &&
-		     !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
+		     (x->sel.family != family ||
+		      !xfrm_selector_match(&x->sel, fl, family))) ||
 		    !security_xfrm_state_pol_flow_match(x, pol, fl))
 			return;
 
@@ -1032,7 +1033,9 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
 		*acq_in_progress = 1;
 	} else if (x->km.state == XFRM_STATE_ERROR ||
 		   x->km.state == XFRM_STATE_EXPIRED) {
-		if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
+		if ((!x->sel.family ||
+		     (x->sel.family == family &&
+		      xfrm_selector_match(&x->sel, fl, family))) &&
 		    security_xfrm_state_pol_flow_match(x, pol, fl))
 			*error = -ESRCH;
 	}
@@ -1072,7 +1075,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, encap_family,
+			xfrm_state_look_at(pol, x, fl, family,
 					   &best, &acquire_in_progress, &error);
 	}
 	if (best || acquire_in_progress)
@@ -1089,7 +1092,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
 		    tmpl->mode == x->props.mode &&
 		    tmpl->id.proto == x->id.proto &&
 		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
-			xfrm_state_look_at(pol, x, fl, encap_family,
+			xfrm_state_look_at(pol, x, fl, family,
 					   &best, &acquire_in_progress, &error);
 	}
 

From 8d214c481611b29458a57913bd786f0ac06f0605 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Wed, 23 Sep 2020 14:53:52 -0700
Subject: [PATCH 158/322] KVM: x86: Reset MMU context if guest toggles CR4.SMAP
 or CR4.PKE

Reset the MMU context during kvm_set_cr4() if SMAP or PKE is toggled.
Recent commits to (correctly) not reload PDPTRs when SMAP/PKE are
toggled inadvertantly skipped the MMU context reset due to the mask
of bits that triggers PDPTR loads also being used to trigger MMU context
resets.

Fixes: 427890aff855 ("kvm: x86: Toggling CR4.SMAP does not load PDPTEs in PAE mode")
Fixes: cb957adb4ea4 ("kvm: x86: Toggling CR4.PKE does not load PDPTEs in PAE mode")
Cc: Jim Mattson <jmattson@google.com>
Cc: Peter Shier <pshier@google.com>
Cc: Oliver Upton <oupton@google.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Message-Id: <20200923215352.17756-1-sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e3fe1d126cc3..58fa354b1446 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -976,6 +976,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	unsigned long old_cr4 = kvm_read_cr4(vcpu);
 	unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
 				   X86_CR4_SMEP;
+	unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE;
 
 	if (kvm_valid_cr4(vcpu, cr4))
 		return 1;
@@ -1003,7 +1004,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	if (kvm_x86_ops.set_cr4(vcpu, cr4))
 		return 1;
 
-	if (((cr4 ^ old_cr4) & pdptr_bits) ||
+	if (((cr4 ^ old_cr4) & mmu_role_bits) ||
 	    (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
 		kvm_mmu_reset_context(vcpu);
 

From f3cd4850504ff612d0ea77a0aaf29b66c98fcefe Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Thu, 24 Sep 2020 14:55:54 -0600
Subject: [PATCH 159/322] io_uring: ensure open/openat2 name is cleaned on
 cancelation

If we cancel these requests, we'll leak the memory associated with the
filename. Add them to the table of ops that need cleaning, if
REQ_F_NEED_CLEANUP is set.

Cc: stable@vger.kernel.org
Fixes: e62753e4e292 ("io_uring: call statx directly")
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index e6004b92e553..0ab16df31288 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5671,6 +5671,11 @@ static void __io_clean_op(struct io_kiocb *req)
 			io_put_file(req, req->splice.file_in,
 				    (req->splice.flags & SPLICE_F_FD_IN_FIXED));
 			break;
+		case IORING_OP_OPENAT:
+		case IORING_OP_OPENAT2:
+			if (req->open.filename)
+				putname(req->open.filename);
+			break;
 		}
 		req->flags &= ~REQ_F_NEED_CLEANUP;
 	}

From 3aab91774bbd8e571cfaddaf839aafd07718333c Mon Sep 17 00:00:00 2001
From: Jeffle Xu <jefflexu@linux.alibaba.com>
Date: Fri, 25 Sep 2020 14:00:31 +0800
Subject: [PATCH 160/322] block: remove unused BLK_QC_T_EAGAIN flag

commit 7b6620d7db56 ("block: remove REQ_NOWAIT_INLINE") removed the
REQ_NOWAIT_INLINE related code, but the diff wasn't applied to
blk_types.h somehow.

Then commit 2771cefeac49 ("block: remove the REQ_NOWAIT_INLINE flag")
removed the REQ_NOWAIT_INLINE flag while the BLK_QC_T_EAGAIN flag still
remains.

Fixes: 7b6620d7db56 ("block: remove REQ_NOWAIT_INLINE")
Signed-off-by: Jeffle Xu <jefflexu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 4ecf4fed171f..b3fc5d3dd8ea 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -497,13 +497,12 @@ static inline int op_stat_group(unsigned int op)
 
 typedef unsigned int blk_qc_t;
 #define BLK_QC_T_NONE		-1U
-#define BLK_QC_T_EAGAIN		-2U
 #define BLK_QC_T_SHIFT		16
 #define BLK_QC_T_INTERNAL	(1U << 31)
 
 static inline bool blk_qc_t_valid(blk_qc_t cookie)
 {
-	return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN;
+	return cookie != BLK_QC_T_NONE;
 }
 
 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)

From 75598a8fc0e0dff2aa5d46c62531b36a595f1d4f Mon Sep 17 00:00:00 2001
From: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
Date: Wed, 2 Sep 2020 12:54:59 +0000
Subject: [PATCH 161/322] iavf: Fix incorrect adapter get in iavf_resume

When calling iavf_resume there was a crash because wrong
function was used to get iavf_adapter and net_device pointers.
Changed how iavf_resume is getting iavf_adapter and net_device
pointers from pci_dev.

Fixes: 5eae00c57f5e ("i40evf: main driver core")
Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
Reviewed-by: Aleksandr Loktionov <aleksandr.loktionov@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index d870343cf689..cf539db79af9 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -3806,8 +3806,8 @@ static int __maybe_unused iavf_suspend(struct device *dev_d)
 static int __maybe_unused iavf_resume(struct device *dev_d)
 {
 	struct pci_dev *pdev = to_pci_dev(dev_d);
-	struct iavf_adapter *adapter = pci_get_drvdata(pdev);
-	struct net_device *netdev = adapter->netdev;
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct iavf_adapter *adapter = netdev_priv(netdev);
 	u32 err;
 
 	pci_set_master(pdev);

From 466e4392920083c58c7ee8905795453d62c4add7 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Wed, 2 Sep 2020 08:53:45 -0700
Subject: [PATCH 162/322] ice: Fix call trace on suspend

It appears that the ice_suspend flow is missing a call to pci_save_state
and this is triggering the message "State of device not saved by
ice_suspend" and a call trace. Fix it.

Fixes: 769c500dcc1e ("ice: Add advanced power mgmt for WoL")
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 4634b48949bb..954f11f86f50 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4522,6 +4522,7 @@ static int __maybe_unused ice_suspend(struct device *dev)
 	}
 	ice_clear_interrupt_scheme(pf);
 
+	pci_save_state(pdev);
 	pci_wake_from_d3(pdev, pf->wol_ena);
 	pci_set_power_state(pdev, PCI_D3hot);
 	return 0;

From 135f4b9e9340dadb78e9737bb4eb9817b9c89dac Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 2 Sep 2020 08:53:46 -0700
Subject: [PATCH 163/322] ice: fix memory leak if register_netdev_fails

The ice_setup_pf_sw function can cause a memory leak if register_netdev
fails, due to accidentally failing to free the VSI rings. Fix the memory
leak by using ice_vsi_release, ensuring we actually go through the full
teardown process.

This should be safe even if the netdevice is not registered because we
will have set the netdev pointer to NULL, ensuring ice_vsi_release won't
call unregister_netdev.

An alternative fix would be moving management of the PF VSI netdev into
the main VSI setup code. This is complicated and likely requires
significant refactor in how we manage VSIs

Fixes: 3a858ba392c3 ("ice: Add support for VSI allocation and deallocation")
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c  |  6 +++---
 drivers/net/ethernet/intel/ice/ice_lib.h  |  6 ------
 drivers/net/ethernet/intel/ice/ice_main.c | 13 +++----------
 3 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index f2682776f8c8..65cc78279501 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -246,7 +246,7 @@ static int ice_get_free_slot(void *array, int size, int curr)
  * ice_vsi_delete - delete a VSI from the switch
  * @vsi: pointer to VSI being removed
  */
-void ice_vsi_delete(struct ice_vsi *vsi)
+static void ice_vsi_delete(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
 	struct ice_vsi_ctx *ctxt;
@@ -313,7 +313,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi)
  *
  * Returns 0 on success, negative on failure
  */
-int ice_vsi_clear(struct ice_vsi *vsi)
+static int ice_vsi_clear(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = NULL;
 	struct device *dev;
@@ -563,7 +563,7 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi)
  * ice_vsi_put_qs - Release queues from VSI to PF
  * @vsi: the VSI that is going to release queues
  */
-void ice_vsi_put_qs(struct ice_vsi *vsi)
+static void ice_vsi_put_qs(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
 	int i;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 981f3a156c24..3da17895a2b1 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -45,10 +45,6 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
 
 void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
 
-void ice_vsi_delete(struct ice_vsi *vsi);
-
-int ice_vsi_clear(struct ice_vsi *vsi);
-
 #ifdef CONFIG_DCB
 int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc);
 #endif /* CONFIG_DCB */
@@ -79,8 +75,6 @@ bool ice_is_reset_in_progress(unsigned long *state);
 void
 ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio);
 
-void ice_vsi_put_qs(struct ice_vsi *vsi);
-
 void ice_vsi_dis_irq(struct ice_vsi *vsi);
 
 void ice_vsi_free_irq(struct ice_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 954f11f86f50..54a7f55eb8c1 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3169,10 +3169,8 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 		return -EBUSY;
 
 	vsi = ice_pf_vsi_setup(pf, pf->hw.port_info);
-	if (!vsi) {
-		status = -ENOMEM;
-		goto unroll_vsi_setup;
-	}
+	if (!vsi)
+		return -ENOMEM;
 
 	status = ice_cfg_netdev(vsi);
 	if (status) {
@@ -3219,12 +3217,7 @@ unroll_napi_add:
 	}
 
 unroll_vsi_setup:
-	if (vsi) {
-		ice_vsi_free_q_vectors(vsi);
-		ice_vsi_delete(vsi);
-		ice_vsi_put_qs(vsi);
-		ice_vsi_clear(vsi);
-	}
+	ice_vsi_release(vsi);
 	return status;
 }
 

From f6a07271bb1535d9549380461437cc48d9e19958 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 2 Sep 2020 08:53:47 -0700
Subject: [PATCH 164/322] ice: fix memory leak in ice_vsi_setup

During ice_vsi_setup, if ice_cfg_vsi_lan fails, it does not properly
release memory associated with the VSI rings. If we had used devres
allocations for the rings, this would be ok. However, we use kzalloc and
kfree_rcu for these ring structures.

Using the correct label to cleanup the rings during ice_vsi_setup
highlights an issue in the ice_vsi_clear_rings function: it can leave
behind stale ring pointers in the q_vectors structure.

When releasing rings, we must also ensure that no q_vector associated
with the VSI will point to this ring again. To resolve this, loop over
all q_vectors and release their ring mapping. Because we are about to
free all rings, no q_vector should remain pointing to any of the rings
in this VSI.

Fixes: 5513b920a4f7 ("ice: Update Tx scheduler tree for VSI multi-Tx queue support")
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 65cc78279501..ebbb8f54871c 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1196,6 +1196,18 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi)
 {
 	int i;
 
+	/* Avoid stale references by clearing map from vector to ring */
+	if (vsi->q_vectors) {
+		ice_for_each_q_vector(vsi, i) {
+			struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+			if (q_vector) {
+				q_vector->tx.ring = NULL;
+				q_vector->rx.ring = NULL;
+			}
+		}
+	}
+
 	if (vsi->tx_rings) {
 		for (i = 0; i < vsi->alloc_txq; i++) {
 			if (vsi->tx_rings[i]) {
@@ -2291,7 +2303,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	if (status) {
 		dev_err(dev, "VSI %d failed lan queue config, error %s\n",
 			vsi->vsi_num, ice_stat_str(status));
-		goto unroll_vector_base;
+		goto unroll_clear_rings;
 	}
 
 	/* Add switch rule to drop all Tx Flow Control Frames, of look up

From 313b085851c13ca08320372a05a7047ea25d3dd4 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 20 Aug 2020 11:18:26 -0400
Subject: [PATCH 165/322] btrfs: move btrfs_scratch_superblocks into
 btrfs_dev_replace_finishing

We need to move the closing of the src_device out of all the device
replace locking, but we definitely want to zero out the superblock
before we commit the last time to make sure the device is properly
removed.  Handle this by pushing btrfs_scratch_superblocks into
btrfs_dev_replace_finishing, and then later on we'll move the src_device
closing and freeing stuff where we need it to be.

Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c |  3 +++
 fs/btrfs/volumes.c     | 12 +++---------
 fs/btrfs/volumes.h     |  3 +++
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index db93909b25e0..7cf48aeb6f14 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -745,6 +745,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	/* replace the sysfs entry */
 	btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device);
 	btrfs_sysfs_update_devid(tgt_device);
+	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state))
+		btrfs_scratch_superblocks(fs_info, src_device->bdev,
+					  src_device->name->str);
 	btrfs_rm_dev_replace_free_srcdev(src_device);
 
 	/* write back the superblocks */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 117b43367629..01ebdb69fbdb 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1999,9 +1999,9 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info)
 	return num_devices;
 }
 
-static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
-				      struct block_device *bdev,
-				      const char *device_path)
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+			       struct block_device *bdev,
+			       const char *device_path)
 {
 	struct btrfs_super_block *disk_super;
 	int copy_num;
@@ -2224,12 +2224,6 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
 	struct btrfs_fs_info *fs_info = srcdev->fs_info;
 	struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
 
-	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
-		/* zero out the old super if it is writable */
-		btrfs_scratch_superblocks(fs_info, srcdev->bdev,
-					  srcdev->name->str);
-	}
-
 	btrfs_close_bdev(srcdev);
 	synchronize_rcu();
 	btrfs_free_device(srcdev);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 5eea93916fbf..302c9234f7d0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -573,6 +573,9 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info);
 void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info);
 bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
 					struct btrfs_device *failing_dev);
+void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
+			       struct block_device *bdev,
+			       const char *device_path);
 
 int btrfs_bg_type_to_factor(u64 flags);
 const char *btrfs_bg_type_to_raid_name(u64 flags);

From d6407613c1e2ef90213dee388aa16b6e1bd08cbc Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b@runbox.com>
Date: Tue, 22 Sep 2020 14:07:00 +0300
Subject: [PATCH 166/322] Revert "usbip: Implement a match function to fix
 usbip"

This commit reverts commit 7a2f2974f265 ("usbip: Implement a match
function to fix usbip").

In summary, commit d5643d2249b2 ("USB: Fix device driver race")
inadvertently broke usbip functionality, which I resolved in an incorrect
manner by introducing a match function to usbip, usbip_match(), that
unconditionally returns true.

However, the usbip_match function, as is, causes usbip to take over
virtual devices used by syzkaller for USB fuzzing, which is a regression
reported by Andrey Konovalov.

Furthermore, in conjunction with the fix of another bug, handled by another
patch titled "usbcore/driver: Fix specific driver selection" in this patch
set, the usbip_match function causes unexpected USB subsystem behaviour
when the usbip_host driver is loaded. The unexpected behaviour can be
qualified as follows:
- If commit 41160802ab8e ("USB: Simplify USB ID table match") is included
  in the kernel, then all USB devices are bound to the usbip_host
  driver, which appears to the user as if all USB devices were
  disconnected.
- If the same commit (41160802ab8e) is not in the kernel (as is the case
  with v5.8.10) then all USB devices are re-probed and re-bound to their
  original device drivers, which appears to the user as a disconnection
  and re-connection of USB devices.

Please note that this commit will make usbip non-operational again,
until yet another patch in this patch set is merged, titled
"usbcore/driver: Accommodate usbip".

Cc: <stable@vger.kernel.org> # 5.8: 41160802ab8e: USB: Simplify USB ID table match
Cc: <stable@vger.kernel.org> # 5.8
Cc: Bastien Nocera <hadess@hadess.net>
Cc: Valentina Manea <valentina.manea.m@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: <syzkaller@googlegroups.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Link: https://lore.kernel.org/r/20200922110703.720960-2-m.v.b@runbox.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/stub_dev.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index 9d7d642022d1..2305d425e6c9 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -461,11 +461,6 @@ static void stub_disconnect(struct usb_device *udev)
 	return;
 }
 
-static bool usbip_match(struct usb_device *udev)
-{
-	return true;
-}
-
 #ifdef CONFIG_PM
 
 /* These functions need usb_port_suspend and usb_port_resume,
@@ -491,7 +486,6 @@ struct usb_device_driver stub_driver = {
 	.name		= "usbip-host",
 	.probe		= stub_probe,
 	.disconnect	= stub_disconnect,
-	.match		= usbip_match,
 #ifdef CONFIG_PM
 	.suspend	= stub_suspend,
 	.resume		= stub_resume,

From aea850cd35ae3d266fe6f93fb9edb25e4a555230 Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b@runbox.com>
Date: Tue, 22 Sep 2020 14:07:01 +0300
Subject: [PATCH 167/322] usbcore/driver: Fix specific driver selection

This commit resolves a bug in the selection/discovery of more
specific USB device drivers for devices that are currently bound to
generic USB device drivers.

The bug is in the logic that determines whether a device currently
bound to a generic USB device driver should be re-probed by a
more specific USB device driver or not. The code in
__usb_bus_reprobe_drivers() used to have the following lines:

  if (usb_device_match_id(udev, new_udriver->id_table) == NULL &&
      (!new_udriver->match || new_udriver->match(udev) != 0))
 		return 0;

  ret = device_reprobe(dev);

As the reader will notice, the code checks whether the USB device in
consideration matches the identifier table (id_table) of a specific
USB device_driver (new_udriver), followed by a similar check, but this
time with the USB device driver's match function. However, the match
function's return value is not checked correctly. When match() returns
zero, it means that the specific USB device driver is *not* applicable
to the USB device in question, but the code then goes on to reprobe the
device with the new USB device driver under consideration. All this to
say, the logic is inverted.

This bug was found by code inspection and instrumentation while
investigating the root cause of the issue reported by Andrey Konovalov,
where usbip took over syzkaller's virtual USB devices in an undesired
manner. The report is linked below.

Fixes: d5643d2249b2 ("USB: Fix device driver race")
Cc: <stable@vger.kernel.org> # 5.8
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Bastien Nocera <hadess@hadess.net>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Valentina Manea <valentina.manea.m@gmail.com>
Cc: <syzkaller@googlegroups.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Link: https://lore.kernel.org/r/20200922110703.720960-3-m.v.b@runbox.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/driver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 7e73e989645b..715782995428 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -924,7 +924,7 @@ static int __usb_bus_reprobe_drivers(struct device *dev, void *data)
 
 	udev = to_usb_device(dev);
 	if (usb_device_match_id(udev, new_udriver->id_table) == NULL &&
-	    (!new_udriver->match || new_udriver->match(udev) != 0))
+	    (!new_udriver->match || new_udriver->match(udev) == 0))
 		return 0;
 
 	ret = device_reprobe(dev);

From 4df30e7603432704380b12fe40a604ee7f66746d Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b@runbox.com>
Date: Tue, 22 Sep 2020 14:07:02 +0300
Subject: [PATCH 168/322] usbcore/driver: Fix incorrect downcast

This commit resolves a minor bug in the selection/discovery of more
specific USB device drivers for devices that are currently bound to
generic USB device drivers.

The bug is related to the way a candidate USB device driver is
compared against the generic USB device driver. The code in
is_dev_usb_generic_driver() assumes that the device driver in question
is a USB device driver by calling to_usb_device_driver(dev->driver)
to downcast; however I have observed that this assumption is not always
true, through code instrumentation.

This commit avoids the incorrect downcast altogether by comparing
the USB device's driver (i.e., dev->driver) to the generic USB
device driver directly. This method was suggested by Alan Stern.

This bug was found while investigating Andrey Konovalov's report
indicating usbip device driver misbehaviour with the recently merged
generic USB device driver selection feature. The report is linked
below.

Fixes: d5643d2249b2 ("USB: Fix device driver race")
Cc: <stable@vger.kernel.org> # 5.8
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Bastien Nocera <hadess@hadess.net>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Valentina Manea <valentina.manea.m@gmail.com>
Cc: <syzkaller@googlegroups.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Link: https://lore.kernel.org/r/20200922110703.720960-4-m.v.b@runbox.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/driver.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 715782995428..b00b7fb1aad1 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -905,21 +905,14 @@ static int usb_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
-static bool is_dev_usb_generic_driver(struct device *dev)
-{
-	struct usb_device_driver *udd = dev->driver ?
-		to_usb_device_driver(dev->driver) : NULL;
-
-	return udd == &usb_generic_driver;
-}
-
 static int __usb_bus_reprobe_drivers(struct device *dev, void *data)
 {
 	struct usb_device_driver *new_udriver = data;
 	struct usb_device *udev;
 	int ret;
 
-	if (!is_dev_usb_generic_driver(dev))
+	/* Don't reprobe if current driver isn't usb_generic_driver */
+	if (dev->driver != &usb_generic_driver.drvwrap.driver)
 		return 0;
 
 	udev = to_usb_device(dev);

From 3fce39601a1a34d940cf62858ee01ed9dac5d459 Mon Sep 17 00:00:00 2001
From: "M. Vefa Bicakci" <m.v.b@runbox.com>
Date: Tue, 22 Sep 2020 14:07:03 +0300
Subject: [PATCH 169/322] usbcore/driver: Accommodate usbip

Commit 88b7381a939d ("USB: Select better matching USB drivers when
available") inadvertently broke usbip functionality. The commit in
question allows USB device drivers to be explicitly matched with
USB devices via the use of driver-provided identifier tables and
match functions, which is useful for a specialised device driver
to be chosen for a device that can also be handled by another,
more generic, device driver.

Prior, the USB device section of usb_device_match() had an
unconditional "return 1" statement, which allowed user-space to bind
USB devices to the usbip_host device driver, if desired. However,
the aforementioned commit changed the default/fallback return
value to zero. This breaks device drivers such as usbip_host, so
this commit restores the legacy behaviour, but only if a device
driver does not have an id_table and a match() function.

In addition, if usb_device_match is called for a device driver
and device pair where the device does not match the id_table of the
device driver in question, then the device driver will be disqualified
for the device. This allows avoiding the default case of "return 1",
which prevents undesirable probe() calls to a driver even though
its id_table did not match the device.

Finally, this commit changes the specialised-driver-to-generic-driver
transition code so that when a device driver returns -ENODEV, a more
generic device driver is only considered if the current device driver
does not have an id_table and a match() function. This ensures that
"generic" drivers such as usbip_host will not be considered specialised
device drivers and will not cause the device to be locked in to the
generic device driver, when a more specialised device driver could be
tried.

All of these changes restore usbip functionality without regressions,
ensure that the specialised/generic device driver selection logic works
as expected with the usb and apple-mfi-fastcharge drivers, and do not
negatively affect the use of devices provided by dummy_hcd.

Fixes: 88b7381a939d ("USB: Select better matching USB drivers when available")
Cc: <stable@vger.kernel.org> # 5.8
Cc: Bastien Nocera <hadess@hadess.net>
Cc: Valentina Manea <valentina.manea.m@gmail.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: <syzkaller@googlegroups.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: M. Vefa Bicakci <m.v.b@runbox.com>
Link: https://lore.kernel.org/r/20200922110703.720960-5-m.v.b@runbox.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/driver.c | 37 +++++++++++++++++++++++++++++++------
 1 file changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index b00b7fb1aad1..b351962279e4 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -269,8 +269,30 @@ static int usb_probe_device(struct device *dev)
 	if (error)
 		return error;
 
+	/* Probe the USB device with the driver in hand, but only
+	 * defer to a generic driver in case the current USB
+	 * device driver has an id_table or a match function; i.e.,
+	 * when the device driver was explicitly matched against
+	 * a device.
+	 *
+	 * If the device driver does not have either of these,
+	 * then we assume that it can bind to any device and is
+	 * not truly a more specialized/non-generic driver, so a
+	 * return value of -ENODEV should not force the device
+	 * to be handled by the generic USB driver, as there
+	 * can still be another, more specialized, device driver.
+	 *
+	 * This accommodates the usbip driver.
+	 *
+	 * TODO: What if, in the future, there are multiple
+	 * specialized USB device drivers for a particular device?
+	 * In such cases, there is a need to try all matching
+	 * specialised device drivers prior to setting the
+	 * use_generic_driver bit.
+	 */
 	error = udriver->probe(udev);
-	if (error == -ENODEV && udriver != &usb_generic_driver) {
+	if (error == -ENODEV && udriver != &usb_generic_driver &&
+	    (udriver->id_table || udriver->match)) {
 		udev->use_generic_driver = 1;
 		return -EPROBE_DEFER;
 	}
@@ -831,14 +853,17 @@ static int usb_device_match(struct device *dev, struct device_driver *drv)
 		udev = to_usb_device(dev);
 		udrv = to_usb_device_driver(drv);
 
-		if (udrv->id_table &&
-		    usb_device_match_id(udev, udrv->id_table) != NULL) {
-			return 1;
-		}
+		if (udrv->id_table)
+			return usb_device_match_id(udev, udrv->id_table) != NULL;
 
 		if (udrv->match)
 			return udrv->match(udev);
-		return 0;
+
+		/* If the device driver under consideration does not have a
+		 * id_table or a match function, then let the driver's probe
+		 * function decide.
+		 */
+		return 1;
 
 	} else if (is_usb_interface(dev)) {
 		struct usb_interface *intf;

From 62c774ed483174b994c79e0c9f596b315f1ddfaf Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 25 Sep 2020 09:01:53 -0600
Subject: [PATCH 170/322] io_uring: don't unconditionally set plug->nowait =
 true

This causes all the bios to be submitted with REQ_NOWAIT, which can be
problematic on either btrfs or on file systems that otherwise use a mix
of block devices where only some of them support it.

For now, just remove the setting of plug->nowait = true.

Reported-by: Dan Melnic <dmm@fb.com>
Reported-by: Brian Foster <bfoster@redhat.com>
Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 0ab16df31288..ad828fa19af4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6353,9 +6353,6 @@ static void io_submit_state_start(struct io_submit_state *state,
 				  struct io_ring_ctx *ctx, unsigned int max_ios)
 {
 	blk_start_plug(&state->plug);
-#ifdef CONFIG_BLOCK
-	state->plug.nowait = true;
-#endif
 	state->comp.nr = 0;
 	INIT_LIST_HEAD(&state->comp.list);
 	state->comp.ctx = ctx;

From 4bb05f30483fd21ea5413eaf1182768f251cf625 Mon Sep 17 00:00:00 2001
From: Tom Lendacky <thomas.lendacky@amd.com>
Date: Thu, 24 Sep 2020 13:41:57 -0500
Subject: [PATCH 171/322] KVM: SVM: Add a dedicated INVD intercept routine

The INVD instruction intercept performs emulation. Emulation can't be done
on an SEV guest because the guest memory is encrypted.

Provide a dedicated intercept routine for the INVD intercept. And since
the instruction is emulated as a NOP, just skip it instead.

Fixes: 1654efcbc431 ("KVM: SVM: Add KVM_SEV_INIT command")
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Message-Id: <a0b9a19ffa7fef86a3cc700c7ea01cb2731e04e5.1600972918.git.thomas.lendacky@amd.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 5764b87379cf..d4e18bda19c7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2183,6 +2183,12 @@ static int iret_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
+static int invd_interception(struct vcpu_svm *svm)
+{
+	/* Treat an INVD instruction as a NOP and just skip it. */
+	return kvm_skip_emulated_instruction(&svm->vcpu);
+}
+
 static int invlpg_interception(struct vcpu_svm *svm)
 {
 	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
@@ -2774,7 +2780,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_RDPMC]			= rdpmc_interception,
 	[SVM_EXIT_CPUID]			= cpuid_interception,
 	[SVM_EXIT_IRET]                         = iret_interception,
-	[SVM_EXIT_INVD]                         = emulate_on_interception,
+	[SVM_EXIT_INVD]                         = invd_interception,
 	[SVM_EXIT_PAUSE]			= pause_interception,
 	[SVM_EXIT_HLT]				= halt_interception,
 	[SVM_EXIT_INVLPG]			= invlpg_interception,

From f38c7e3abfba9a9e180b34f642254c43782e7ffe Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 25 Sep 2020 15:23:43 -0600
Subject: [PATCH 172/322] io_uring: ensure async buffered read-retry is setup
 properly

A previous commit for fixing up short reads botched the async retry
path, so we ended up going to worker threads more often than we should.
Fix this up, so retries work the way they originally were intended to.

Fixes: 227c0c9673d8 ("io_uring: internally retry short reads")
Reported-by: Hao_Xu <haoxu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index ad828fa19af4..11b8e428300d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3172,10 +3172,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 			goto done;
 		/* some cases will consume bytes even on error returns */
 		iov_iter_revert(iter, iov_count - iov_iter_count(iter));
-		ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
-		if (ret)
-			goto out_free;
-		return -EAGAIN;
+		ret = 0;
+		goto copy_iov;
 	} else if (ret < 0) {
 		/* make sure -ERESTARTSYS -> -EINTR is done */
 		goto done;

From 1dac3b1bc66dc68dbb0c9f43adac71a7d0a0331a Mon Sep 17 00:00:00 2001
From: Ronak Doshi <doshir@vmware.com>
Date: Thu, 24 Sep 2020 23:11:29 -0700
Subject: [PATCH 173/322] vmxnet3: fix cksum offload issues for non-udp tunnels

Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload
support") added support for encapsulation offload. However, the inner
offload capability is to be restrictued to UDP tunnels.

This patch fixes the issue for non-udp tunnels by adding features
check capability and filtering appropriate features for non-udp tunnels.

Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support")
Signed-off-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_drv.c     |  5 ++---
 drivers/net/vmxnet3/vmxnet3_ethtool.c | 28 +++++++++++++++++++++++++++
 drivers/net/vmxnet3/vmxnet3_int.h     |  4 ++++
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 2818015324b8..336504b7531d 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1032,7 +1032,6 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 	/* Use temporary descriptor to avoid touching bits multiple times */
 	union Vmxnet3_GenericDesc tempTxDesc;
 #endif
-	struct udphdr *udph;
 
 	count = txd_estimate(skb);
 
@@ -1135,8 +1134,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 			gdesc->txd.om = VMXNET3_OM_ENCAP;
 			gdesc->txd.msscof = ctx.mss;
 
-			udph = udp_hdr(skb);
-			if (udph->check)
+			if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)
 				gdesc->txd.oco = 1;
 		} else {
 			gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size;
@@ -3371,6 +3369,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 		.ndo_change_mtu = vmxnet3_change_mtu,
 		.ndo_fix_features = vmxnet3_fix_features,
 		.ndo_set_features = vmxnet3_set_features,
+		.ndo_features_check = vmxnet3_features_check,
 		.ndo_get_stats64 = vmxnet3_get_stats64,
 		.ndo_tx_timeout = vmxnet3_tx_timeout,
 		.ndo_set_rx_mode = vmxnet3_set_mc,
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 1014693a5ceb..7ec8652f2c26 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -267,6 +267,34 @@ netdev_features_t vmxnet3_fix_features(struct net_device *netdev,
 	return features;
 }
 
+netdev_features_t vmxnet3_features_check(struct sk_buff *skb,
+					 struct net_device *netdev,
+					 netdev_features_t features)
+{
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+
+	/* Validate if the tunneled packet is being offloaded by the device */
+	if (VMXNET3_VERSION_GE_4(adapter) &&
+	    skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) {
+		u8 l4_proto = 0;
+
+		switch (vlan_get_protocol(skb)) {
+		case htons(ETH_P_IP):
+			l4_proto = ip_hdr(skb)->protocol;
+			break;
+		case htons(ETH_P_IPV6):
+			l4_proto = ipv6_hdr(skb)->nexthdr;
+			break;
+		default:
+			return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+		}
+
+		if (l4_proto != IPPROTO_UDP)
+			return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	}
+	return features;
+}
+
 static void vmxnet3_enable_encap_offloads(struct net_device *netdev)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 5d2b062215a2..d958b92c9429 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -470,6 +470,10 @@ vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter);
 netdev_features_t
 vmxnet3_fix_features(struct net_device *netdev, netdev_features_t features);
 
+netdev_features_t
+vmxnet3_features_check(struct sk_buff *skb,
+		       struct net_device *netdev, netdev_features_t features);
+
 int
 vmxnet3_set_features(struct net_device *netdev, netdev_features_t features);
 

From ac322f86b56cb99d1c4224c209095aa67647c967 Mon Sep 17 00:00:00 2001
From: Wong Vee Khee <vee.khee.wong@intel.com>
Date: Fri, 25 Sep 2020 17:54:06 +0800
Subject: [PATCH 174/322] net: stmmac: Fix clock handling on remove path

While unloading the dwmac-intel driver, clk_disable_unprepare() is
being called twice in stmmac_dvr_remove() and
intel_eth_pci_remove(). This causes kernel panic on the second call.

Removing the second call of clk_disable_unprepare() in
intel_eth_pci_remove().

Fixes: 09f012e64e4b ("stmmac: intel: Fix clock handling on error and remove paths")
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@intel.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 2ac9dfb3462c..9e6d60e75f85 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -653,7 +653,6 @@ static void intel_eth_pci_remove(struct pci_dev *pdev)
 
 	pci_free_irq_vectors(pdev);
 
-	clk_disable_unprepare(priv->plat->stmmac_clk);
 	clk_unregister_fixed_rate(priv->plat->stmmac_clk);
 
 	pcim_iounmap_regions(pdev, BIT(0));

From 4663ff60257aec4ee1e2e969a7c046f0aff35ab8 Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@gmail.com>
Date: Fri, 25 Sep 2020 15:44:39 +0300
Subject: [PATCH 175/322] net: ethernet: cavium: octeon_mgmt: use phy_start and
 phy_stop

To start also "phy state machine", with UP state as it should be,
the phy_start() has to be used, in another case machine even is not
triggered. After this change negotiation is supposed to be triggered
by SM workqueue.

It's not correct usage, but it appears after the following patch,
so add it as a fix.

Fixes: 74a992b3598a ("net: phy: add phy_check_link_status")
Signed-off-by: Ivan Khoronzhuk <ikhoronz@cisco.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 3e17ce0d2314..6cb2162a75d4 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -1219,7 +1219,7 @@ static int octeon_mgmt_open(struct net_device *netdev)
 	 */
 	if (netdev->phydev) {
 		netif_carrier_off(netdev);
-		phy_start_aneg(netdev->phydev);
+		phy_start(netdev->phydev);
 	}
 
 	netif_wake_queue(netdev);
@@ -1247,8 +1247,10 @@ static int octeon_mgmt_stop(struct net_device *netdev)
 	napi_disable(&p->napi);
 	netif_stop_queue(netdev);
 
-	if (netdev->phydev)
+	if (netdev->phydev) {
+		phy_stop(netdev->phydev);
 		phy_disconnect(netdev->phydev);
+	}
 
 	netif_carrier_off(netdev);
 

From f32f19339596b214c208c0dba716f4b6cc4f6958 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 25 Sep 2020 06:38:07 -0700
Subject: [PATCH 176/322] bonding: set dev->needed_headroom in
 bond_setup_by_slave()

syzbot managed to crash a host by creating a bond
with a GRE device.

For non Ethernet device, bonding calls bond_setup_by_slave()
instead of ether_setup(), and unfortunately dev->needed_headroom
was not copied from the new added member.

[  171.243095] skbuff: skb_under_panic: text:ffffffffa184b9ea len:116 put:20 head:ffff883f84012dc0 data:ffff883f84012dbc tail:0x70 end:0xd00 dev:bond0
[  171.243111] ------------[ cut here ]------------
[  171.243112] kernel BUG at net/core/skbuff.c:112!
[  171.243117] invalid opcode: 0000 [#1] SMP KASAN PTI
[  171.243469] gsmi: Log Shutdown Reason 0x03
[  171.243505] Call Trace:
[  171.243506]  <IRQ>
[  171.243512]  [<ffffffffa171be59>] skb_push+0x49/0x50
[  171.243516]  [<ffffffffa184b9ea>] ipgre_header+0x2a/0xf0
[  171.243520]  [<ffffffffa17452d7>] neigh_connected_output+0xb7/0x100
[  171.243524]  [<ffffffffa186f1d3>] ip6_finish_output2+0x383/0x490
[  171.243528]  [<ffffffffa186ede2>] __ip6_finish_output+0xa2/0x110
[  171.243531]  [<ffffffffa186acbc>] ip6_finish_output+0x2c/0xa0
[  171.243534]  [<ffffffffa186abe9>] ip6_output+0x69/0x110
[  171.243537]  [<ffffffffa186ac90>] ? ip6_output+0x110/0x110
[  171.243541]  [<ffffffffa189d952>] mld_sendpack+0x1b2/0x2d0
[  171.243544]  [<ffffffffa189d290>] ? mld_send_report+0xf0/0xf0
[  171.243548]  [<ffffffffa189c797>] mld_ifc_timer_expire+0x2d7/0x3b0
[  171.243551]  [<ffffffffa189c4c0>] ? mld_gq_timer_expire+0x50/0x50
[  171.243556]  [<ffffffffa0fea270>] call_timer_fn+0x30/0x130
[  171.243559]  [<ffffffffa0fea17c>] expire_timers+0x4c/0x110
[  171.243563]  [<ffffffffa0fea0e3>] __run_timers+0x213/0x260
[  171.243566]  [<ffffffffa0fecb7d>] ? ktime_get+0x3d/0xa0
[  171.243570]  [<ffffffffa0ff9c4e>] ? clockevents_program_event+0x7e/0xe0
[  171.243574]  [<ffffffffa0f7e5d5>] ? sched_clock_cpu+0x15/0x190
[  171.243577]  [<ffffffffa0fe973d>] run_timer_softirq+0x1d/0x40
[  171.243581]  [<ffffffffa1c00152>] __do_softirq+0x152/0x2f0
[  171.243585]  [<ffffffffa0f44e1f>] irq_exit+0x9f/0xb0
[  171.243588]  [<ffffffffa1a02e1d>] smp_apic_timer_interrupt+0xfd/0x1a0
[  171.243591]  [<ffffffffa1a01ea6>] apic_timer_interrupt+0x86/0x90

Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 42ef25ec0af5..14740d3053b8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1315,6 +1315,7 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
 
 	bond_dev->type		    = slave_dev->type;
 	bond_dev->hard_header_len   = slave_dev->hard_header_len;
+	bond_dev->needed_headroom   = slave_dev->needed_headroom;
 	bond_dev->addr_len	    = slave_dev->addr_len;
 
 	memcpy(bond_dev->broadcast, slave_dev->broadcast,

From 89d01748b2354e210b5d4ea47bc25a42a1b42c82 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 25 Sep 2020 06:38:08 -0700
Subject: [PATCH 177/322] team: set dev->needed_headroom in
 team_setup_by_port()

Some devices set needed_headroom. If we ignore it, we might
end up crashing in various skb_push() for example in ipgre_header()
since some layers assume enough headroom has been reserved.

Fixes: 1d76efe1577b ("team: add support for non-ethernet devices")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 8c1e02752ff6..69dfb1a49cc8 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2112,6 +2112,7 @@ static void team_setup_by_port(struct net_device *dev,
 	dev->header_ops	= port_dev->header_ops;
 	dev->type = port_dev->type;
 	dev->hard_header_len = port_dev->hard_header_len;
+	dev->needed_headroom = port_dev->needed_headroom;
 	dev->addr_len = port_dev->addr_len;
 	dev->mtu = port_dev->mtu;
 	memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len);

From e42d72fea91f8f2e82b65808739ca04b7a8cd7a8 Mon Sep 17 00:00:00 2001
From: Wilken Gottwalt <wilken.gottwalt@mailbox.org>
Date: Fri, 25 Sep 2020 15:58:57 +0200
Subject: [PATCH 178/322] net: usb: ax88179_178a: add Toshiba usb 3.0 adapter

Adds the driver_info and usb ids of the AX88179 based Toshiba USB 3.0
ethernet adapter.

Signed-off-by: Wilken Gottwalt <wilken.gottwalt@mailbox.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index a38e868e44d4..125f7bf57590 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1828,6 +1828,19 @@ static const struct driver_info belkin_info = {
 	.tx_fixup = ax88179_tx_fixup,
 };
 
+static const struct driver_info toshiba_info = {
+	.description = "Toshiba USB Ethernet Adapter",
+	.bind	= ax88179_bind,
+	.unbind = ax88179_unbind,
+	.status = ax88179_status,
+	.link_reset = ax88179_link_reset,
+	.reset	= ax88179_reset,
+	.stop = ax88179_stop,
+	.flags	= FLAG_ETHER | FLAG_FRAMING_AX,
+	.rx_fixup = ax88179_rx_fixup,
+	.tx_fixup = ax88179_tx_fixup,
+};
+
 static const struct usb_device_id products[] = {
 {
 	/* ASIX AX88179 10/100/1000 */
@@ -1861,6 +1874,10 @@ static const struct usb_device_id products[] = {
 	/* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */
 	USB_DEVICE(0x050d, 0x0128),
 	.driver_info = (unsigned long)&belkin_info,
+}, {
+	/* Toshiba USB 3.0 GBit Ethernet Adapter */
+	USB_DEVICE(0x0930, 0x0a13),
+	.driver_info = (unsigned long)&toshiba_info,
 },
 	{ },
 };

From 72e27c38abb3dc8da7c05f829d44b3f3c46d06da Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Fri, 25 Sep 2020 17:35:30 +0300
Subject: [PATCH 179/322] dpaa2-eth: fix command version for Tx shaping

When adding the support for TBF offload, the improper command version
was added even though the command format is for the V2 of
dpni_set_tx_shaping(). This does not affect the functionality of TBF
since the only change between these two versions is the addition of the
exceeded parameters which are not used in TBF. Still, fix the bug so
that we keep things in sync.

Fixes: 39344a89623d ("dpaa2-eth: add API for Tx shaping")
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
index 593e3812af93..3c06f5fb5759 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
@@ -11,9 +11,11 @@
 #define DPNI_VER_MAJOR				7
 #define DPNI_VER_MINOR				0
 #define DPNI_CMD_BASE_VERSION			1
+#define DPNI_CMD_2ND_VERSION			2
 #define DPNI_CMD_ID_OFFSET			4
 
 #define DPNI_CMD(id)	(((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_BASE_VERSION)
+#define DPNI_CMD_V2(id)	(((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_2ND_VERSION)
 
 #define DPNI_CMDID_OPEN					DPNI_CMD(0x801)
 #define DPNI_CMDID_CLOSE				DPNI_CMD(0x800)
@@ -45,7 +47,7 @@
 #define DPNI_CMDID_SET_MAX_FRAME_LENGTH			DPNI_CMD(0x216)
 #define DPNI_CMDID_GET_MAX_FRAME_LENGTH			DPNI_CMD(0x217)
 #define DPNI_CMDID_SET_LINK_CFG				DPNI_CMD(0x21A)
-#define DPNI_CMDID_SET_TX_SHAPING			DPNI_CMD(0x21B)
+#define DPNI_CMDID_SET_TX_SHAPING			DPNI_CMD_V2(0x21B)
 
 #define DPNI_CMDID_SET_MCAST_PROMISC			DPNI_CMD(0x220)
 #define DPNI_CMDID_GET_MCAST_PROMISC			DPNI_CMD(0x221)

From 5e46e43c2ad92bca709944be806feb9bce044061 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 25 Sep 2020 08:26:16 -0700
Subject: [PATCH 180/322] MAINTAINERS: Add Vladimir as a maintainer for DSA

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Vladimir Oltean <olteanv@gmail.com>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9350506a1127..6dc9ebf5bf76 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12077,6 +12077,7 @@ NETWORKING [DSA]
 M:	Andrew Lunn <andrew@lunn.ch>
 M:	Vivien Didelot <vivien.didelot@gmail.com>
 M:	Florian Fainelli <f.fainelli@gmail.com>
+M:	Vladimir Oltean <olteanv@gmail.com>
 S:	Maintained
 F:	Documentation/devicetree/bindings/net/dsa/
 F:	drivers/net/dsa/

From 059432495e209279bae12db3d2b0bc8c8fe987bb Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Fri, 25 Sep 2020 23:27:35 +0300
Subject: [PATCH 181/322] net: atlantic: fix build when object tree is separate

Driver subfolder files refer parent folder includes in an
absolute manner.

Makefile contains a -I for this, but apparently that does not
work if object tree is separated.

Adding srctree to fix that.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile
index 130a105d03f3..8ebcc68e807f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/Makefile
+++ b/drivers/net/ethernet/aquantia/atlantic/Makefile
@@ -8,7 +8,7 @@
 
 obj-$(CONFIG_AQTION) += atlantic.o
 
-ccflags-y += -I$(src)
+ccflags-y += -I$(srctree)/$(src)
 
 atlantic-objs := aq_main.o \
 	aq_nic.o \
@@ -33,4 +33,4 @@ atlantic-objs := aq_main.o \
 
 atlantic-$(CONFIG_MACSEC) += aq_macsec.o
 
-atlantic-$(CONFIG_PTP_1588_CLOCK) += aq_ptp.o
\ No newline at end of file
+atlantic-$(CONFIG_PTP_1588_CLOCK) += aq_ptp.o

From e30d694c3381b565e043cf74b0bed059db1b4ac9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 25 Sep 2020 08:21:14 -0700
Subject: [PATCH 182/322] Documentation/llvm: Fix clang target examples

clang --target=<triple> is how we can specify a particular toolchain
triple to be use, fix the two occurences in the documentation.

Fixes: fcf1b6a35c16 ("Documentation/llvm: add documentation on building w/ Clang/LLVM")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Documentation/kbuild/llvm.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst
index 334df758dce3..dae90c21aed3 100644
--- a/Documentation/kbuild/llvm.rst
+++ b/Documentation/kbuild/llvm.rst
@@ -39,10 +39,10 @@ which can help simplify cross compiling. ::
 	ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang
 
 ``CROSS_COMPILE`` is not used to prefix the Clang compiler binary, instead
-``CROSS_COMPILE`` is used to set a command line flag: ``--target <triple>``. For
+``CROSS_COMPILE`` is used to set a command line flag: ``--target=<triple>``. For
 example: ::
 
-	clang --target aarch64-linux-gnu foo.c
+	clang --target=aarch64-linux-gnu foo.c
 
 LLVM Utilities
 --------------

From 678ff6a7afccc43d352c1b8c94b6d8c0ee1464ad Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Sat, 26 Sep 2020 07:13:41 -0700
Subject: [PATCH 183/322] mm: slab: fix potential double free in ___cache_free

With the commit 10befea91b61 ("mm: memcg/slab: use a single set of
kmem_caches for all allocations"), it becomes possible to call kfree()
from the slabs_destroy().

The functions cache_flusharray() and do_drain() calls slabs_destroy() on
array_cache of the local CPU without updating the size of the
array_cache.  This enables the kfree() call from the slabs_destroy() to
recursively call cache_flusharray() which can potentially call
free_block() on the same elements of the array_cache of the local CPU
and causing double free and memory corruption.

To fix the issue, simply update the local CPU array_cache cache before
calling slabs_destroy().

Fixes: 10befea91b61 ("mm: memcg/slab: use a single set of kmem_caches for all allocations")
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Tested-by: Ming Lei <ming.lei@redhat.com>
Reported-by: kernel test robot <rong.a.chen@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ted Ts'o <tytso@mit.edu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slab.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 3160dff6fd76..f658e86ec8ce 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1632,6 +1632,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page)
 		kmem_cache_free(cachep->freelist_cache, freelist);
 }
 
+/*
+ * Update the size of the caches before calling slabs_destroy as it may
+ * recursively call kfree.
+ */
 static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list)
 {
 	struct page *page, *n;
@@ -2153,8 +2157,8 @@ static void do_drain(void *arg)
 	spin_lock(&n->list_lock);
 	free_block(cachep, ac->entry, ac->avail, node, &list);
 	spin_unlock(&n->list_lock);
-	slabs_destroy(cachep, &list);
 	ac->avail = 0;
+	slabs_destroy(cachep, &list);
 }
 
 static void drain_cpu_caches(struct kmem_cache *cachep)
@@ -3402,9 +3406,9 @@ free_done:
 	}
 #endif
 	spin_unlock(&n->list_lock);
-	slabs_destroy(cachep, &list);
 	ac->avail -= batchcount;
 	memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
+	slabs_destroy(cachep, &list);
 }
 
 /*

From 41663430588c737dd735bad5a0d1ba325dcabd59 Mon Sep 17 00:00:00 2001
From: Gao Xiang <hsiangkao@redhat.com>
Date: Fri, 25 Sep 2020 21:19:01 -0700
Subject: [PATCH 184/322] mm, THP, swap: fix allocating cluster for swapfile by
 mistake

SWP_FS is used to make swap_{read,write}page() go through the
filesystem, and it's only used for swap files over NFS.  So, !SWP_FS
means non NFS for now, it could be either file backed or device backed.
Something similar goes with legacy SWP_FILE.

So in order to achieve the goal of the original patch, SWP_BLKDEV should
be used instead.

FS corruption can be observed with SSD device + XFS + fragmented
swapfile due to CONFIG_THP_SWAP=y.

I reproduced the issue with the following details:

Environment:

  QEMU + upstream kernel + buildroot + NVMe (2 GB)

Kernel config:

  CONFIG_BLK_DEV_NVME=y
  CONFIG_THP_SWAP=y

Some reproducible steps:

  mkfs.xfs -f /dev/nvme0n1
  mkdir /tmp/mnt
  mount /dev/nvme0n1 /tmp/mnt
  bs="32k"
  sz="1024m"    # doesn't matter too much, I also tried 16m
  xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
  xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
  xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
  xfs_io -f -c "pwrite -F -S 0 -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw
  xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fsync" /tmp/mnt/sw

  mkswap /tmp/mnt/sw
  swapon /tmp/mnt/sw

  stress --vm 2 --vm-bytes 600M   # doesn't matter too much as well

Symptoms:
 - FS corruption (e.g. checksum failure)
 - memory corruption at: 0xd2808010
 - segfault

Fixes: f0eea189e8e9 ("mm, THP, swap: Don't allocate huge cluster for file backed swap device")
Fixes: 38d8b4e6bdc8 ("mm, THP, swap: delay splitting THP during swap out")
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Carlos Maiolino <cmaiolino@redhat.com>
Cc: Eric Sandeen <esandeen@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20200820045323.7809-1-hsiangkao@redhat.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/swapfile.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/swapfile.c b/mm/swapfile.c
index 12f59e641b5e..debc94155f74 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1078,7 +1078,7 @@ start_over:
 			goto nextsi;
 		}
 		if (size == SWAPFILE_CLUSTER) {
-			if (!(si->flags & SWP_FS))
+			if (si->flags & SWP_BLKDEV)
 				n_ret = swap_alloc_cluster(si, swp_entries);
 		} else
 			n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,

From 8d3fe09d8d6645dcbbe2413cde58f51ceb6545a6 Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Fri, 25 Sep 2020 21:19:05 -0700
Subject: [PATCH 185/322] mm: memcontrol: fix missing suffix of
 workingset_restore

We forget to add the suffix to the workingset_restore string, so fix it.

And also update the documentation of cgroup-v2.rst.

Fixes: 170b04b7ae49 ("mm/workingset: prepare the workingset detection infrastructure for anon LRU")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Link: https://lkml.kernel.org/r/20200916100030.71698-1-songmuchun@bytedance.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v2.rst | 25 ++++++++++++++++++-------
 mm/memcontrol.c                         |  4 ++--
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 6be43781ec7f..baa07b30845e 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1324,15 +1324,26 @@ PAGE_SIZE multiple when read back.
 	  pgmajfault
 		Number of major page faults incurred
 
-	  workingset_refault
-		Number of refaults of previously evicted pages
+	  workingset_refault_anon
+		Number of refaults of previously evicted anonymous pages.
 
-	  workingset_activate
-		Number of refaulted pages that were immediately activated
+	  workingset_refault_file
+		Number of refaults of previously evicted file pages.
 
-	  workingset_restore
-		Number of restored pages which have been detected as an active
-		workingset before they got reclaimed.
+	  workingset_activate_anon
+		Number of refaulted anonymous pages that were immediately
+		activated.
+
+	  workingset_activate_file
+		Number of refaulted file pages that were immediately activated.
+
+	  workingset_restore_anon
+		Number of restored anonymous pages which have been detected as
+		an active workingset before they got reclaimed.
+
+	  workingset_restore_file
+		Number of restored file pages which have been detected as an
+		active workingset before they got reclaimed.
 
 	  workingset_nodereclaim
 		Number of times a shadow node has been reclaimed
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cfa6cbad21d5..6877c765b8d0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1538,9 +1538,9 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
 		       memcg_page_state(memcg, WORKINGSET_ACTIVATE_ANON));
 	seq_buf_printf(&s, "workingset_activate_file %lu\n",
 		       memcg_page_state(memcg, WORKINGSET_ACTIVATE_FILE));
-	seq_buf_printf(&s, "workingset_restore %lu\n",
+	seq_buf_printf(&s, "workingset_restore_anon %lu\n",
 		       memcg_page_state(memcg, WORKINGSET_RESTORE_ANON));
-	seq_buf_printf(&s, "workingset_restore %lu\n",
+	seq_buf_printf(&s, "workingset_restore_file %lu\n",
 		       memcg_page_state(memcg, WORKINGSET_RESTORE_FILE));
 	seq_buf_printf(&s, "workingset_nodereclaim %lu\n",
 		       memcg_page_state(memcg, WORKINGSET_NODERECLAIM));

From d3f7b1bb204099f2f7306318896223e8599bb6a2 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 25 Sep 2020 21:19:10 -0700
Subject: [PATCH 186/322] mm/gup: fix gup_fast with dynamic page table folding

Currently to make sure that every page table entry is read just once
gup_fast walks perform READ_ONCE and pass pXd value down to the next
gup_pXd_range function by value e.g.:

  static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
                           unsigned int flags, struct page **pages, int *nr)
  ...
          pudp = pud_offset(&p4d, addr);

This function passes a reference on that local value copy to pXd_offset,
and might get the very same pointer in return.  This happens when the
level is folded (on most arches), and that pointer should not be
iterated.

On s390 due to the fact that each task might have different 5,4 or
3-level address translation and hence different levels folded the logic
is more complex and non-iteratable pointer to a local copy leads to
severe problems.

Here is an example of what happens with gup_fast on s390, for a task
with 3-level paging, crossing a 2 GB pud boundary:

  // addr = 0x1007ffff000, end = 0x10080001000
  static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
                           unsigned int flags, struct page **pages, int *nr)
  {
        unsigned long next;
        pud_t *pudp;

        // pud_offset returns &p4d itself (a pointer to a value on stack)
        pudp = pud_offset(&p4d, addr);
        do {
                // on second iteratation reading "random" stack value
                pud_t pud = READ_ONCE(*pudp);

                // next = 0x10080000000, due to PUD_SIZE/MASK != PGDIR_SIZE/MASK on s390
                next = pud_addr_end(addr, end);
                ...
        } while (pudp++, addr = next, addr != end); // pudp++ iterating over stack

        return 1;
  }

This happens since s390 moved to common gup code with commit
d1874a0c2805 ("s390/mm: make the pxd_offset functions more robust") and
commit 1a42010cdc26 ("s390/mm: convert to the generic
get_user_pages_fast code").

s390 tried to mimic static level folding by changing pXd_offset
primitives to always calculate top level page table offset in pgd_offset
and just return the value passed when pXd_offset has to act as folded.

What is crucial for gup_fast and what has been overlooked is that
PxD_SIZE/MASK and thus pXd_addr_end should also change correspondingly.
And the latter is not possible with dynamic folding.

To fix the issue in addition to pXd values pass original pXdp pointers
down to gup_pXd_range functions.  And introduce pXd_offset_lockless
helpers, which take an additional pXd entry value parameter.  This has
already been discussed in

  https://lkml.kernel.org/r/20190418100218.0a4afd51@mschwideX1

Fixes: 1a42010cdc26 ("s390/mm: convert to the generic get_user_pages_fast code")
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: <stable@vger.kernel.org>	[5.2+]
Link: https://lkml.kernel.org/r/patch.git-943f1e5dcff2.your-ad-here.call-01599856292-ext-8676@work.hours
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/pgtable.h | 42 +++++++++++++++++++++++----------
 include/linux/pgtable.h         | 10 ++++++++
 mm/gup.c                        | 18 +++++++-------
 3 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 7eb01a5459cd..b55561cc8786 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1260,26 +1260,44 @@ static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address)
 
 #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
 
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address)
 {
-	if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
-		return (p4d_t *) pgd_deref(*pgd) + p4d_index(address);
-	return (p4d_t *) pgd;
+	if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
+		return (p4d_t *) pgd_deref(pgd) + p4d_index(address);
+	return (p4d_t *) pgdp;
+}
+#define p4d_offset_lockless p4d_offset_lockless
+
+static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address)
+{
+	return p4d_offset_lockless(pgdp, *pgdp, address);
 }
 
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address)
 {
-	if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
-		return (pud_t *) p4d_deref(*p4d) + pud_index(address);
-	return (pud_t *) p4d;
+	if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
+		return (pud_t *) p4d_deref(p4d) + pud_index(address);
+	return (pud_t *) p4dp;
+}
+#define pud_offset_lockless pud_offset_lockless
+
+static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address)
+{
+	return pud_offset_lockless(p4dp, *p4dp, address);
 }
 #define pud_offset pud_offset
 
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
+static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address)
 {
-	if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
-		return (pmd_t *) pud_deref(*pud) + pmd_index(address);
-	return (pmd_t *) pud;
+	if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
+		return (pmd_t *) pud_deref(pud) + pmd_index(address);
+	return (pmd_t *) pudp;
+}
+#define pmd_offset_lockless pmd_offset_lockless
+
+static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address)
+{
+	return pmd_offset_lockless(pudp, *pudp, address);
 }
 #define pmd_offset pmd_offset
 
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e8cbc2e795d5..90654cb63e9e 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1427,6 +1427,16 @@ typedef unsigned int pgtbl_mod_mask;
 #define mm_pmd_folded(mm)	__is_defined(__PAGETABLE_PMD_FOLDED)
 #endif
 
+#ifndef p4d_offset_lockless
+#define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address)
+#endif
+#ifndef pud_offset_lockless
+#define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address)
+#endif
+#ifndef pmd_offset_lockless
+#define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address)
+#endif
+
 /*
  * p?d_leaf() - true if this entry is a final mapping to a physical address.
  * This differs from p?d_huge() by the fact that they are always available (if
diff --git a/mm/gup.c b/mm/gup.c
index e5739a1974d5..578bf5bd8bf8 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2485,13 +2485,13 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
 	return 1;
 }
 
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
 		unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pmd_t *pmdp;
 
-	pmdp = pmd_offset(&pud, addr);
+	pmdp = pmd_offset_lockless(pudp, pud, addr);
 	do {
 		pmd_t pmd = READ_ONCE(*pmdp);
 
@@ -2528,13 +2528,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 	return 1;
 }
 
-static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
+static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
 			 unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pud_t *pudp;
 
-	pudp = pud_offset(&p4d, addr);
+	pudp = pud_offset_lockless(p4dp, p4d, addr);
 	do {
 		pud_t pud = READ_ONCE(*pudp);
 
@@ -2549,20 +2549,20 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
 			if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
 					 PUD_SHIFT, next, flags, pages, nr))
 				return 0;
-		} else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
+		} else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
 			return 0;
 	} while (pudp++, addr = next, addr != end);
 
 	return 1;
 }
 
-static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
 			 unsigned int flags, struct page **pages, int *nr)
 {
 	unsigned long next;
 	p4d_t *p4dp;
 
-	p4dp = p4d_offset(&pgd, addr);
+	p4dp = p4d_offset_lockless(pgdp, pgd, addr);
 	do {
 		p4d_t p4d = READ_ONCE(*p4dp);
 
@@ -2574,7 +2574,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 			if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
 					 P4D_SHIFT, next, flags, pages, nr))
 				return 0;
-		} else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
+		} else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
 			return 0;
 	} while (p4dp++, addr = next, addr != end);
 
@@ -2602,7 +2602,7 @@ static void gup_pgd_range(unsigned long addr, unsigned long end,
 			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
 					 PGDIR_SHIFT, next, flags, pages, nr))
 				return;
-		} else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
+		} else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
 			return;
 	} while (pgdp++, addr = next, addr != end);
 }

From 6c5c7b9f3352728af490183f71d350bb658ffb75 Mon Sep 17 00:00:00 2001
From: Zi Yan <ziy@nvidia.com>
Date: Fri, 25 Sep 2020 21:19:14 -0700
Subject: [PATCH 187/322] mm/migrate: correct thp migration stats

PageTransHuge returns true for both thp and hugetlb, so thp stats was
counting both thp and hugetlb migrations.  Exclude hugetlb migration by
setting is_thp variable right.

Clean up thp handling code too when we are there.

Fixes: 1a5bae25e3cf ("mm/vmstat: add events for THP migration without split")
Signed-off-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Link: https://lkml.kernel.org/r/20200917210413.1462975-1-zi.yan@sent.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/migrate.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index aecb1433cf3c..04a98bb2f568 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1446,7 +1446,7 @@ retry:
 			 * Capture required information that might get lost
 			 * during migration.
 			 */
-			is_thp = PageTransHuge(page);
+			is_thp = PageTransHuge(page) && !PageHuge(page);
 			nr_subpages = thp_nr_pages(page);
 			cond_resched();
 
@@ -1472,7 +1472,7 @@ retry:
 				 * we encounter them after the rest of the list
 				 * is processed.
 				 */
-				if (PageTransHuge(page) && !PageHuge(page)) {
+				if (is_thp) {
 					lock_page(page);
 					rc = split_huge_page_to_list(page, from);
 					unlock_page(page);
@@ -1481,8 +1481,7 @@ retry:
 						nr_thp_split++;
 						goto retry;
 					}
-				}
-				if (is_thp) {
+
 					nr_thp_failed++;
 					nr_failed += nr_subpages;
 					goto out;

From 1e1b6d63d6340764e00356873e5794225a2a03ea Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 25 Sep 2020 21:19:18 -0700
Subject: [PATCH 188/322] lib/string.c: implement stpcpy

LLVM implemented a recent "libcall optimization" that lowers calls to
`sprintf(dest, "%s", str)` where the return value is used to
`stpcpy(dest, str) - dest`.

This generally avoids the machinery involved in parsing format strings.
`stpcpy` is just like `strcpy` except it returns the pointer to the new
tail of `dest`.  This optimization was introduced into clang-12.

Implement this so that we don't observe linkage failures due to missing
symbol definitions for `stpcpy`.

Similar to last year's fire drill with: commit 5f074f3e192f
("lib/string.c: implement a basic bcmp")

The kernel is somewhere between a "freestanding" environment (no full
libc) and "hosted" environment (many symbols from libc exist with the
same type, function signature, and semantics).

As Peter Anvin notes, there's not really a great way to inform the
compiler that you're targeting a freestanding environment but would like
to opt-in to some libcall optimizations (see pr/47280 below), rather
than opt-out.

Arvind notes, -fno-builtin-* behaves slightly differently between GCC
and Clang, and Clang is missing many __builtin_* definitions, which I
consider a bug in Clang and am working on fixing.

Masahiro summarizes the subtle distinction between compilers justly:
  To prevent transformation from foo() into bar(), there are two ways in
  Clang to do that; -fno-builtin-foo, and -fno-builtin-bar.  There is
  only one in GCC; -fno-buitin-foo.

(Any difference in that behavior in Clang is likely a bug from a missing
__builtin_* definition.)

Masahiro also notes:
  We want to disable optimization from foo() to bar(),
  but we may still benefit from the optimization from
  foo() into something else. If GCC implements the same transform, we
  would run into a problem because it is not -fno-builtin-bar, but
  -fno-builtin-foo that disables that optimization.

  In this regard, -fno-builtin-foo would be more future-proof than
  -fno-built-bar, but -fno-builtin-foo is still potentially overkill. We
  may want to prevent calls from foo() being optimized into calls to
  bar(), but we still may want other optimization on calls to foo().

It seems that compilers today don't quite provide the fine grain control
over which libcall optimizations pseudo-freestanding environments would
prefer.

Finally, Kees notes that this interface is unsafe, so we should not
encourage its use.  As such, I've removed the declaration from any
header, but it still needs to be exported to avoid linkage errors in
modules.

Reported-by: Sami Tolvanen <samitolvanen@google.com>
Suggested-by: Andy Lavr <andy.lavr@gmail.com>
Suggested-by: Arvind Sankar <nivedita@alum.mit.edu>
Suggested-by: Joe Perches <joe@perches.com>
Suggested-by: Kees Cook <keescook@chromium.org>
Suggested-by: Masahiro Yamada <masahiroy@kernel.org>
Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20200914161643.938408-1-ndesaulniers@google.com
Link: https://bugs.llvm.org/show_bug.cgi?id=47162
Link: https://bugs.llvm.org/show_bug.cgi?id=47280
Link: https://github.com/ClangBuiltLinux/linux/issues/1126
Link: https://man7.org/linux/man-pages/man3/stpcpy.3.html
Link: https://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html
Link: https://reviews.llvm.org/D85963
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/string.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/lib/string.c b/lib/string.c
index 6012c385fb31..4288e0158d47 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -272,6 +272,30 @@ ssize_t strscpy_pad(char *dest, const char *src, size_t count)
 }
 EXPORT_SYMBOL(strscpy_pad);
 
+/**
+ * stpcpy - copy a string from src to dest returning a pointer to the new end
+ *          of dest, including src's %NUL-terminator. May overrun dest.
+ * @dest: pointer to end of string being copied into. Must be large enough
+ *        to receive copy.
+ * @src: pointer to the beginning of string being copied from. Must not overlap
+ *       dest.
+ *
+ * stpcpy differs from strcpy in a key way: the return value is a pointer
+ * to the new %NUL-terminating character in @dest. (For strcpy, the return
+ * value is a pointer to the start of @dest). This interface is considered
+ * unsafe as it doesn't perform bounds checking of the inputs. As such it's
+ * not recommended for usage. Instead, its definition is provided in case
+ * the compiler lowers other libcalls to stpcpy.
+ */
+char *stpcpy(char *__restrict__ dest, const char *__restrict__ src);
+char *stpcpy(char *__restrict__ dest, const char *__restrict__ src)
+{
+	while ((*dest++ = *src++) != '\0')
+		/* nothing */;
+	return --dest;
+}
+EXPORT_SYMBOL(stpcpy);
+
 #ifndef __HAVE_ARCH_STRCAT
 /**
  * strcat - Append one %NUL-terminated string to another

From ffa550cd691b8ba6c6117ce986516635afe03c13 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 25 Sep 2020 21:19:21 -0700
Subject: [PATCH 189/322] lib/memregion.c: include memregion.h

This addresses the following sparse warning:

  lib/memregion.c:8:5: warning: symbol 'memregion_alloc' was not declared. Should it be static?
  lib/memregion.c:14:6: warning: symbol 'memregion_free' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200921142852.875312-1-yanaijie@huawei.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/memregion.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/memregion.c b/lib/memregion.c
index 77c85b5251da..be5cfa5a3b57 100644
--- a/lib/memregion.c
+++ b/lib/memregion.c
@@ -2,6 +2,7 @@
 /* identifiers for device / performance-differentiated memory regions */
 #include <linux/idr.h>
 #include <linux/types.h>
+#include <linux/memregion.h>
 
 static DEFINE_IDA(memregion_ids);
 

From a1cd6c2ae47ee10ff21e62475685d5b399e2ed4a Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Fri, 25 Sep 2020 21:19:24 -0700
Subject: [PATCH 190/322] arch/x86/lib/usercopy_64.c: fix
 __copy_user_flushcache() cache writeback

If we copy less than 8 bytes and if the destination crosses a cache
line, __copy_user_flushcache would invalidate only the first cache line.

This patch makes it invalidate the second cache line as well.

Fixes: 0aed55af88345b ("x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations")
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Dan Williams <dan.j.wiilliams@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Toshi Kani <toshi.kani@hpe.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/alpine.LRH.2.02.2009161451140.21915@file01.intranet.prod.int.rdu2.redhat.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/lib/usercopy_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index b0dfac3d3df7..1847e993ac63 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -120,7 +120,7 @@ long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
 	 */
 	if (size < 8) {
 		if (!IS_ALIGNED(dest, 4) || size != 4)
-			clean_cache_range(dst, 1);
+			clean_cache_range(dst, size);
 	} else {
 		if (!IS_ALIGNED(dest, 8)) {
 			dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);

From c1d0da83358a2316d9be7f229f26126dbaa07468 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Fri, 25 Sep 2020 21:19:28 -0700
Subject: [PATCH 191/322] mm: replace memmap_context by meminit_context

Patch series "mm: fix memory to node bad links in sysfs", v3.

Sometimes, firmware may expose interleaved memory layout like this:

 Early memory node ranges
   node   1: [mem 0x0000000000000000-0x000000011fffffff]
   node   2: [mem 0x0000000120000000-0x000000014fffffff]
   node   1: [mem 0x0000000150000000-0x00000001ffffffff]
   node   0: [mem 0x0000000200000000-0x000000048fffffff]
   node   2: [mem 0x0000000490000000-0x00000007ffffffff]

In that case, we can see memory blocks assigned to multiple nodes in
sysfs:

  $ ls -l /sys/devices/system/memory/memory21
  total 0
  lrwxrwxrwx 1 root root     0 Aug 24 05:27 node1 -> ../../node/node1
  lrwxrwxrwx 1 root root     0 Aug 24 05:27 node2 -> ../../node/node2
  -rw-r--r-- 1 root root 65536 Aug 24 05:27 online
  -r--r--r-- 1 root root 65536 Aug 24 05:27 phys_device
  -r--r--r-- 1 root root 65536 Aug 24 05:27 phys_index
  drwxr-xr-x 2 root root     0 Aug 24 05:27 power
  -r--r--r-- 1 root root 65536 Aug 24 05:27 removable
  -rw-r--r-- 1 root root 65536 Aug 24 05:27 state
  lrwxrwxrwx 1 root root     0 Aug 24 05:25 subsystem -> ../../../../bus/memory
  -rw-r--r-- 1 root root 65536 Aug 24 05:25 uevent
  -r--r--r-- 1 root root 65536 Aug 24 05:27 valid_zones

The same applies in the node's directory with a memory21 link in both
the node1 and node2's directory.

This is wrong but doesn't prevent the system to run.  However when
later, one of these memory blocks is hot-unplugged and then hot-plugged,
the system is detecting an inconsistency in the sysfs layout and a
BUG_ON() is raised:

  kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084!
  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
  Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4
  CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25
  Call Trace:
    add_memory_resource+0x23c/0x340 (unreliable)
    __add_memory+0x5c/0xf0
    dlpar_add_lmb+0x1b4/0x500
    dlpar_memory+0x1f8/0xb80
    handle_dlpar_errorlog+0xc0/0x190
    dlpar_store+0x198/0x4a0
    kobj_attr_store+0x30/0x50
    sysfs_kf_write+0x64/0x90
    kernfs_fop_write+0x1b0/0x290
    vfs_write+0xe8/0x290
    ksys_write+0xdc/0x130
    system_call_exception+0x160/0x270
    system_call_common+0xf0/0x27c

This has been seen on PowerPC LPAR.

The root cause of this issue is that when node's memory is registered,
the range used can overlap another node's range, thus the memory block
is registered to multiple nodes in sysfs.

There are two issues here:

 (a) The sysfs memory and node's layouts are broken due to these
     multiple links

 (b) The link errors in link_mem_sections() should not lead to a system
     panic.

To address (a) register_mem_sect_under_node should not rely on the
system state to detect whether the link operation is triggered by a hot
plug operation or not.  This is addressed by the patches 1 and 2 of this
series.

Issue (b) will be addressed separately.

This patch (of 2):

The memmap_context enum is used to detect whether a memory operation is
due to a hot-add operation or happening at boot time.

Make it general to the hotplug operation and rename it as
meminit_context.

There is no functional change introduced by this patch

Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J . Wysocki" <rafael@kernel.org>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Scott Cheloha <cheloha@linux.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20200915094143.79181-1-ldufour@linux.ibm.com
Link: https://lkml.kernel.org/r/20200915132624.9723-1-ldufour@linux.ibm.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c    |  6 +++---
 include/linux/mm.h     |  2 +-
 include/linux/mmzone.h | 11 ++++++++---
 mm/memory_hotplug.c    |  2 +-
 mm/page_alloc.c        | 10 +++++-----
 5 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 0b3fb4c7af29..8e7b8c6c576e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -538,7 +538,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
 	if (map_start < map_end)
 		memmap_init_zone((unsigned long)(map_end - map_start),
 				 args->nid, args->zone, page_to_pfn(map_start),
-				 MEMMAP_EARLY, NULL);
+				 MEMINIT_EARLY, NULL);
 	return 0;
 }
 
@@ -547,8 +547,8 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
 	     unsigned long start_pfn)
 {
 	if (!vmem_map) {
-		memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY,
-				NULL);
+		memmap_init_zone(size, nid, zone, start_pfn,
+				 MEMINIT_EARLY, NULL);
 	} else {
 		struct page *start;
 		struct memmap_init_callback_data args;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b2f370f0b420..48614513eb66 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2416,7 +2416,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
 
 extern void set_dma_reserve(unsigned long new_dma_reserve);
 extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
-		enum memmap_context, struct vmem_altmap *);
+		enum meminit_context, struct vmem_altmap *);
 extern void setup_per_zone_wmarks(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8379432f4f2f..0f7a4ff4b059 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -824,10 +824,15 @@ bool zone_watermark_ok(struct zone *z, unsigned int order,
 		unsigned int alloc_flags);
 bool zone_watermark_ok_safe(struct zone *z, unsigned int order,
 		unsigned long mark, int highest_zoneidx);
-enum memmap_context {
-	MEMMAP_EARLY,
-	MEMMAP_HOTPLUG,
+/*
+ * Memory initialization context, use to differentiate memory added by
+ * the platform statically or via memory hotplug interface.
+ */
+enum meminit_context {
+	MEMINIT_EARLY,
+	MEMINIT_HOTPLUG,
 };
+
 extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
 				     unsigned long size);
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b11a269e2356..345308a8bd15 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -729,7 +729,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 	 * are reserved so nobody should be touching them so we should be safe
 	 */
 	memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
-			MEMMAP_HOTPLUG, altmap);
+			 MEMINIT_HOTPLUG, altmap);
 
 	set_zone_contiguous(zone);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fab5e97dc9ca..5661fa164f13 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5975,7 +5975,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
  * done. Non-atomic initialization, single-pass.
  */
 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
-		unsigned long start_pfn, enum memmap_context context,
+		unsigned long start_pfn, enum meminit_context context,
 		struct vmem_altmap *altmap)
 {
 	unsigned long pfn, end_pfn = start_pfn + size;
@@ -6007,7 +6007,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		 * There can be holes in boot-time mem_map[]s handed to this
 		 * function.  They do not exist on hotplugged memory.
 		 */
-		if (context == MEMMAP_EARLY) {
+		if (context == MEMINIT_EARLY) {
 			if (overlap_memmap_init(zone, &pfn))
 				continue;
 			if (defer_init(nid, pfn, end_pfn))
@@ -6016,7 +6016,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 
 		page = pfn_to_page(pfn);
 		__init_single_page(page, pfn, zone, nid);
-		if (context == MEMMAP_HOTPLUG)
+		if (context == MEMINIT_HOTPLUG)
 			__SetPageReserved(page);
 
 		/*
@@ -6099,7 +6099,7 @@ void __ref memmap_init_zone_device(struct zone *zone,
 		 * check here not to call set_pageblock_migratetype() against
 		 * pfn out of zone.
 		 *
-		 * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
+		 * Please note that MEMINIT_HOTPLUG path doesn't clear memmap
 		 * because this is done early in section_activate()
 		 */
 		if (!(pfn & (pageblock_nr_pages - 1))) {
@@ -6137,7 +6137,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid,
 		if (end_pfn > start_pfn) {
 			size = end_pfn - start_pfn;
 			memmap_init_zone(size, nid, zone, start_pfn,
-					 MEMMAP_EARLY, NULL);
+					 MEMINIT_EARLY, NULL);
 		}
 	}
 }

From f85086f95fa36194eb0db5cd5c12e56801b98523 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Fri, 25 Sep 2020 21:19:31 -0700
Subject: [PATCH 192/322] mm: don't rely on system state to detect hot-plug
 operations

In register_mem_sect_under_node() the system_state's value is checked to
detect whether the call is made during boot time or during an hot-plug
operation.  Unfortunately, that check against SYSTEM_BOOTING is wrong
because regular memory is registered at SYSTEM_SCHEDULING state.  In
addition, memory hot-plug operation can be triggered at this system
state by the ACPI [1].  So checking against the system state is not
enough.

The consequence is that on system with interleaved node's ranges like this:

 Early memory node ranges
   node   1: [mem 0x0000000000000000-0x000000011fffffff]
   node   2: [mem 0x0000000120000000-0x000000014fffffff]
   node   1: [mem 0x0000000150000000-0x00000001ffffffff]
   node   0: [mem 0x0000000200000000-0x000000048fffffff]
   node   2: [mem 0x0000000490000000-0x00000007ffffffff]

This can be seen on PowerPC LPAR after multiple memory hot-plug and
hot-unplug operations are done.  At the next reboot the node's memory
ranges can be interleaved and since the call to link_mem_sections() is
made in topology_init() while the system is in the SYSTEM_SCHEDULING
state, the node's id is not checked, and the sections registered to
multiple nodes:

  $ ls -l /sys/devices/system/memory/memory21/node*
  total 0
  lrwxrwxrwx 1 root root     0 Aug 24 05:27 node1 -> ../../node/node1
  lrwxrwxrwx 1 root root     0 Aug 24 05:27 node2 -> ../../node/node2

In that case, the system is able to boot but if later one of theses
memory blocks is hot-unplugged and then hot-plugged, the sysfs
inconsistency is detected and this is triggering a BUG_ON():

  kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084!
  Oops: Exception in kernel mode, sig: 5 [#1]
  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
  Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4
  CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25
  Call Trace:
    add_memory_resource+0x23c/0x340 (unreliable)
    __add_memory+0x5c/0xf0
    dlpar_add_lmb+0x1b4/0x500
    dlpar_memory+0x1f8/0xb80
    handle_dlpar_errorlog+0xc0/0x190
    dlpar_store+0x198/0x4a0
    kobj_attr_store+0x30/0x50
    sysfs_kf_write+0x64/0x90
    kernfs_fop_write+0x1b0/0x290
    vfs_write+0xe8/0x290
    ksys_write+0xdc/0x130
    system_call_exception+0x160/0x270
    system_call_common+0xf0/0x27c

This patch addresses the root cause by not relying on the system_state
value to detect whether the call is due to a hot-plug operation.  An
extra parameter is added to link_mem_sections() detailing whether the
operation is due to a hot-plug operation.

[1] According to Oscar Salvador, using this qemu command line, ACPI
memory hotplug operations are raised at SYSTEM_SCHEDULING state:

  $QEMU -enable-kvm -machine pc -smp 4,sockets=4,cores=1,threads=1 -cpu host -monitor pty \
        -m size=$MEM,slots=255,maxmem=4294967296k  \
        -numa node,nodeid=0,cpus=0-3,mem=512 -numa node,nodeid=1,mem=512 \
        -object memory-backend-ram,id=memdimm0,size=134217728 -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \
        -object memory-backend-ram,id=memdimm1,size=134217728 -device pc-dimm,node=0,memdev=memdimm1,id=dimm1,slot=1 \
        -object memory-backend-ram,id=memdimm2,size=134217728 -device pc-dimm,node=0,memdev=memdimm2,id=dimm2,slot=2 \
        -object memory-backend-ram,id=memdimm3,size=134217728 -device pc-dimm,node=0,memdev=memdimm3,id=dimm3,slot=3 \
        -object memory-backend-ram,id=memdimm4,size=134217728 -device pc-dimm,node=1,memdev=memdimm4,id=dimm4,slot=4 \
        -object memory-backend-ram,id=memdimm5,size=134217728 -device pc-dimm,node=1,memdev=memdimm5,id=dimm5,slot=5 \
        -object memory-backend-ram,id=memdimm6,size=134217728 -device pc-dimm,node=1,memdev=memdimm6,id=dimm6,slot=6 \

Fixes: 4fbce633910e ("mm/memory_hotplug.c: make register_mem_sect_under_node() a callback of walk_memory_range()")
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Scott Cheloha <cheloha@linux.ibm.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: <stable@vger.kernel.org>
Link: https://lkml.kernel.org/r/20200915094143.79181-3-ldufour@linux.ibm.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c  | 85 ++++++++++++++++++++++++++++----------------
 include/linux/node.h | 11 +++---
 mm/memory_hotplug.c  |  3 +-
 3 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 508b80f6329b..50af16e68d98 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -761,14 +761,36 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
 	return pfn_to_nid(pfn);
 }
 
+static int do_register_memory_block_under_node(int nid,
+					       struct memory_block *mem_blk)
+{
+	int ret;
+
+	/*
+	 * If this memory block spans multiple nodes, we only indicate
+	 * the last processed node.
+	 */
+	mem_blk->nid = nid;
+
+	ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
+				       &mem_blk->dev.kobj,
+				       kobject_name(&mem_blk->dev.kobj));
+	if (ret)
+		return ret;
+
+	return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
+				&node_devices[nid]->dev.kobj,
+				kobject_name(&node_devices[nid]->dev.kobj));
+}
+
 /* register memory section under specified node if it spans that node */
-static int register_mem_sect_under_node(struct memory_block *mem_blk,
-					 void *arg)
+static int register_mem_block_under_node_early(struct memory_block *mem_blk,
+					       void *arg)
 {
 	unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
 	unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
 	unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
-	int ret, nid = *(int *)arg;
+	int nid = *(int *)arg;
 	unsigned long pfn;
 
 	for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
@@ -785,38 +807,33 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk,
 		}
 
 		/*
-		 * We need to check if page belongs to nid only for the boot
-		 * case, during hotplug we know that all pages in the memory
-		 * block belong to the same node.
+		 * We need to check if page belongs to nid only at the boot
+		 * case because node's ranges can be interleaved.
 		 */
-		if (system_state == SYSTEM_BOOTING) {
-			page_nid = get_nid_for_pfn(pfn);
-			if (page_nid < 0)
-				continue;
-			if (page_nid != nid)
-				continue;
-		}
+		page_nid = get_nid_for_pfn(pfn);
+		if (page_nid < 0)
+			continue;
+		if (page_nid != nid)
+			continue;
 
-		/*
-		 * If this memory block spans multiple nodes, we only indicate
-		 * the last processed node.
-		 */
-		mem_blk->nid = nid;
-
-		ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
-					&mem_blk->dev.kobj,
-					kobject_name(&mem_blk->dev.kobj));
-		if (ret)
-			return ret;
-
-		return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
-				&node_devices[nid]->dev.kobj,
-				kobject_name(&node_devices[nid]->dev.kobj));
+		return do_register_memory_block_under_node(nid, mem_blk);
 	}
 	/* mem section does not span the specified node */
 	return 0;
 }
 
+/*
+ * During hotplug we know that all pages in the memory block belong to the same
+ * node.
+ */
+static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
+						 void *arg)
+{
+	int nid = *(int *)arg;
+
+	return do_register_memory_block_under_node(nid, mem_blk);
+}
+
 /*
  * Unregister a memory block device under the node it spans. Memory blocks
  * with multiple nodes cannot be offlined and therefore also never be removed.
@@ -832,11 +849,19 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
 			  kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
 }
 
-int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
+int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
+		      enum meminit_context context)
 {
+	walk_memory_blocks_func_t func;
+
+	if (context == MEMINIT_HOTPLUG)
+		func = register_mem_block_under_node_hotplug;
+	else
+		func = register_mem_block_under_node_early;
+
 	return walk_memory_blocks(PFN_PHYS(start_pfn),
 				  PFN_PHYS(end_pfn - start_pfn), (void *)&nid,
-				  register_mem_sect_under_node);
+				  func);
 }
 
 #ifdef CONFIG_HUGETLBFS
diff --git a/include/linux/node.h b/include/linux/node.h
index 4866f32a02d8..014ba3ab2efd 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -99,11 +99,13 @@ extern struct node *node_devices[];
 typedef  void (*node_registration_func_t)(struct node *);
 
 #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
-extern int link_mem_sections(int nid, unsigned long start_pfn,
-			     unsigned long end_pfn);
+int link_mem_sections(int nid, unsigned long start_pfn,
+		      unsigned long end_pfn,
+		      enum meminit_context context);
 #else
 static inline int link_mem_sections(int nid, unsigned long start_pfn,
-				    unsigned long end_pfn)
+				    unsigned long end_pfn,
+				    enum meminit_context context)
 {
 	return 0;
 }
@@ -128,7 +130,8 @@ static inline int register_one_node(int nid)
 		if (error)
 			return error;
 		/* link memory sections under this node */
-		error = link_mem_sections(nid, start_pfn, end_pfn);
+		error = link_mem_sections(nid, start_pfn, end_pfn,
+					  MEMINIT_EARLY);
 	}
 
 	return error;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 345308a8bd15..ce3e73e3a5c1 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1080,7 +1080,8 @@ int __ref add_memory_resource(int nid, struct resource *res)
 	}
 
 	/* link memory sections under this node.*/
-	ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1));
+	ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1),
+				MEMINIT_HOTPLUG);
 	BUG_ON(ret);
 
 	/* create new memmap entry */

From ce2684254bd4818ca3995c0d021fb62c4cf10a19 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Mon, 14 Sep 2020 23:32:15 -0700
Subject: [PATCH 193/322] mm: validate pmd after splitting

syzbot reported the following KASAN splat:

  general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN
  KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f]
  CPU: 1 PID: 6826 Comm: syz-executor142 Not tainted 5.9.0-rc4-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  RIP: 0010:__lock_acquire+0x84/0x2ae0 kernel/locking/lockdep.c:4296
  Code: ff df 8a 04 30 84 c0 0f 85 e3 16 00 00 83 3d 56 58 35 08 00 0f 84 0e 17 00 00 83 3d 25 c7 f5 07 00 74 2c 4c 89 e8 48 c1 e8 03 <80> 3c 30 00 74 12 4c 89 ef e8 3e d1 5a 00 48 be 00 00 00 00 00 fc
  RSP: 0018:ffffc90004b9f850 EFLAGS: 00010006
  Call Trace:
    lock_acquire+0x140/0x6f0 kernel/locking/lockdep.c:5006
    __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline]
    _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151
    spin_lock include/linux/spinlock.h:354 [inline]
    madvise_cold_or_pageout_pte_range+0x52f/0x25c0 mm/madvise.c:389
    walk_pmd_range mm/pagewalk.c:89 [inline]
    walk_pud_range mm/pagewalk.c:160 [inline]
    walk_p4d_range mm/pagewalk.c:193 [inline]
    walk_pgd_range mm/pagewalk.c:229 [inline]
    __walk_page_range+0xe7b/0x1da0 mm/pagewalk.c:331
    walk_page_range+0x2c3/0x5c0 mm/pagewalk.c:427
    madvise_pageout_page_range mm/madvise.c:521 [inline]
    madvise_pageout mm/madvise.c:557 [inline]
    madvise_vma mm/madvise.c:946 [inline]
    do_madvise+0x12d0/0x2090 mm/madvise.c:1145
    __do_sys_madvise mm/madvise.c:1171 [inline]
    __se_sys_madvise mm/madvise.c:1169 [inline]
    __x64_sys_madvise+0x76/0x80 mm/madvise.c:1169
    do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46
    entry_SYSCALL_64_after_hwframe+0x44/0xa9

The backing vma was shmem.

In case of split page of file-backed THP, madvise zaps the pmd instead
of remapping of sub-pages.  So we need to check pmd validity after
split.

Reported-by: syzbot+ecf80462cb7d5d552bc7@syzkaller.appspotmail.com
Fixes: 1a4e58cce84e ("mm: introduce MADV_PAGEOUT")
Signed-off-by: Minchan Kim <minchan@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/madvise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index d4aa5f776543..0e0d61003fc6 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -381,9 +381,9 @@ huge_unlock:
 		return 0;
 	}
 
+regular_page:
 	if (pmd_trans_unstable(pmd))
 		return 0;
-regular_page:
 #endif
 	tlb_change_page_size(tlb, PAGE_SIZE);
 	orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);

From a2bd970aa62f2f7f80fd0d212b1d4ccea5df4aed Mon Sep 17 00:00:00 2001
From: Nicolas VINCENT <nicolas.vincent@vossloh.com>
Date: Wed, 23 Sep 2020 16:08:40 +0200
Subject: [PATCH 194/322] i2c: cpm: Fix i2c_ram structure

the i2c_ram structure is missing the sdmatmp field mentionned in
datasheet for MPC8272 at paragraph 36.5. With this field missing, the
hardware would write past the allocated memory done through
cpm_muram_alloc for the i2c_ram structure and land in memory allocated
for the buffers descriptors corrupting the cbd_bufaddr field. Since this
field is only set during setup(), the first i2c transaction would work
and the following would send data read from an arbitrary memory
location.

Fixes: 61045dbe9d8d ("i2c: Add support for I2C bus on Freescale CPM1/CPM2 controllers")
Signed-off-by: Nicolas VINCENT <nicolas.vincent@vossloh.com>
Acked-by: Jochen Friedrich <jochen@scram.de>
Acked-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-cpm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c
index 1213e1932ccb..24d584a1c9a7 100644
--- a/drivers/i2c/busses/i2c-cpm.c
+++ b/drivers/i2c/busses/i2c-cpm.c
@@ -65,6 +65,9 @@ struct i2c_ram {
 	char    res1[4];	/* Reserved */
 	ushort  rpbase;		/* Relocation pointer */
 	char    res2[2];	/* Reserved */
+	/* The following elements are only for CPM2 */
+	char    res3[4];	/* Reserved */
+	uint    sdmatmp;	/* Internal */
 };
 
 #define I2COM_START	0x80

From 8947efc077168c53b84d039881a7c967086a248a Mon Sep 17 00:00:00 2001
From: Tali Perry <tali.perry1@gmail.com>
Date: Sun, 20 Sep 2020 23:48:09 +0300
Subject: [PATCH 195/322] i2c: npcm7xx: Clear LAST bit after a failed
 transaction.

Due to a HW issue, in some scenarios the LAST bit might remain set.
This will cause an unexpected NACK after reading 16 bytes on the next
read.

Example: if user tries to read from a missing device, get a NACK,
then if the next command is a long read ( > 16 bytes),
the master will stop reading after 16 bytes.
To solve this, if a command fails, check if LAST bit is still
set. If it does, reset the module.

Fixes: 56a1485b102e (i2c: npcm7xx: Add Nuvoton NPCM I2C controller driver)
Signed-off-by: Tali Perry <tali.perry1@gmail.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-npcm7xx.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c
index dfcf04e1967f..2ad166355ec9 100644
--- a/drivers/i2c/busses/i2c-npcm7xx.c
+++ b/drivers/i2c/busses/i2c-npcm7xx.c
@@ -2163,6 +2163,15 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
 	if (bus->cmd_err == -EAGAIN)
 		ret = i2c_recover_bus(adap);
 
+	/*
+	 * After any type of error, check if LAST bit is still set,
+	 * due to a HW issue.
+	 * It cannot be cleared without resetting the module.
+	 */
+	if (bus->cmd_err &&
+	    (NPCM_I2CRXF_CTL_LAST_PEC & ioread8(bus->reg + NPCM_I2CRXF_CTL)))
+		npcm_i2c_reset(bus);
+
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
 	/* reenable slave if it was enabled */
 	if (bus->slave)

From 008cfe4418b3dbda2ff820cdd7b1a5ce458ae444 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 25 Sep 2020 18:25:57 -0400
Subject: [PATCH 196/322] mm: Introduce mm_struct.has_pinned

(Commit message majorly collected from Jason Gunthorpe)

Reduce the chance of false positive from page_maybe_dma_pinned() by
keeping track if the mm_struct has ever been used with pin_user_pages().
This allows cases that might drive up the page ref_count to avoid any
penalty from handling dma_pinned pages.

Future work is planned, to provide a more sophisticated solution, likely
to turn it into a real counter.  For now, make it atomic_t but use it as
a boolean for simplicity.

Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 10 ++++++++++
 kernel/fork.c            |  1 +
 mm/gup.c                 |  6 ++++++
 3 files changed, 17 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 496c3ff97cce..ed028af3cb19 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -436,6 +436,16 @@ struct mm_struct {
 		 */
 		atomic_t mm_count;
 
+		/**
+		 * @has_pinned: Whether this mm has pinned any pages.  This can
+		 * be either replaced in the future by @pinned_vm when it
+		 * becomes stable, or grow into a counter on its own. We're
+		 * aggresive on this bit now - even if the pinned pages were
+		 * unpinned later on, we'll still keep this bit set for the
+		 * lifecycle of this mm just for simplicity.
+		 */
+		atomic_t has_pinned;
+
 #ifdef CONFIG_MMU
 		atomic_long_t pgtables_bytes;	/* PTE page table pages */
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 49677d668de4..e65d8192d080 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1011,6 +1011,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm_pgtables_bytes_init(mm);
 	mm->map_count = 0;
 	mm->locked_vm = 0;
+	atomic_set(&mm->has_pinned, 0);
 	atomic64_set(&mm->pinned_vm, 0);
 	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
diff --git a/mm/gup.c b/mm/gup.c
index 578bf5bd8bf8..dfe781d2ad4c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1255,6 +1255,9 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 		BUG_ON(*locked != 1);
 	}
 
+	if (flags & FOLL_PIN)
+		atomic_set(&current->mm->has_pinned, 1);
+
 	/*
 	 * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
 	 * is to set FOLL_GET if the caller wants pages[] filled in (but has
@@ -2660,6 +2663,9 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
 				       FOLL_FAST_ONLY)))
 		return -EINVAL;
 
+	if (gup_flags & FOLL_PIN)
+		atomic_set(&current->mm->has_pinned, 1);
+
 	if (!(gup_flags & FOLL_FAST_ONLY))
 		might_lock_read(&current->mm->mmap_lock);
 

From 7a4830c380f3a8b3425f6383deff58e65b2557b5 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 25 Sep 2020 18:25:58 -0400
Subject: [PATCH 197/322] mm/fork: Pass new vma pointer into copy_page_range()

This prepares for the future work to trigger early cow on pinned pages
during fork().

No functional change intended.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 +-
 kernel/fork.c      |  2 +-
 mm/memory.c        | 14 +++++++++-----
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 48614513eb66..16b799a0522c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1646,7 +1646,7 @@ struct mmu_notifier_range;
 void free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
 		unsigned long end, unsigned long floor, unsigned long ceiling);
 int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
-			struct vm_area_struct *vma);
+		    struct vm_area_struct *vma, struct vm_area_struct *new);
 int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 		   struct mmu_notifier_range *range,
 		   pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
diff --git a/kernel/fork.c b/kernel/fork.c
index e65d8192d080..da8d360fb032 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -589,7 +589,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 
 		mm->map_count++;
 		if (!(tmp->vm_flags & VM_WIPEONFORK))
-			retval = copy_page_range(mm, oldmm, mpnt);
+			retval = copy_page_range(mm, oldmm, mpnt, tmp);
 
 		if (tmp->vm_ops && tmp->vm_ops->open)
 			tmp->vm_ops->open(tmp);
diff --git a/mm/memory.c b/mm/memory.c
index f3eb55975902..d56178721452 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -819,6 +819,7 @@ copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		   pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
+		   struct vm_area_struct *new,
 		   unsigned long addr, unsigned long end)
 {
 	pte_t *orig_src_pte, *orig_dst_pte;
@@ -889,6 +890,7 @@ again:
 
 static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
+		struct vm_area_struct *new,
 		unsigned long addr, unsigned long end)
 {
 	pmd_t *src_pmd, *dst_pmd;
@@ -915,7 +917,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
 		if (pmd_none_or_clear_bad(src_pmd))
 			continue;
 		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
-						vma, addr, next))
+				   vma, new, addr, next))
 			return -ENOMEM;
 	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
 	return 0;
@@ -923,6 +925,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
 
 static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma,
+		struct vm_area_struct *new,
 		unsigned long addr, unsigned long end)
 {
 	pud_t *src_pud, *dst_pud;
@@ -949,7 +952,7 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
 		if (pud_none_or_clear_bad(src_pud))
 			continue;
 		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
-						vma, addr, next))
+				   vma, new, addr, next))
 			return -ENOMEM;
 	} while (dst_pud++, src_pud++, addr = next, addr != end);
 	return 0;
@@ -957,6 +960,7 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src
 
 static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
+		struct vm_area_struct *new,
 		unsigned long addr, unsigned long end)
 {
 	p4d_t *src_p4d, *dst_p4d;
@@ -971,14 +975,14 @@ static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src
 		if (p4d_none_or_clear_bad(src_p4d))
 			continue;
 		if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d,
-						vma, addr, next))
+				   vma, new, addr, next))
 			return -ENOMEM;
 	} while (dst_p4d++, src_p4d++, addr = next, addr != end);
 	return 0;
 }
 
 int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
-		struct vm_area_struct *vma)
+		    struct vm_area_struct *vma, struct vm_area_struct *new)
 {
 	pgd_t *src_pgd, *dst_pgd;
 	unsigned long next;
@@ -1033,7 +1037,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		if (pgd_none_or_clear_bad(src_pgd))
 			continue;
 		if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd,
-					    vma, addr, next))) {
+					    vma, new, addr, next))) {
 			ret = -ENOMEM;
 			break;
 		}

From 70e806e4e645019102d0e09d4933654fb5fb58ce Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 25 Sep 2020 18:25:59 -0400
Subject: [PATCH 198/322] mm: Do early cow for pinned pages during fork() for
 ptes

This allows copy_pte_range() to do early cow if the pages were pinned on
the source mm.

Currently we don't have an accurate way to know whether a page is pinned
or not.  The only thing we have is page_maybe_dma_pinned().  However
that's good enough for now.  Especially, with the newly added
mm->has_pinned flag to make sure we won't affect processes that never
pinned any pages.

It would be easier if we can do GFP_KERNEL allocation within
copy_one_pte().  Unluckily, we can't because we're with the page table
locks held for both the parent and child processes.  So the page
allocation needs to be done outside copy_one_pte().

Some trick is there in copy_present_pte(), majorly the wrprotect trick
to block concurrent fast-gup.  Comments in the function should explain
better in place.

Oleg Nesterov reported a (probably harmless) bug during review that we
didn't reset entry.val properly in copy_pte_range() so that potentially
there's chance to call add_swap_count_continuation() multiple times on
the same swp entry.  However that should be harmless since even if it
happens, the same function (add_swap_count_continuation()) will return
directly noticing that there're enough space for the swp counter.  So
instead of a standalone stable patch, it is touched up in this patch
directly.

Link: https://lore.kernel.org/lkml/20200914143829.GA1424636@nvidia.com/
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory.c | 207 +++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 190 insertions(+), 17 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index d56178721452..fcfc4ca36eba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -773,15 +773,142 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	return 0;
 }
 
-static inline void
+/*
+ * Copy a present and normal page if necessary.
+ *
+ * NOTE! The usual case is that this doesn't need to do
+ * anything, and can just return a positive value. That
+ * will let the caller know that it can just increase
+ * the page refcount and re-use the pte the traditional
+ * way.
+ *
+ * But _if_ we need to copy it because it needs to be
+ * pinned in the parent (and the child should get its own
+ * copy rather than just a reference to the same page),
+ * we'll do that here and return zero to let the caller
+ * know we're done.
+ *
+ * And if we need a pre-allocated page but don't yet have
+ * one, return a negative error to let the preallocation
+ * code know so that it can do so outside the page table
+ * lock.
+ */
+static inline int
+copy_present_page(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+		pte_t *dst_pte, pte_t *src_pte,
+		struct vm_area_struct *vma, struct vm_area_struct *new,
+		unsigned long addr, int *rss, struct page **prealloc,
+		pte_t pte, struct page *page)
+{
+	struct page *new_page;
+
+	if (!is_cow_mapping(vma->vm_flags))
+		return 1;
+
+	/*
+	 * The trick starts.
+	 *
+	 * What we want to do is to check whether this page may
+	 * have been pinned by the parent process.  If so,
+	 * instead of wrprotect the pte on both sides, we copy
+	 * the page immediately so that we'll always guarantee
+	 * the pinned page won't be randomly replaced in the
+	 * future.
+	 *
+	 * To achieve this, we do the following:
+	 *
+	 * 1. Write-protect the pte if it's writable.  This is
+	 *    to protect concurrent write fast-gup with
+	 *    FOLL_PIN, so that we'll fail the fast-gup with
+	 *    the write bit removed.
+	 *
+	 * 2. Check page_maybe_dma_pinned() to see whether this
+	 *    page may have been pinned.
+	 *
+	 * The order of these steps is important to serialize
+	 * against the fast-gup code (gup_pte_range()) on the
+	 * pte check and try_grab_compound_head(), so that
+	 * we'll make sure either we'll capture that fast-gup
+	 * so we'll copy the pinned page here, or we'll fail
+	 * that fast-gup.
+	 *
+	 * NOTE! Even if we don't end up copying the page,
+	 * we won't undo this wrprotect(), because the normal
+	 * reference copy will need it anyway.
+	 */
+	if (pte_write(pte))
+		ptep_set_wrprotect(src_mm, addr, src_pte);
+
+	/*
+	 * These are the "normally we can just copy by reference"
+	 * checks.
+	 */
+	if (likely(!atomic_read(&src_mm->has_pinned)))
+		return 1;
+	if (likely(!page_maybe_dma_pinned(page)))
+		return 1;
+
+	/*
+	 * Uhhuh. It looks like the page might be a pinned page,
+	 * and we actually need to copy it. Now we can set the
+	 * source pte back to being writable.
+	 */
+	if (pte_write(pte))
+		set_pte_at(src_mm, addr, src_pte, pte);
+
+	new_page = *prealloc;
+	if (!new_page)
+		return -EAGAIN;
+
+	/*
+	 * We have a prealloc page, all good!  Take it
+	 * over and copy the page & arm it.
+	 */
+	*prealloc = NULL;
+	copy_user_highpage(new_page, page, addr, vma);
+	__SetPageUptodate(new_page);
+	page_add_new_anon_rmap(new_page, new, addr, false);
+	lru_cache_add_inactive_or_unevictable(new_page, new);
+	rss[mm_counter(new_page)]++;
+
+	/* All done, just insert the new page copy in the child */
+	pte = mk_pte(new_page, new->vm_page_prot);
+	pte = maybe_mkwrite(pte_mkdirty(pte), new);
+	set_pte_at(dst_mm, addr, dst_pte, pte);
+	return 0;
+}
+
+/*
+ * Copy one pte.  Returns 0 if succeeded, or -EAGAIN if one preallocated page
+ * is required to copy this pte.
+ */
+static inline int
 copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
-		unsigned long addr, int *rss)
+		struct vm_area_struct *new,
+		unsigned long addr, int *rss, struct page **prealloc)
 {
 	unsigned long vm_flags = vma->vm_flags;
 	pte_t pte = *src_pte;
 	struct page *page;
 
+	page = vm_normal_page(vma, addr, pte);
+	if (page) {
+		int retval;
+
+		retval = copy_present_page(dst_mm, src_mm,
+			dst_pte, src_pte,
+			vma, new,
+			addr, rss, prealloc,
+			pte, page);
+		if (retval <= 0)
+			return retval;
+
+		get_page(page);
+		page_dup_rmap(page, false);
+		rss[mm_counter(page)]++;
+	}
+
 	/*
 	 * If it's a COW mapping, write protect it both
 	 * in the parent and the child
@@ -807,14 +934,27 @@ copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (!(vm_flags & VM_UFFD_WP))
 		pte = pte_clear_uffd_wp(pte);
 
-	page = vm_normal_page(vma, addr, pte);
-	if (page) {
-		get_page(page);
-		page_dup_rmap(page, false);
-		rss[mm_counter(page)]++;
-	}
-
 	set_pte_at(dst_mm, addr, dst_pte, pte);
+	return 0;
+}
+
+static inline struct page *
+page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma,
+		   unsigned long addr)
+{
+	struct page *new_page;
+
+	new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, addr);
+	if (!new_page)
+		return NULL;
+
+	if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) {
+		put_page(new_page);
+		return NULL;
+	}
+	cgroup_throttle_swaprate(new_page, GFP_KERNEL);
+
+	return new_page;
 }
 
 static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -825,16 +965,20 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	pte_t *orig_src_pte, *orig_dst_pte;
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
-	int progress = 0;
+	int progress, ret = 0;
 	int rss[NR_MM_COUNTERS];
 	swp_entry_t entry = (swp_entry_t){0};
+	struct page *prealloc = NULL;
 
 again:
+	progress = 0;
 	init_rss_vec(rss);
 
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
-	if (!dst_pte)
-		return -ENOMEM;
+	if (!dst_pte) {
+		ret = -ENOMEM;
+		goto out;
+	}
 	src_pte = pte_offset_map(src_pmd, addr);
 	src_ptl = pte_lockptr(src_mm, src_pmd);
 	spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
@@ -866,8 +1010,25 @@ again:
 			progress += 8;
 			continue;
 		}
-		copy_present_pte(dst_mm, src_mm, dst_pte, src_pte,
-				 vma, addr, rss);
+		/* copy_present_pte() will clear `*prealloc' if consumed */
+		ret = copy_present_pte(dst_mm, src_mm, dst_pte, src_pte,
+				       vma, new, addr, rss, &prealloc);
+		/*
+		 * If we need a pre-allocated page for this pte, drop the
+		 * locks, allocate, and try again.
+		 */
+		if (unlikely(ret == -EAGAIN))
+			break;
+		if (unlikely(prealloc)) {
+			/*
+			 * pre-alloc page cannot be reused by next time so as
+			 * to strictly follow mempolicy (e.g., alloc_page_vma()
+			 * will allocate page according to address).  This
+			 * could only happen if one pinned pte changed.
+			 */
+			put_page(prealloc);
+			prealloc = NULL;
+		}
 		progress += 8;
 	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
 
@@ -879,13 +1040,25 @@ again:
 	cond_resched();
 
 	if (entry.val) {
-		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
+		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		entry.val = 0;
+	} else if (ret) {
+		WARN_ON_ONCE(ret != -EAGAIN);
+		prealloc = page_copy_prealloc(src_mm, vma, addr);
+		if (!prealloc)
 			return -ENOMEM;
-		progress = 0;
+		/* We've captured and resolved the error. Reset, try again. */
+		ret = 0;
 	}
 	if (addr != end)
 		goto again;
-	return 0;
+out:
+	if (unlikely(prealloc))
+		put_page(prealloc);
+	return ret;
 }
 
 static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,

From d042035eaf5f9009ad927dc4d3ce848381ccdeed Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 25 Sep 2020 18:26:00 -0400
Subject: [PATCH 199/322] mm/thp: Split huge pmds/puds if they're pinned when
 fork()

Pinned pages shouldn't be write-protected when fork() happens, because
follow up copy-on-write on these pages could cause the pinned pages to
be replaced by random newly allocated pages.

For huge PMDs, we split the huge pmd if pinning is detected.  So that
future handling will be done by the PTE level (with our latest changes,
each of the small pages will be copied).  We can achieve this by let
copy_huge_pmd() return -EAGAIN for pinned pages, so that we'll
fallthrough in copy_pmd_range() and finally land the next
copy_pte_range() call.

Huge PUDs will be even more special - so far it does not support
anonymous pages.  But it can actually be done the same as the huge PMDs
even if the split huge PUDs means to erase the PUD entries.  It'll
guarantee the follow up fault ins will remap the same pages in either
parent/child later.

This might not be the most efficient way, but it should be easy and
clean enough.  It should be fine, since we're tackling with a very rare
case just to make sure userspaces that pinned some thps will still work
even without MADV_DONTFORK and after they fork()ed.

Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index faadc449cca5..da397779a6d4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1074,6 +1074,24 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
 	src_page = pmd_page(pmd);
 	VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
+
+	/*
+	 * If this page is a potentially pinned page, split and retry the fault
+	 * with smaller page size.  Normally this should not happen because the
+	 * userspace should use MADV_DONTFORK upon pinned regions.  This is a
+	 * best effort that the pinned pages won't be replaced by another
+	 * random page during the coming copy-on-write.
+	 */
+	if (unlikely(is_cow_mapping(vma->vm_flags) &&
+		     atomic_read(&src_mm->has_pinned) &&
+		     page_maybe_dma_pinned(src_page))) {
+		pte_free(dst_mm, pgtable);
+		spin_unlock(src_ptl);
+		spin_unlock(dst_ptl);
+		__split_huge_pmd(vma, src_pmd, addr, false, NULL);
+		return -EAGAIN;
+	}
+
 	get_page(src_page);
 	page_dup_rmap(src_page, true);
 	add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -1177,6 +1195,16 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		/* No huge zero pud yet */
 	}
 
+	/* Please refer to comments in copy_huge_pmd() */
+	if (unlikely(is_cow_mapping(vma->vm_flags) &&
+		     atomic_read(&src_mm->has_pinned) &&
+		     page_maybe_dma_pinned(pud_page(pud)))) {
+		spin_unlock(src_ptl);
+		spin_unlock(dst_ptl);
+		__split_huge_pud(vma, src_pud, addr);
+		return -EAGAIN;
+	}
+
 	pudp_set_wrprotect(src_mm, addr, src_pud);
 	pud = pud_mkold(pud_wrprotect(pud));
 	set_pud_at(dst_mm, addr, dst_pud, pud);

From 7dbbcf496f2a4b6d82cfc7810a0746e160b79762 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 26 Sep 2020 21:33:43 -0700
Subject: [PATCH 200/322] mdio: fix mdio-thunder.c dependency & build error

Fix build error by selecting MDIO_DEVRES for MDIO_THUNDER.
Fixes this build error:

ld: drivers/net/phy/mdio-thunder.o: in function `thunder_mdiobus_pci_probe':
drivers/net/phy/mdio-thunder.c:78: undefined reference to `devm_mdiobus_alloc_size'

Fixes: 379d7ac7ca31 ("phy: mdio-thunder: Add driver for Cavium Thunder SoC MDIO buses.")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: netdev@vger.kernel.org
Cc: David Daney <david.daney@cavium.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 726e4b240e7e..1c5a10b672fc 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -222,6 +222,7 @@ config MDIO_THUNDER
 	depends on 64BIT
 	depends on PCI
 	select MDIO_CAVIUM
+	select MDIO_DEVRES
 	help
 	  This driver supports the MDIO interfaces found on Cavium
 	  ThunderX SoCs when the MDIO bus device appears as a PCI

From 72865028582a678be1e05240e55d452e5c258eca Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Sun, 27 Sep 2020 09:42:11 +0300
Subject: [PATCH 201/322] mlxsw: spectrum_acl: Fix
 mlxsw_sp_acl_tcam_group_add()'s error path

If mlxsw_sp_acl_tcam_group_id_get() fails, the mutex initialized earlier
is not destroyed.

Fix this by initializing the mutex after calling the function. This is
symmetric to mlxsw_sp_acl_tcam_group_del().

Fixes: 5ec2ee28d27b ("mlxsw: spectrum_acl: Introduce a mutex to guard region list updates")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index 5c020403342f..7cccc41dd69c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -292,13 +292,14 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp_acl_tcam *tcam,
 	int err;
 
 	group->tcam = tcam;
-	mutex_init(&group->lock);
 	INIT_LIST_HEAD(&group->region_list);
 
 	err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id);
 	if (err)
 		return err;
 
+	mutex_init(&group->lock);
+
 	return 0;
 }
 

From 307eea32b202864c98d28174c14ddbf5b9367722 Mon Sep 17 00:00:00 2001
From: Marian-Cristian Rotariu <marian-cristian.rotariu.rb@bp.renesas.com>
Date: Sun, 27 Sep 2020 13:34:39 +0100
Subject: [PATCH 202/322] dt-bindings: net: renesas,ravb: Add support for
 r8a774e1 SoC

Document RZ/G2H (R8A774E1) SoC bindings.

Signed-off-by: Marian-Cristian Rotariu <marian-cristian.rotariu.rb@bp.renesas.com>
Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/renesas,ravb.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index 032b76f14f4f..9119f1caf391 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -21,6 +21,7 @@ Required properties:
       - "renesas,etheravb-r8a774a1" for the R8A774A1 SoC.
       - "renesas,etheravb-r8a774b1" for the R8A774B1 SoC.
       - "renesas,etheravb-r8a774c0" for the R8A774C0 SoC.
+      - "renesas,etheravb-r8a774e1" for the R8A774E1 SoC.
       - "renesas,etheravb-r8a7795" for the R8A7795 SoC.
       - "renesas,etheravb-r8a7796" for the R8A77960 SoC.
       - "renesas,etheravb-r8a77961" for the R8A77961 SoC.

From 709a16be0593c08190982cfbdca6df95e6d5823b Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 27 Sep 2020 19:44:29 +0200
Subject: [PATCH 203/322] r8169: fix RTL8168f/RTL8411 EPHY config

Mistakenly bit 2 was set instead of bit 3 as in the vendor driver.

Fixes: a7a92cf81589 ("r8169: sync PCIe PHY init with vendor driver 8.047.01")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index fc9e6626db55..5e867da1aad3 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2904,7 +2904,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 		{ 0x08, 0x0001,	0x0002 },
 		{ 0x09, 0x0000,	0x0080 },
 		{ 0x19, 0x0000,	0x0224 },
-		{ 0x00, 0x0000,	0x0004 },
+		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x3df0,	0x0200 },
 	};
 
@@ -2921,7 +2921,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 		{ 0x06, 0x00c0,	0x0020 },
 		{ 0x0f, 0xffff,	0x5200 },
 		{ 0x19, 0x0000,	0x0224 },
-		{ 0x00, 0x0000,	0x0004 },
+		{ 0x00, 0x0000,	0x0008 },
 		{ 0x0c, 0x3df0,	0x0200 },
 	};
 

From a1b8638ba1320e6684aa98233c15255eb803fac7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 27 Sep 2020 14:38:10 -0700
Subject: [PATCH 204/322] Linux 5.9-rc7

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 2b66d3398878..992d24467ca0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*

From 62c59a8786e6bb75569cee91dab66e9da3ff4b68 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Fri, 25 Sep 2020 16:49:51 +0800
Subject: [PATCH 205/322] memstick: Skip allocating card when removing host

After commit 6827ca573c03 ("memstick: rtsx_usb_ms: Support runtime power
management"), removing module rtsx_usb_ms will be stuck.

The deadlock is caused by powering on and powering off at the same time,
the former one is when memstick_check() is flushed, and the later is called
by memstick_remove_host().

Soe let's skip allocating card to prevent this issue.

Fixes: 6827ca573c03 ("memstick: rtsx_usb_ms: Support runtime power management")
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Link: https://lore.kernel.org/r/20200925084952.13220-1-kai.heng.feng@canonical.com
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/memstick/core/memstick.c | 4 ++++
 include/linux/memstick.h         | 1 +
 2 files changed, 5 insertions(+)

diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index 693ee73eb291..ef03d6fafc5c 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -441,6 +441,9 @@ static void memstick_check(struct work_struct *work)
 	} else if (host->card->stop)
 		host->card->stop(host->card);
 
+	if (host->removing)
+		goto out_power_off;
+
 	card = memstick_alloc_card(host);
 
 	if (!card) {
@@ -545,6 +548,7 @@ EXPORT_SYMBOL(memstick_add_host);
  */
 void memstick_remove_host(struct memstick_host *host)
 {
+	host->removing = 1;
 	flush_workqueue(workqueue);
 	mutex_lock(&host->lock);
 	if (host->card)
diff --git a/include/linux/memstick.h b/include/linux/memstick.h
index da4c65f9435f..ebf73d4ee969 100644
--- a/include/linux/memstick.h
+++ b/include/linux/memstick.h
@@ -281,6 +281,7 @@ struct memstick_host {
 
 	struct memstick_dev *card;
 	unsigned int        retries;
+	bool removing;
 
 	/* Notify the host that some requests are pending. */
 	void                (*request)(struct memstick_host *host);

From afd7f30886b0b445a4240a99020458a9772f2b89 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 27 Sep 2020 12:48:21 +0200
Subject: [PATCH 206/322] mmc: sdhci: Workaround broken command queuing on
 Intel GLK based IRBIS models

Commit bedf9fc01ff1 ("mmc: sdhci: Workaround broken command queuing on
Intel GLK"), disabled command-queuing on Intel GLK based LENOVO models
because of it being broken due to what is believed to be a bug in
the BIOS.

It seems that the BIOS of some IRBIS models, including the IRBIS NB111
model has the same issue, so disable command queuing there too.

Fixes: bedf9fc01ff1 ("mmc: sdhci: Workaround broken command queuing on Intel GLK")
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209397
Reported-and-tested-by: RussianNeuroMancer <russianneuromancer@ya.ru>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/20200927104821.5676-1-hdegoede@redhat.com
Cc: stable@vger.kernel.org
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pci-core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index af413805bbf1..914f5184295f 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -794,7 +794,8 @@ static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
 static bool glk_broken_cqhci(struct sdhci_pci_slot *slot)
 {
 	return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_EMMC &&
-	       dmi_match(DMI_BIOS_VENDOR, "LENOVO");
+	       (dmi_match(DMI_BIOS_VENDOR, "LENOVO") ||
+		dmi_match(DMI_SYS_VENDOR, "IRBIS"));
 }
 
 static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot)

From d25e8fdebdad84219b498873300b7f11dd915b88 Mon Sep 17 00:00:00 2001
From: Ed Wildgoose <lists@wildgooses.com>
Date: Mon, 28 Sep 2020 10:44:52 +0100
Subject: [PATCH 207/322] gpio: amd-fch: correct logic of GPIO_LINE_DIRECTION

The original commit appears to have the logic reversed in
amd_fch_gpio_get_direction. Also confirmed by observing the value of
"direction" in the sys tree.

Signed-off-by: Ed Wildgoose <lists@wildgooses.com>
Fixes: e09d168f13f0 ("gpio: AMD G-Series PCH gpio driver")
Cc: stable@vger.kernel.org
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 drivers/gpio/gpio-amd-fch.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-amd-fch.c b/drivers/gpio/gpio-amd-fch.c
index 4e44ba4d7423..2a21354ed6a0 100644
--- a/drivers/gpio/gpio-amd-fch.c
+++ b/drivers/gpio/gpio-amd-fch.c
@@ -92,7 +92,7 @@ static int amd_fch_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio)
 	ret = (readl_relaxed(ptr) & AMD_FCH_GPIO_FLAG_DIRECTION);
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	return ret ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT;
+	return ret ? GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN;
 }
 
 static void amd_fch_gpio_set(struct gpio_chip *gc,

From 6a7548305a04ae0cd9e9f062c050f265d4bf2d2e Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime@cerno.tech>
Date: Thu, 24 Sep 2020 10:26:42 +0200
Subject: [PATCH 208/322] ARM: dts: bcm2835: Change firmware compatible from
 simple-bus to simple-mfd

The current binding for the RPi firmware uses the simple-bus compatible as
a fallback to benefit from its automatic probing of child nodes.

However, simple-bus also comes with some constraints, like having the ranges,
our case.

Let's switch to simple-mfd that provides the same probing logic without
those constraints.

Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://lore.kernel.org/r/20200924082642.18144-1-maxime@cerno.tech
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml        | 4 ++--
 arch/arm/boot/dts/bcm2835-rpi.dtsi                            | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml
index 17e4f20c8d39..6834f5e8df5f 100644
--- a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml
@@ -23,7 +23,7 @@ properties:
   compatible:
     items:
       - const: raspberrypi,bcm2835-firmware
-      - const: simple-bus
+      - const: simple-mfd
 
   mboxes:
     $ref: '/schemas/types.yaml#/definitions/phandle'
@@ -57,7 +57,7 @@ required:
 examples:
   - |
     firmware {
-        compatible = "raspberrypi,bcm2835-firmware", "simple-bus";
+        compatible = "raspberrypi,bcm2835-firmware", "simple-mfd";
         mboxes = <&mailbox>;
 
         firmware_clocks: clocks {
diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index f7ae5a4530b8..d94357b21f7e 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -13,7 +13,7 @@
 
 	soc {
 		firmware: firmware {
-			compatible = "raspberrypi,bcm2835-firmware", "simple-bus";
+			compatible = "raspberrypi,bcm2835-firmware", "simple-mfd";
 			#address-cells = <1>;
 			#size-cells = <1>;
 

From 8706e04ed7d6c95004d42b22a4db97d5b2eb73b2 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 28 Sep 2020 08:38:54 -0600
Subject: [PATCH 209/322] io_uring: always delete double poll wait entry on
 match

syzbot reports a crash with tty polling, which is using the double poll
handling:

general protection fault, probably for non-canonical address 0xdffffc0000000009: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000048-0x000000000000004f]
CPU: 0 PID: 6874 Comm: syz-executor749 Not tainted 5.9.0-rc6-next-20200924-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:io_poll_get_single fs/io_uring.c:4778 [inline]
RIP: 0010:io_poll_double_wake+0x51/0x510 fs/io_uring.c:4845
Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 9e 03 00 00 48 b8 00 00 00 00 00 fc ff df 49 8b 5d 08 48 8d 7b 48 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 06 0f 8e 63 03 00 00 0f b6 6b 48 bf 06 00 00
RSP: 0018:ffffc90001c1fb70 EFLAGS: 00010006
RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000004
RDX: 0000000000000009 RSI: ffffffff81d9b3ad RDI: 0000000000000048
RBP: dffffc0000000000 R08: ffff8880a3cac798 R09: ffffc90001c1fc60
R10: fffff52000383f73 R11: 0000000000000000 R12: 0000000000000004
R13: ffff8880a3cac798 R14: ffff8880a3cac7a0 R15: 0000000000000004
FS:  0000000001f98880(0000) GS:ffff8880ae400000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f18886916c0 CR3: 0000000094c5a000 CR4: 00000000001506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 __wake_up_common+0x147/0x650 kernel/sched/wait.c:93
 __wake_up_common_lock+0xd0/0x130 kernel/sched/wait.c:123
 tty_ldisc_hangup+0x1cf/0x680 drivers/tty/tty_ldisc.c:735
 __tty_hangup.part.0+0x403/0x870 drivers/tty/tty_io.c:625
 __tty_hangup drivers/tty/tty_io.c:575 [inline]
 tty_vhangup+0x1d/0x30 drivers/tty/tty_io.c:698
 pty_close+0x3f5/0x550 drivers/tty/pty.c:79
 tty_release+0x455/0xf60 drivers/tty/tty_io.c:1679
 __fput+0x285/0x920 fs/file_table.c:281
 task_work_run+0xdd/0x190 kernel/task_work.c:141
 tracehook_notify_resume include/linux/tracehook.h:188 [inline]
 exit_to_user_mode_loop kernel/entry/common.c:165 [inline]
 exit_to_user_mode_prepare+0x1e2/0x1f0 kernel/entry/common.c:192
 syscall_exit_to_user_mode+0x7a/0x2c0 kernel/entry/common.c:267
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x401210

which is due to a failure in removing the double poll wait entry if we
hit a wakeup match. This can cause multiple invocations of the wakeup,
which isn't safe.

Cc: stable@vger.kernel.org # v5.8
Reported-by: syzbot+81b3883093f772addf6d@syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 11b8e428300d..5dcc77b42f0d 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4743,6 +4743,8 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
 	if (mask && !(mask & poll->events))
 		return 0;
 
+	list_del_init(&wait->entry);
+
 	if (poll && poll->head) {
 		bool done;
 

From fad8e0de4426a776c9bcb060555e7c09e2d08db6 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 28 Sep 2020 08:57:48 -0600
Subject: [PATCH 210/322] io_uring: fix potential ABBA deadlock in
 ->show_fdinfo()

syzbot reports a potential lock deadlock between the normal IO path and
->show_fdinfo():

======================================================
WARNING: possible circular locking dependency detected
5.9.0-rc6-syzkaller #0 Not tainted
------------------------------------------------------
syz-executor.2/19710 is trying to acquire lock:
ffff888098ddc450 (sb_writers#4){.+.+}-{0:0}, at: io_write+0x6b5/0xb30 fs/io_uring.c:3296

but task is already holding lock:
ffff8880a11b8428 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #2 (&ctx->uring_lock){+.+.}-{3:3}:
       __mutex_lock_common kernel/locking/mutex.c:956 [inline]
       __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
       __io_uring_show_fdinfo fs/io_uring.c:8417 [inline]
       io_uring_show_fdinfo+0x194/0xc70 fs/io_uring.c:8460
       seq_show+0x4a8/0x700 fs/proc/fd.c:65
       seq_read+0x432/0x1070 fs/seq_file.c:208
       do_loop_readv_writev fs/read_write.c:734 [inline]
       do_loop_readv_writev fs/read_write.c:721 [inline]
       do_iter_read+0x48e/0x6e0 fs/read_write.c:955
       vfs_readv+0xe5/0x150 fs/read_write.c:1073
       kernel_readv fs/splice.c:355 [inline]
       default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412
       do_splice_to+0x137/0x170 fs/splice.c:871
       splice_direct_to_actor+0x307/0x980 fs/splice.c:950
       do_splice_direct+0x1b3/0x280 fs/splice.c:1059
       do_sendfile+0x55f/0xd40 fs/read_write.c:1540
       __do_sys_sendfile64 fs/read_write.c:1601 [inline]
       __se_sys_sendfile64 fs/read_write.c:1587 [inline]
       __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587
       do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
       entry_SYSCALL_64_after_hwframe+0x44/0xa9

-> #1 (&p->lock){+.+.}-{3:3}:
       __mutex_lock_common kernel/locking/mutex.c:956 [inline]
       __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
       seq_read+0x61/0x1070 fs/seq_file.c:155
       pde_read fs/proc/inode.c:306 [inline]
       proc_reg_read+0x221/0x300 fs/proc/inode.c:318
       do_loop_readv_writev fs/read_write.c:734 [inline]
       do_loop_readv_writev fs/read_write.c:721 [inline]
       do_iter_read+0x48e/0x6e0 fs/read_write.c:955
       vfs_readv+0xe5/0x150 fs/read_write.c:1073
       kernel_readv fs/splice.c:355 [inline]
       default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412
       do_splice_to+0x137/0x170 fs/splice.c:871
       splice_direct_to_actor+0x307/0x980 fs/splice.c:950
       do_splice_direct+0x1b3/0x280 fs/splice.c:1059
       do_sendfile+0x55f/0xd40 fs/read_write.c:1540
       __do_sys_sendfile64 fs/read_write.c:1601 [inline]
       __se_sys_sendfile64 fs/read_write.c:1587 [inline]
       __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587
       do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
       entry_SYSCALL_64_after_hwframe+0x44/0xa9

-> #0 (sb_writers#4){.+.+}-{0:0}:
       check_prev_add kernel/locking/lockdep.c:2496 [inline]
       check_prevs_add kernel/locking/lockdep.c:2601 [inline]
       validate_chain kernel/locking/lockdep.c:3218 [inline]
       __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441
       lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029
       percpu_down_read include/linux/percpu-rwsem.h:51 [inline]
       __sb_start_write+0x228/0x450 fs/super.c:1672
       io_write+0x6b5/0xb30 fs/io_uring.c:3296
       io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719
       __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175
       io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254
       io_submit_sqe fs/io_uring.c:6324 [inline]
       io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521
       __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349
       do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
       entry_SYSCALL_64_after_hwframe+0x44/0xa9

other info that might help us debug this:

Chain exists of:
  sb_writers#4 --> &p->lock --> &ctx->uring_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&ctx->uring_lock);
                               lock(&p->lock);
                               lock(&ctx->uring_lock);
  lock(sb_writers#4);

 *** DEADLOCK ***

1 lock held by syz-executor.2/19710:
 #0: ffff8880a11b8428 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348

stack backtrace:
CPU: 0 PID: 19710 Comm: syz-executor.2 Not tainted 5.9.0-rc6-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x198/0x1fd lib/dump_stack.c:118
 check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827
 check_prev_add kernel/locking/lockdep.c:2496 [inline]
 check_prevs_add kernel/locking/lockdep.c:2601 [inline]
 validate_chain kernel/locking/lockdep.c:3218 [inline]
 __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441
 lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029
 percpu_down_read include/linux/percpu-rwsem.h:51 [inline]
 __sb_start_write+0x228/0x450 fs/super.c:1672
 io_write+0x6b5/0xb30 fs/io_uring.c:3296
 io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719
 __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175
 io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254
 io_submit_sqe fs/io_uring.c:6324 [inline]
 io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521
 __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349
 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x45e179
Code: 3d b2 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 0b b2 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f1194e74c78 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa
RAX: ffffffffffffffda RBX: 00000000000082c0 RCX: 000000000045e179
RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000004
RBP: 000000000118cf98 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cf4c
R13: 00007ffd1aa5756f R14: 00007f1194e759c0 R15: 000000000118cf4c

Fix this by just not diving into details if we fail to trylock the
io_uring mutex. We know the ctx isn't going away during this operation,
but we cannot safely iterate buffers/files/personalities if we don't
hold the io_uring mutex.

Reported-by: syzbot+2f8fa4e860edc3066aba@syzkaller.appspotmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 5dcc77b42f0d..556e4a2ead07 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -8414,11 +8414,19 @@ static int io_uring_show_cred(int id, void *p, void *data)
 
 static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 {
+	bool has_lock;
 	int i;
 
-	mutex_lock(&ctx->uring_lock);
+	/*
+	 * Avoid ABBA deadlock between the seq lock and the io_uring mutex,
+	 * since fdinfo case grabs it in the opposite direction of normal use
+	 * cases. If we fail to get the lock, we just don't iterate any
+	 * structures that could be going away outside the io_uring mutex.
+	 */
+	has_lock = mutex_trylock(&ctx->uring_lock);
+
 	seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files);
-	for (i = 0; i < ctx->nr_user_files; i++) {
+	for (i = 0; has_lock && i < ctx->nr_user_files; i++) {
 		struct fixed_file_table *table;
 		struct file *f;
 
@@ -8430,13 +8438,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 			seq_printf(m, "%5u: <none>\n", i);
 	}
 	seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs);
-	for (i = 0; i < ctx->nr_user_bufs; i++) {
+	for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) {
 		struct io_mapped_ubuf *buf = &ctx->user_bufs[i];
 
 		seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
 						(unsigned int) buf->len);
 	}
-	if (!idr_is_empty(&ctx->personality_idr)) {
+	if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
 		seq_printf(m, "Personalities:\n");
 		idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
 	}
@@ -8451,7 +8459,8 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
 					req->task->task_works != NULL);
 	}
 	spin_unlock_irq(&ctx->completion_lock);
-	mutex_unlock(&ctx->uring_lock);
+	if (has_lock)
+		mutex_unlock(&ctx->uring_lock);
 }
 
 static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)

From a4d63c3732f1a0c91abcf5b7f32b4ef7dcd82025 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 28 Sep 2020 12:35:07 +0200
Subject: [PATCH 211/322] mm: do not rely on mm == current->mm in
 __get_user_pages_locked

It seems likely this block was pasted from internal_get_user_pages_fast,
which is not passed an mm struct and therefore uses current's.  But
__get_user_pages_locked is passed an explicit mm, and current->mm is not
always valid. This was hit when being called from i915, which uses:

  pin_user_pages_remote->
    __get_user_pages_remote->
      __gup_longterm_locked->
        __get_user_pages_locked

Before, this would lead to an OOPS:

  BUG: kernel NULL pointer dereference, address: 0000000000000064
  #PF: supervisor write access in kernel mode
  #PF: error_code(0x0002) - not-present page
  CPU: 10 PID: 1431 Comm: kworker/u33:1 Tainted: P S   U     O      5.9.0-rc7+ #140
  Hardware name: LENOVO 20QTCTO1WW/20QTCTO1WW, BIOS N2OET47W (1.34 ) 08/06/2020
  Workqueue: i915-userptr-acquire __i915_gem_userptr_get_pages_worker [i915]
  RIP: 0010:__get_user_pages_remote+0xd7/0x310
  Call Trace:
   __i915_gem_userptr_get_pages_worker+0xc8/0x260 [i915]
   process_one_work+0x1ca/0x390
   worker_thread+0x48/0x3c0
   kthread+0x114/0x130
   ret_from_fork+0x1f/0x30
  CR2: 0000000000000064

This commit fixes the problem by using the mm pointer passed to the
function rather than the bogus one in current.

Fixes: 008cfe4418b3 ("mm: Introduce mm_struct.has_pinned")
Tested-by: Chris Wilson <chris@chris-wilson.co.uk>
Reported-by: Harald Arnesen <harald@skogtun.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/gup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/gup.c b/mm/gup.c
index dfe781d2ad4c..e869c634cc9a 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1256,7 +1256,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
 	}
 
 	if (flags & FOLL_PIN)
-		atomic_set(&current->mm->has_pinned, 1);
+		atomic_set(&mm->has_pinned, 1);
 
 	/*
 	 * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior

From f2f3729fb65c5c2e6db234e6316b71a7bdc4b30b Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@nvidia.com>
Date: Mon, 28 Sep 2020 18:30:02 +0300
Subject: [PATCH 212/322] net: bridge: fdb: don't flush ext_learn entries

When a user-space software manages fdb entries externally it should
set the ext_learn flag which marks the fdb entry as externally managed
and avoids expiring it (they're treated as static fdbs). Unfortunately
on events where fdb entries are flushed (STP down, netlink fdb flush
etc) these fdbs are also deleted automatically by the bridge. That in turn
causes trouble for the managing user-space software (e.g. in MLAG setups
we lose remote fdb entries on port flaps).
These entries are completely externally managed so we should avoid
automatically deleting them, the only exception are offloaded entries
(i.e. BR_FDB_ADDED_BY_EXT_LEARN + BR_FDB_OFFLOADED). They are flushed as
before.

Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9db504baa094..32ac8343b0ba 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -413,6 +413,8 @@ void br_fdb_delete_by_port(struct net_bridge *br,
 
 		if (!do_all)
 			if (test_bit(BR_FDB_STATIC, &f->flags) ||
+			    (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) &&
+			     !test_bit(BR_FDB_OFFLOADED, &f->flags)) ||
 			    (vid && f->key.vlan_id != vid))
 				continue;
 

From 1aad8049909a6d3379175ef2824a68ac35c0b564 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 28 Sep 2020 11:31:03 -0700
Subject: [PATCH 213/322] net_sched: remove a redundant goto chain check

All TC actions call tcf_action_check_ctrlact() to validate
goto chain, so this check in tcf_action_init_1() is actually
redundant. Remove it to save troubles of leaking memory.

Fixes: e49d8c22f126 ("net_sched: defer tcf_idr_insert() in tcf_action_init_1()")
Reported-by: Vlad Buslov <vladbu@mellanox.com>
Suggested-by: Davide Caratti <dcaratti@redhat.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Reviewed-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 104b47f5184f..5612b336e18e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -722,13 +722,6 @@ int tcf_action_destroy(struct tc_action *actions[], int bind)
 	return ret;
 }
 
-static int tcf_action_destroy_1(struct tc_action *a, int bind)
-{
-	struct tc_action *actions[] = { a, NULL };
-
-	return tcf_action_destroy(actions, bind);
-}
-
 static int tcf_action_put(struct tc_action *p)
 {
 	return __tcf_action_put(p, false);
@@ -1000,13 +993,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (err < 0)
 		goto err_mod;
 
-	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
-	    !rcu_access_pointer(a->goto_chain)) {
-		tcf_action_destroy_1(a, bind);
-		NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
-		return ERR_PTR(-EINVAL);
-	}
-
 	if (!name && tb[TCA_ACT_COOKIE])
 		tcf_set_action_cookie(&a->act_cookie, cookie);
 

From fe8300fd8d655ebc6b6297565665959d6d7bbe02 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 25 Sep 2020 18:13:02 +0000
Subject: [PATCH 214/322] net: core: add __netdev_upper_dev_unlink()

The netdev_upper_dev_unlink() has to work differently according to flags.
This idea is the same with __netdev_upper_dev_link().

In the following patches, new flags will be added.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 266073e300b5..b7b3d6e15cda 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7762,16 +7762,8 @@ int netdev_master_upper_dev_link(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link);
 
-/**
- * netdev_upper_dev_unlink - Removes a link to upper device
- * @dev: device
- * @upper_dev: new upper device
- *
- * Removes a link to device which is upper to this one. The caller must hold
- * the RTNL lock.
- */
-void netdev_upper_dev_unlink(struct net_device *dev,
-			     struct net_device *upper_dev)
+static void __netdev_upper_dev_unlink(struct net_device *dev,
+				      struct net_device *upper_dev)
 {
 	struct netdev_notifier_changeupper_info changeupper_info = {
 		.info = {
@@ -7800,6 +7792,20 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 	__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
 				    NULL);
 }
+
+/**
+ * netdev_upper_dev_unlink - Removes a link to upper device
+ * @dev: device
+ * @upper_dev: new upper device
+ *
+ * Removes a link to device which is upper to this one. The caller must hold
+ * the RTNL lock.
+ */
+void netdev_upper_dev_unlink(struct net_device *dev,
+			     struct net_device *upper_dev)
+{
+	__netdev_upper_dev_unlink(dev, upper_dev);
+}
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
 static void __netdev_adjacent_dev_set(struct net_device *upper_dev,

From eff7423365a6938d2d34dbce989febed2ae1f957 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 25 Sep 2020 18:13:12 +0000
Subject: [PATCH 215/322] net: core: introduce struct netdev_nested_priv for
 nested interface infrastructure

Functions related to nested interface infrastructure such as
netdev_walk_all_{ upper | lower }_dev() pass both private functions
and "data" pointer to handle their own things.
At this point, the data pointer type is void *.
In order to make it easier to expand common variables and functions,
this new netdev_nested_priv structure is added.

In the following patch, a new member variable will be added into this
struct to fix the lockdep issue.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/cache.c               | 10 +++-
 drivers/infiniband/core/cma.c                 |  9 ++-
 drivers/infiniband/core/roce_gid_mgmt.c       |  9 ++-
 drivers/infiniband/ulp/ipoib/ipoib_main.c     |  9 ++-
 drivers/net/bonding/bond_alb.c                |  9 ++-
 drivers/net/bonding/bond_main.c               | 10 +++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 37 ++++++++----
 .../net/ethernet/mellanox/mlxsw/spectrum.c    | 24 ++++----
 .../ethernet/mellanox/mlxsw/spectrum_router.c | 11 +++-
 .../mellanox/mlxsw/spectrum_switchdev.c       | 10 +++-
 drivers/net/ethernet/rocker/rocker_main.c     |  9 ++-
 drivers/net/wireless/quantenna/qtnfmac/core.c | 10 +++-
 include/linux/netdevice.h                     | 16 +++--
 net/bridge/br_arp_nd_proxy.c                  | 26 +++++---
 net/bridge/br_vlan.c                          | 20 ++++---
 net/core/dev.c                                | 59 ++++++++++++-------
 16 files changed, 183 insertions(+), 95 deletions(-)

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index ffad73bb40ff..5a76611e684a 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1320,9 +1320,10 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
 }
 EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
 
-static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
+static int get_lower_dev_vlan(struct net_device *lower_dev,
+			      struct netdev_nested_priv *priv)
 {
-	u16 *vlan_id = data;
+	u16 *vlan_id = (u16 *)priv->data;
 
 	if (is_vlan_dev(lower_dev))
 		*vlan_id = vlan_dev_vlan_id(lower_dev);
@@ -1348,6 +1349,9 @@ static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
 int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
 			    u16 *vlan_id, u8 *smac)
 {
+	struct netdev_nested_priv priv = {
+		.data = (void *)vlan_id,
+	};
 	struct net_device *ndev;
 
 	rcu_read_lock();
@@ -1368,7 +1372,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
 			 * the lower vlan device for this gid entry.
 			 */
 			netdev_walk_all_lower_dev_rcu(attr->ndev,
-					get_lower_dev_vlan, vlan_id);
+					get_lower_dev_vlan, &priv);
 		}
 	}
 	rcu_read_unlock();
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 7f0e91e92968..5888311b2119 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2865,9 +2865,10 @@ struct iboe_prio_tc_map {
 	bool found;
 };
 
-static int get_lower_vlan_dev_tc(struct net_device *dev, void *data)
+static int get_lower_vlan_dev_tc(struct net_device *dev,
+				 struct netdev_nested_priv *priv)
 {
-	struct iboe_prio_tc_map *map = data;
+	struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data;
 
 	if (is_vlan_dev(dev))
 		map->output_tc = get_vlan_ndev_tc(dev, map->input_prio);
@@ -2886,16 +2887,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
 {
 	struct iboe_prio_tc_map prio_tc_map = {};
 	int prio = rt_tos2priority(tos);
+	struct netdev_nested_priv priv;
 
 	/* If VLAN device, get it directly from the VLAN netdev */
 	if (is_vlan_dev(ndev))
 		return get_vlan_ndev_tc(ndev, prio);
 
 	prio_tc_map.input_prio = prio;
+	priv.data = (void *)&prio_tc_map;
 	rcu_read_lock();
 	netdev_walk_all_lower_dev_rcu(ndev,
 				      get_lower_vlan_dev_tc,
-				      &prio_tc_map);
+				      &priv);
 	rcu_read_unlock();
 	/* If map is found from lower device, use it; Otherwise
 	 * continue with the current netdevice to get priority to tc map.
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 2860def84f4d..6b8364bb032d 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -531,10 +531,11 @@ struct upper_list {
 	struct net_device *upper;
 };
 
-static int netdev_upper_walk(struct net_device *upper, void *data)
+static int netdev_upper_walk(struct net_device *upper,
+			     struct netdev_nested_priv *priv)
 {
 	struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
-	struct list_head *upper_list = data;
+	struct list_head *upper_list = (struct list_head *)priv->data;
 
 	if (!entry)
 		return 0;
@@ -553,12 +554,14 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
 						      struct net_device *ndev))
 {
 	struct net_device *ndev = cookie;
+	struct netdev_nested_priv priv;
 	struct upper_list *upper_iter;
 	struct upper_list *upper_temp;
 	LIST_HEAD(upper_list);
 
+	priv.data = &upper_list;
 	rcu_read_lock();
-	netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list);
+	netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv);
 	rcu_read_unlock();
 
 	handle_netdev(ib_dev, port, ndev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index ab75b7f745d4..f772fe8c5b66 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -342,9 +342,10 @@ struct ipoib_walk_data {
 	struct net_device *result;
 };
 
-static int ipoib_upper_walk(struct net_device *upper, void *_data)
+static int ipoib_upper_walk(struct net_device *upper,
+			    struct netdev_nested_priv *priv)
 {
-	struct ipoib_walk_data *data = _data;
+	struct ipoib_walk_data *data = (struct ipoib_walk_data *)priv->data;
 	int ret = 0;
 
 	if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) {
@@ -368,10 +369,12 @@ static int ipoib_upper_walk(struct net_device *upper, void *_data)
 static struct net_device *ipoib_get_net_dev_match_addr(
 		const struct sockaddr *addr, struct net_device *dev)
 {
+	struct netdev_nested_priv priv;
 	struct ipoib_walk_data data = {
 		.addr = addr,
 	};
 
+	priv.data = (void *)&data;
 	rcu_read_lock();
 	if (ipoib_is_dev_match_addr_rcu(addr, dev)) {
 		dev_hold(dev);
@@ -379,7 +382,7 @@ static struct net_device *ipoib_get_net_dev_match_addr(
 		goto out;
 	}
 
-	netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data);
+	netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &priv);
 out:
 	rcu_read_unlock();
 	return data.result;
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 4e1b7deb724b..c3091e00dd5f 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -942,9 +942,10 @@ struct alb_walk_data {
 	bool strict_match;
 };
 
-static int alb_upper_dev_walk(struct net_device *upper, void *_data)
+static int alb_upper_dev_walk(struct net_device *upper,
+			      struct netdev_nested_priv *priv)
 {
-	struct alb_walk_data *data = _data;
+	struct alb_walk_data *data = (struct alb_walk_data *)priv->data;
 	bool strict_match = data->strict_match;
 	struct bonding *bond = data->bond;
 	struct slave *slave = data->slave;
@@ -983,6 +984,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
 				      bool strict_match)
 {
 	struct bonding *bond = bond_get_bond_by_slave(slave);
+	struct netdev_nested_priv priv;
 	struct alb_walk_data data = {
 		.strict_match = strict_match,
 		.mac_addr = mac_addr,
@@ -990,6 +992,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
 		.bond = bond,
 	};
 
+	priv.data = (void *)&data;
 	/* send untagged */
 	alb_send_lp_vid(slave, mac_addr, 0, 0);
 
@@ -997,7 +1000,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
 	 * for that device.
 	 */
 	rcu_read_lock();
-	netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data);
+	netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &priv);
 	rcu_read_unlock();
 }
 
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 14740d3053b8..84ecbc6fa0ff 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2511,22 +2511,26 @@ re_arm:
 	}
 }
 
-static int bond_upper_dev_walk(struct net_device *upper, void *data)
+static int bond_upper_dev_walk(struct net_device *upper,
+			       struct netdev_nested_priv *priv)
 {
-	__be32 ip = *((__be32 *)data);
+	__be32 ip = *(__be32 *)priv->data;
 
 	return ip == bond_confirm_addr(upper, 0, ip);
 }
 
 static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
 {
+	struct netdev_nested_priv priv = {
+		.data = (void *)&ip,
+	};
 	bool ret = false;
 
 	if (ip == bond_confirm_addr(bond->dev, 0, ip))
 		return true;
 
 	rcu_read_lock();
-	if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &ip))
+	if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &priv))
 		ret = true;
 	rcu_read_unlock();
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2f8a4cfc5fa1..86ca8b9ea1b8 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5396,9 +5396,10 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter,
 	return err;
 }
 
-static int ixgbe_macvlan_up(struct net_device *vdev, void *data)
+static int ixgbe_macvlan_up(struct net_device *vdev,
+			    struct netdev_nested_priv *priv)
 {
-	struct ixgbe_adapter *adapter = data;
+	struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data;
 	struct ixgbe_fwd_adapter *accel;
 
 	if (!netif_is_macvlan(vdev))
@@ -5415,8 +5416,12 @@ static int ixgbe_macvlan_up(struct net_device *vdev, void *data)
 
 static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
 {
+	struct netdev_nested_priv priv = {
+		.data = (void *)adapter,
+	};
+
 	netdev_walk_all_upper_dev_rcu(adapter->netdev,
-				      ixgbe_macvlan_up, adapter);
+				      ixgbe_macvlan_up, &priv);
 }
 
 static void ixgbe_configure(struct ixgbe_adapter *adapter)
@@ -9023,9 +9028,10 @@ static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter)
 }
 
 #endif /* CONFIG_IXGBE_DCB */
-static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data)
+static int ixgbe_reassign_macvlan_pool(struct net_device *vdev,
+				       struct netdev_nested_priv *priv)
 {
-	struct ixgbe_adapter *adapter = data;
+	struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data;
 	struct ixgbe_fwd_adapter *accel;
 	int pool;
 
@@ -9062,13 +9068,16 @@ static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data)
 static void ixgbe_defrag_macvlan_pools(struct net_device *dev)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct netdev_nested_priv priv = {
+		.data = (void *)adapter,
+	};
 
 	/* flush any stale bits out of the fwd bitmask */
 	bitmap_clear(adapter->fwd_bitmask, 1, 63);
 
 	/* walk through upper devices reassigning pools */
 	netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool,
-				      adapter);
+				      &priv);
 }
 
 /**
@@ -9242,14 +9251,18 @@ struct upper_walk_data {
 	u8 queue;
 };
 
-static int get_macvlan_queue(struct net_device *upper, void *_data)
+static int get_macvlan_queue(struct net_device *upper,
+			     struct netdev_nested_priv *priv)
 {
 	if (netif_is_macvlan(upper)) {
 		struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper);
-		struct upper_walk_data *data = _data;
-		struct ixgbe_adapter *adapter = data->adapter;
-		int ifindex = data->ifindex;
+		struct ixgbe_adapter *adapter;
+		struct upper_walk_data *data;
+		int ifindex;
 
+		data = (struct upper_walk_data *)priv->data;
+		ifindex = data->ifindex;
+		adapter = data->adapter;
 		if (vadapter && upper->ifindex == ifindex) {
 			data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx;
 			data->action = data->queue;
@@ -9265,6 +9278,7 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex,
 {
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
 	unsigned int num_vfs = adapter->num_vfs, vf;
+	struct netdev_nested_priv priv;
 	struct upper_walk_data data;
 	struct net_device *upper;
 
@@ -9284,8 +9298,9 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex,
 	data.ifindex = ifindex;
 	data.action = 0;
 	data.queue = 0;
+	priv.data = (void *)&data;
 	if (netdev_walk_all_upper_dev_rcu(adapter->netdev,
-					  get_macvlan_queue, &data)) {
+					  get_macvlan_queue, &priv)) {
 		*action = data.action;
 		*queue = data.queue;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4186e29119c2..f3c0e241e1b4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3690,13 +3690,13 @@ bool mlxsw_sp_port_dev_check(const struct net_device *dev)
 	return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
 }
 
-static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data)
+static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev,
+				   struct netdev_nested_priv *priv)
 {
-	struct mlxsw_sp_port **p_mlxsw_sp_port = data;
 	int ret = 0;
 
 	if (mlxsw_sp_port_dev_check(lower_dev)) {
-		*p_mlxsw_sp_port = netdev_priv(lower_dev);
+		priv->data = (void *)netdev_priv(lower_dev);
 		ret = 1;
 	}
 
@@ -3705,15 +3705,16 @@ static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data)
 
 struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev)
 {
-	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct netdev_nested_priv priv = {
+		.data = NULL,
+	};
 
 	if (mlxsw_sp_port_dev_check(dev))
 		return netdev_priv(dev);
 
-	mlxsw_sp_port = NULL;
-	netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &mlxsw_sp_port);
+	netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &priv);
 
-	return mlxsw_sp_port;
+	return (struct mlxsw_sp_port *)priv.data;
 }
 
 struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
@@ -3726,16 +3727,17 @@ struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
 
 struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev)
 {
-	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct netdev_nested_priv priv = {
+		.data = NULL,
+	};
 
 	if (mlxsw_sp_port_dev_check(dev))
 		return netdev_priv(dev);
 
-	mlxsw_sp_port = NULL;
 	netdev_walk_all_lower_dev_rcu(dev, mlxsw_sp_lower_dev_walk,
-				      &mlxsw_sp_port);
+				      &priv);
 
-	return mlxsw_sp_port;
+	return (struct mlxsw_sp_port *)priv.data;
 }
 
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 24f1fd1f8d56..460cb523312f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -7351,9 +7351,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
 	return err;
 }
 
-static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
+static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
+					struct netdev_nested_priv *priv)
 {
-	struct mlxsw_sp_rif *rif = data;
+	struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
 
 	if (!netif_is_macvlan(dev))
 		return 0;
@@ -7364,12 +7365,16 @@ static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
 
 static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
 {
+	struct netdev_nested_priv priv = {
+		.data = (void *)rif,
+	};
+
 	if (!netif_is_macvlan_port(rif->dev))
 		return 0;
 
 	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
 	return netdev_walk_all_upper_dev_rcu(rif->dev,
-					     __mlxsw_sp_rif_macvlan_flush, rif);
+					     __mlxsw_sp_rif_macvlan_flush, &priv);
 }
 
 static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 72912afa6f72..6501ce94ace5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -136,9 +136,9 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev,
-						    void *data)
+						    struct netdev_nested_priv *priv)
 {
-	struct mlxsw_sp *mlxsw_sp = data;
+	struct mlxsw_sp *mlxsw_sp = priv->data;
 
 	mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev);
 	return 0;
@@ -147,10 +147,14 @@ static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev,
 static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp,
 						struct net_device *dev)
 {
+	struct netdev_nested_priv priv = {
+		.data = (void *)mlxsw_sp,
+	};
+
 	mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev);
 	netdev_walk_all_upper_dev_rcu(dev,
 				      mlxsw_sp_bridge_device_upper_rif_destroy,
-				      mlxsw_sp);
+				      &priv);
 }
 
 static int mlxsw_sp_bridge_device_vxlan_init(struct mlxsw_sp_bridge *bridge,
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 42458a46ffaf..9cc31f7e0df1 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -3099,9 +3099,10 @@ struct rocker_walk_data {
 	struct rocker_port *port;
 };
 
-static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data)
+static int rocker_lower_dev_walk(struct net_device *lower_dev,
+				 struct netdev_nested_priv *priv)
 {
-	struct rocker_walk_data *data = _data;
+	struct rocker_walk_data *data = (struct rocker_walk_data *)priv->data;
 	int ret = 0;
 
 	if (rocker_port_dev_check_under(lower_dev, data->rocker)) {
@@ -3115,6 +3116,7 @@ static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data)
 struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev,
 					       struct rocker *rocker)
 {
+	struct netdev_nested_priv priv;
 	struct rocker_walk_data data;
 
 	if (rocker_port_dev_check_under(dev, rocker))
@@ -3122,7 +3124,8 @@ struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev,
 
 	data.rocker = rocker;
 	data.port = NULL;
-	netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &data);
+	priv.data = (void *)&data;
+	netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &priv);
 
 	return data.port;
 }
diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c
index 6aafff9d4231..e013ebe3079c 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/core.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/core.c
@@ -671,9 +671,10 @@ bool qtnf_netdev_is_qtn(const struct net_device *ndev)
 	return ndev->netdev_ops == &qtnf_netdev_ops;
 }
 
-static int qtnf_check_br_ports(struct net_device *dev, void *data)
+static int qtnf_check_br_ports(struct net_device *dev,
+			       struct netdev_nested_priv *priv)
 {
-	struct net_device *ndev = data;
+	struct net_device *ndev = (struct net_device *)priv->data;
 
 	if (dev != ndev && netdev_port_same_parent_id(dev, ndev))
 		return -ENOTSUPP;
@@ -686,6 +687,9 @@ static int qtnf_core_netdevice_event(struct notifier_block *nb,
 {
 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 	const struct netdev_notifier_changeupper_info *info;
+	struct netdev_nested_priv priv = {
+		.data = (void *)ndev,
+	};
 	struct net_device *brdev;
 	struct qtnf_vif *vif;
 	struct qtnf_bus *bus;
@@ -725,7 +729,7 @@ static int qtnf_core_netdevice_event(struct notifier_block *nb,
 		} else {
 			ret = netdev_walk_all_lower_dev(brdev,
 							qtnf_check_br_ports,
-							ndev);
+							&priv);
 		}
 
 		break;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7bd4fcdd0738..313803d6c781 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4455,6 +4455,10 @@ extern int		dev_rx_weight;
 extern int		dev_tx_weight;
 extern int		gro_normal_batch;
 
+struct netdev_nested_priv {
+	void *data;
+};
+
 bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev);
 struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 						     struct list_head **iter);
@@ -4470,8 +4474,8 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 
 int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *upper_dev,
-					    void *data),
-				  void *data);
+					    struct netdev_nested_priv *priv),
+				  struct netdev_nested_priv *priv);
 
 bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
 				  struct net_device *upper_dev);
@@ -4508,12 +4512,12 @@ struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
 					     struct list_head **iter);
 int netdev_walk_all_lower_dev(struct net_device *dev,
 			      int (*fn)(struct net_device *lower_dev,
-					void *data),
-			      void *data);
+					struct netdev_nested_priv *priv),
+			      struct netdev_nested_priv *priv);
 int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *lower_dev,
-					    void *data),
-				  void *data);
+					    struct netdev_nested_priv *priv),
+				  struct netdev_nested_priv *priv);
 
 void *netdev_adjacent_get_private(struct list_head *adj_list);
 void *netdev_lower_get_first_private_rcu(struct net_device *dev);
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index b18cdf03edb3..dfec65eca8a6 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -88,9 +88,10 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
 	}
 }
 
-static int br_chk_addr_ip(struct net_device *dev, void *data)
+static int br_chk_addr_ip(struct net_device *dev,
+			  struct netdev_nested_priv *priv)
 {
-	__be32 ip = *(__be32 *)data;
+	__be32 ip = *(__be32 *)priv->data;
 	struct in_device *in_dev;
 	__be32 addr = 0;
 
@@ -107,11 +108,15 @@ static int br_chk_addr_ip(struct net_device *dev, void *data)
 
 static bool br_is_local_ip(struct net_device *dev, __be32 ip)
 {
-	if (br_chk_addr_ip(dev, &ip))
+	struct netdev_nested_priv priv = {
+		.data = (void *)&ip,
+	};
+
+	if (br_chk_addr_ip(dev, &priv))
 		return true;
 
 	/* check if ip is configured on upper dev */
-	if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip))
+	if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &priv))
 		return true;
 
 	return false;
@@ -361,9 +366,10 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
 	}
 }
 
-static int br_chk_addr_ip6(struct net_device *dev, void *data)
+static int br_chk_addr_ip6(struct net_device *dev,
+			   struct netdev_nested_priv *priv)
 {
-	struct in6_addr *addr = (struct in6_addr *)data;
+	struct in6_addr *addr = (struct in6_addr *)priv->data;
 
 	if (ipv6_chk_addr(dev_net(dev), addr, dev, 0))
 		return 1;
@@ -374,11 +380,15 @@ static int br_chk_addr_ip6(struct net_device *dev, void *data)
 static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr)
 
 {
-	if (br_chk_addr_ip6(dev, addr))
+	struct netdev_nested_priv priv = {
+		.data = (void *)addr,
+	};
+
+	if (br_chk_addr_ip6(dev, &priv))
 		return true;
 
 	/* check if ip is configured on upper dev */
-	if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr))
+	if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, &priv))
 		return true;
 
 	return false;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 61c94cefa843..ee8780080be5 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1360,7 +1360,7 @@ static int br_vlan_is_bind_vlan_dev(const struct net_device *dev)
 }
 
 static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev,
-				       __always_unused void *data)
+			       __always_unused struct netdev_nested_priv *priv)
 {
 	return br_vlan_is_bind_vlan_dev(dev);
 }
@@ -1383,9 +1383,9 @@ struct br_vlan_bind_walk_data {
 };
 
 static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev,
-					  void *data_in)
+					  struct netdev_nested_priv *priv)
 {
-	struct br_vlan_bind_walk_data *data = data_in;
+	struct br_vlan_bind_walk_data *data = priv->data;
 	int found = 0;
 
 	if (br_vlan_is_bind_vlan_dev(dev) &&
@@ -1403,10 +1403,13 @@ br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid)
 	struct br_vlan_bind_walk_data data = {
 		.vid = vid,
 	};
+	struct netdev_nested_priv priv = {
+		.data = (void *)&data,
+	};
 
 	rcu_read_lock();
 	netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn,
-				      &data);
+				      &priv);
 	rcu_read_unlock();
 
 	return data.result;
@@ -1487,9 +1490,9 @@ struct br_vlan_link_state_walk_data {
 };
 
 static int br_vlan_link_state_change_fn(struct net_device *vlan_dev,
-					void *data_in)
+					struct netdev_nested_priv *priv)
 {
-	struct br_vlan_link_state_walk_data *data = data_in;
+	struct br_vlan_link_state_walk_data *data = priv->data;
 
 	if (br_vlan_is_bind_vlan_dev(vlan_dev))
 		br_vlan_set_vlan_dev_state(data->br, vlan_dev);
@@ -1503,10 +1506,13 @@ static void br_vlan_link_state_change(struct net_device *dev,
 	struct br_vlan_link_state_walk_data data = {
 		.br = br
 	};
+	struct netdev_nested_priv priv = {
+		.data = (void *)&data,
+	};
 
 	rcu_read_lock();
 	netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn,
-				      &data);
+				      &priv);
 	rcu_read_unlock();
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index b7b3d6e15cda..a4a1fa806c5c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6812,9 +6812,10 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
 	return NULL;
 }
 
-static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
+static int ____netdev_has_upper_dev(struct net_device *upper_dev,
+				    struct netdev_nested_priv *priv)
 {
-	struct net_device *dev = data;
+	struct net_device *dev = (struct net_device *)priv->data;
 
 	return upper_dev == dev;
 }
@@ -6831,10 +6832,14 @@ static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data)
 bool netdev_has_upper_dev(struct net_device *dev,
 			  struct net_device *upper_dev)
 {
+	struct netdev_nested_priv priv = {
+		.data = (void *)upper_dev,
+	};
+
 	ASSERT_RTNL();
 
 	return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
-					     upper_dev);
+					     &priv);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev);
 
@@ -6851,8 +6856,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev);
 bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
 				  struct net_device *upper_dev)
 {
+	struct netdev_nested_priv priv = {
+		.data = (void *)upper_dev,
+	};
+
 	return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
-					       upper_dev);
+					       &priv);
 }
 EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
 
@@ -6997,8 +7006,8 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
 
 static int __netdev_walk_all_upper_dev(struct net_device *dev,
 				       int (*fn)(struct net_device *dev,
-						 void *data),
-				       void *data)
+					 struct netdev_nested_priv *priv),
+				       struct netdev_nested_priv *priv)
 {
 	struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7010,7 +7019,7 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev,
 
 	while (1) {
 		if (now != dev) {
-			ret = fn(now, data);
+			ret = fn(now, priv);
 			if (ret)
 				return ret;
 		}
@@ -7046,8 +7055,8 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev,
 
 int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *dev,
-					    void *data),
-				  void *data)
+					    struct netdev_nested_priv *priv),
+				  struct netdev_nested_priv *priv)
 {
 	struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7058,7 +7067,7 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
 
 	while (1) {
 		if (now != dev) {
-			ret = fn(now, data);
+			ret = fn(now, priv);
 			if (ret)
 				return ret;
 		}
@@ -7094,10 +7103,14 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
 static bool __netdev_has_upper_dev(struct net_device *dev,
 				   struct net_device *upper_dev)
 {
+	struct netdev_nested_priv priv = {
+		.data = (void *)upper_dev,
+	};
+
 	ASSERT_RTNL();
 
 	return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
-					   upper_dev);
+					   &priv);
 }
 
 /**
@@ -7215,8 +7228,8 @@ static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
 
 int netdev_walk_all_lower_dev(struct net_device *dev,
 			      int (*fn)(struct net_device *dev,
-					void *data),
-			      void *data)
+					struct netdev_nested_priv *priv),
+			      struct netdev_nested_priv *priv)
 {
 	struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7227,7 +7240,7 @@ int netdev_walk_all_lower_dev(struct net_device *dev,
 
 	while (1) {
 		if (now != dev) {
-			ret = fn(now, data);
+			ret = fn(now, priv);
 			if (ret)
 				return ret;
 		}
@@ -7262,8 +7275,8 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
 
 static int __netdev_walk_all_lower_dev(struct net_device *dev,
 				       int (*fn)(struct net_device *dev,
-						 void *data),
-				       void *data)
+					 struct netdev_nested_priv *priv),
+				       struct netdev_nested_priv *priv)
 {
 	struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7275,7 +7288,7 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev,
 
 	while (1) {
 		if (now != dev) {
-			ret = fn(now, data);
+			ret = fn(now, priv);
 			if (ret)
 				return ret;
 		}
@@ -7364,13 +7377,15 @@ static u8 __netdev_lower_depth(struct net_device *dev)
 	return max_depth;
 }
 
-static int __netdev_update_upper_level(struct net_device *dev, void *data)
+static int __netdev_update_upper_level(struct net_device *dev,
+				       struct netdev_nested_priv *__unused)
 {
 	dev->upper_level = __netdev_upper_depth(dev) + 1;
 	return 0;
 }
 
-static int __netdev_update_lower_level(struct net_device *dev, void *data)
+static int __netdev_update_lower_level(struct net_device *dev,
+				       struct netdev_nested_priv *__unused)
 {
 	dev->lower_level = __netdev_lower_depth(dev) + 1;
 	return 0;
@@ -7378,8 +7393,8 @@ static int __netdev_update_lower_level(struct net_device *dev, void *data)
 
 int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
 				  int (*fn)(struct net_device *dev,
-					    void *data),
-				  void *data)
+					    struct netdev_nested_priv *priv),
+				  struct netdev_nested_priv *priv)
 {
 	struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
 	struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
@@ -7390,7 +7405,7 @@ int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
 
 	while (1) {
 		if (now != dev) {
-			ret = fn(now, data);
+			ret = fn(now, priv);
 			if (ret)
 				return ret;
 		}

From 1fc70edb7d7b5ce1ae32b0cf90183f4879ad421a Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 25 Sep 2020 18:13:29 +0000
Subject: [PATCH 216/322] net: core: add nested_level variable in net_device

This patch is to add a new variable 'nested_level' into the net_device
structure.
This variable will be used as a parameter of spin_lock_nested() of
dev->addr_list_lock.

netif_addr_lock() can be called recursively so spin_lock_nested() is
used instead of spin_lock() and dev->lower_level is used as a parameter
of spin_lock_nested().
But, dev->lower_level value can be updated while it is being used.
So, lockdep would warn a possible deadlock scenario.

When a stacked interface is deleted, netif_{uc | mc}_sync() is
called recursively.
So, spin_lock_nested() is called recursively too.
At this moment, the dev->lower_level variable is used as a parameter of it.
dev->lower_level value is updated when interfaces are being unlinked/linked
immediately.
Thus, After unlinking, dev->lower_level shouldn't be a parameter of
spin_lock_nested().

    A (macvlan)
    |
    B (vlan)
    |
    C (bridge)
    |
    D (macvlan)
    |
    E (vlan)
    |
    F (bridge)

    A->lower_level : 6
    B->lower_level : 5
    C->lower_level : 4
    D->lower_level : 3
    E->lower_level : 2
    F->lower_level : 1

When an interface 'A' is removed, it releases resources.
At this moment, netif_addr_lock() would be called.
Then, netdev_upper_dev_unlink() is called recursively.
Then dev->lower_level is updated.
There is no problem.

But, when the bridge module is removed, 'C' and 'F' interfaces
are removed at once.
If 'F' is removed first, a lower_level value is like below.
    A->lower_level : 5
    B->lower_level : 4
    C->lower_level : 3
    D->lower_level : 2
    E->lower_level : 1
    F->lower_level : 1

Then, 'C' is removed. at this moment, netif_addr_lock() is called
recursively.
The ordering is like this.
C(3)->D(2)->E(1)->F(1)
At this moment, the lower_level value of 'E' and 'F' are the same.
So, lockdep warns a possible deadlock scenario.

In order to avoid this problem, a new variable 'nested_level' is added.
This value is the same as dev->lower_level - 1.
But this value is updated in rtnl_unlock().
So, this variable can be used as a parameter of spin_lock_nested() safely
in the rtnl context.

Test commands:
   ip link add br0 type bridge vlan_filtering 1
   ip link add vlan1 link br0 type vlan id 10
   ip link add macvlan2 link vlan1 type macvlan
   ip link add br3 type bridge vlan_filtering 1
   ip link set macvlan2 master br3
   ip link add vlan4 link br3 type vlan id 10
   ip link add macvlan5 link vlan4 type macvlan
   ip link add br6 type bridge vlan_filtering 1
   ip link set macvlan5 master br6
   ip link add vlan7 link br6 type vlan id 10
   ip link add macvlan8 link vlan7 type macvlan

   ip link set br0 up
   ip link set vlan1 up
   ip link set macvlan2 up
   ip link set br3 up
   ip link set vlan4 up
   ip link set macvlan5 up
   ip link set br6 up
   ip link set vlan7 up
   ip link set macvlan8 up
   modprobe -rv bridge

Splat looks like:
[   36.057436][  T744] WARNING: possible recursive locking detected
[   36.058848][  T744] 5.9.0-rc6+ #728 Not tainted
[   36.059959][  T744] --------------------------------------------
[   36.061391][  T744] ip/744 is trying to acquire lock:
[   36.062590][  T744] ffff8c4767509280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_set_rx_mode+0x19/0x30
[   36.064922][  T744]
[   36.064922][  T744] but task is already holding lock:
[   36.066626][  T744] ffff8c4767769280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_uc_add+0x1e/0x60
[   36.068851][  T744]
[   36.068851][  T744] other info that might help us debug this:
[   36.070731][  T744]  Possible unsafe locking scenario:
[   36.070731][  T744]
[   36.072497][  T744]        CPU0
[   36.073238][  T744]        ----
[   36.074007][  T744]   lock(&vlan_netdev_addr_lock_key);
[   36.075290][  T744]   lock(&vlan_netdev_addr_lock_key);
[   36.076590][  T744]
[   36.076590][  T744]  *** DEADLOCK ***
[   36.076590][  T744]
[   36.078515][  T744]  May be due to missing lock nesting notation
[   36.078515][  T744]
[   36.080491][  T744] 3 locks held by ip/744:
[   36.081471][  T744]  #0: ffffffff98571df0 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x236/0x490
[   36.083614][  T744]  #1: ffff8c4767769280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_uc_add+0x1e/0x60
[   36.085942][  T744]  #2: ffff8c476c8da280 (&bridge_netdev_addr_lock_key/4){+...}-{2:2}, at: dev_uc_sync+0x39/0x80
[   36.088400][  T744]
[   36.088400][  T744] stack backtrace:
[   36.089772][  T744] CPU: 6 PID: 744 Comm: ip Not tainted 5.9.0-rc6+ #728
[   36.091364][  T744] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
[   36.093630][  T744] Call Trace:
[   36.094416][  T744]  dump_stack+0x77/0x9b
[   36.095385][  T744]  __lock_acquire+0xbc3/0x1f40
[   36.096522][  T744]  lock_acquire+0xb4/0x3b0
[   36.097540][  T744]  ? dev_set_rx_mode+0x19/0x30
[   36.098657][  T744]  ? rtmsg_ifinfo+0x1f/0x30
[   36.099711][  T744]  ? __dev_notify_flags+0xa5/0xf0
[   36.100874][  T744]  ? rtnl_is_locked+0x11/0x20
[   36.101967][  T744]  ? __dev_set_promiscuity+0x7b/0x1a0
[   36.103230][  T744]  _raw_spin_lock_bh+0x38/0x70
[   36.104348][  T744]  ? dev_set_rx_mode+0x19/0x30
[   36.105461][  T744]  dev_set_rx_mode+0x19/0x30
[   36.106532][  T744]  dev_set_promiscuity+0x36/0x50
[   36.107692][  T744]  __dev_set_promiscuity+0x123/0x1a0
[   36.108929][  T744]  dev_set_promiscuity+0x1e/0x50
[   36.110093][  T744]  br_port_set_promisc+0x1f/0x40 [bridge]
[   36.111415][  T744]  br_manage_promisc+0x8b/0xe0 [bridge]
[   36.112728][  T744]  __dev_set_promiscuity+0x123/0x1a0
[   36.113967][  T744]  ? __hw_addr_sync_one+0x23/0x50
[   36.115135][  T744]  __dev_set_rx_mode+0x68/0x90
[   36.116249][  T744]  dev_uc_sync+0x70/0x80
[   36.117244][  T744]  dev_uc_add+0x50/0x60
[   36.118223][  T744]  macvlan_open+0x18e/0x1f0 [macvlan]
[   36.119470][  T744]  __dev_open+0xd6/0x170
[   36.120470][  T744]  __dev_change_flags+0x181/0x1d0
[   36.121644][  T744]  dev_change_flags+0x23/0x60
[   36.122741][  T744]  do_setlink+0x30a/0x11e0
[   36.123778][  T744]  ? __lock_acquire+0x92c/0x1f40
[   36.124929][  T744]  ? __nla_validate_parse.part.6+0x45/0x8e0
[   36.126309][  T744]  ? __lock_acquire+0x92c/0x1f40
[   36.127457][  T744]  __rtnl_newlink+0x546/0x8e0
[   36.128560][  T744]  ? lock_acquire+0xb4/0x3b0
[   36.129623][  T744]  ? deactivate_slab.isra.85+0x6a1/0x850
[   36.130946][  T744]  ? __lock_acquire+0x92c/0x1f40
[   36.132102][  T744]  ? lock_acquire+0xb4/0x3b0
[   36.133176][  T744]  ? is_bpf_text_address+0x5/0xe0
[   36.134364][  T744]  ? rtnl_newlink+0x2e/0x70
[   36.135445][  T744]  ? rcu_read_lock_sched_held+0x32/0x60
[   36.136771][  T744]  ? kmem_cache_alloc_trace+0x2d8/0x380
[   36.138070][  T744]  ? rtnl_newlink+0x2e/0x70
[   36.139164][  T744]  rtnl_newlink+0x47/0x70
[ ... ]

Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 52 ++++++++++++++++++++----
 net/core/dev.c            | 85 +++++++++++++++++++++++++++++++++------
 net/core/dev_addr_lists.c | 12 +++---
 3 files changed, 122 insertions(+), 27 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 313803d6c781..9fdb3ebef306 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1955,6 +1955,7 @@ struct net_device {
 	unsigned short		type;
 	unsigned short		hard_header_len;
 	unsigned char		min_header_len;
+	unsigned char		name_assign_type;
 
 	unsigned short		needed_headroom;
 	unsigned short		needed_tailroom;
@@ -1965,21 +1966,28 @@ struct net_device {
 	unsigned char		addr_len;
 	unsigned char		upper_level;
 	unsigned char		lower_level;
+
 	unsigned short		neigh_priv_len;
 	unsigned short          dev_id;
 	unsigned short          dev_port;
 	spinlock_t		addr_list_lock;
-	unsigned char		name_assign_type;
-	bool			uc_promisc;
+
 	struct netdev_hw_addr_list	uc;
 	struct netdev_hw_addr_list	mc;
 	struct netdev_hw_addr_list	dev_addrs;
 
 #ifdef CONFIG_SYSFS
 	struct kset		*queues_kset;
+#endif
+#ifdef CONFIG_LOCKDEP
+	struct list_head	unlink_list;
 #endif
 	unsigned int		promiscuity;
 	unsigned int		allmulti;
+	bool			uc_promisc;
+#ifdef CONFIG_LOCKDEP
+	unsigned char		nested_level;
+#endif
 
 
 	/* Protocol-specific pointers */
@@ -4260,17 +4268,23 @@ static inline void netif_tx_disable(struct net_device *dev)
 
 static inline void netif_addr_lock(struct net_device *dev)
 {
-	spin_lock(&dev->addr_list_lock);
-}
+	unsigned char nest_level = 0;
 
-static inline void netif_addr_lock_nested(struct net_device *dev)
-{
-	spin_lock_nested(&dev->addr_list_lock, dev->lower_level);
+#ifdef CONFIG_LOCKDEP
+	nest_level = dev->nested_level;
+#endif
+	spin_lock_nested(&dev->addr_list_lock, nest_level);
 }
 
 static inline void netif_addr_lock_bh(struct net_device *dev)
 {
-	spin_lock_bh(&dev->addr_list_lock);
+	unsigned char nest_level = 0;
+
+#ifdef CONFIG_LOCKDEP
+	nest_level = dev->nested_level;
+#endif
+	local_bh_disable();
+	spin_lock_nested(&dev->addr_list_lock, nest_level);
 }
 
 static inline void netif_addr_unlock(struct net_device *dev)
@@ -4455,7 +4469,19 @@ extern int		dev_rx_weight;
 extern int		dev_tx_weight;
 extern int		gro_normal_batch;
 
+enum {
+	NESTED_SYNC_IMM_BIT,
+	NESTED_SYNC_TODO_BIT,
+};
+
+#define __NESTED_SYNC_BIT(bit)	((u32)1 << (bit))
+#define __NESTED_SYNC(name)	__NESTED_SYNC_BIT(NESTED_SYNC_ ## name ## _BIT)
+
+#define NESTED_SYNC_IMM		__NESTED_SYNC(IMM)
+#define NESTED_SYNC_TODO	__NESTED_SYNC(TODO)
+
 struct netdev_nested_priv {
+	unsigned char flags;
 	void *data;
 };
 
@@ -4465,6 +4491,16 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
 struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
 						     struct list_head **iter);
 
+#ifdef CONFIG_LOCKDEP
+static LIST_HEAD(net_unlink_list);
+
+static inline void net_unlink_todo(struct net_device *dev)
+{
+	if (list_empty(&dev->unlink_list))
+		list_add_tail(&dev->unlink_list, &net_unlink_list);
+}
+#endif
+
 /* iterate through upper list, must be called under RCU read lock */
 #define netdev_for_each_upper_dev_rcu(dev, updev, iter) \
 	for (iter = &(dev)->adj_list.upper, \
diff --git a/net/core/dev.c b/net/core/dev.c
index a4a1fa806c5c..4906b44af850 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7104,6 +7104,7 @@ static bool __netdev_has_upper_dev(struct net_device *dev,
 				   struct net_device *upper_dev)
 {
 	struct netdev_nested_priv priv = {
+		.flags = 0,
 		.data = (void *)upper_dev,
 	};
 
@@ -7385,9 +7386,19 @@ static int __netdev_update_upper_level(struct net_device *dev,
 }
 
 static int __netdev_update_lower_level(struct net_device *dev,
-				       struct netdev_nested_priv *__unused)
+				       struct netdev_nested_priv *priv)
 {
 	dev->lower_level = __netdev_lower_depth(dev) + 1;
+
+#ifdef CONFIG_LOCKDEP
+	if (!priv)
+		return 0;
+
+	if (priv->flags & NESTED_SYNC_IMM)
+		dev->nested_level = dev->lower_level - 1;
+	if (priv->flags & NESTED_SYNC_TODO)
+		net_unlink_todo(dev);
+#endif
 	return 0;
 }
 
@@ -7665,6 +7676,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
 static int __netdev_upper_dev_link(struct net_device *dev,
 				   struct net_device *upper_dev, bool master,
 				   void *upper_priv, void *upper_info,
+				   struct netdev_nested_priv *priv,
 				   struct netlink_ext_ack *extack)
 {
 	struct netdev_notifier_changeupper_info changeupper_info = {
@@ -7721,9 +7733,9 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	__netdev_update_upper_level(dev, NULL);
 	__netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
 
-	__netdev_update_lower_level(upper_dev, NULL);
+	__netdev_update_lower_level(upper_dev, priv);
 	__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
-				    NULL);
+				    priv);
 
 	return 0;
 
@@ -7748,8 +7760,13 @@ int netdev_upper_dev_link(struct net_device *dev,
 			  struct net_device *upper_dev,
 			  struct netlink_ext_ack *extack)
 {
+	struct netdev_nested_priv priv = {
+		.flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+		.data = NULL,
+	};
+
 	return __netdev_upper_dev_link(dev, upper_dev, false,
-				       NULL, NULL, extack);
+				       NULL, NULL, &priv, extack);
 }
 EXPORT_SYMBOL(netdev_upper_dev_link);
 
@@ -7772,13 +7789,19 @@ int netdev_master_upper_dev_link(struct net_device *dev,
 				 void *upper_priv, void *upper_info,
 				 struct netlink_ext_ack *extack)
 {
+	struct netdev_nested_priv priv = {
+		.flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+		.data = NULL,
+	};
+
 	return __netdev_upper_dev_link(dev, upper_dev, true,
-				       upper_priv, upper_info, extack);
+				       upper_priv, upper_info, &priv, extack);
 }
 EXPORT_SYMBOL(netdev_master_upper_dev_link);
 
 static void __netdev_upper_dev_unlink(struct net_device *dev,
-				      struct net_device *upper_dev)
+				      struct net_device *upper_dev,
+				      struct netdev_nested_priv *priv)
 {
 	struct netdev_notifier_changeupper_info changeupper_info = {
 		.info = {
@@ -7803,9 +7826,9 @@ static void __netdev_upper_dev_unlink(struct net_device *dev,
 	__netdev_update_upper_level(dev, NULL);
 	__netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
 
-	__netdev_update_lower_level(upper_dev, NULL);
+	__netdev_update_lower_level(upper_dev, priv);
 	__netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
-				    NULL);
+				    priv);
 }
 
 /**
@@ -7819,7 +7842,12 @@ static void __netdev_upper_dev_unlink(struct net_device *dev,
 void netdev_upper_dev_unlink(struct net_device *dev,
 			     struct net_device *upper_dev)
 {
-	__netdev_upper_dev_unlink(dev, upper_dev);
+	struct netdev_nested_priv priv = {
+		.flags = NESTED_SYNC_TODO,
+		.data = NULL,
+	};
+
+	__netdev_upper_dev_unlink(dev, upper_dev, &priv);
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
@@ -7855,6 +7883,10 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev,
 				   struct net_device *dev,
 				   struct netlink_ext_ack *extack)
 {
+	struct netdev_nested_priv priv = {
+		.flags = 0,
+		.data = NULL,
+	};
 	int err;
 
 	if (!new_dev)
@@ -7862,8 +7894,8 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev,
 
 	if (old_dev && new_dev != old_dev)
 		netdev_adjacent_dev_disable(dev, old_dev);
-
-	err = netdev_upper_dev_link(new_dev, dev, extack);
+	err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv,
+				      extack);
 	if (err) {
 		if (old_dev && new_dev != old_dev)
 			netdev_adjacent_dev_enable(dev, old_dev);
@@ -7878,6 +7910,11 @@ void netdev_adjacent_change_commit(struct net_device *old_dev,
 				   struct net_device *new_dev,
 				   struct net_device *dev)
 {
+	struct netdev_nested_priv priv = {
+		.flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
+		.data = NULL,
+	};
+
 	if (!new_dev || !old_dev)
 		return;
 
@@ -7885,7 +7922,7 @@ void netdev_adjacent_change_commit(struct net_device *old_dev,
 		return;
 
 	netdev_adjacent_dev_enable(dev, old_dev);
-	netdev_upper_dev_unlink(old_dev, dev);
+	__netdev_upper_dev_unlink(old_dev, dev, &priv);
 }
 EXPORT_SYMBOL(netdev_adjacent_change_commit);
 
@@ -7893,13 +7930,18 @@ void netdev_adjacent_change_abort(struct net_device *old_dev,
 				  struct net_device *new_dev,
 				  struct net_device *dev)
 {
+	struct netdev_nested_priv priv = {
+		.flags = 0,
+		.data = NULL,
+	};
+
 	if (!new_dev)
 		return;
 
 	if (old_dev && new_dev != old_dev)
 		netdev_adjacent_dev_enable(dev, old_dev);
 
-	netdev_upper_dev_unlink(new_dev, dev);
+	__netdev_upper_dev_unlink(new_dev, dev, &priv);
 }
 EXPORT_SYMBOL(netdev_adjacent_change_abort);
 
@@ -10083,6 +10125,19 @@ static void netdev_wait_allrefs(struct net_device *dev)
 void netdev_run_todo(void)
 {
 	struct list_head list;
+#ifdef CONFIG_LOCKDEP
+	struct list_head unlink_list;
+
+	list_replace_init(&net_unlink_list, &unlink_list);
+
+	while (!list_empty(&unlink_list)) {
+		struct net_device *dev = list_first_entry(&unlink_list,
+							  struct net_device,
+							  unlink_list);
+		list_del(&dev->unlink_list);
+		dev->nested_level = dev->lower_level - 1;
+	}
+#endif
 
 	/* Snapshot list, allow later requests */
 	list_replace_init(&net_todo_list, &list);
@@ -10295,6 +10350,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->gso_max_segs = GSO_MAX_SEGS;
 	dev->upper_level = 1;
 	dev->lower_level = 1;
+#ifdef CONFIG_LOCKDEP
+	dev->nested_level = 0;
+	INIT_LIST_HEAD(&dev->unlink_list);
+#endif
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 54cd568e7c2f..fa1c37ec40c9 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from)
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
@@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from)
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
@@ -700,7 +700,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from)
 	 * larger.
 	 */
 	netif_addr_lock_bh(from);
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
 	__dev_set_rx_mode(to);
 	netif_addr_unlock(to);
@@ -867,7 +867,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from)
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
@@ -897,7 +897,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from)
 	if (to->addr_len != from->addr_len)
 		return -EINVAL;
 
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len);
 	if (!err)
 		__dev_set_rx_mode(to);
@@ -922,7 +922,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from)
 
 	/* See the above comments inside dev_uc_unsync(). */
 	netif_addr_lock_bh(from);
-	netif_addr_lock_nested(to);
+	netif_addr_lock(to);
 	__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
 	__dev_set_rx_mode(to);
 	netif_addr_unlock(to);

From a7809ff90ce6c48598d3c4ab54eb599bec1e9c42 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Sat, 26 Sep 2020 22:26:25 +0530
Subject: [PATCH 217/322] net: qrtr: ns: Protect radix_tree_deref_slot() using
 rcu read locks

The rcu read locks are needed to avoid potential race condition while
dereferencing radix tree from multiple threads. The issue was identified
by syzbot. Below is the crash report:

=============================
WARNING: suspicious RCU usage
5.7.0-syzkaller #0 Not tainted
-----------------------------
include/linux/radix-tree.h:176 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
2 locks held by kworker/u4:1/21:
 #0: ffff88821b097938 ((wq_completion)qrtr_ns_handler){+.+.}-{0:0}, at: spin_unlock_irq include/linux/spinlock.h:403 [inline]
 #0: ffff88821b097938 ((wq_completion)qrtr_ns_handler){+.+.}-{0:0}, at: process_one_work+0x6df/0xfd0 kernel/workqueue.c:2241
 #1: ffffc90000dd7d80 ((work_completion)(&qrtr_ns.work)){+.+.}-{0:0}, at: process_one_work+0x71e/0xfd0 kernel/workqueue.c:2243

stack backtrace:
CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.7.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: qrtr_ns_handler qrtr_ns_worker
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1e9/0x30e lib/dump_stack.c:118
 radix_tree_deref_slot include/linux/radix-tree.h:176 [inline]
 ctrl_cmd_new_lookup net/qrtr/ns.c:558 [inline]
 qrtr_ns_worker+0x2aff/0x4500 net/qrtr/ns.c:674
 process_one_work+0x76e/0xfd0 kernel/workqueue.c:2268
 worker_thread+0xa7f/0x1450 kernel/workqueue.c:2414
 kthread+0x353/0x380 kernel/kthread.c:268

Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace")
Reported-and-tested-by: syzbot+0f84f6eed90503da72fc@syzkaller.appspotmail.com
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/ns.c | 34 +++++++++++++++++++++++++---------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index d8252fdab851..934999b56d60 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -193,12 +193,13 @@ static int announce_servers(struct sockaddr_qrtr *sq)
 	struct qrtr_server *srv;
 	struct qrtr_node *node;
 	void __rcu **slot;
-	int ret;
+	int ret = 0;
 
 	node = node_get(qrtr_ns.local_node);
 	if (!node)
 		return 0;
 
+	rcu_read_lock();
 	/* Announce the list of servers registered in this node */
 	radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
 		srv = radix_tree_deref_slot(slot);
@@ -206,11 +207,14 @@ static int announce_servers(struct sockaddr_qrtr *sq)
 		ret = service_announce_new(sq, srv);
 		if (ret < 0) {
 			pr_err("failed to announce new service\n");
-			return ret;
+			goto err_out;
 		}
 	}
 
-	return 0;
+err_out:
+	rcu_read_unlock();
+
+	return ret;
 }
 
 static struct qrtr_server *server_add(unsigned int service,
@@ -335,7 +339,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
 	struct qrtr_node *node;
 	void __rcu **slot;
 	struct kvec iv;
-	int ret;
+	int ret = 0;
 
 	iv.iov_base = &pkt;
 	iv.iov_len = sizeof(pkt);
@@ -344,11 +348,13 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
 	if (!node)
 		return 0;
 
+	rcu_read_lock();
 	/* Advertise removal of this client to all servers of remote node */
 	radix_tree_for_each_slot(slot, &node->servers, &iter, 0) {
 		srv = radix_tree_deref_slot(slot);
 		server_del(node, srv->port);
 	}
+	rcu_read_unlock();
 
 	/* Advertise the removal of this client to all local servers */
 	local_node = node_get(qrtr_ns.local_node);
@@ -359,6 +365,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
 	pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE);
 	pkt.client.node = cpu_to_le32(from->sq_node);
 
+	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
 		srv = radix_tree_deref_slot(slot);
 
@@ -372,11 +379,14 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from)
 		ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt));
 		if (ret < 0) {
 			pr_err("failed to send bye cmd\n");
-			return ret;
+			goto err_out;
 		}
 	}
 
-	return 0;
+err_out:
+	rcu_read_unlock();
+
+	return ret;
 }
 
 static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
@@ -394,7 +404,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
 	struct list_head *li;
 	void __rcu **slot;
 	struct kvec iv;
-	int ret;
+	int ret = 0;
 
 	iv.iov_base = &pkt;
 	iv.iov_len = sizeof(pkt);
@@ -434,6 +444,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
 	pkt.client.node = cpu_to_le32(node_id);
 	pkt.client.port = cpu_to_le32(port);
 
+	rcu_read_lock();
 	radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) {
 		srv = radix_tree_deref_slot(slot);
 
@@ -447,11 +458,14 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from,
 		ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt));
 		if (ret < 0) {
 			pr_err("failed to send del client cmd\n");
-			return ret;
+			goto err_out;
 		}
 	}
 
-	return 0;
+err_out:
+	rcu_read_unlock();
+
+	return ret;
 }
 
 static int ctrl_cmd_new_server(struct sockaddr_qrtr *from,
@@ -554,6 +568,7 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
 	filter.service = service;
 	filter.instance = instance;
 
+	rcu_read_lock();
 	radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) {
 		node = radix_tree_deref_slot(node_slot);
 
@@ -568,6 +583,7 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from,
 			lookup_notify(from, srv, true);
 		}
 	}
+	rcu_read_unlock();
 
 	/* Empty notification, to indicate end of listing */
 	lookup_notify(from, NULL, true);

From 996d585b079ad494a30cac10e08585bcd5345125 Mon Sep 17 00:00:00 2001
From: Vincent Huang <vincent.huang@tw.synaptics.com>
Date: Mon, 28 Sep 2020 16:19:05 -0700
Subject: [PATCH 218/322] Input: trackpoint - enable Synaptics trackpoints

Add Synaptics IDs in trackpoint_start_protocol() to mark them as valid.

Signed-off-by: Vincent Huang <vincent.huang@tw.synaptics.com>
Fixes: 6c77545af100 ("Input: trackpoint - add new trackpoint variant IDs")
Reviewed-by: Harry Cutts <hcutts@chromium.org>
Tested-by: Harry Cutts <hcutts@chromium.org>
Link: https://lore.kernel.org/r/20200924053013.1056953-1-vincent.huang@tw.synaptics.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/mouse/trackpoint.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c
index 854d5e758724..ef2fa0905208 100644
--- a/drivers/input/mouse/trackpoint.c
+++ b/drivers/input/mouse/trackpoint.c
@@ -282,6 +282,8 @@ static int trackpoint_start_protocol(struct psmouse *psmouse,
 	case TP_VARIANT_ALPS:
 	case TP_VARIANT_ELAN:
 	case TP_VARIANT_NXP:
+	case TP_VARIANT_JYT_SYNAPTICS:
+	case TP_VARIANT_SYNAPTICS:
 		if (variant_id)
 			*variant_id = param[0];
 		if (firmware_id)

From 5fc27b098dafb8e30794a9db0705074c7d766179 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Mon, 28 Sep 2020 16:21:17 -0700
Subject: [PATCH 219/322] Input: i8042 - add nopnp quirk for Acer Aspire 5 A515
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Touchpad on this laptop is not detected properly during boot, as PNP
enumerates (wrongly) AUX port as disabled on this machine.

Fix that by adding this board (with admittedly quite funny DMI
identifiers) to nopnp quirk list.

Reported-by: Andrés Barrantes Silman <andresbs2000@protonmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2009252337340.3336@cbobk.fhfr.pm
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042-x86ia64io.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h
index 37fb9aa88f9c..a4c9b9652560 100644
--- a/drivers/input/serio/i8042-x86ia64io.h
+++ b/drivers/input/serio/i8042-x86ia64io.h
@@ -721,6 +721,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nopnp_table[] = {
 			DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"),
 		},
 	},
+	{
+		/* Acer Aspire 5 A515 */
+		.matches = {
+			DMI_MATCH(DMI_BOARD_NAME, "Grumpy_PK"),
+			DMI_MATCH(DMI_BOARD_VENDOR, "PK"),
+		},
+	},
 	{ }
 };
 

From 9666ea66a74adfe295cb3a8760c76e1ef70f9caf Mon Sep 17 00:00:00 2001
From: Wilken Gottwalt <wilken.gottwalt@mailbox.org>
Date: Mon, 28 Sep 2020 11:01:04 +0200
Subject: [PATCH 220/322] net: usb: ax88179_178a: fix missing stop entry in
 driver_info

Adds the missing .stop entry in the Belkin driver_info structure.

Fixes: e20bd60bf62a ("net: usb: asix88179_178a: Add support for the Belkin B2B128")
Signed-off-by: Wilken Gottwalt <wilken.gottwalt@mailbox.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 125f7bf57590..8f1798b95a02 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1823,6 +1823,7 @@ static const struct driver_info belkin_info = {
 	.status = ax88179_status,
 	.link_reset = ax88179_link_reset,
 	.reset	= ax88179_reset,
+	.stop	= ax88179_stop,
 	.flags	= FLAG_ETHER | FLAG_FRAMING_AX,
 	.rx_fixup = ax88179_rx_fixup,
 	.tx_fixup = ax88179_tx_fixup,

From c92a79829c7c169139874aa1d4bf6da32d10c38a Mon Sep 17 00:00:00 2001
From: Wilken Gottwalt <wilken.gottwalt@mailbox.org>
Date: Mon, 28 Sep 2020 11:17:40 +0200
Subject: [PATCH 221/322] net: usb: ax88179_178a: add MCT usb 3.0 adapter

Adds the driver_info and usb ids of the AX88179 based MCT U3-A9003 USB
3.0 ethernet adapter.

Signed-off-by: Wilken Gottwalt <wilken.gottwalt@mailbox.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 8f1798b95a02..5541f3faedbc 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1842,6 +1842,19 @@ static const struct driver_info toshiba_info = {
 	.tx_fixup = ax88179_tx_fixup,
 };
 
+static const struct driver_info mct_info = {
+	.description = "MCT USB 3.0 Gigabit Ethernet Adapter",
+	.bind	= ax88179_bind,
+	.unbind	= ax88179_unbind,
+	.status	= ax88179_status,
+	.link_reset = ax88179_link_reset,
+	.reset	= ax88179_reset,
+	.stop	= ax88179_stop,
+	.flags	= FLAG_ETHER | FLAG_FRAMING_AX,
+	.rx_fixup = ax88179_rx_fixup,
+	.tx_fixup = ax88179_tx_fixup,
+};
+
 static const struct usb_device_id products[] = {
 {
 	/* ASIX AX88179 10/100/1000 */
@@ -1879,6 +1892,10 @@ static const struct usb_device_id products[] = {
 	/* Toshiba USB 3.0 GBit Ethernet Adapter */
 	USB_DEVICE(0x0930, 0x0a13),
 	.driver_info = (unsigned long)&toshiba_info,
+}, {
+	/* Magic Control Technology U3-A9003 USB 3.0 Gigabit Ethernet Adapter */
+	USB_DEVICE(0x0711, 0x0179),
+	.driver_info = (unsigned long)&mct_info,
 },
 	{ },
 };

From 3ddf9b431b931544dc2f94e8f6a055ff501436fa Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 28 Sep 2020 17:53:29 -0700
Subject: [PATCH 222/322] genetlink: add missing kdoc for validation flags

Validation flags are missing kdoc, add it.

Fixes: ef6243acb478 ("genetlink: optionally validate strictly/dumps")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 6e5f1e1aa822..8899d7429ccb 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -138,6 +138,7 @@ genl_dumpit_info(struct netlink_callback *cb)
  * @cmd: command identifier
  * @internal_flags: flags used by the family
  * @flags: flags
+ * @validate: validation flags from enum genl_validate_flags
  * @doit: standard command callback
  * @start: start callback for dumps
  * @dumpit: callback for dumpers

From 78b70155dc73280be2c7084a3be591161cdc6d0c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 28 Sep 2020 17:58:41 -0700
Subject: [PATCH 223/322] ethtool: mark netlink family as __ro_after_init

Like all genl families ethtool_genl_family needs to not
be a straight up constant, because it's modified/initialized
by genl_register_family(). After init, however, it's only
passed to genlmsg_put() & co. therefore we can mark it
as __ro_after_init.

Since genl_family structure contains function pointers
mark this as a fix.

Fixes: 2b4a8990b7df ("ethtool: introduce ethtool netlink interface")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 5c2072765be7..0c3f54baec4e 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -866,7 +866,7 @@ static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
 	[ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME },
 };
 
-static struct genl_family ethtool_genl_family = {
+static struct genl_family ethtool_genl_family __ro_after_init = {
 	.name		= ETHTOOL_GENL_NAME,
 	.version	= ETHTOOL_GENL_VERSION,
 	.netnsok	= true,

From c8d317aa1887b40b188ec3aaa6e9e524333caed1 Mon Sep 17 00:00:00 2001
From: Hao Xu <haoxu@linux.alibaba.com>
Date: Tue, 29 Sep 2020 20:00:45 +0800
Subject: [PATCH 224/322] io_uring: fix async buffered reads when readahead is
 disabled

The async buffered reads feature is not working when readahead is
turned off. There are two things to concern:

- when doing retry in io_read, not only the IOCB_WAITQ flag but also
  the IOCB_NOWAIT flag is still set, which makes it goes to would_block
  phase in generic_file_buffered_read() and then return -EAGAIN. After
  that, the io-wq thread work is queued, and later doing the async
  reads in the old way.

- even if we remove IOCB_NOWAIT when doing retry, the feature is still
  not running properly, since in generic_file_buffered_read() it goes to
  lock_page_killable() after calling mapping->a_ops->readpage() to do
  IO, and thus causing process to sleep.

Fixes: 1a0a7853b901 ("mm: support async buffered reads in generic_file_buffered_read()")
Fixes: 3b2a4439e0ae ("io_uring: get rid of kiocb_wait_page_queue_init()")
Signed-off-by: Hao Xu <haoxu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/io_uring.c | 2 ++
 mm/filemap.c  | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 556e4a2ead07..b89cc87830b1 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3049,6 +3049,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
 	if (!wake_page_match(wpq, key))
 		return 0;
 
+	req->rw.kiocb.ki_flags &= ~IOCB_WAITQ;
 	list_del_init(&wait->entry);
 
 	init_task_work(&req->task_work, io_req_task_submit);
@@ -3106,6 +3107,7 @@ static bool io_rw_should_retry(struct io_kiocb *req)
 	wait->wait.flags = 0;
 	INIT_LIST_HEAD(&wait->wait.entry);
 	kiocb->ki_flags |= IOCB_WAITQ;
+	kiocb->ki_flags &= ~IOCB_NOWAIT;
 	kiocb->ki_waitq = wait;
 
 	io_get_req_task(req);
diff --git a/mm/filemap.c b/mm/filemap.c
index 1aaea26556cc..ea383478fc22 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2267,7 +2267,11 @@ readpage:
 		}
 
 		if (!PageUptodate(page)) {
-			error = lock_page_killable(page);
+			if (iocb->ki_flags & IOCB_WAITQ)
+				error = lock_page_async(page, iocb->ki_waitq);
+			else
+				error = lock_page_killable(page);
+
 			if (unlikely(error))
 				goto readpage_error;
 			if (!PageUptodate(page)) {

From 632bfb6323799c087fcb4108dfe59518609667a7 Mon Sep 17 00:00:00 2001
From: yangerkun <yangerkun@huawei.com>
Date: Sat, 5 Sep 2020 19:25:56 +0800
Subject: [PATCH 225/322] blk-mq: call commit_rqs while list empty but error
 happen

Blk-mq should call commit_rqs once 'bd.last != true' and no more
request will come(so virtscsi can kick the virtqueue, e.g.). We already
do that in 'blk_mq_dispatch_rq_list/blk_mq_try_issue_list_directly' while
list not empty and 'queued > 0'. However, we can seen the same scene
once the last request in list call queue_rq and return error like
BLK_STS_IOERR which will not requeue the request, and lead that list
empty but need call commit_rqs too(Or the request for virtscsi will stay
timeout until other request kick virtqueue).

We found this problem by do fsstress test with offline/online virtscsi
device repeat quickly.

Fixes: d666ba98f849 ("blk-mq: add mq_ops->commit_rqs()")
Reported-by: zhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: yangerkun <yangerkun@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b3d2785eefe9..cdced4aca2e8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1412,6 +1412,11 @@ out:
 
 	hctx->dispatched[queued_to_index(queued)]++;
 
+	/* If we didn't flush the entire list, we could have told the driver
+	 * there was more coming, but that turned out to be a lie.
+	 */
+	if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued)
+		q->mq_ops->commit_rqs(hctx);
 	/*
 	 * Any items that need requeuing? Stuff them into hctx->dispatch,
 	 * that is where we will continue on next queue run.
@@ -1425,14 +1430,6 @@ out:
 
 		blk_mq_release_budgets(q, nr_budgets);
 
-		/*
-		 * If we didn't flush the entire list, we could have told
-		 * the driver there was more coming, but that turned out to
-		 * be a lie.
-		 */
-		if (q->mq_ops->commit_rqs && queued)
-			q->mq_ops->commit_rqs(hctx);
-
 		spin_lock(&hctx->lock);
 		list_splice_tail_init(list, &hctx->dispatch);
 		spin_unlock(&hctx->lock);
@@ -2079,6 +2076,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 		struct list_head *list)
 {
 	int queued = 0;
+	int errors = 0;
 
 	while (!list_empty(list)) {
 		blk_status_t ret;
@@ -2095,6 +2093,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 				break;
 			}
 			blk_mq_end_request(rq, ret);
+			errors++;
 		} else
 			queued++;
 	}
@@ -2104,7 +2103,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
 	 * the driver there was more coming, but that turned out to
 	 * be a lie.
 	 */
-	if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs && queued)
+	if ((!list_empty(list) || errors) &&
+	     hctx->queue->mq_ops->commit_rqs && queued)
 		hctx->queue->mq_ops->commit_rqs(hctx);
 }
 

From 9cf51446e68607136e42a4e531a30c888c472463 Mon Sep 17 00:00:00 2001
From: He Zhe <zhe.he@windriver.com>
Date: Mon, 28 Sep 2020 17:00:23 +0800
Subject: [PATCH 226/322] bpf, powerpc: Fix misuse of fallthrough in
 bpf_jit_comp()

The user defined label following "fallthrough" is not considered by GCC
and causes build failure.

kernel-source/include/linux/compiler_attributes.h:208:41: error: attribute
'fallthrough' not preceding a case label or default label [-Werror]
 208   define fallthrough _attribute((fallthrough_))
                          ^~~~~~~~~~~~~

Fixes: df561f6688fe ("treewide: Use fallthrough pseudo-keyword")
Signed-off-by: He Zhe <zhe.he@windriver.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/bpf/20200928090023.38117-1-zhe.he@windriver.com
---
 arch/powerpc/net/bpf_jit_comp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 78d61f97371e..e809cb5a1631 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -475,7 +475,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 		case BPF_JMP | BPF_JSET | BPF_K:
 		case BPF_JMP | BPF_JSET | BPF_X:
 			true_cond = COND_NE;
-			fallthrough;
 		cond_branch:
 			/* same targets, can avoid doing the test :) */
 			if (filter[i].jt == filter[i].jf) {

From 851e6f61cd076954f9d521e0d79b173ad3a2453b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 29 Sep 2020 12:27:23 -0400
Subject: [PATCH 227/322] tracing: Fix trace_find_next_entry() accounting of
 temp buffer size

The temp buffer size variable for trace_find_next_entry() was incorrectly
being updated when the size did not change. The temp buffer size should only
be updated when it is reallocated.

This is mostly an issue when used with ftrace_dump(). That's because
ftrace_dump() can not allocate a new buffer, and instead uses a temporary
buffer with a fix size. But the variable that keeps track of that size is
incorrectly updated with each call, and it could fall into the path that
would try to reallocate the buffer and produce a warning.

 ------------[ cut here ]------------
 WARNING: CPU: 1 PID: 1601 at kernel/trace/trace.c:3548
trace_find_next_entry+0xd0/0xe0
 Modules linked in [..]
 CPU: 1 PID: 1601 Comm: bash Not tainted 5.9.0-rc5-test+ #521
 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03
07/14/2016
 RIP: 0010:trace_find_next_entry+0xd0/0xe0
 Code: 40 21 00 00 4c 89 e1 31 d2 4c 89 ee 48 89 df e8 c6 9e ff ff 89 ab 54
21 00 00 5b 5d 41 5c 41 5d c3 48 63 d5 eb bf 31 c0 eb f0 <0f> 0b 48 63 d5 eb
b4 66 0f 1f 84 00 00 00 00 00 53 48 8d 8f 60 21
 RSP: 0018:ffff95a4f2e8bd70 EFLAGS: 00010046
 RAX: ffffffff96679fc0 RBX: ffffffff97910de0 RCX: ffffffff96679fc0
 RDX: ffff95a4f2e8bd98 RSI: ffff95a4ee321098 RDI: ffffffff97913000
 RBP: 0000000000000018 R08: 0000000000000000 R09: 0000000000000000
 R10: 0000000000000001 R11: 0000000000000046 R12: ffff95a4f2e8bd98
 R13: 0000000000000000 R14: ffff95a4ee321098 R15: 00000000009aa301
 FS:  00007f8565484740(0000) GS:ffff95a55aa40000(0000)
knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 000055876bd43d90 CR3: 00000000b76e6003 CR4: 00000000001706e0
 Call Trace:
  trace_print_lat_context+0x58/0x2d0
  ? cpumask_next+0x16/0x20
  print_trace_line+0x1a4/0x4f0
  ftrace_dump.cold+0xad/0x12c
  __handle_sysrq.cold+0x51/0x126
  write_sysrq_trigger+0x3f/0x4a
  proc_reg_write+0x53/0x80
  vfs_write+0xca/0x210
  ksys_write+0x70/0xf0
  do_syscall_64+0x33/0x40
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f8565579487
 Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa
64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff
77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
 RSP: 002b:00007ffd40707948 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
 RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f8565579487
 RDX: 0000000000000002 RSI: 000055876bd74de0 RDI: 0000000000000001
 RBP: 000055876bd74de0 R08: 000000000000000a R09: 0000000000000001
 R10: 000055876bdec280 R11: 0000000000000246 R12: 0000000000000002
 R13: 00007f856564a500 R14: 0000000000000002 R15: 00007f856564a700
 irq event stamp: 109958
 ---[ end trace 7aab5b7e51484b00 ]---

Not only fix the updating of the temp buffer, but also do not free the temp
buffer before a new buffer is allocated (there's no reason to not continue
to use the current temp buffer if an allocation fails).

Cc: stable@vger.kernel.org
Fixes: 8e99cf91b99bb ("tracing: Do not allocate buffer in trace_find_next_entry() in atomic")
Reported-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2a7c26345e83..d3e5de717df2 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3546,13 +3546,15 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
 	if (iter->ent && iter->ent != iter->temp) {
 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
-			kfree(iter->temp);
-			iter->temp = kmalloc(iter->ent_size, GFP_KERNEL);
-			if (!iter->temp)
+			void *temp;
+			temp = kmalloc(iter->ent_size, GFP_KERNEL);
+			if (!temp)
 				return NULL;
+			kfree(iter->temp);
+			iter->temp = temp;
+			iter->temp_size = iter->ent_size;
 		}
 		memcpy(iter->temp, iter->ent, iter->ent_size);
-		iter->temp_size = iter->ent_size;
 		iter->ent = iter->temp;
 	}
 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);

From b40341fad6cc2daa195f8090fd3348f18fff640a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 29 Sep 2020 12:40:31 -0400
Subject: [PATCH 228/322] ftrace: Move RCU is watching check after recursion
 check

The first thing that the ftrace function callback helper functions should do
is to check for recursion. Peter Zijlstra found that when
"rcu_is_watching()" had its notrace removed, it caused perf function tracing
to crash. This is because the call of rcu_is_watching() is tested before
function recursion is checked and and if it is traced, it will cause an
infinite recursion loop.

rcu_is_watching() should still stay notrace, but to prevent this should
never had crashed in the first place. The recursion prevention must be the
first thing done in callback functions.

Link: https://lore.kernel.org/r/20200929112541.GM2628@hirez.programming.kicks-ass.net

Cc: stable@vger.kernel.org
Cc: Paul McKenney <paulmck@kernel.org>
Fixes: c68c0fa293417 ("ftrace: Have ftrace_ops_get_func() handle RCU and PER_CPU flags too")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reported-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 603255f5f085..541453927c82 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -6993,16 +6993,14 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
 {
 	int bit;
 
-	if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching())
-		return;
-
 	bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
 	if (bit < 0)
 		return;
 
 	preempt_disable_notrace();
 
-	op->func(ip, parent_ip, op, regs);
+	if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching())
+		op->func(ip, parent_ip, op, regs);
 
 	preempt_enable_notrace();
 	trace_clear_recursion(bit);

From 1a03b8a35a957f9f38ecb8a97443b7380bbf6a8b Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Tue, 29 Sep 2020 09:58:06 +0800
Subject: [PATCH 229/322] virtio-net: don't disable guest csum when disable LRO

Open vSwitch and Linux bridge will disable LRO of the interface
when this interface added to them. Now when disable the LRO, the
virtio-net csum is disable too. That drops the forwarding performance.

Fixes: a02e8964eaf9 ("virtio-net: ethtool configurable LRO")
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 263b005981bd..668685c09e65 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -63,6 +63,11 @@ static const unsigned long guest_offloads[] = {
 	VIRTIO_NET_F_GUEST_CSUM
 };
 
+#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
+				(1ULL << VIRTIO_NET_F_GUEST_TSO6) | \
+				(1ULL << VIRTIO_NET_F_GUEST_ECN)  | \
+				(1ULL << VIRTIO_NET_F_GUEST_UFO))
+
 struct virtnet_stat_desc {
 	char desc[ETH_GSTRING_LEN];
 	size_t offset;
@@ -2531,7 +2536,8 @@ static int virtnet_set_features(struct net_device *dev,
 		if (features & NETIF_F_LRO)
 			offloads = vi->guest_offloads_capable;
 		else
-			offloads = 0;
+			offloads = vi->guest_offloads_capable &
+				   ~GUEST_OFFLOAD_LRO_MASK;
 
 		err = virtnet_set_guest_offloads(vi, offloads);
 		if (err)

From bbc4d71d63549bcd003a430de18a72a742d8c91e Mon Sep 17 00:00:00 2001
From: Willy Liu <willy.liu@realtek.com>
Date: Tue, 29 Sep 2020 10:10:49 +0800
Subject: [PATCH 230/322] net: phy: realtek: fix rtl8211e rx/tx delay config

There are two chip pins named TXDLY and RXDLY which actually adds the 2ns
delays to TXC and RXC for TXD/RXD latching. These two pins can config via
4.7k-ohm resistor to 3.3V hw setting, but also config via software setting
(extension page 0xa4 register 0x1c bit13 12 and 11).

The configuration register definitions from table 13 official PHY datasheet:
PHYAD[2:0] = PHY Address
AN[1:0] = Auto-Negotiation
Mode = Interface Mode Select
RX Delay = RX Delay
TX Delay = TX Delay
SELRGV = RGMII/GMII Selection

This table describes how to config these hw pins via external pull-high or pull-
low resistor.

It is a misunderstanding that mapping it as register bits below:
8:6 = PHY Address
5:4 = Auto-Negotiation
3 = Interface Mode Select
2 = RX Delay
1 = TX Delay
0 = SELRGV
So I removed these descriptions above and add related settings as below:
14 = reserved
13 = force Tx RX Delay controlled by bit12 bit11
12 = Tx Delay
11 = Rx Delay
10:0 = Test && debug settings reserved by realtek

Test && debug settings are not recommend to modify by default.

Fixes: f81dadbcf7fd ("net: phy: realtek: Add rtl8211e rx/tx delays config")
Signed-off-by: Willy Liu <willy.liu@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/realtek.c | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 95dbe5e8e1d8..0f0960971800 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0+
-/*
- * drivers/net/phy/realtek.c
+/* drivers/net/phy/realtek.c
  *
  * Driver for Realtek PHYs
  *
@@ -32,9 +31,9 @@
 #define RTL8211F_TX_DELAY			BIT(8)
 #define RTL8211F_RX_DELAY			BIT(3)
 
-#define RTL8211E_TX_DELAY			BIT(1)
-#define RTL8211E_RX_DELAY			BIT(2)
-#define RTL8211E_MODE_MII_GMII			BIT(3)
+#define RTL8211E_CTRL_DELAY			BIT(13)
+#define RTL8211E_TX_DELAY			BIT(12)
+#define RTL8211E_RX_DELAY			BIT(11)
 
 #define RTL8201F_ISR				0x1e
 #define RTL8201F_IER				0x13
@@ -246,16 +245,16 @@ static int rtl8211e_config_init(struct phy_device *phydev)
 	/* enable TX/RX delay for rgmii-* modes, and disable them for rgmii. */
 	switch (phydev->interface) {
 	case PHY_INTERFACE_MODE_RGMII:
-		val = 0;
+		val = RTL8211E_CTRL_DELAY | 0;
 		break;
 	case PHY_INTERFACE_MODE_RGMII_ID:
-		val = RTL8211E_TX_DELAY | RTL8211E_RX_DELAY;
+		val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY;
 		break;
 	case PHY_INTERFACE_MODE_RGMII_RXID:
-		val = RTL8211E_RX_DELAY;
+		val = RTL8211E_CTRL_DELAY | RTL8211E_RX_DELAY;
 		break;
 	case PHY_INTERFACE_MODE_RGMII_TXID:
-		val = RTL8211E_TX_DELAY;
+		val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY;
 		break;
 	default: /* the rest of the modes imply leaving delays as is. */
 		return 0;
@@ -263,11 +262,12 @@ static int rtl8211e_config_init(struct phy_device *phydev)
 
 	/* According to a sample driver there is a 0x1c config register on the
 	 * 0xa4 extension page (0x7) layout. It can be used to disable/enable
-	 * the RX/TX delays otherwise controlled by RXDLY/TXDLY pins. It can
-	 * also be used to customize the whole configuration register:
-	 * 8:6 = PHY Address, 5:4 = Auto-Negotiation, 3 = Interface Mode Select,
-	 * 2 = RX Delay, 1 = TX Delay, 0 = SELRGV (see original PHY datasheet
-	 * for details).
+	 * the RX/TX delays otherwise controlled by RXDLY/TXDLY pins.
+	 * The configuration register definition:
+	 * 14 = reserved
+	 * 13 = Force Tx RX Delay controlled by bit12 bit11,
+	 * 12 = RX Delay, 11 = TX Delay
+	 * 10:0 = Test && debug settings reserved by realtek
 	 */
 	oldpage = phy_select_page(phydev, 0x7);
 	if (oldpage < 0)
@@ -277,7 +277,8 @@ static int rtl8211e_config_init(struct phy_device *phydev)
 	if (ret)
 		goto err_restore_page;
 
-	ret = __phy_modify(phydev, 0x1c, RTL8211E_TX_DELAY | RTL8211E_RX_DELAY,
+	ret = __phy_modify(phydev, 0x1c, RTL8211E_CTRL_DELAY
+			   | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY,
 			   val);
 
 err_restore_page:

From 460e985ea07ec23d46af257e84e57b5409576577 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Sep 2020 14:20:24 +0300
Subject: [PATCH 231/322] net: dsa: felix: fix incorrect action offsets for
 VCAP IS2

The port mask width was larger than the actual number of ports, and
therefore, all fields following this one were also shifted by the number
of excess bits. But the driver doesn't use the REW_OP, SMAC_REPLACE_ENA
or ACL_ID bits from the action vector, so the bug was inconsequential.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 36db631a55e6..d9c90849274e 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -685,12 +685,12 @@ static struct vcap_field vsc9959_vcap_is2_actions[] = {
 	[VCAP_IS2_ACT_POLICE_ENA]		= {  9,  1},
 	[VCAP_IS2_ACT_POLICE_IDX]		= { 10,  9},
 	[VCAP_IS2_ACT_POLICE_VCAP_ONLY]		= { 19,  1},
-	[VCAP_IS2_ACT_PORT_MASK]		= { 20, 11},
-	[VCAP_IS2_ACT_REW_OP]			= { 31,  9},
-	[VCAP_IS2_ACT_SMAC_REPLACE_ENA]		= { 40,  1},
-	[VCAP_IS2_ACT_RSV]			= { 41,  2},
-	[VCAP_IS2_ACT_ACL_ID]			= { 43,  6},
-	[VCAP_IS2_ACT_HIT_CNT]			= { 49, 32},
+	[VCAP_IS2_ACT_PORT_MASK]		= { 20,  6},
+	[VCAP_IS2_ACT_REW_OP]			= { 26,  9},
+	[VCAP_IS2_ACT_SMAC_REPLACE_ENA]		= { 35,  1},
+	[VCAP_IS2_ACT_RSV]			= { 36,  2},
+	[VCAP_IS2_ACT_ACL_ID]			= { 38,  6},
+	[VCAP_IS2_ACT_HIT_CNT]			= { 44, 32},
 };
 
 static const struct vcap_props vsc9959_vcap_props[] = {

From eaa0355c668d2793e6ced32926fa22d518752845 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 29 Sep 2020 14:20:25 +0300
Subject: [PATCH 232/322] net: dsa: seville: fix VCAP IS2 action width

Since the actions are packed together in the action RAM, an incorrect
action width means that no action except the first one would behave
correctly.

The tc-flower offload has probably not been tested on this hardware
since its introduction.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/seville_vsc9953.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c
index 29df0797ecf5..110070a60041 100644
--- a/drivers/net/dsa/ocelot/seville_vsc9953.c
+++ b/drivers/net/dsa/ocelot/seville_vsc9953.c
@@ -706,7 +706,7 @@ static const struct vcap_props vsc9953_vcap_props[] = {
 		.action_type_width = 1,
 		.action_table = {
 			[IS2_ACTION_TYPE_NORMAL] = {
-				.width = 44,
+				.width = 50, /* HIT_CNT not included */
 				.count = 2
 			},
 			[IS2_ACTION_TYPE_SMAC_SIP] = {

From 64ff609b551914d8c2c51d828920f2ffc5237de7 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 28 Sep 2020 10:22:56 -0500
Subject: [PATCH 233/322] dt-bindings: Fix 'reg' size issues in zynqmp examples

The default sizes in examples for 'reg' are 1 cell each. Fix the
incorrect sizes in zynqmp examples:

Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.example.dt.yaml: example-0: dma-controller@fd4c0000:reg:0: [0, 4249616384, 0, 4096] is too long
	From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml
Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.example.dt.yaml: example-0: display@fd4a0000:reg:0: [0, 4249485312, 0, 4096] is too long
	From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml
Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.example.dt.yaml: example-0: display@fd4a0000:reg:1: [0, 4249526272, 0, 4096] is too long
	From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml
Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.example.dt.yaml: example-0: display@fd4a0000:reg:2: [0, 4249530368, 0, 4096] is too long
	From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml
Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.example.dt.yaml: example-0: display@fd4a0000:reg:3: [0, 4249534464, 0, 4096] is too long
	From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml

Cc: Hyun Kwon <hyun.kwon@xilinx.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Vinod Koul <vkoul@kernel.org>
Cc: dri-devel@lists.freedesktop.org
Cc: dmaengine@vger.kernel.org
Acked-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml          | 8 ++++----
 .../devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml
index 52a939cade3b..7b9d468c3e52 100644
--- a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml
+++ b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml
@@ -145,10 +145,10 @@ examples:
 
     display@fd4a0000 {
         compatible = "xlnx,zynqmp-dpsub-1.7";
-        reg = <0x0 0xfd4a0000 0x0 0x1000>,
-              <0x0 0xfd4aa000 0x0 0x1000>,
-              <0x0 0xfd4ab000 0x0 0x1000>,
-              <0x0 0xfd4ac000 0x0 0x1000>;
+        reg = <0xfd4a0000 0x1000>,
+              <0xfd4aa000 0x1000>,
+              <0xfd4ab000 0x1000>,
+              <0xfd4ac000 0x1000>;
         reg-names = "dp", "blend", "av_buf", "aud";
         interrupts = <0 119 4>;
         interrupt-parent = <&gic>;
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml
index 5de510f8c88c..2a595b18ff6c 100644
--- a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml
+++ b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml
@@ -57,7 +57,7 @@ examples:
 
     dma: dma-controller@fd4c0000 {
       compatible = "xlnx,zynqmp-dpdma";
-      reg = <0x0 0xfd4c0000 0x0 0x1000>;
+      reg = <0xfd4c0000 0x1000>;
       interrupts = <GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>;
       interrupt-parent = <&gic>;
       clocks = <&dpdma_clk>;

From efe84d408bf41975db8506d3a1cc02e794e2309c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sat, 19 Sep 2020 16:39:22 +0200
Subject: [PATCH 234/322] scripts/dtc: only append to HOST_EXTRACFLAGS instead
 of overwriting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building with

	$ HOST_EXTRACFLAGS=-g make

the expectation is that host tools are built with debug informations.
This however doesn't happen if the Makefile assigns a new value to the
HOST_EXTRACFLAGS instead of appending to it. So use += instead of := for
the first assignment.

Fixes: e3fd9b5384f3 ("scripts/dtc: consolidate include path options in Makefile")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 scripts/dtc/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile
index a698ece43fff..4852bf44e913 100644
--- a/scripts/dtc/Makefile
+++ b/scripts/dtc/Makefile
@@ -9,7 +9,7 @@ dtc-objs	:= dtc.o flattree.o fstree.o data.o livetree.o treesource.o \
 dtc-objs	+= dtc-lexer.lex.o dtc-parser.tab.o
 
 # Source files need to get at the userspace version of libfdt_env.h to compile
-HOST_EXTRACFLAGS := -I $(srctree)/$(src)/libfdt
+HOST_EXTRACFLAGS += -I $(srctree)/$(src)/libfdt
 
 ifeq ($(shell pkg-config --exists yaml-0.1 2>/dev/null && echo yes),)
 ifneq ($(CHECK_DT_BINDING)$(CHECK_DTBS),)

From 97cf32996c46d9935cc133d910a75fb687dd6144 Mon Sep 17 00:00:00 2001
From: Sudheesh Mavila <sudheesh.mavila@amd.com>
Date: Tue, 15 Sep 2020 12:48:20 +0530
Subject: [PATCH 235/322] drm/amd/pm: Removed fixed clock in auto mode DPM

SMU10_UMD_PSTATE_PEAK_FCLK value should not be used to set the DPM.

Suggested-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Sudheesh Mavila <sudheesh.mavila@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index 9ee8cf8267c8..43f7adff6cb7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -563,6 +563,8 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 	struct smu10_hwmgr *data = hwmgr->backend;
 	uint32_t min_sclk = hwmgr->display_config->min_core_set_clock;
 	uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100;
+	uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1;
+	uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1;
 
 	if (hwmgr->smu_version < 0x1E3700) {
 		pr_info("smu firmware version too old, can not set dpm level\n");
@@ -676,13 +678,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinFclkByFreq,
 						hwmgr->display_config->num_display > 3 ?
-						SMU10_UMD_PSTATE_PEAK_FCLK :
+						data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk :
 						min_mclk,
 						NULL);
 
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinSocclkByFreq,
-						SMU10_UMD_PSTATE_MIN_SOCCLK,
+						data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk,
 						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetHardMinVcn,
@@ -695,11 +697,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr,
 						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxFclkByFreq,
-						SMU10_UMD_PSTATE_PEAK_FCLK,
+						data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk,
 						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxSocclkByFreq,
-						SMU10_UMD_PSTATE_PEAK_SOCCLK,
+						data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk,
 						NULL);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 						PPSMC_MSG_SetSoftMaxVcn,

From 0c7014154d6397d6a35bf3759839207f1c702a42 Mon Sep 17 00:00:00 2001
From: Jiansong Chen <Jiansong.Chen@amd.com>
Date: Wed, 23 Sep 2020 11:58:23 +0800
Subject: [PATCH 236/322] drm/amdgpu: remove gpu_info fw support for
 sienna_cichlid etc.

Remove gpu_info fw support for sienna_cichlid etc., since the
information can be retrieved from discovery binary.

Signed-off-by: Jiansong Chen <Jiansong.Chen@amd.com>
Reviewed-by: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index eb7cfe87042e..d0b8d0d341af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -80,8 +80,6 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin");
-MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin");
 
 #define AMDGPU_RESUME_MS		2000
 
@@ -1600,6 +1598,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
 	case CHIP_VEGA20:
+	case CHIP_SIENNA_CICHLID:
+	case CHIP_NAVY_FLOUNDER:
 	default:
 		return 0;
 	case CHIP_VEGA10:
@@ -1631,12 +1631,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 	case CHIP_NAVI12:
 		chip_name = "navi12";
 		break;
-	case CHIP_SIENNA_CICHLID:
-		chip_name = "sienna_cichlid";
-		break;
-	case CHIP_NAVY_FLOUNDER:
-		chip_name = "navy_flounder";
-		break;
 	}
 
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);

From 898c7302f4de1d91065e80fc46552b3ec70894ff Mon Sep 17 00:00:00 2001
From: Flora Cui <flora.cui@amd.com>
Date: Wed, 23 Sep 2020 14:42:59 +0800
Subject: [PATCH 237/322] drm/amd/display: fix return value check for hdcp_work

max_caps might be 0, thus hdcp_work might be ZERO_SIZE_PTR

Signed-off-by: Flora Cui <flora.cui@amd.com>
Reviewed-by: Feifei Xu <Feifei.Xu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 694c5bc93665..c2cd184f0bbd 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -604,7 +604,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct
 	int i = 0;
 
 	hdcp_work = kcalloc(max_caps, sizeof(*hdcp_work), GFP_KERNEL);
-	if (hdcp_work == NULL)
+	if (ZERO_OR_NULL_PTR(hdcp_work))
 		return NULL;
 
 	hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm), GFP_KERNEL);

From c73d05eaba1c2d60558364f2d980090ebec6b082 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Tue, 22 Sep 2020 11:34:16 -0400
Subject: [PATCH 238/322] drm/amdgpu/display: fix CFLAGS setup for DCN30

Properly handle clang and older versions of gcc.

Fixes: e77165bf7b02a3 ("drm/amd/display: Add DCN3 blocks to Makefile")
Acked-by: Nirmoy Das <nirmoy.das@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
index 025637a83c3b..bd2a068f9863 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile
@@ -31,9 +31,21 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \
 	dcn30_dio_link_encoder.o dcn30_resource.o
 
 
-CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse -mpreferred-stack-boundary=4
-
+ifdef CONFIG_X86
 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse
+CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse
+endif
+
+ifdef CONFIG_PPC64
+CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec
+CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec
+endif
+
+ifdef CONFIG_ARM64
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only
+endif
+
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
@@ -45,8 +57,10 @@ ifdef IS_OLD_GCC
 # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
 # (8B stack alignment).
 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mpreferred-stack-boundary=4
+CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mpreferred-stack-boundary=4
 else
 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2
+CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2
 endif
 
 AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30))

From a39d0d7bdf8c21ac7645c02e9676b5cb2b804c31 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Mon, 28 Sep 2020 11:10:37 +0200
Subject: [PATCH 239/322] drm/amdgpu: restore proper ref count in
 amdgpu_display_crtc_set_config

A recent attempt to fix a ref count leak in
amdgpu_display_crtc_set_config() turned out to be doing too much and
"fixed" an intended decrease as if it were a leak. Undo that part to
restore the proper balance. This is the very nature of this function
to increase or decrease the power reference count depending on the
situation.

Consequences of this bug is that the power reference would
eventually get down to 0 while the display was still in use,
resulting in that display switching off unexpectedly.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Fixes: e008fa6fb415 ("drm/amdgpu: fix ref count leak in amdgpu_display_crtc_set_config")
Cc: stable@vger.kernel.org
Cc: Navid Emamdoost <navid.emamdoost@gmail.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index d76172965199..44c1f6e00635 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -297,7 +297,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
 	   take the current one */
 	if (active && !adev->have_disp_power_ref) {
 		adev->have_disp_power_ref = true;
-		goto out;
+		return ret;
 	}
 	/* if we have no active crtcs, then drop the power ref
 	   we got before */

From 3c26d0314c10ceb77511e2cc81894001d488c1d0 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 28 Sep 2020 14:16:25 -0400
Subject: [PATCH 240/322] drm/amdgpu/swsmu/smu12: fix force clock handling for
 mclk

The state array is in the reverse order compared to other asics
(high to low rather than low to high).

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1313
Reviewed-by: Prike Liang <Prike.Liang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
index dbb676c482fd..15263cf210d5 100644
--- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c
@@ -232,14 +232,16 @@ static int renoir_get_profiling_clk_mask(struct smu_context *smu,
 			*sclk_mask = 0;
 	} else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) {
 		if (mclk_mask)
-			*mclk_mask = 0;
+			/* mclk levels are in reverse order */
+			*mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1;
 	} else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) {
 		if(sclk_mask)
 			/* The sclk as gfxclk and has three level about max/min/current */
 			*sclk_mask = 3 - 1;
 
 		if(mclk_mask)
-			*mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1;
+			/* mclk levels are in reverse order */
+			*mclk_mask = 0;
 
 		if(soc_mask)
 			*soc_mask = NUM_SOCCLK_DPM_LEVELS - 1;
@@ -333,7 +335,7 @@ static int renoir_get_dpm_ultimate_freq(struct smu_context *smu,
 		case SMU_UCLK:
 		case SMU_FCLK:
 		case SMU_MCLK:
-			ret = renoir_get_dpm_clk_limited(smu, clk_type, 0, min);
+			ret = renoir_get_dpm_clk_limited(smu, clk_type, NUM_MEMCLK_DPM_LEVELS - 1, min);
 			if (ret)
 				goto failed;
 			break;

From 548c7ba7dc9e9dacb43bd2af00b586da48015d04 Mon Sep 17 00:00:00 2001
From: Dirk Gouders <dirk@gouders.net>
Date: Sun, 27 Sep 2020 11:39:45 +0200
Subject: [PATCH 241/322] drm/amd/display: remove duplicate call to
 rn_vbios_smu_get_smu_version()

Commit 78fe9f63947a2b ("drm/amd/display: Remove DISPCLK Limit Floor for Certain SMU Versions")
added a call to rn_vbios_smu_get_smu_version() to set clk_mgr->smu_ver.
That field is initialized prior to the if-statement, already.

Fixes: 78fe9f63947a2b (drm/amd/display: Remove DISPCLK Limit Floor for Certain SMU Versions)
Signed-off-by: Dirk Gouders <dirk@gouders.net>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Sung Lee <sung.lee@amd.com>
Cc: Yongqiang Sun <yongqiang.sun@amd.com>
Cc: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 543afa34d87a..21a3073c8929 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -783,7 +783,6 @@ void rn_clk_mgr_construct(
 	} else {
 		struct clk_log_info log_info = {0};
 
-		clk_mgr->smu_ver = rn_vbios_smu_get_smu_version(clk_mgr);
 		clk_mgr->periodic_retraining_disabled = rn_vbios_smu_is_periodic_retraining_disabled(clk_mgr);
 
 		/* SMU Version 55.51.0 and up no longer have an issue

From 6667df916fceca7d585b34d8bb7a4aea2402f11f Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Tue, 29 Sep 2020 14:06:18 -0700
Subject: [PATCH 242/322] MAINTAINERS: Update MAINTAINERS for Intel ethernet
 drivers

Add Jesse Brandeburg and myself; remove Jeff Kirsher.

CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
CC: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6dc9ebf5bf76..eb45c9fdeb21 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8752,7 +8752,8 @@ F:	include/drm/i915*
 F:	include/uapi/drm/i915_drm.h
 
 INTEL ETHERNET DRIVERS
-M:	Jeff Kirsher <jeffrey.t.kirsher@intel.com>
+M:	Jesse Brandeburg <jesse.brandeburg@intel.com>
+M:	Tony Nguyen <anthony.l.nguyen@intel.com>
 L:	intel-wired-lan@lists.osuosl.org (moderated for non-subscribers)
 S:	Supported
 W:	http://www.intel.com/support/feedback.htm

From d120c9a81e32c43cba8017dec873b6a414898716 Mon Sep 17 00:00:00 2001
From: Kevin Brace <kevinbrace@bracecomputerlab.com>
Date: Tue, 29 Sep 2020 13:09:40 -0700
Subject: [PATCH 243/322] via-rhine: Fix for the hardware having a reset
 failure after resume

In rhine_resume() and rhine_suspend(), the code calls netif_running()
to see if the network interface is down or not.  If it is down (i.e.,
netif_running() returning false), they will skip any housekeeping work
within the function relating to the hardware.  This becomes a problem
when the hardware resumes from a standby since it is counting on
rhine_resume() to map its MMIO and power up rest of the hardware.
Not getting its MMIO remapped and rest of the hardware powered
up lead to a soft reset failure and hardware disappearance.  The
solution is to map its MMIO and power up rest of the hardware inside
rhine_open() before soft reset is to be performed.  This solution was
verified on ASUS P5V800-VM mainboard's integrated Rhine-II Ethernet
MAC inside VIA Technologies VT8251 South Bridge.

Signed-off-by: Kevin Brace <kevinbrace@bracecomputerlab.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/via/via-rhine.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 803247d51fe9..a20492da3407 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -1706,6 +1706,8 @@ static int rhine_open(struct net_device *dev)
 		goto out_free_ring;
 
 	alloc_tbufs(dev);
+	enable_mmio(rp->pioaddr, rp->quirks);
+	rhine_power_init(dev);
 	rhine_chip_reset(dev);
 	rhine_task_enable(rp);
 	init_registers(dev);

From aa15190cf2cf25ec7e6c6d7373ae3ca563d48601 Mon Sep 17 00:00:00 2001
From: Kevin Brace <kevinbrace@bracecomputerlab.com>
Date: Tue, 29 Sep 2020 13:09:41 -0700
Subject: [PATCH 244/322] via-rhine: VTunknown1 device is really VT8251 South
 Bridge

The VIA Technologies VT8251 South Bridge's integrated Rhine-II
Ethernet MAC comes has a PCI revision value of 0x7c.  This was
verified on ASUS P5V800-VM mainboard.

Signed-off-by: Kevin Brace <kevinbrace@bracecomputerlab.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/via/via-rhine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index a20492da3407..d3a2be2e75d0 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -243,7 +243,7 @@ enum rhine_revs {
 	VT8233		= 0x60,	/* Integrated MAC */
 	VT8235		= 0x74,	/* Integrated MAC */
 	VT8237		= 0x78,	/* Integrated MAC */
-	VTunknown1	= 0x7C,
+	VT8251		= 0x7C,	/* Integrated MAC */
 	VT6105		= 0x80,
 	VT6105_B0	= 0x83,
 	VT6105L		= 0x8A,

From 9f5159e89d5d1535e3da0222805c355a86d3e2dc Mon Sep 17 00:00:00 2001
From: Kevin Brace <kevinbrace@bracecomputerlab.com>
Date: Tue, 29 Sep 2020 13:09:42 -0700
Subject: [PATCH 245/322] via-rhine: Eliminate version information

Signed-off-by: Kevin Brace <kevinbrace@bracecomputerlab.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/via/via-rhine.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index d3a2be2e75d0..8e8cfe110d95 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -32,8 +32,6 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #define DRV_NAME	"via-rhine"
-#define DRV_VERSION	"1.5.1"
-#define DRV_RELDATE	"2010-10-09"
 
 #include <linux/types.h>
 
@@ -117,10 +115,6 @@ static const int multicast_filter_limit = 32;
 #include <linux/uaccess.h>
 #include <linux/dmi.h>
 
-/* These identify the driver base version and may not be removed. */
-static const char version[] =
-	"v1.10-LK" DRV_VERSION " " DRV_RELDATE " Written by Donald Becker";
-
 MODULE_AUTHOR("Donald Becker <becker@scyld.com>");
 MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver");
 MODULE_LICENSE("GPL");
@@ -1051,11 +1045,6 @@ static int rhine_init_one_pci(struct pci_dev *pdev,
 	u32 quirks = 0;
 #endif
 
-/* when built into the kernel, we only print version if device is found */
-#ifndef MODULE
-	pr_info_once("%s\n", version);
-#endif
-
 	rc = pci_enable_device(pdev);
 	if (rc)
 		goto err_out;
@@ -2296,7 +2285,6 @@ static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *i
 	struct device *hwdev = dev->dev.parent;
 
 	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
-	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 	strlcpy(info->bus_info, dev_name(hwdev), sizeof(info->bus_info));
 }
 
@@ -2618,9 +2606,6 @@ static int __init rhine_init(void)
 	int ret_pci, ret_platform;
 
 /* when a module, this is printed whether or not devices are found in probe */
-#ifdef MODULE
-	pr_info("%s\n", version);
-#endif
 	if (dmi_check_system(rhine_dmi_table)) {
 		/* these BIOSes fail at PXE boot if chip is in D3 */
 		avoid_D3 = true;

From 2b6b78e0828948fdc8374dc6fe6b2911252333c9 Mon Sep 17 00:00:00 2001
From: Kevin Brace <kevinbrace@bracecomputerlab.com>
Date: Tue, 29 Sep 2020 13:09:43 -0700
Subject: [PATCH 246/322] via-rhine: New device driver maintainer

Signed-off-by: Kevin Brace <kevinbrace@bracecomputerlab.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                          | 3 ++-
 drivers/net/ethernet/via/via-rhine.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index eb45c9fdeb21..dd1840e00a38 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18283,7 +18283,8 @@ F:	drivers/gpu/vga/vga_switcheroo.c
 F:	include/linux/vga_switcheroo.h
 
 VIA RHINE NETWORK DRIVER
-S:	Orphan
+S:	Maintained
+M:	Kevin Brace <kevinbrace@bracecomputerlab.com>
 F:	drivers/net/ethernet/via/via-rhine.c
 
 VIA SD/MMC CARD CONTROLLER DRIVER
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 8e8cfe110d95..55b0ddab1776 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -2,7 +2,7 @@
 /*
 	Written 1998-2001 by Donald Becker.
 
-	Current Maintainer: Roger Luethi <rl@hellgate.ch>
+	Current Maintainer: Kevin Brace <kevinbrace@bracecomputerlab.com>
 
 	This software may be used and distributed according to the terms of
 	the GNU General Public License (GPL), incorporated herein by reference.

From 90fb702791bf99b959006972e8ee7bb4609f441b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 29 Sep 2020 17:18:34 -0700
Subject: [PATCH 247/322] autofs: use __kernel_write() for the autofs pipe
 writing

autofs got broken in some configurations by commit 13c164b1a186
("autofs: switch to kernel_write") because there is now an extra LSM
permission check done by security_file_permission() in rw_verify_area().

autofs is one if the few places that really does want the much more
limited __kernel_write(), because the write is an internal kernel one
that shouldn't do any user permission checks (it also doesn't need the
file_start_write/file_end_write logic, since it's just a pipe).

There are a couple of other cases like that - accounting, core dumping,
and splice - but autofs stands out because it can be built as a module.

As a result, we need to export this internal __kernel_write() function
again.

We really don't want any other module to use this, but we don't have a
"EXPORT_SYMBOL_FOR_AUTOFS_ONLY()".  But we can mark it GPL-only to at
least approximate that "internal use only" for licensing.

While in this area, make autofs pass in NULL for the file position
pointer, since it's always a pipe, and we now use a NULL file pointer
for streaming file descriptors (see file_ppos() and commit 438ab720c675:
"vfs: pass ppos=NULL to .read()/.write() of FMODE_STREAM files")

This effectively reverts commits 9db977522449 ("fs: unexport
__kernel_write") and 13c164b1a186 ("autofs: switch to kernel_write").

Fixes: 13c164b1a186 ("autofs: switch to kernel_write")
Reported-by: Ondrej Mosnacek <omosnace@redhat.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Acked-by: Ian Kent <raven@themaw.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/autofs/waitq.c | 2 +-
 fs/read_write.c   | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 74c886f7c51c..5ced859dac53 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -53,7 +53,7 @@ static int autofs_write(struct autofs_sb_info *sbi,
 
 	mutex_lock(&sbi->pipe_mutex);
 	while (bytes) {
-		wr = kernel_write(file, data, bytes, &file->f_pos);
+		wr = __kernel_write(file, data, bytes, NULL);
 		if (wr <= 0)
 			break;
 		data += wr;
diff --git a/fs/read_write.c b/fs/read_write.c
index 5db58b8c78d0..d3428189f36b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -538,6 +538,14 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t
 	inc_syscw(current);
 	return ret;
 }
+/*
+ * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()",
+ * but autofs is one of the few internal kernel users that actually
+ * wants this _and_ can be built as a module. So we need to export
+ * this symbol for autofs, even though it really isn't appropriate
+ * for any other kernel modules.
+ */
+EXPORT_SYMBOL_GPL(__kernel_write);
 
 ssize_t kernel_write(struct file *file, const void *buf, size_t count,
 			    loff_t *pos)

From 917944da3bfc7cb5ac3af26725af3371d3a12db0 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Tue, 29 Sep 2020 15:08:19 -0700
Subject: [PATCH 248/322] mptcp: Consistently use READ_ONCE/WRITE_ONCE with
 msk->ack_seq

The msk->ack_seq value is sometimes read without the msk lock held, so
make proper use of READ_ONCE and WRITE_ONCE.

Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  | 4 ++--
 net/mptcp/protocol.c | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 7fa822b55c34..120ef39fe589 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -518,11 +518,11 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 
 	if (subflow->use_64bit_ack) {
 		ack_size = TCPOLEN_MPTCP_DSS_ACK64;
-		opts->ext_copy.data_ack = msk->ack_seq;
+		opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
 		opts->ext_copy.ack64 = 1;
 	} else {
 		ack_size = TCPOLEN_MPTCP_DSS_ACK32;
-		opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq);
+		opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq);
 		opts->ext_copy.ack64 = 0;
 	}
 	opts->ext_copy.use_ack = 1;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 365ba96c84b0..5d747c6a610e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -123,7 +123,7 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 
 	skb_ext_reset(skb);
 	skb_orphan(skb);
-	msk->ack_seq += copy_len;
+	WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
 
 	tail = skb_peek_tail(&sk->sk_receive_queue);
 	if (offset == 0 && tail) {
@@ -261,7 +261,7 @@ static void mptcp_check_data_fin(struct sock *sk)
 	if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) {
 		struct mptcp_subflow_context *subflow;
 
-		msk->ack_seq++;
+		WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1);
 		WRITE_ONCE(msk->rcv_data_fin, 0);
 
 		sk->sk_shutdown |= RCV_SHUTDOWN;
@@ -1720,7 +1720,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 		msk->remote_key = mp_opt->sndr_key;
 		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
 		ack_seq++;
-		msk->ack_seq = ack_seq;
+		WRITE_ONCE(msk->ack_seq, ack_seq);
 	}
 
 	sock_reset_flag(nsk, SOCK_RCU_FREE);
@@ -2072,7 +2072,7 @@ bool mptcp_finish_join(struct sock *sk)
 	parent_sock = READ_ONCE(parent->sk_socket);
 	if (parent_sock && !sk->sk_socket)
 		mptcp_sock_graft(sk, parent_sock);
-	subflow->map_seq = msk->ack_seq;
+	subflow->map_seq = READ_ONCE(msk->ack_seq);
 	return true;
 }
 

From 1a49b2c2a501467587504e75bd049d43af38b692 Mon Sep 17 00:00:00 2001
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Tue, 29 Sep 2020 15:08:20 -0700
Subject: [PATCH 249/322] mptcp: Handle incoming 32-bit DATA_FIN values

The peer may send a DATA_FIN mapping with either a 32-bit or 64-bit
sequence number. When a 32-bit sequence number is received for the
DATA_FIN, it must be expanded to 64 bits before comparing it to the
last acked sequence number. This expansion was missing.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/93
Fixes: 3721b9b64676 ("mptcp: Track received DATA_FIN sequence number and add related helpers")
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/options.c  |  7 ++++---
 net/mptcp/protocol.h |  2 +-
 net/mptcp/subflow.c  | 16 +++++++++++++---
 3 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 120ef39fe589..afa486912f5a 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -782,7 +782,7 @@ static void update_una(struct mptcp_sock *msk,
 	}
 }
 
-bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
+bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit)
 {
 	/* Skip if DATA_FIN was already received.
 	 * If updating simultaneously with the recvmsg loop, values
@@ -792,7 +792,8 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq)
 	if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first))
 		return false;
 
-	WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq);
+	WRITE_ONCE(msk->rcv_data_fin_seq,
+		   expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit));
 	WRITE_ONCE(msk->rcv_data_fin, 1);
 
 	return true;
@@ -875,7 +876,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb,
 	 */
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
 		if (mp_opt.data_fin && mp_opt.data_len == 1 &&
-		    mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) &&
+		    mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) &&
 		    schedule_work(&msk->work))
 			sock_hold(subflow->conn);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 60b27d44c184..20f04ac85409 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -387,7 +387,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk);
 bool mptcp_finish_join(struct sock *sk);
 void mptcp_data_acked(struct sock *sk);
 void mptcp_subflow_eof(struct sock *sk);
-bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq);
+bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
 
 void __init mptcp_token_init(void);
 static inline void mptcp_token_init_request(struct request_sock *req)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8cbeb68f3775..5f2fa935022d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -731,7 +731,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 
 	if (mpext->data_fin == 1) {
 		if (data_len == 1) {
-			bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq);
+			bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq,
+								 mpext->dsn64);
 			pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq);
 			if (subflow->map_valid) {
 				/* A DATA_FIN might arrive in a DSS
@@ -748,8 +749,17 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 				return MAPPING_DATA_FIN;
 			}
 		} else {
-			mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len);
-			pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len);
+			u64 data_fin_seq = mpext->data_seq + data_len;
+
+			/* If mpext->data_seq is a 32-bit value, data_fin_seq
+			 * must also be limited to 32 bits.
+			 */
+			if (!mpext->dsn64)
+				data_fin_seq &= GENMASK_ULL(31, 0);
+
+			mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64);
+			pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d",
+				 data_fin_seq, mpext->dsn64);
 		}
 
 		/* Adjust for DATA_FIN using 1 byte of sequence space */

From bcf3a2953d36bbfb9bd44ccb3db0897d935cc485 Mon Sep 17 00:00:00 2001
From: Mark Mielke <mark.mielke@gmail.com>
Date: Mon, 28 Sep 2020 00:33:29 -0400
Subject: [PATCH 250/322] scsi: iscsi: iscsi_tcp: Avoid holding spinlock while
 calling getpeername()

The kernel may fail to boot or devices may fail to come up when
initializing iscsi_tcp devices starting with Linux 5.8.

Commit a79af8a64d39 ("[SCSI] iscsi_tcp: use iscsi_conn_get_addr_param
libiscsi function") introduced getpeername() within the session spinlock.

Commit 1b66d253610c ("bpf: Add get{peer, sock}name attach types for
sock_addr") introduced BPF_CGROUP_RUN_SA_PROG_LOCK() within getpeername(),
which acquires a mutex and when used from iscsi_tcp devices can now lead to
"BUG: scheduling while atomic:" and subsequent damage.

Ensure that the spinlock is released before calling getpeername() or
getsockname(). sock_hold() and sock_put() are used to ensure that the
socket reference is preserved until after the getpeername() or
getsockname() complete.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1877345
Link: https://lkml.org/lkml/2020/7/28/1085
Link: https://lkml.org/lkml/2020/8/31/459
Link: https://lore.kernel.org/r/20200928043329.606781-1-mark.mielke@gmail.com
Fixes: a79af8a64d39 ("[SCSI] iscsi_tcp: use iscsi_conn_get_addr_param libiscsi function")
Fixes: 1b66d253610c ("bpf: Add get{peer, sock}name attach types for sock_addr")
Cc: stable@vger.kernel.org
Reported-by: Marc Dionne <marc.c.dionne@gmail.com>
Tested-by: Marc Dionne <marc.c.dionne@gmail.com>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Mark Mielke <mark.mielke@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/iscsi_tcp.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index b5dd1caae5e9..d10efb66cf19 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -736,6 +736,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 	struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data;
 	struct sockaddr_in6 addr;
+	struct socket *sock;
 	int rc;
 
 	switch(param) {
@@ -747,13 +748,17 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
 			spin_unlock_bh(&conn->session->frwd_lock);
 			return -ENOTCONN;
 		}
+		sock = tcp_sw_conn->sock;
+		sock_hold(sock->sk);
+		spin_unlock_bh(&conn->session->frwd_lock);
+
 		if (param == ISCSI_PARAM_LOCAL_PORT)
-			rc = kernel_getsockname(tcp_sw_conn->sock,
+			rc = kernel_getsockname(sock,
 						(struct sockaddr *)&addr);
 		else
-			rc = kernel_getpeername(tcp_sw_conn->sock,
+			rc = kernel_getpeername(sock,
 						(struct sockaddr *)&addr);
-		spin_unlock_bh(&conn->session->frwd_lock);
+		sock_put(sock->sk);
 		if (rc < 0)
 			return rc;
 
@@ -775,6 +780,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
 	struct iscsi_tcp_conn *tcp_conn;
 	struct iscsi_sw_tcp_conn *tcp_sw_conn;
 	struct sockaddr_in6 addr;
+	struct socket *sock;
 	int rc;
 
 	switch (param) {
@@ -789,16 +795,18 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,
 			return -ENOTCONN;
 		}
 		tcp_conn = conn->dd_data;
-
 		tcp_sw_conn = tcp_conn->dd_data;
-		if (!tcp_sw_conn->sock) {
+		sock = tcp_sw_conn->sock;
+		if (!sock) {
 			spin_unlock_bh(&session->frwd_lock);
 			return -ENOTCONN;
 		}
-
-		rc = kernel_getsockname(tcp_sw_conn->sock,
-					(struct sockaddr *)&addr);
+		sock_hold(sock->sk);
 		spin_unlock_bh(&session->frwd_lock);
+
+		rc = kernel_getsockname(sock,
+					(struct sockaddr *)&addr);
+		sock_put(sock->sk);
 		if (rc < 0)
 			return rc;
 

From f54c4442893b8dfbd3aff8e903c54dfff1aef990 Mon Sep 17 00:00:00 2001
From: Zack Rusin <zackr@vmware.com>
Date: Fri, 25 Sep 2020 11:54:10 -0400
Subject: [PATCH 251/322] drm/vmwgfx: Fix error handling in get_node

ttm_mem_type_manager_func.get_node was changed to return -ENOSPC
instead of setting the node pointer to NULL. Unfortunately
vmwgfx still had two places where it was explicitly converting
-ENOSPC to 0 causing regressions. This fixes those spots by
allowing -ENOSPC to be returned. That seems to fix recent
regressions with vmwgfx.

Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Reviewed-by: Martin Krastev <krastevm@vmware.com>
Sigend-off-by: Roland Scheidegger <sroland@vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_thp.c           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
index 4a76fc7114ad..f8bdd4ea294a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
@@ -55,7 +55,7 @@ static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man,
 
 	id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL);
 	if (id < 0)
-		return (id != -ENOMEM ? 0 : id);
+		return id;
 
 	spin_lock(&gman->lock);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
index b7c816ba7166..c8b9335bccd8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c
@@ -95,7 +95,7 @@ found_unlock:
 		mem->start = node->start;
 	}
 
-	return 0;
+	return ret;
 }
 
 

From c14decfca23cfbeb423ad8be3961a58a31a5473c Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmerdabbelt@google.com>
Date: Tue, 29 Sep 2020 23:48:47 -0700
Subject: [PATCH 252/322] clocksource: clint: Export clint_time_val for modules

clint_time_val will soon be used by the RISC-V implementation of
random_get_entropy(), which is a static inline function that may be used by
modules (at least CRYPTO_JITTERENTROPY=m).

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 drivers/clocksource/timer-clint.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c
index d17367dee02c..6cfe2ab73eb0 100644
--- a/drivers/clocksource/timer-clint.c
+++ b/drivers/clocksource/timer-clint.c
@@ -38,6 +38,7 @@ static unsigned int clint_timer_irq;
 
 #ifdef CONFIG_RISCV_M_MODE
 u64 __iomem *clint_time_val;
+EXPORT_SYMBOL(clint_time_val);
 #endif
 
 static void clint_send_ipi(const struct cpumask *target)

From b195152536237acf65962b703394614d9c4d307b Mon Sep 17 00:00:00 2001
From: Evan Quan <evan.quan@amd.com>
Date: Wed, 30 Sep 2020 12:07:56 +0800
Subject: [PATCH 253/322] drm/amd/pm: setup APU dpm clock table in SMU HW
 initialization

As the dpm clock table is needed during DC HW initialization.
And that (DC HW initialization) comes before smu_late_init()
where current APU dpm clock table setup is performed. So, NULL
pointer dereference will be triggered. By moving APU dpm clock
table setup to smu_hw_init(), this can be avoided.

Fixes: 02cf91c113ea ("drm/amd/powerplay: postpone operations not required for hw setup to late_init")
Acked-by: Nirmoy Das <nirmoy.das@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Evan Quan <evan.quan@amd.com>
Reported-by: Dirk Gouders <dirk@gouders.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 63f945f9f331..8dc5abb6931e 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -479,17 +479,6 @@ static int smu_late_init(void *handle)
 		return ret;
 	}
 
-	/*
-	 * Set initialized values (get from vbios) to dpm tables context such as
-	 * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each
-	 * type of clks.
-	 */
-	ret = smu_set_default_dpm_table(smu);
-	if (ret) {
-		dev_err(adev->dev, "Failed to setup default dpm clock tables!\n");
-		return ret;
-	}
-
 	ret = smu_populate_umd_state_clk(smu);
 	if (ret) {
 		dev_err(adev->dev, "Failed to populate UMD state clocks!\n");
@@ -984,6 +973,17 @@ static int smu_smc_hw_setup(struct smu_context *smu)
 		return ret;
 	}
 
+	/*
+	 * Set initialized values (get from vbios) to dpm tables context such as
+	 * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each
+	 * type of clks.
+	 */
+	ret = smu_set_default_dpm_table(smu);
+	if (ret) {
+		dev_err(adev->dev, "Failed to setup default dpm clock tables!\n");
+		return ret;
+	}
+
 	ret = smu_notify_display_change(smu);
 	if (ret)
 		return ret;

From 95433a1305a000aa91f558b062ce614a3bb8ceb5 Mon Sep 17 00:00:00 2001
From: Jiansong Chen <Jiansong.Chen@amd.com>
Date: Wed, 30 Sep 2020 15:30:24 +0800
Subject: [PATCH 254/322] drm/amdgpu: disable gfxoff temporarily for
 navy_flounder

gfxoff is temporarily disabled for navy_flounder, since
at present the feature caused some tdr when performing
display operations.

Signed-off-by: Jiansong Chen <Jiansong.Chen@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 037a187aa42f..f73ce9721233 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3595,6 +3595,9 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
 		if (!gfx_v10_0_navi10_gfxoff_should_enable(adev))
 			adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
 		break;
+	case CHIP_NAVY_FLOUNDER:
+		adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
+		break;
 	default:
 		break;
 	}

From 0ec86e8e82b57cfb2f83a9de14732002816d3282 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 26 Aug 2020 11:30:34 -0700
Subject: [PATCH 255/322] ice: increase maximum wait time for flash write
 commands

The ice driver needs to wait for a firmware response to each command to
write a block of data to the scratch area used to update the device
firmware. The driver currently waits for up to 1 second for this to be
returned.

It turns out that firmware might take longer than 1 second to return
a completion in some cases. If this happens, the flash update will fail
to complete.

Fix this by increasing the maximum time that the driver will wait for
both writing a block of data, and for activating the new NVM bank. The
timeout for an erase command is already several minutes, as the firmware
had to erase the entire bank which was already expected to take a minute
or more in the worst case.

In the case where firmware really won't respond, we will now take longer
to fail. However, this ensures that if the firmware is simply slow to
respond, the flash update can still complete. This new maximum timeout
should not adversely increase the update time, as the implementation for
wait_event_interruptible_timeout, and should wake very soon after we get
a completion event. It is better for a flash update be slow but still
succeed than to fail because we gave up too quickly.

Fixes: d69ea414c9b4 ("ice: implement device flash update via devlink")
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Brijesh Behera <brijeshx.behera@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_fw_update.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c
index deaefe00c9c0..8968fdd4816b 100644
--- a/drivers/net/ethernet/intel/ice/ice_fw_update.c
+++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c
@@ -289,7 +289,13 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset,
 		return -EIO;
 	}
 
-	err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, HZ, &event);
+	/* In most cases, firmware reports a write completion within a few
+	 * milliseconds. However, it has been observed that a completion might
+	 * take more than a second to complete in some cases. The timeout here
+	 * is conservative and is intended to prevent failure to update when
+	 * firmware is slow to respond.
+	 */
+	err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, 15 * HZ, &event);
 	if (err) {
 		dev_err(dev, "Timed out waiting for firmware write completion for module 0x%02x, err %d\n",
 			module, err);
@@ -513,7 +519,7 @@ static int ice_switch_flash_banks(struct ice_pf *pf, u8 activate_flags,
 		return -EIO;
 	}
 
-	err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, HZ,
+	err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, 30 * HZ,
 				    &event);
 	if (err) {
 		dev_err(dev, "Timed out waiting for firmware to switch active flash banks, err %d\n",

From be49b1ad299a638e6181aefc7bdbfd621b46a7ef Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 2 Sep 2020 08:53:42 -0700
Subject: [PATCH 256/322] ice: preserve NVM capabilities in safe mode

If the driver initializes in safe mode, it will call
ice_set_safe_mode_caps. This results in clearing the capabilities
structures, in order to set them up for operating in safe mode, ensuring
many features are disabled.

This has a side effect of also clearing the capability bits that relate
to NVM update. The result is that the device driver will not indicate
support for unified update, even if the firmware is capable.

Fix this by adding the relevant capability fields to the list of values
we preserve. To simplify the code, use a common_cap structure instead of
a handful of local variables. To reduce some duplication of the
capability name, introduce a couple of macros used to restore the
capabilities values from the cached copy.

Fixes: de9b277ee032 ("ice: Add support for unified NVM update flow capability")
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Brijesh Behera <brijeshx.behera@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c | 49 ++++++++++++---------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 34abfcea9858..7db5fd977367 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -2288,26 +2288,28 @@ void ice_set_safe_mode_caps(struct ice_hw *hw)
 {
 	struct ice_hw_func_caps *func_caps = &hw->func_caps;
 	struct ice_hw_dev_caps *dev_caps = &hw->dev_caps;
-	u32 valid_func, rxq_first_id, txq_first_id;
-	u32 msix_vector_first_id, max_mtu;
+	struct ice_hw_common_caps cached_caps;
 	u32 num_funcs;
 
 	/* cache some func_caps values that should be restored after memset */
-	valid_func = func_caps->common_cap.valid_functions;
-	txq_first_id = func_caps->common_cap.txq_first_id;
-	rxq_first_id = func_caps->common_cap.rxq_first_id;
-	msix_vector_first_id = func_caps->common_cap.msix_vector_first_id;
-	max_mtu = func_caps->common_cap.max_mtu;
+	cached_caps = func_caps->common_cap;
 
 	/* unset func capabilities */
 	memset(func_caps, 0, sizeof(*func_caps));
 
+#define ICE_RESTORE_FUNC_CAP(name) \
+	func_caps->common_cap.name = cached_caps.name
+
 	/* restore cached values */
-	func_caps->common_cap.valid_functions = valid_func;
-	func_caps->common_cap.txq_first_id = txq_first_id;
-	func_caps->common_cap.rxq_first_id = rxq_first_id;
-	func_caps->common_cap.msix_vector_first_id = msix_vector_first_id;
-	func_caps->common_cap.max_mtu = max_mtu;
+	ICE_RESTORE_FUNC_CAP(valid_functions);
+	ICE_RESTORE_FUNC_CAP(txq_first_id);
+	ICE_RESTORE_FUNC_CAP(rxq_first_id);
+	ICE_RESTORE_FUNC_CAP(msix_vector_first_id);
+	ICE_RESTORE_FUNC_CAP(max_mtu);
+	ICE_RESTORE_FUNC_CAP(nvm_unified_update);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_nvm);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_orom);
+	ICE_RESTORE_FUNC_CAP(nvm_update_pending_netlist);
 
 	/* one Tx and one Rx queue in safe mode */
 	func_caps->common_cap.num_rxq = 1;
@@ -2318,22 +2320,25 @@ void ice_set_safe_mode_caps(struct ice_hw *hw)
 	func_caps->guar_num_vsi = 1;
 
 	/* cache some dev_caps values that should be restored after memset */
-	valid_func = dev_caps->common_cap.valid_functions;
-	txq_first_id = dev_caps->common_cap.txq_first_id;
-	rxq_first_id = dev_caps->common_cap.rxq_first_id;
-	msix_vector_first_id = dev_caps->common_cap.msix_vector_first_id;
-	max_mtu = dev_caps->common_cap.max_mtu;
+	cached_caps = dev_caps->common_cap;
 	num_funcs = dev_caps->num_funcs;
 
 	/* unset dev capabilities */
 	memset(dev_caps, 0, sizeof(*dev_caps));
 
+#define ICE_RESTORE_DEV_CAP(name) \
+	dev_caps->common_cap.name = cached_caps.name
+
 	/* restore cached values */
-	dev_caps->common_cap.valid_functions = valid_func;
-	dev_caps->common_cap.txq_first_id = txq_first_id;
-	dev_caps->common_cap.rxq_first_id = rxq_first_id;
-	dev_caps->common_cap.msix_vector_first_id = msix_vector_first_id;
-	dev_caps->common_cap.max_mtu = max_mtu;
+	ICE_RESTORE_DEV_CAP(valid_functions);
+	ICE_RESTORE_DEV_CAP(txq_first_id);
+	ICE_RESTORE_DEV_CAP(rxq_first_id);
+	ICE_RESTORE_DEV_CAP(msix_vector_first_id);
+	ICE_RESTORE_DEV_CAP(max_mtu);
+	ICE_RESTORE_DEV_CAP(nvm_unified_update);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_nvm);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_orom);
+	ICE_RESTORE_DEV_CAP(nvm_update_pending_netlist);
 	dev_caps->num_funcs = num_funcs;
 
 	/* one Tx and one Rx queue per function in safe mode */

From fc7d17551f776fda243493ce59894c1eec2a339f Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Mon, 28 Sep 2020 11:33:42 +0800
Subject: [PATCH 257/322] cpufreq: intel_pstate: Fix missing return statement

Fix missing return statement when writing "off" to intel_pstate status
sysfs I/F.

Fixes: 55671ea3257a ("cpufreq: intel_pstate: Free memory only when turning off")
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index a827b000ef51..9a515c460a00 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2781,6 +2781,7 @@ static int intel_pstate_update_status(const char *buf, size_t size)
 
 		cpufreq_unregister_driver(intel_pstate_driver);
 		intel_pstate_driver_cleanup();
+		return 0;
 	}
 
 	if (size == 6 && !strncmp(buf, "active", size)) {

From eac53b3e304a8354dc2a2f38e60e712d0e110ef6 Mon Sep 17 00:00:00 2001
From: Yoann Congal <yoann.congal@smile.fr>
Date: Tue, 29 Sep 2020 22:41:58 +0200
Subject: [PATCH 258/322] Documentation: PM: Fix a reStructuredText syntax
 error

Fix a reStructuredText syntax error in the cpuidle PM admin-guide
documentation: the ``...'' quotation marks are parsed as partial ''...''
reStructuredText markup and break the output formatting.

This change them to "...".

Signed-off-by: Yoann Congal <yoann.congal@smile.fr>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/cpuidle.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst
index a96a423e3779..6ebe163f9dfe 100644
--- a/Documentation/admin-guide/pm/cpuidle.rst
+++ b/Documentation/admin-guide/pm/cpuidle.rst
@@ -690,7 +690,7 @@ which of the two parameters is added to the kernel command line.  In the
 instruction of the CPUs (which, as a rule, suspends the execution of the program
 and causes the hardware to attempt to enter the shallowest available idle state)
 for this purpose, and if ``idle=poll`` is used, idle CPUs will execute a
-more or less ``lightweight'' sequence of instructions in a tight loop.  [Note
+more or less "lightweight" sequence of instructions in a tight loop.  [Note
 that using ``idle=poll`` is somewhat drastic in many cases, as preventing idle
 CPUs from saving almost any energy at all may not be the only effect of it.
 For example, on Intel hardware it effectively prevents CPUs from using

From 1a67b9263e06688d7f683714cace8dcc203f8c55 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 30 Sep 2020 13:20:23 +0200
Subject: [PATCH 259/322] ARM: imx6q: Fixup RCU usage for cpuidle

The commit eb1f00237aca ("lockdep,trace: Expose tracepoints"), started to
expose us for tracepoints. For imx6q cpuidle, this leads to an RCU splat
according to below.

[6.870684] [<c0db7690>] (_raw_spin_lock) from [<c011f6a4>] (imx6q_enter_wait+0x18/0x9c)
[6.878846] [<c011f6a4>] (imx6q_enter_wait) from [<c09abfb0>] (cpuidle_enter_state+0x168/0x5e4)

To fix the problem, let's assign the corresponding idlestate->flags the
CPUIDLE_FLAG_RCU_IDLE bit, which enables us to call rcu_idle_enter|exit()
at the proper point.

Reported-by: Dong Aisheng <aisheng.dong@nxp.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-imx/cpuidle-imx6q.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
index 24dd5bbe60e4..094337dc1bc7 100644
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -24,7 +24,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev,
 		imx6_set_lpm(WAIT_UNCLOCKED);
 	raw_spin_unlock(&cpuidle_lock);
 
+	rcu_idle_enter();
 	cpu_do_idle();
+	rcu_idle_exit();
 
 	raw_spin_lock(&cpuidle_lock);
 	if (num_idle_cpus-- == num_online_cpus())
@@ -44,7 +46,7 @@ static struct cpuidle_driver imx6q_cpuidle_driver = {
 		{
 			.exit_latency = 50,
 			.target_residency = 75,
-			.flags = CPUIDLE_FLAG_TIMER_STOP,
+			.flags = CPUIDLE_FLAG_TIMER_STOP | CPUIDLE_FLAG_RCU_IDLE,
 			.enter = imx6q_enter_wait,
 			.name = "WAIT",
 			.desc = "Clock off",

From a466c85edc6fbe845facc8f57c408c544f42899e Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Thu, 20 Aug 2020 11:18:27 -0400
Subject: [PATCH 260/322] btrfs: move btrfs_rm_dev_replace_free_srcdev outside
 of all locks

When closing and freeing the source device we could end up doing our
final blkdev_put() on the bdev, which will grab the bd_mutex.  As such
we want to be holding as few locks as possible, so move this call
outside of the dev_replace->lock_finishing_cancel_unmount lock.  Since
we're modifying the fs_devices we need to make sure we're holding the
uuid_mutex here, so take that as well.

There's a report from syzbot probably hitting one of the cases where
the bd_mutex and device_list_mutex are taken in the wrong order, however
it's not with device replace, like this patch fixes. As there's no
reproducer available so far, we can't verify the fix.

https://lore.kernel.org/lkml/000000000000fc04d105afcf86d7@google.com/
dashboard link: https://syzkaller.appspot.com/bug?extid=84a0634dc5d21d488419

  WARNING: possible circular locking dependency detected
  5.9.0-rc5-syzkaller #0 Not tainted
  ------------------------------------------------------
  syz-executor.0/6878 is trying to acquire lock:
  ffff88804c17d780 (&bdev->bd_mutex){+.+.}-{3:3}, at: blkdev_put+0x30/0x520 fs/block_dev.c:1804

  but task is already holding lock:
  ffff8880908cfce0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: close_fs_devices.part.0+0x2e/0x800 fs/btrfs/volumes.c:1159

  which lock already depends on the new lock.

  the existing dependency chain (in reverse order) is:

  -> #4 (&fs_devs->device_list_mutex){+.+.}-{3:3}:
	 __mutex_lock_common kernel/locking/mutex.c:956 [inline]
	 __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
	 btrfs_finish_chunk_alloc+0x281/0xf90 fs/btrfs/volumes.c:5255
	 btrfs_create_pending_block_groups+0x2f3/0x700 fs/btrfs/block-group.c:2109
	 __btrfs_end_transaction+0xf5/0x690 fs/btrfs/transaction.c:916
	 find_free_extent_update_loop fs/btrfs/extent-tree.c:3807 [inline]
	 find_free_extent+0x23b7/0x2e60 fs/btrfs/extent-tree.c:4127
	 btrfs_reserve_extent+0x166/0x460 fs/btrfs/extent-tree.c:4206
	 cow_file_range+0x3de/0x9b0 fs/btrfs/inode.c:1063
	 btrfs_run_delalloc_range+0x2cf/0x1410 fs/btrfs/inode.c:1838
	 writepage_delalloc+0x150/0x460 fs/btrfs/extent_io.c:3439
	 __extent_writepage+0x441/0xd00 fs/btrfs/extent_io.c:3653
	 extent_write_cache_pages.constprop.0+0x69d/0x1040 fs/btrfs/extent_io.c:4249
	 extent_writepages+0xcd/0x2b0 fs/btrfs/extent_io.c:4370
	 do_writepages+0xec/0x290 mm/page-writeback.c:2352
	 __writeback_single_inode+0x125/0x1400 fs/fs-writeback.c:1461
	 writeback_sb_inodes+0x53d/0xf40 fs/fs-writeback.c:1721
	 wb_writeback+0x2ad/0xd40 fs/fs-writeback.c:1894
	 wb_do_writeback fs/fs-writeback.c:2039 [inline]
	 wb_workfn+0x2dc/0x13e0 fs/fs-writeback.c:2080
	 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
	 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
	 kthread+0x3b5/0x4a0 kernel/kthread.c:292
	 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294

  -> #3 (sb_internal#2){.+.+}-{0:0}:
	 percpu_down_read include/linux/percpu-rwsem.h:51 [inline]
	 __sb_start_write+0x234/0x470 fs/super.c:1672
	 sb_start_intwrite include/linux/fs.h:1690 [inline]
	 start_transaction+0xbe7/0x1170 fs/btrfs/transaction.c:624
	 find_free_extent_update_loop fs/btrfs/extent-tree.c:3789 [inline]
	 find_free_extent+0x25e1/0x2e60 fs/btrfs/extent-tree.c:4127
	 btrfs_reserve_extent+0x166/0x460 fs/btrfs/extent-tree.c:4206
	 cow_file_range+0x3de/0x9b0 fs/btrfs/inode.c:1063
	 btrfs_run_delalloc_range+0x2cf/0x1410 fs/btrfs/inode.c:1838
	 writepage_delalloc+0x150/0x460 fs/btrfs/extent_io.c:3439
	 __extent_writepage+0x441/0xd00 fs/btrfs/extent_io.c:3653
	 extent_write_cache_pages.constprop.0+0x69d/0x1040 fs/btrfs/extent_io.c:4249
	 extent_writepages+0xcd/0x2b0 fs/btrfs/extent_io.c:4370
	 do_writepages+0xec/0x290 mm/page-writeback.c:2352
	 __writeback_single_inode+0x125/0x1400 fs/fs-writeback.c:1461
	 writeback_sb_inodes+0x53d/0xf40 fs/fs-writeback.c:1721
	 wb_writeback+0x2ad/0xd40 fs/fs-writeback.c:1894
	 wb_do_writeback fs/fs-writeback.c:2039 [inline]
	 wb_workfn+0x2dc/0x13e0 fs/fs-writeback.c:2080
	 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269
	 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415
	 kthread+0x3b5/0x4a0 kernel/kthread.c:292
	 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294

  -> #2 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}:
	 __flush_work+0x60e/0xac0 kernel/workqueue.c:3041
	 wb_shutdown+0x180/0x220 mm/backing-dev.c:355
	 bdi_unregister+0x174/0x590 mm/backing-dev.c:872
	 del_gendisk+0x820/0xa10 block/genhd.c:933
	 loop_remove drivers/block/loop.c:2192 [inline]
	 loop_control_ioctl drivers/block/loop.c:2291 [inline]
	 loop_control_ioctl+0x3b1/0x480 drivers/block/loop.c:2257
	 vfs_ioctl fs/ioctl.c:48 [inline]
	 __do_sys_ioctl fs/ioctl.c:753 [inline]
	 __se_sys_ioctl fs/ioctl.c:739 [inline]
	 __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739
	 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
	 entry_SYSCALL_64_after_hwframe+0x44/0xa9

  -> #1 (loop_ctl_mutex){+.+.}-{3:3}:
	 __mutex_lock_common kernel/locking/mutex.c:956 [inline]
	 __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
	 lo_open+0x19/0xd0 drivers/block/loop.c:1893
	 __blkdev_get+0x759/0x1aa0 fs/block_dev.c:1507
	 blkdev_get fs/block_dev.c:1639 [inline]
	 blkdev_open+0x227/0x300 fs/block_dev.c:1753
	 do_dentry_open+0x4b9/0x11b0 fs/open.c:817
	 do_open fs/namei.c:3251 [inline]
	 path_openat+0x1b9a/0x2730 fs/namei.c:3368
	 do_filp_open+0x17e/0x3c0 fs/namei.c:3395
	 do_sys_openat2+0x16d/0x420 fs/open.c:1168
	 do_sys_open fs/open.c:1184 [inline]
	 __do_sys_open fs/open.c:1192 [inline]
	 __se_sys_open fs/open.c:1188 [inline]
	 __x64_sys_open+0x119/0x1c0 fs/open.c:1188
	 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46
	 entry_SYSCALL_64_after_hwframe+0x44/0xa9

  -> #0 (&bdev->bd_mutex){+.+.}-{3:3}:
	 check_prev_add kernel/locking/lockdep.c:2496 [inline]
	 check_prevs_add kernel/locking/lockdep.c:2601 [inline]
	 validate_chain kernel/locking/lockdep.c:3218 [inline]
	 __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4426
	 lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006
	 __mutex_lock_common kernel/locking/mutex.c:956 [inline]
	 __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
	 blkdev_put+0x30/0x520 fs/block_dev.c:1804
	 btrfs_close_bdev fs/btrfs/volumes.c:1117 [inline]
	 btrfs_close_bdev fs/btrfs/volumes.c:1107 [inline]
	 btrfs_close_one_device fs/btrfs/volumes.c:1133 [inline]
	 close_fs_devices.part.0+0x1a4/0x800 fs/btrfs/volumes.c:1161
	 close_fs_devices fs/btrfs/volumes.c:1193 [inline]
	 btrfs_close_devices+0x95/0x1f0 fs/btrfs/volumes.c:1179
	 close_ctree+0x688/0x6cb fs/btrfs/disk-io.c:4149
	 generic_shutdown_super+0x144/0x370 fs/super.c:464
	 kill_anon_super+0x36/0x60 fs/super.c:1108
	 btrfs_kill_super+0x38/0x50 fs/btrfs/super.c:2265
	 deactivate_locked_super+0x94/0x160 fs/super.c:335
	 deactivate_super+0xad/0xd0 fs/super.c:366
	 cleanup_mnt+0x3a3/0x530 fs/namespace.c:1118
	 task_work_run+0xdd/0x190 kernel/task_work.c:141
	 tracehook_notify_resume include/linux/tracehook.h:188 [inline]
	 exit_to_user_mode_loop kernel/entry/common.c:163 [inline]
	 exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190
	 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265
	 entry_SYSCALL_64_after_hwframe+0x44/0xa9

  other info that might help us debug this:

  Chain exists of:
    &bdev->bd_mutex --> sb_internal#2 --> &fs_devs->device_list_mutex

   Possible unsafe locking scenario:

	 CPU0                    CPU1
	 ----                    ----
    lock(&fs_devs->device_list_mutex);
				 lock(sb_internal#2);
				 lock(&fs_devs->device_list_mutex);
    lock(&bdev->bd_mutex);

   *** DEADLOCK ***

  3 locks held by syz-executor.0/6878:
   #0: ffff88809070c0e0 (&type->s_umount_key#70){++++}-{3:3}, at: deactivate_super+0xa5/0xd0 fs/super.c:365
   #1: ffffffff8a5b37a8 (uuid_mutex){+.+.}-{3:3}, at: btrfs_close_devices+0x23/0x1f0 fs/btrfs/volumes.c:1178
   #2: ffff8880908cfce0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: close_fs_devices.part.0+0x2e/0x800 fs/btrfs/volumes.c:1159

  stack backtrace:
  CPU: 0 PID: 6878 Comm: syz-executor.0 Not tainted 5.9.0-rc5-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
  Call Trace:
   __dump_stack lib/dump_stack.c:77 [inline]
   dump_stack+0x198/0x1fd lib/dump_stack.c:118
   check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827
   check_prev_add kernel/locking/lockdep.c:2496 [inline]
   check_prevs_add kernel/locking/lockdep.c:2601 [inline]
   validate_chain kernel/locking/lockdep.c:3218 [inline]
   __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4426
   lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006
   __mutex_lock_common kernel/locking/mutex.c:956 [inline]
   __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103
   blkdev_put+0x30/0x520 fs/block_dev.c:1804
   btrfs_close_bdev fs/btrfs/volumes.c:1117 [inline]
   btrfs_close_bdev fs/btrfs/volumes.c:1107 [inline]
   btrfs_close_one_device fs/btrfs/volumes.c:1133 [inline]
   close_fs_devices.part.0+0x1a4/0x800 fs/btrfs/volumes.c:1161
   close_fs_devices fs/btrfs/volumes.c:1193 [inline]
   btrfs_close_devices+0x95/0x1f0 fs/btrfs/volumes.c:1179
   close_ctree+0x688/0x6cb fs/btrfs/disk-io.c:4149
   generic_shutdown_super+0x144/0x370 fs/super.c:464
   kill_anon_super+0x36/0x60 fs/super.c:1108
   btrfs_kill_super+0x38/0x50 fs/btrfs/super.c:2265
   deactivate_locked_super+0x94/0x160 fs/super.c:335
   deactivate_super+0xad/0xd0 fs/super.c:366
   cleanup_mnt+0x3a3/0x530 fs/namespace.c:1118
   task_work_run+0xdd/0x190 kernel/task_work.c:141
   tracehook_notify_resume include/linux/tracehook.h:188 [inline]
   exit_to_user_mode_loop kernel/entry/common.c:163 [inline]
   exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190
   syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x460027
  RSP: 002b:00007fff59216328 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
  RAX: 0000000000000000 RBX: 0000000000076035 RCX: 0000000000460027
  RDX: 0000000000403188 RSI: 0000000000000002 RDI: 00007fff592163d0
  RBP: 0000000000000333 R08: 0000000000000000 R09: 000000000000000b
  R10: 0000000000000005 R11: 0000000000000246 R12: 00007fff59217460
  R13: 0000000002df2a60 R14: 0000000000000000 R15: 00007fff59217460

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
[ add syzbot reference ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c | 3 ++-
 fs/btrfs/volumes.c     | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 7cf48aeb6f14..62aece1bb8ec 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -748,7 +748,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state))
 		btrfs_scratch_superblocks(fs_info, src_device->bdev,
 					  src_device->name->str);
-	btrfs_rm_dev_replace_free_srcdev(src_device);
 
 	/* write back the superblocks */
 	trans = btrfs_start_transaction(root, 0);
@@ -757,6 +756,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 
 	mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
 
+	btrfs_rm_dev_replace_free_srcdev(src_device);
+
 	return 0;
 }
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 01ebdb69fbdb..1997a7d67f22 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2224,6 +2224,8 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
 	struct btrfs_fs_info *fs_info = srcdev->fs_info;
 	struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
 
+	mutex_lock(&uuid_mutex);
+
 	btrfs_close_bdev(srcdev);
 	synchronize_rcu();
 	btrfs_free_device(srcdev);
@@ -2252,6 +2254,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev)
 		close_fs_devices(fs_devices);
 		free_fs_devices(fs_devices);
 	}
+	mutex_unlock(&uuid_mutex);
 }
 
 void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)

From 4c8f353272dd1262013873990c0fafd0e3c8f274 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Wed, 23 Sep 2020 15:30:16 +0100
Subject: [PATCH 261/322] btrfs: fix filesystem corruption after a device
 replace

We use a device's allocation state tree to track ranges in a device used
for allocated chunks, and we set ranges in this tree when allocating a new
chunk. However after a device replace operation, we were not setting the
allocated ranges in the new device's allocation state tree, so that tree
is empty after a device replace.

This means that a fitrim operation after a device replace will trim the
device ranges that have allocated chunks and extents, as we trim every
range for which there is not a range marked in the device's allocation
state tree. It is also important during chunk allocation, since the
device's allocation state is used to determine if a range is already
allocated when allocating a new chunk.

This is trivial to reproduce and the following script triggers the bug:

  $ cat reproducer.sh
  #!/bin/bash

  DEV1="/dev/sdg"
  DEV2="/dev/sdh"
  DEV3="/dev/sdi"

  wipefs -a $DEV1 $DEV2 $DEV3 &> /dev/null

  # Create a raid1 test fs on 2 devices.
  mkfs.btrfs -f -m raid1 -d raid1 $DEV1 $DEV2 > /dev/null
  mount $DEV1 /mnt/btrfs

  xfs_io -f -c "pwrite -S 0xab 0 10M" /mnt/btrfs/foo

  echo "Starting to replace $DEV1 with $DEV3"
  btrfs replace start -B $DEV1 $DEV3 /mnt/btrfs
  echo

  echo "Running fstrim"
  fstrim /mnt/btrfs
  echo

  echo "Unmounting filesystem"
  umount /mnt/btrfs

  echo "Mounting filesystem in degraded mode using $DEV3 only"
  wipefs -a $DEV1 $DEV2 &> /dev/null
  mount -o degraded $DEV3 /mnt/btrfs
  if [ $? -ne 0 ]; then
          dmesg | tail
          echo
          echo "Failed to mount in degraded mode"
          exit 1
  fi

  echo
  echo "File foo data (expected all bytes = 0xab):"
  od -A d -t x1 /mnt/btrfs/foo

  umount /mnt/btrfs

When running the reproducer:

  $ ./replace-test.sh
  wrote 10485760/10485760 bytes at offset 0
  10 MiB, 2560 ops; 0.0901 sec (110.877 MiB/sec and 28384.5216 ops/sec)
  Starting to replace /dev/sdg with /dev/sdi

  Running fstrim

  Unmounting filesystem
  Mounting filesystem in degraded mode using /dev/sdi only
  mount: /mnt/btrfs: wrong fs type, bad option, bad superblock on /dev/sdi, missing codepage or helper program, or other error.
  [19581.748641] BTRFS info (device sdg): dev_replace from /dev/sdg (devid 1) to /dev/sdi started
  [19581.803842] BTRFS info (device sdg): dev_replace from /dev/sdg (devid 1) to /dev/sdi finished
  [19582.208293] BTRFS info (device sdi): allowing degraded mounts
  [19582.208298] BTRFS info (device sdi): disk space caching is enabled
  [19582.208301] BTRFS info (device sdi): has skinny extents
  [19582.212853] BTRFS warning (device sdi): devid 2 uuid 1f731f47-e1bb-4f00-bfbb-9e5a0cb4ba9f is missing
  [19582.213904] btree_readpage_end_io_hook: 25839 callbacks suppressed
  [19582.213907] BTRFS error (device sdi): bad tree block start, want 30490624 have 0
  [19582.214780] BTRFS warning (device sdi): failed to read root (objectid=7): -5
  [19582.231576] BTRFS error (device sdi): open_ctree failed

  Failed to mount in degraded mode

So fix by setting all allocated ranges in the replace target device when
the replace operation is finishing, when we are holding the chunk mutex
and we can not race with new chunk allocations.

A test case for fstests follows soon.

Fixes: 1c11b63eff2a67 ("btrfs: replace pending/pinned chunks lists with io tree")
CC: stable@vger.kernel.org # 5.2+
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/dev-replace.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 62aece1bb8ec..e4a1c6afe35d 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -599,6 +599,37 @@ static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
 	wake_up(&fs_info->dev_replace.replace_wait);
 }
 
+/*
+ * When finishing the device replace, before swapping the source device with the
+ * target device we must update the chunk allocation state in the target device,
+ * as it is empty because replace works by directly copying the chunks and not
+ * through the normal chunk allocation path.
+ */
+static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
+					struct btrfs_device *tgtdev)
+{
+	struct extent_state *cached_state = NULL;
+	u64 start = 0;
+	u64 found_start;
+	u64 found_end;
+	int ret = 0;
+
+	lockdep_assert_held(&srcdev->fs_info->chunk_mutex);
+
+	while (!find_first_extent_bit(&srcdev->alloc_state, start,
+				      &found_start, &found_end,
+				      CHUNK_ALLOCATED, &cached_state)) {
+		ret = set_extent_bits(&tgtdev->alloc_state, found_start,
+				      found_end, CHUNK_ALLOCATED);
+		if (ret)
+			break;
+		start = found_end + 1;
+	}
+
+	free_extent_state(cached_state);
+	return ret;
+}
+
 static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 				       int scrub_ret)
 {
@@ -673,8 +704,14 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 	dev_replace->time_stopped = ktime_get_real_seconds();
 	dev_replace->item_needs_writeback = 1;
 
-	/* replace old device with new one in mapping tree */
+	/*
+	 * Update allocation state in the new device and replace the old device
+	 * with the new one in the mapping tree.
+	 */
 	if (!scrub_ret) {
+		scrub_ret = btrfs_set_target_alloc_state(src_device, tgt_device);
+		if (scrub_ret)
+			goto error;
 		btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
 								src_device,
 								tgt_device);
@@ -685,6 +722,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
 				 btrfs_dev_name(src_device),
 				 src_device->devid,
 				 rcu_str_deref(tgt_device->name), scrub_ret);
+error:
 		up_write(&dev_replace->rwsem);
 		mutex_unlock(&fs_info->chunk_mutex);
 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);

From aa9887608e77b835d51f05a54940380391cd4e21 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@wdc.com>
Date: Sun, 27 Sep 2020 11:09:16 +0530
Subject: [PATCH 262/322] RISC-V: Check clint_time_val before use

The NoMMU kernel is broken for QEMU virt machine from Linux-5.9-rc6
because clint_time_val is used even before CLINT driver is probed
at following places:
1. rand_initialize() calls get_cycles() which in-turn uses
   clint_time_val
2. boot_init_stack_canary() calls get_cycles() which in-turn
   uses clint_time_val

The issue#1 (above) is fixed by providing custom random_get_entropy()
for RISC-V NoMMU kernel. For issue#2 (above), we remove dependency of
boot_init_stack_canary() on get_cycles() and this is aligned with the
boot_init_stack_canary() implementations of ARM, ARM64 and MIPS kernel.

Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation for M-mode systems")
Signed-off-by: Anup Patel <anup.patel@wdc.com>
Tested-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
---
 arch/riscv/include/asm/stackprotector.h |  4 ----
 arch/riscv/include/asm/timex.h          | 13 +++++++++++++
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h
index d95f7b2a7f37..5962f8891f06 100644
--- a/arch/riscv/include/asm/stackprotector.h
+++ b/arch/riscv/include/asm/stackprotector.h
@@ -5,7 +5,6 @@
 
 #include <linux/random.h>
 #include <linux/version.h>
-#include <asm/timex.h>
 
 extern unsigned long __stack_chk_guard;
 
@@ -18,12 +17,9 @@ extern unsigned long __stack_chk_guard;
 static __always_inline void boot_init_stack_canary(void)
 {
 	unsigned long canary;
-	unsigned long tsc;
 
 	/* Try to get a semi random initial value. */
 	get_random_bytes(&canary, sizeof(canary));
-	tsc = get_cycles();
-	canary += tsc + (tsc << BITS_PER_LONG/2);
 	canary ^= LINUX_VERSION_CODE;
 	canary &= CANARY_MASK;
 
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index 7f659dda0032..ab104905d4db 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -33,6 +33,19 @@ static inline u32 get_cycles_hi(void)
 #define get_cycles_hi get_cycles_hi
 #endif /* CONFIG_64BIT */
 
+/*
+ * Much like MIPS, we may not have a viable counter to use at an early point
+ * in the boot process. Unfortunately we don't have a fallback, so instead
+ * we just return 0.
+ */
+static inline unsigned long random_get_entropy(void)
+{
+	if (unlikely(clint_time_val == NULL))
+		return 0;
+	return get_cycles();
+}
+#define random_get_entropy()	random_get_entropy()
+
 #else /* CONFIG_RISCV_M_MODE */
 
 static inline cycles_t get_cycles(void)

From a509a66a9d0d4f4e304d58fad38c078d0336c445 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 29 Sep 2020 15:25:22 +0200
Subject: [PATCH 263/322] arm64: permit ACPI core to map kernel memory used for
 table overrides

Jonathan reports that the strict policy for memory mapped by the
ACPI core breaks the use case of passing ACPI table overrides via
initramfs. This is due to the fact that the memory type used for
loading the initramfs in memory is not recognized as a memory type
that is typically used by firmware to pass firmware tables.

Since the purpose of the strict policy is to ensure that no AML or
other ACPI code can manipulate any memory that is used by the kernel
to keep its internal state or the state of user tasks, we can relax
the permission check, and allow mappings of memory that is reserved
and marked as NOMAP via memblock, and therefore not covered by the
linear mapping to begin with.

Fixes: 1583052d111f ("arm64/acpi: disallow AML memory opregions to access kernel memory")
Fixes: 325f5585ec36 ("arm64/acpi: disallow writeable AML opregion mapping for EFI code regions")
Reported-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Link: https://lore.kernel.org/r/20200929132522.18067-1-ardb@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/acpi.c | 22 ++++++++++++++++++++--
 include/linux/acpi.h     |  2 +-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index a85174d05473..cada0b816c8a 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -298,8 +298,21 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
 		case EFI_BOOT_SERVICES_DATA:
 		case EFI_CONVENTIONAL_MEMORY:
 		case EFI_PERSISTENT_MEMORY:
-			pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys);
-			return NULL;
+			if (memblock_is_map_memory(phys) ||
+			    !memblock_is_region_memory(phys, size)) {
+				pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys);
+				return NULL;
+			}
+			/*
+			 * Mapping kernel memory is permitted if the region in
+			 * question is covered by a single memblock with the
+			 * NOMAP attribute set: this enables the use of ACPI
+			 * table overrides passed via initramfs, which are
+			 * reserved in memory using arch_reserve_mem_area()
+			 * below. As this particular use case only requires
+			 * read access, fall through to the R/O mapping case.
+			 */
+			fallthrough;
 
 		case EFI_RUNTIME_SERVICES_CODE:
 			/*
@@ -388,3 +401,8 @@ int apei_claim_sea(struct pt_regs *regs)
 
 	return err;
 }
+
+void arch_reserve_mem_area(acpi_physical_address addr, size_t size)
+{
+	memblock_mark_nomap(addr, size);
+}
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 1e4cdc6c7ae2..64ae25c59d55 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -958,7 +958,7 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state,
 acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
 					   u32 val_a, u32 val_b);
 
-#ifdef CONFIG_X86
+#ifndef CONFIG_IA64
 void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
 #else
 static inline void arch_reserve_mem_area(acpi_physical_address addr,

From 76a6b0b90d532ed9bb9f6069aa12859c185e5b99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Fri, 25 Sep 2020 11:21:15 +0200
Subject: [PATCH 264/322] =?UTF-8?q?MAINTAINERS:=20Add=20Pali=20Roh=C3=A1r?=
 =?UTF-8?q?=20as=20aardvark=20PCI=20maintainer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Link: https://lore.kernel.org/r/20200925092115.16546-1-pali@kernel.org
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Thomas Petazzzoni <thomas.petazzoni@bootlin.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index deaafb617361..5959a24a4acb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13160,6 +13160,7 @@ F:	drivers/firmware/pcdp.*
 
 PCI DRIVER FOR AARDVARK (Marvell Armada 3700)
 M:	Thomas Petazzoni <thomas.petazzoni@bootlin.com>
+M:	Pali Rohár <pali@kernel.org>
 L:	linux-pci@vger.kernel.org
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained

From e154b5b70368a84a19505a0be9b0096c66562b56 Mon Sep 17 00:00:00 2001
From: Subbaraya Sundeep <sbhatta@marvell.com>
Date: Wed, 30 Sep 2020 21:38:27 +0530
Subject: [PATCH 265/322] octeontx2-af: Fix enable/disable of default NPC
 entries

Packet replication feature present in Octeontx2
is a hardware linked list of PF and its VF
interfaces so that broadcast packets are sent
to all interfaces present in the list. It is
driver job to add and delete a PF/VF interface
to/from the list when the interface is brought
up and down. This patch fixes the
npc_enadis_default_entries function to handle
broadcast replication properly if packet replication
feature is present.

Fixes: 40df309e4166 ("octeontx2-af: Support to enable/disable default MCAM entries")
Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/marvell/octeontx2/af/rvu.h   |  3 ++-
 .../ethernet/marvell/octeontx2/af/rvu_nix.c   |  5 ++--
 .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 26 ++++++++++++++-----
 3 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index dcf25a092008..b89dde2c8b08 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -463,6 +463,7 @@ void rvu_nix_freemem(struct rvu *rvu);
 int rvu_get_nixlf_count(struct rvu *rvu);
 void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf);
 int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr);
+int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add);
 
 /* NPC APIs */
 int rvu_npc_init(struct rvu *rvu);
@@ -477,7 +478,7 @@ void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 				       int nixlf, u64 chan);
-void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc);
+void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable);
 int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
 void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 01a793105599..0fc70824fd6b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -17,7 +17,6 @@
 #include "npc.h"
 #include "cgx.h"
 
-static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add);
 static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
 			    int type, int chan_id);
 
@@ -2020,7 +2019,7 @@ static int nix_update_mce_list(struct nix_mce_list *mce_list,
 	return 0;
 }
 
-static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
+int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
 {
 	int err = 0, idx, next_idx, last_idx;
 	struct nix_mce_list *mce_list;
@@ -2065,7 +2064,7 @@ static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add)
 
 	/* Disable MCAM entry in NPC */
 	if (!mce_list->count) {
-		rvu_npc_disable_bcast_entry(rvu, pcifunc);
+		rvu_npc_enable_bcast_entry(rvu, pcifunc, false);
 		goto end;
 	}
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 0a214084406a..fbaf9bcd83f2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@ -530,7 +530,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
 			      NIX_INTF_RX, &entry, true);
 }
 
-void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc)
+void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable)
 {
 	struct npc_mcam *mcam = &rvu->hw->mcam;
 	int blkaddr, index;
@@ -543,7 +543,7 @@ void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc)
 	pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK;
 
 	index = npc_get_nixlf_mcam_index(mcam, pcifunc, 0, NIXLF_BCAST_ENTRY);
-	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false);
+	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
 }
 
 void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf,
@@ -622,23 +622,35 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc,
 					 nixlf, NIXLF_UCAST_ENTRY);
 	npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable);
 
-	/* For PF, ena/dis promisc and bcast MCAM match entries */
-	if (pcifunc & RVU_PFVF_FUNC_MASK)
+	/* For PF, ena/dis promisc and bcast MCAM match entries.
+	 * For VFs add/delete from bcast list when RX multicast
+	 * feature is present.
+	 */
+	if (pcifunc & RVU_PFVF_FUNC_MASK && !rvu->hw->cap.nix_rx_multicast)
 		return;
 
 	/* For bcast, enable/disable only if it's action is not
 	 * packet replication, incase if action is replication
-	 * then this PF's nixlf is removed from bcast replication
+	 * then this PF/VF's nixlf is removed from bcast replication
 	 * list.
 	 */
-	index = npc_get_nixlf_mcam_index(mcam, pcifunc,
+	index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK,
 					 nixlf, NIXLF_BCAST_ENTRY);
 	bank = npc_get_bank(mcam, index);
 	*(u64 *)&action = rvu_read64(rvu, blkaddr,
 	     NPC_AF_MCAMEX_BANKX_ACTION(index & (mcam->banksize - 1), bank));
-	if (action.op != NIX_RX_ACTIONOP_MCAST)
+
+	/* VFs will not have BCAST entry */
+	if (action.op != NIX_RX_ACTIONOP_MCAST &&
+	    !(pcifunc & RVU_PFVF_FUNC_MASK)) {
 		npc_enable_mcam_entry(rvu, mcam,
 				      blkaddr, index, enable);
+	} else {
+		nix_update_bcast_mce_list(rvu, pcifunc, enable);
+		/* Enable PF's BCAST entry for packet replication */
+		rvu_npc_enable_bcast_entry(rvu, pcifunc, enable);
+	}
+
 	if (enable)
 		rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf);
 	else

From 89eae5e87b4fa799726a3e8911c90d418cb5d2b1 Mon Sep 17 00:00:00 2001
From: Geetha sowjanya <gakula@marvell.com>
Date: Wed, 30 Sep 2020 21:38:52 +0530
Subject: [PATCH 266/322] octeontx2-pf: Fix TCP/UDP checksum offload for IPv6
 frames

For TCP/UDP checksum offload feature in Octeontx2
expects L3TYPE to be set irrespective of IP header
checksum is being offloaded or not. Currently for
IPv6 frames L3TYPE is not being set resulting in
packet drop with checksum error. This patch fixes
this issue.

Fixes: 3ca6c4c88 ("octeontx2-pf: Add packet transmission support")
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 3a5b34a2a7a6..e46834e043be 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -524,6 +524,7 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq,
 			sqe_hdr->ol3type = NIX_SENDL3TYPE_IP4_CKSUM;
 		} else if (skb->protocol == htons(ETH_P_IPV6)) {
 			proto = ipv6_hdr(skb)->nexthdr;
+			sqe_hdr->ol3type = NIX_SENDL3TYPE_IP6;
 		}
 
 		if (proto == IPPROTO_TCP)

From 1ea0166da0509e987caa42c30a6a71f2c6ca1875 Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Wed, 30 Sep 2020 21:39:14 +0530
Subject: [PATCH 267/322] octeontx2-pf: Fix the device state on error

Currently in otx2_open on failure of nix_lf_start
transmit queues are not stopped which are already
started in link_event. Since the tx queues are not
stopped network stack still try's to send the packets
leading to driver crash while access the device resources.

Fixes: 50fe6c02e ("octeontx2-pf: Register and handle link notifications")
Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 75a8c407e815..5d620a39ea80 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1560,10 +1560,13 @@ int otx2_open(struct net_device *netdev)
 
 	err = otx2_rxtx_enable(pf, true);
 	if (err)
-		goto err_free_cints;
+		goto err_tx_stop_queues;
 
 	return 0;
 
+err_tx_stop_queues:
+	netif_tx_stop_all_queues(netdev);
+	netif_carrier_off(netdev);
 err_free_cints:
 	otx2_free_cints(pf, qidx);
 	vec = pci_irq_vector(pf->pdev,

From 66a5209b53418111757716d71e52727b782eabd4 Mon Sep 17 00:00:00 2001
From: Hariprasad Kelam <hkelam@marvell.com>
Date: Wed, 30 Sep 2020 21:39:35 +0530
Subject: [PATCH 268/322] octeontx2-pf: Fix synchnorization issue in mbox

Mbox implementation in octeontx2 driver has three states
alloc, send and reset in mbox response. VF allocate and
sends message to PF for processing, PF ACKs them back and
reset the mbox memory. In some case we see synchronization
issue where after msgs_acked is incremented and before
mbox_reset API is called, if current execution is scheduled
out and a different thread is scheduled in which checks for
msgs_acked. Since the new thread sees msgs_acked == msgs_sent
it will try to allocate a new message and to send a new mbox
message to PF.Now if mbox_reset is scheduled in, PF will see
'0' in msgs_send.
This patch fixes the issue by calling mbox_reset before
incrementing msgs_acked flag for last processing message and
checks for valid message size.

Fixes: d424b6c02 ("octeontx2-pf: Enable SRIOV and added VF mbox handling")
Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/af/mbox.c     | 12 ++++++++++--
 drivers/net/ethernet/marvell/octeontx2/af/mbox.h     |  1 +
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 11 ++++++-----
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c |  4 ++--
 4 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
index 387e33fa417a..2718fe201c14 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c
@@ -17,7 +17,7 @@
 
 static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
 
-void otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
+void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
 {
 	void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE);
 	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
@@ -26,13 +26,21 @@ void otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
 	tx_hdr = hw_mbase + mbox->tx_start;
 	rx_hdr = hw_mbase + mbox->rx_start;
 
-	spin_lock(&mdev->mbox_lock);
 	mdev->msg_size = 0;
 	mdev->rsp_size = 0;
 	tx_hdr->num_msgs = 0;
 	tx_hdr->msg_size = 0;
 	rx_hdr->num_msgs = 0;
 	rx_hdr->msg_size = 0;
+}
+EXPORT_SYMBOL(__otx2_mbox_reset);
+
+void otx2_mbox_reset(struct otx2_mbox *mbox, int devid)
+{
+	struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+
+	spin_lock(&mdev->mbox_lock);
+	__otx2_mbox_reset(mbox, devid);
 	spin_unlock(&mdev->mbox_lock);
 }
 EXPORT_SYMBOL(otx2_mbox_reset);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 6dfd0f90cd70..ab433789d2c3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -93,6 +93,7 @@ struct mbox_msghdr {
 };
 
 void otx2_mbox_reset(struct otx2_mbox *mbox, int devid);
+void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid);
 void otx2_mbox_destroy(struct otx2_mbox *mbox);
 int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase,
 		   struct pci_dev *pdev, void __force *reg_base,
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 5d620a39ea80..2fb45670aca4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -370,8 +370,8 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf,
 		dst_mbox = &pf->mbox;
 		dst_size = dst_mbox->mbox.tx_size -
 				ALIGN(sizeof(*mbox_hdr), MBOX_MSG_ALIGN);
-		/* Check if msgs fit into destination area */
-		if (mbox_hdr->msg_size > dst_size)
+		/* Check if msgs fit into destination area and has valid size */
+		if (mbox_hdr->msg_size > dst_size || !mbox_hdr->msg_size)
 			return -EINVAL;
 
 		dst_mdev = &dst_mbox->mbox.dev[0];
@@ -526,10 +526,10 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work)
 
 end:
 		offset = mbox->rx_start + msg->next_msgoff;
+		if (mdev->msgs_acked == (vf_mbox->up_num_msgs - 1))
+			__otx2_mbox_reset(mbox, 0);
 		mdev->msgs_acked++;
 	}
-
-	otx2_mbox_reset(mbox, vf_idx);
 }
 
 static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq)
@@ -803,10 +803,11 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work)
 		msg = (struct mbox_msghdr *)(mdev->mbase + offset);
 		otx2_process_pfaf_mbox_msg(pf, msg);
 		offset = mbox->rx_start + msg->next_msgoff;
+		if (mdev->msgs_acked == (af_mbox->num_msgs - 1))
+			__otx2_mbox_reset(mbox, 0);
 		mdev->msgs_acked++;
 	}
 
-	otx2_mbox_reset(mbox, 0);
 }
 
 static void otx2_handle_link_event(struct otx2_nic *pf)
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
index 92a3db69a6cd..2f90f1721441 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
@@ -99,10 +99,10 @@ static void otx2vf_vfaf_mbox_handler(struct work_struct *work)
 		msg = (struct mbox_msghdr *)(mdev->mbase + offset);
 		otx2vf_process_vfaf_mbox_msg(af_mbox->pfvf, msg);
 		offset = mbox->rx_start + msg->next_msgoff;
+		if (mdev->msgs_acked == (af_mbox->num_msgs - 1))
+			__otx2_mbox_reset(mbox, 0);
 		mdev->msgs_acked++;
 	}
-
-	otx2_mbox_reset(mbox, 0);
 }
 
 static int otx2vf_process_mbox_msg_up(struct otx2_nic *vf,

From 5d8ff95a52c36740bf4e61202d08549e7a9caf20 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Sep 2020 12:18:46 +0300
Subject: [PATCH 269/322] pinctrl: qcom: sm8250: correct sdc2_clk

Correct sdc2_clk pin definition (register offset is wrong, verified by
the msm-4.19 driver).

Fixes: 4e3ec9e407ad ("pinctrl: qcom: Add sm8250 pinctrl driver.")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20200914091846.55204-1-dmitry.baryshkov@linaro.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/qcom/pinctrl-sm8250.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c
index a660f1274b66..826df0d637ea 100644
--- a/drivers/pinctrl/qcom/pinctrl-sm8250.c
+++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c
@@ -1308,7 +1308,7 @@ static const struct msm_pingroup sm8250_groups[] = {
 	[178] = PINGROUP(178, WEST, _, _, _, _, _, _, _, _, _),
 	[179] = PINGROUP(179, WEST, _, _, _, _, _, _, _, _, _),
 	[180] = UFS_RESET(ufs_reset, 0xb8000),
-	[181] = SDC_PINGROUP(sdc2_clk, 0x7000, 14, 6),
+	[181] = SDC_PINGROUP(sdc2_clk, 0xb7000, 14, 6),
 	[182] = SDC_PINGROUP(sdc2_cmd, 0xb7000, 11, 3),
 	[183] = SDC_PINGROUP(sdc2_data, 0xb7000, 9, 0),
 };

From 39c4dbe4cc363accd676162c24b264f44c581490 Mon Sep 17 00:00:00 2001
From: Hanks Chen <hanks.chen@mediatek.com>
Date: Thu, 20 Aug 2020 19:22:25 +0800
Subject: [PATCH 270/322] pinctrl: mediatek: check mtk_is_virt_gpio input
 parameter

check mtk_is_virt_gpio input parameter,
virtual gpio need to support eint mode.

add error handler for the ko case
to fix this boot fail:
pc : mtk_is_virt_gpio+0x20/0x38 [pinctrl_mtk_common_v2]
lr : mtk_gpio_get_direction+0x44/0xb0 [pinctrl_paris]

Fixes: edd546465002 ("pinctrl: mediatek: avoid virtual gpio trying to set reg")
Signed-off-by: Hanks Chen <hanks.chen@mediatek.com>
Acked-by: Sean Wang <sean.wang@kernel.org>
Singed-off-by: Jie Yang <sin_jieyang@mediatek.com>
Link: https://lore.kernel.org/r/1597922546-29633-1-git-send-email-hanks.chen@mediatek.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
index 2f3dfb56c3fa..35bbe5935708 100644
--- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
+++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c
@@ -259,6 +259,10 @@ bool mtk_is_virt_gpio(struct mtk_pinctrl *hw, unsigned int gpio_n)
 
 	desc = (const struct mtk_pin_desc *)&hw->soc->pins[gpio_n];
 
+	/* if the GPIO is not supported for eint mode */
+	if (desc->eint.eint_m == NO_EINT_SUPPORT)
+		return virt_gpio;
+
 	if (desc->funcs && !desc->funcs[desc->eint.eint_m].name)
 		virt_gpio = true;
 

From e09e200e07222467ef82367bff7cc6f44ad00397 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 30 Sep 2020 17:20:12 +0300
Subject: [PATCH 271/322] gpio: pca953x: Use bitmap API over implicit GCC
 extension

In IRQ handler we have to clear bitmap before use. Currently
the GCC extension has been used for that. For sake of the consistency
switch to bitmap API. As expected bloat-o-meter shows no difference
in the object size.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Link: https://lore.kernel.org/r/20200930142013.59247-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-pca953x.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 29342e5def82..7f64e6948574 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -814,10 +814,12 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid)
 {
 	struct pca953x_chip *chip = devid;
 	struct gpio_chip *gc = &chip->gpio_chip;
-	DECLARE_BITMAP(pending, MAX_LINE) = {};
+	DECLARE_BITMAP(pending, MAX_LINE);
 	int level;
 	bool ret;
 
+	bitmap_zero(pending, MAX_LINE);
+
 	mutex_lock(&chip->i2c_lock);
 	ret = pca953x_irq_pending(chip, pending);
 	mutex_unlock(&chip->i2c_lock);

From 8c1f1c34777bddb633d4a068a9c812d29974c6bd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 30 Sep 2020 17:20:13 +0300
Subject: [PATCH 272/322] gpio: pca953x: Correctly initialize registers 6 and 7
 for PCA957x

When driver has been converted to the bitmap API the non-bitmap functions
started behaving differently on 32-bit BE architectures since the bytes in
two consequent unsigned longs are in different order in comparison to byte
array. Hence if the chip had had more than 32 lines the memset() call over
it would have not set up upper lines correctly.
Although it's currently a theoretical case (no supported chips of this type
has 32+ lines), it's better to provide a clean code to avoid people thinking
this is okay and potentially producing not fully working things.

Fixes: 35d13d94893f ("gpio: pca953x: convert to use bitmap API")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Link: https://lore.kernel.org/r/20200930142013.59247-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-pca953x.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 7f64e6948574..fb61f2fc6ed7 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -942,6 +942,7 @@ out:
 static int device_pca957x_init(struct pca953x_chip *chip, u32 invert)
 {
 	DECLARE_BITMAP(val, MAX_LINE);
+	unsigned int i;
 	int ret;
 
 	ret = device_pca95xx_init(chip, invert);
@@ -949,7 +950,9 @@ static int device_pca957x_init(struct pca953x_chip *chip, u32 invert)
 		goto out;
 
 	/* To enable register 6, 7 to control pull up and pull down */
-	memset(val, 0x02, NBANK(chip));
+	for (i = 0; i < NBANK(chip); i++)
+		bitmap_set_value8(val, 0x02, i * BANK_SZ);
+
 	ret = pca953x_write_regs(chip, PCA957X_BKEN, val);
 	if (ret)
 		goto out;

From 452d62227958e149e5c7b0084a559c0957c3ff2a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 13 Jul 2020 15:15:14 +0100
Subject: [PATCH 273/322] KVM: arm64: Restore missing ISB on nVHE
 __tlb_switch_to_guest

Commit a0e50aa3f4a8 ("KVM: arm64: Factor out stage 2 page table
data from struct kvm") dropped the ISB after __load_guest_stage2(),
only leaving the one that is required when the speculative AT
workaround is in effect.

As Andrew points it: "This alternative is 'backwards' to avoid a
double ISB as there is one in __load_guest_stage2 when the workaround
is active."

Restore the missing ISB, conditionned on the AT workaround not being
active.

Fixes: a0e50aa3f4a8 ("KVM: arm64: Factor out stage 2 page table data from struct kvm")
Reported-by: Andrew Scull <ascull@google.com>
Reported-by: Thomas Tai <thomas.tai@oracle.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/hyp/nvhe/tlb.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 69eae608d670..b15d65a42042 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -31,7 +31,14 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
 		isb();
 	}
 
+	/*
+	 * __load_guest_stage2() includes an ISB only when the AT
+	 * workaround is applied. Take care of the opposite condition,
+	 * ensuring that we always have an ISB, but not two ISBs back
+	 * to back.
+	 */
 	__load_guest_stage2(mmu);
+	asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
 }
 
 static void __tlb_switch_to_host(struct tlb_inv_context *cxt)

From 0bbe4ced53e36786eafc2ecbf9a1761f55b4a82e Mon Sep 17 00:00:00 2001
From: Adrian Huang <ahuang12@lenovo.com>
Date: Sat, 26 Sep 2020 18:26:02 +0800
Subject: [PATCH 274/322] iommu/amd: Fix the overwritten field in IVMD header

Commit 387caf0b759a ("iommu/amd: Treat per-device exclusion
ranges as r/w unity-mapped regions") accidentally overwrites
the 'flags' field in IVMD (struct ivmd_header) when the I/O
virtualization memory definition is associated with the
exclusion range entry. This leads to the corrupted IVMD table
(incorrect checksum). The kdump kernel reports the invalid checksum:

ACPI BIOS Warning (bug): Incorrect checksum in table [IVRS] - 0x5C, should be 0x60 (20200717/tbprint-177)
AMD-Vi: [Firmware Bug]: IVRS invalid checksum

Fix the above-mentioned issue by modifying the 'struct unity_map_entry'
member instead of the IVMD header.

Cleanup: The *exclusion_range* functions are not used anymore, so
get rid of them.

Fixes: 387caf0b759a ("iommu/amd: Treat per-device exclusion ranges as r/w unity-mapped regions")
Reported-and-tested-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Adrian Huang <ahuang12@lenovo.com>
Cc: Jerry Snitselaar <jsnitsel@redhat.com>
Link: https://lore.kernel.org/r/20200926102602.19177-1-adrianhuang0701@gmail.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/init.c | 56 +++++++---------------------------------
 1 file changed, 10 insertions(+), 46 deletions(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 445a08d23fed..1ba6b4cc56e8 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -1103,25 +1103,6 @@ static int __init add_early_maps(void)
 	return 0;
 }
 
-/*
- * Reads the device exclusion range from ACPI and initializes the IOMMU with
- * it
- */
-static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
-{
-	if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
-		return;
-
-	/*
-	 * Treat per-device exclusion ranges as r/w unity-mapped regions
-	 * since some buggy BIOSes might lead to the overwritten exclusion
-	 * range (exclusion_start and exclusion_length members). This
-	 * happens when there are multiple exclusion ranges (IVMD entries)
-	 * defined in ACPI table.
-	 */
-	m->flags = (IVMD_FLAG_IW | IVMD_FLAG_IR | IVMD_FLAG_UNITY_MAP);
-}
-
 /*
  * Takes a pointer to an AMD IOMMU entry in the ACPI table and
  * initializes the hardware and our data structures with it.
@@ -2073,30 +2054,6 @@ static void __init free_unity_maps(void)
 	}
 }
 
-/* called when we find an exclusion range definition in ACPI */
-static int __init init_exclusion_range(struct ivmd_header *m)
-{
-	int i;
-
-	switch (m->type) {
-	case ACPI_IVMD_TYPE:
-		set_device_exclusion_range(m->devid, m);
-		break;
-	case ACPI_IVMD_TYPE_ALL:
-		for (i = 0; i <= amd_iommu_last_bdf; ++i)
-			set_device_exclusion_range(i, m);
-		break;
-	case ACPI_IVMD_TYPE_RANGE:
-		for (i = m->devid; i <= m->aux; ++i)
-			set_device_exclusion_range(i, m);
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
 /* called for unity map ACPI definition */
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
@@ -2107,9 +2064,6 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 	if (e == NULL)
 		return -ENOMEM;
 
-	if (m->flags & IVMD_FLAG_EXCL_RANGE)
-		init_exclusion_range(m);
-
 	switch (m->type) {
 	default:
 		kfree(e);
@@ -2133,6 +2087,16 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 	e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
 	e->prot = m->flags >> 1;
 
+	/*
+	 * Treat per-device exclusion ranges as r/w unity-mapped regions
+	 * since some buggy BIOSes might lead to the overwritten exclusion
+	 * range (exclusion_start and exclusion_length members). This
+	 * happens when there are multiple exclusion ranges (IVMD entries)
+	 * defined in ACPI table.
+	 */
+	if (m->flags & IVMD_FLAG_EXCL_RANGE)
+		e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
+
 	DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
 		    " range_start: %016llx range_end: %016llx flags: %x\n", s,
 		    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),

From 0891fb39ba67bd7ae023ea0d367297ffff010781 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 30 Sep 2020 11:16:14 +0200
Subject: [PATCH 275/322] xen/events: don't use chip_data for legacy IRQs

Since commit c330fb1ddc0a ("XEN uses irqdesc::irq_data_common::handler_data to store a per interrupt XEN data pointer which contains XEN specific information.")
Xen is using the chip_data pointer for storing IRQ specific data. When
running as a HVM domain this can result in problems for legacy IRQs, as
those might use chip_data for their own purposes.

Use a local array for this purpose in case of legacy IRQs, avoiding the
double use.

Cc: stable@vger.kernel.org
Fixes: c330fb1ddc0a ("XEN uses irqdesc::irq_data_common::handler_data to store a per interrupt XEN data pointer which contains XEN specific information.")
Signed-off-by: Juergen Gross <jgross@suse.com>
Tested-by: Stefan Bader <stefan.bader@canonical.com>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Link: https://lore.kernel.org/r/20200930091614.13660-1-jgross@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/events/events_base.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 90b8f56fbadb..6f02c18fa65c 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -92,6 +92,8 @@ static bool (*pirq_needs_eoi)(unsigned irq);
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) != 0)
 
+static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
+
 static struct irq_chip xen_dynamic_chip;
 static struct irq_chip xen_percpu_chip;
 static struct irq_chip xen_pirq_chip;
@@ -156,7 +158,18 @@ int get_evtchn_to_irq(evtchn_port_t evtchn)
 /* Get info for IRQ */
 struct irq_info *info_for_irq(unsigned irq)
 {
-	return irq_get_chip_data(irq);
+	if (irq < nr_legacy_irqs())
+		return legacy_info_ptrs[irq];
+	else
+		return irq_get_chip_data(irq);
+}
+
+static void set_info_for_irq(unsigned int irq, struct irq_info *info)
+{
+	if (irq < nr_legacy_irqs())
+		legacy_info_ptrs[irq] = info;
+	else
+		irq_set_chip_data(irq, info);
 }
 
 /* Constructors for packed IRQ information. */
@@ -377,7 +390,7 @@ static void xen_irq_init(unsigned irq)
 	info->type = IRQT_UNBOUND;
 	info->refcnt = -1;
 
-	irq_set_chip_data(irq, info);
+	set_info_for_irq(irq, info);
 
 	list_add_tail(&info->list, &xen_irq_list_head);
 }
@@ -426,14 +439,14 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
 
 static void xen_free_irq(unsigned irq)
 {
-	struct irq_info *info = irq_get_chip_data(irq);
+	struct irq_info *info = info_for_irq(irq);
 
 	if (WARN_ON(!info))
 		return;
 
 	list_del(&info->list);
 
-	irq_set_chip_data(irq, NULL);
+	set_info_for_irq(irq, NULL);
 
 	WARN_ON(info->refcnt > 0);
 
@@ -603,7 +616,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
 static void __unbind_from_irq(unsigned int irq)
 {
 	evtchn_port_t evtchn = evtchn_from_irq(irq);
-	struct irq_info *info = irq_get_chip_data(irq);
+	struct irq_info *info = info_for_irq(irq);
 
 	if (info->refcnt > 0) {
 		info->refcnt--;
@@ -1108,7 +1121,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
 
 void unbind_from_irqhandler(unsigned int irq, void *dev_id)
 {
-	struct irq_info *info = irq_get_chip_data(irq);
+	struct irq_info *info = info_for_irq(irq);
 
 	if (WARN_ON(!info))
 		return;
@@ -1142,7 +1155,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn)
 	if (irq == -1)
 		return -ENOENT;
 
-	info = irq_get_chip_data(irq);
+	info = info_for_irq(irq);
 
 	if (!info)
 		return -ENOENT;
@@ -1170,7 +1183,7 @@ int evtchn_get(evtchn_port_t evtchn)
 	if (irq == -1)
 		goto done;
 
-	info = irq_get_chip_data(irq);
+	info = info_for_irq(irq);
 
 	if (!info)
 		goto done;

From 1a3f2fd7fc4e8f24510830e265de2ffb8e3300d2 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Sun, 27 Sep 2020 14:24:28 +0800
Subject: [PATCH 276/322] iommu/vt-d: Fix lockdep splat in
 iommu_flush_dev_iotlb()

Lock(&iommu->lock) without disabling irq causes lockdep warnings.

[   12.703950] ========================================================
[   12.703962] WARNING: possible irq lock inversion dependency detected
[   12.703975] 5.9.0-rc6+ #659 Not tainted
[   12.703983] --------------------------------------------------------
[   12.703995] systemd-udevd/284 just changed the state of lock:
[   12.704007] ffffffffbd6ff4d8 (device_domain_lock){..-.}-{2:2}, at:
               iommu_flush_dev_iotlb.part.57+0x2e/0x90
[   12.704031] but this lock took another, SOFTIRQ-unsafe lock in the past:
[   12.704043]  (&iommu->lock){+.+.}-{2:2}
[   12.704045]

               and interrupts could create inverse lock ordering between
               them.

[   12.704073]
               other info that might help us debug this:
[   12.704085]  Possible interrupt unsafe locking scenario:

[   12.704097]        CPU0                    CPU1
[   12.704106]        ----                    ----
[   12.704115]   lock(&iommu->lock);
[   12.704123]                                local_irq_disable();
[   12.704134]                                lock(device_domain_lock);
[   12.704146]                                lock(&iommu->lock);
[   12.704158]   <Interrupt>
[   12.704164]     lock(device_domain_lock);
[   12.704174]
                *** DEADLOCK ***

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20200927062428.13713-1-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 87b17bac04c2..2239c211178b 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2664,7 +2664,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 		}
 
 		/* Setup the PASID entry for requests without PASID: */
-		spin_lock(&iommu->lock);
+		spin_lock_irqsave(&iommu->lock, flags);
 		if (hw_pass_through && domain_type_is_si(domain))
 			ret = intel_pasid_setup_pass_through(iommu, domain,
 					dev, PASID_RID2PASID);
@@ -2674,7 +2674,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
 		else
 			ret = intel_pasid_setup_second_level(iommu, domain,
 					dev, PASID_RID2PASID);
-		spin_unlock(&iommu->lock);
+		spin_unlock_irqrestore(&iommu->lock, flags);
 		if (ret) {
 			dev_err(dev, "Setup RID2PASID failed\n");
 			dmar_remove_one_dev_info(dev);

From bb13a800620c2b9046187facdf7dfd45699f5a74 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 1 Oct 2020 08:44:19 +0200
Subject: [PATCH 277/322] r8169: fix handling ether_clk

Petr reported that system freezes on r8169 driver load on a system
using ether_clk. The original change was done under the assumption
that the clock isn't needed for basic operations like chip register
access. But obviously that was wrong.
Therefore effectively revert the original change, and in addition
leave the clock active when suspending and WoL is enabled. Chip may
not be able to process incoming packets otherwise.

Fixes: 9f0b54cd1672 ("r8169: move switching optional clock on/off to pll power functions")
Reported-by: Petr Tesarik <ptesarik@suse.cz>
Tested-by: Petr Tesarik <ptesarik@suse.cz>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 44 +++++++++++++----------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5e867da1aad3..4ae1a6fbf0ba 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2239,14 +2239,10 @@ static void rtl_pll_power_down(struct rtl8169_private *tp)
 	default:
 		break;
 	}
-
-	clk_disable_unprepare(tp->clk);
 }
 
 static void rtl_pll_power_up(struct rtl8169_private *tp)
 {
-	clk_prepare_enable(tp->clk);
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
 	case RTL_GIGA_MAC_VER_37:
@@ -4826,21 +4822,8 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
 
 #ifdef CONFIG_PM
 
-static int __maybe_unused rtl8169_suspend(struct device *device)
+static int rtl8169_net_resume(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = dev_get_drvdata(device);
-
-	rtnl_lock();
-	rtl8169_net_suspend(tp);
-	rtnl_unlock();
-
-	return 0;
-}
-
-static int rtl8169_resume(struct device *device)
-{
-	struct rtl8169_private *tp = dev_get_drvdata(device);
-
 	rtl_rar_set(tp, tp->dev->dev_addr);
 
 	if (tp->TxDescArray)
@@ -4851,6 +4834,29 @@ static int rtl8169_resume(struct device *device)
 	return 0;
 }
 
+static int __maybe_unused rtl8169_suspend(struct device *device)
+{
+	struct rtl8169_private *tp = dev_get_drvdata(device);
+
+	rtnl_lock();
+	rtl8169_net_suspend(tp);
+	if (!device_may_wakeup(tp_to_dev(tp)))
+		clk_disable_unprepare(tp->clk);
+	rtnl_unlock();
+
+	return 0;
+}
+
+static int __maybe_unused rtl8169_resume(struct device *device)
+{
+	struct rtl8169_private *tp = dev_get_drvdata(device);
+
+	if (!device_may_wakeup(tp_to_dev(tp)))
+		clk_prepare_enable(tp->clk);
+
+	return rtl8169_net_resume(tp);
+}
+
 static int rtl8169_runtime_suspend(struct device *device)
 {
 	struct rtl8169_private *tp = dev_get_drvdata(device);
@@ -4874,7 +4880,7 @@ static int rtl8169_runtime_resume(struct device *device)
 
 	__rtl8169_set_wol(tp, tp->saved_wolopts);
 
-	return rtl8169_resume(device);
+	return rtl8169_net_resume(tp);
 }
 
 static int rtl8169_runtime_idle(struct device *device)

From ef9da46ddef071e1bbb943afbbe9b38771855554 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 1 Oct 2020 09:23:02 +0200
Subject: [PATCH 278/322] r8169: fix data corruption issue on RTL8402

Petr reported that after resume from suspend RTL8402 partially
truncates incoming packets, and re-initializing register RxConfig
before the actual chip re-initialization sequence is needed to avoid
the issue.

Reported-by: Petr Tesarik <ptesarik@suse.cz>
Proposed-by: Petr Tesarik <ptesarik@suse.cz>
Tested-by: Petr Tesarik <ptesarik@suse.cz>
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 4ae1a6fbf0ba..256bc4878ef1 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4854,6 +4854,10 @@ static int __maybe_unused rtl8169_resume(struct device *device)
 	if (!device_may_wakeup(tp_to_dev(tp)))
 		clk_prepare_enable(tp->clk);
 
+	/* Reportedly at least Asus X453MA truncates packets otherwise */
+	if (tp->mac_version == RTL_GIGA_MAC_VER_37)
+		rtl_init_rxcfg(tp);
+
 	return rtl8169_net_resume(tp);
 }
 

From 472e5b056f000a778abb41f1e443de58eb259783 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 1 Oct 2020 19:14:36 -0700
Subject: [PATCH 279/322] pipe: remove pipe_wait() and fix wakeup race with
 splice

The pipe splice code still used the old model of waiting for pipe IO by
using a non-specific "pipe_wait()" that waited for any pipe event to
happen, which depended on all pipe IO being entirely serialized by the
pipe lock.  So by checking the state you were waiting for, and then
adding yourself to the wait queue before dropping the lock, you were
guaranteed to see all the wakeups.

Strictly speaking, the actual wakeups were not done under the lock, but
the pipe_wait() model still worked, because since the waiter held the
lock when checking whether it should sleep, it would always see the
current state, and the wakeup was always done after updating the state.

However, commit 0ddad21d3e99 ("pipe: use exclusive waits when reading or
writing") split the single wait-queue into two, and in the process also
made the "wait for event" code wait for _two_ wait queues, and that then
showed a race with the wakers that were not serialized by the pipe lock.

It's only splice that used that "pipe_wait()" model, so the problem
wasn't obvious, but Josef Bacik reports:

 "I hit a hang with fstest btrfs/187, which does a btrfs send into
  /dev/null. This works by creating a pipe, the write side is given to
  the kernel to write into, and the read side is handed to a thread that
  splices into a file, in this case /dev/null.

  The box that was hung had the write side stuck here [pipe_write] and
  the read side stuck here [splice_from_pipe_next -> pipe_wait].

  [ more details about pipe_wait() scenario ]

  The problem is we're doing the prepare_to_wait, which sets our state
  each time, however we can be woken up either with reads or writes. In
  the case above we race with the WRITER waking us up, and re-set our
  state to INTERRUPTIBLE, and thus never break out of schedule"

Josef had a patch that avoided the issue in pipe_wait() by just making
it set the state only once, but the deeper problem is that pipe_wait()
depends on a level of synchonization by the pipe mutex that it really
shouldn't.  And the whole "wait for any pipe state change" model really
isn't very good to begin with.

So rather than trying to work around things in pipe_wait(), remove that
legacy model of "wait for arbitrary pipe event" entirely, and actually
create functions that wait for the pipe actually being readable or
writable, and can do so without depending on the pipe lock serializing
everything.

Fixes: 0ddad21d3e99 ("pipe: use exclusive waits when reading or writing")
Link: https://lore.kernel.org/linux-fsdevel/bfa88b5ad6f069b2b679316b9e495a970130416c.1601567868.git.josef@toxicpanda.com/
Reported-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-and-tested-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/pipe.c                 | 62 ++++++++++++++++++++++++++-------------
 fs/splice.c               |  8 ++---
 include/linux/pipe_fs_i.h |  5 ++--
 3 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 60dbee457143..117db82b10af 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -106,25 +106,6 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
 	}
 }
 
-/* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct pipe_inode_info *pipe)
-{
-	DEFINE_WAIT(rdwait);
-	DEFINE_WAIT(wrwait);
-
-	/*
-	 * Pipes are system-local resources, so sleeping on them
-	 * is considered a noninteractive wait:
-	 */
-	prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
-	prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
-	pipe_unlock(pipe);
-	schedule();
-	finish_wait(&pipe->rd_wait, &rdwait);
-	finish_wait(&pipe->wr_wait, &wrwait);
-	pipe_lock(pipe);
-}
-
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
@@ -1035,12 +1016,52 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)
 	return do_pipe2(fildes, 0);
 }
 
+/*
+ * This is the stupid "wait for pipe to be readable or writable"
+ * model.
+ *
+ * See pipe_read/write() for the proper kind of exclusive wait,
+ * but that requires that we wake up any other readers/writers
+ * if we then do not end up reading everything (ie the whole
+ * "wake_next_reader/writer" logic in pipe_read/write()).
+ */
+void pipe_wait_readable(struct pipe_inode_info *pipe)
+{
+	pipe_unlock(pipe);
+	wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe));
+	pipe_lock(pipe);
+}
+
+void pipe_wait_writable(struct pipe_inode_info *pipe)
+{
+	pipe_unlock(pipe);
+	wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe));
+	pipe_lock(pipe);
+}
+
+/*
+ * This depends on both the wait (here) and the wakeup (wake_up_partner)
+ * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot
+ * race with the count check and waitqueue prep.
+ *
+ * Normally in order to avoid races, you'd do the prepare_to_wait() first,
+ * then check the condition you're waiting for, and only then sleep. But
+ * because of the pipe lock, we can check the condition before being on
+ * the wait queue.
+ *
+ * We use the 'rd_wait' waitqueue for pipe partner waiting.
+ */
 static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
 {
+	DEFINE_WAIT(rdwait);
 	int cur = *cnt;
 
 	while (cur == *cnt) {
-		pipe_wait(pipe);
+		prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+		pipe_unlock(pipe);
+		schedule();
+		finish_wait(&pipe->rd_wait, &rdwait);
+		pipe_lock(pipe);
 		if (signal_pending(current))
 			break;
 	}
@@ -1050,7 +1071,6 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
 static void wake_up_partner(struct pipe_inode_info *pipe)
 {
 	wake_up_interruptible_all(&pipe->rd_wait);
-	wake_up_interruptible_all(&pipe->wr_wait);
 }
 
 static int fifo_open(struct inode *inode, struct file *filp)
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c4db07..c3d00dfc7344 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -563,7 +563,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
 			sd->need_wakeup = false;
 		}
 
-		pipe_wait(pipe);
+		pipe_wait_readable(pipe);
 	}
 
 	return 1;
@@ -1077,7 +1077,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
 			return -EAGAIN;
 		if (signal_pending(current))
 			return -ERESTARTSYS;
-		pipe_wait(pipe);
+		pipe_wait_writable(pipe);
 	}
 }
 
@@ -1454,7 +1454,7 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 			ret = -EAGAIN;
 			break;
 		}
-		pipe_wait(pipe);
+		pipe_wait_readable(pipe);
 	}
 
 	pipe_unlock(pipe);
@@ -1493,7 +1493,7 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 			ret = -ERESTARTSYS;
 			break;
 		}
-		pipe_wait(pipe);
+		pipe_wait_writable(pipe);
 	}
 
 	pipe_unlock(pipe);
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 50afd0d0084c..5d2705f1d01c 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -240,8 +240,9 @@ extern unsigned int pipe_max_size;
 extern unsigned long pipe_user_pages_hard;
 extern unsigned long pipe_user_pages_soft;
 
-/* Drop the inode semaphore and wait for a pipe event, atomically */
-void pipe_wait(struct pipe_inode_info *pipe);
+/* Wait for a pipe to be readable/writable while dropping the pipe lock */
+void pipe_wait_readable(struct pipe_inode_info *);
+void pipe_wait_writable(struct pipe_inode_info *);
 
 struct pipe_inode_info *alloc_pipe_info(void);
 void free_pipe_info(struct pipe_inode_info *);

From 21d64817c72470ed69483f59279ce557c0658826 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 2 Oct 2020 17:29:59 +0300
Subject: [PATCH 280/322] platform/x86: intel-vbtn: Revert "Fix SW_TABLET_MODE
 always reporting 1 on the HP Pavilion 11 x360"

After discussion, see the Link tag, it appears that this is not good enough.
So, revert it now and apply a better fix.

This reverts commit d823346876a970522ff9e4d2b323c9b734dcc4de.

Link: https://lore.kernel.org/platform-driver-x86/s5hft71klxl.wl-tiwai@suse.de/
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/intel-vbtn.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index f443619e1e7e..e85d8e58320c 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -15,13 +15,9 @@
 #include <linux/platform_device.h>
 #include <linux/suspend.h>
 
-/* Returned when NOT in tablet mode on some HP Stream x360 11 models */
-#define VGBS_TABLET_MODE_FLAG_ALT	0x10
 /* When NOT in tablet mode, VGBS returns with the flag 0x40 */
-#define VGBS_TABLET_MODE_FLAG		0x40
-#define VGBS_DOCK_MODE_FLAG		0x80
-
-#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT)
+#define TABLET_MODE_FLAG 0x40
+#define DOCK_MODE_FLAG   0x80
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("AceLan Kao");
@@ -76,9 +72,9 @@ static void detect_tablet_mode(struct platform_device *device)
 	if (ACPI_FAILURE(status))
 		return;
 
-	m = !(vgbs & VGBS_TABLET_MODE_FLAGS);
+	m = !(vgbs & TABLET_MODE_FLAG);
 	input_report_switch(priv->input_dev, SW_TABLET_MODE, m);
-	m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0;
+	m = (vgbs & DOCK_MODE_FLAG) ? 1 : 0;
 	input_report_switch(priv->input_dev, SW_DOCK, m);
 }
 

From 8169bd3e6e193497cab781acddcff8fde5d0c416 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 30 Sep 2020 15:19:05 +0200
Subject: [PATCH 281/322] platform/x86: intel-vbtn: Switch to an allow-list for
 SW_TABLET_MODE reporting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

2 recent commits:
cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE
on the 9 / "Laptop" chasis-type")
1fac39fd0316 ("platform/x86: intel-vbtn: Also handle tablet-mode switch on
"Detachable" and "Portable" chassis-types")

Enabled reporting of SW_TABLET_MODE on more devices since the vbtn ACPI
interface is used by the firmware on some of those devices to report this.

Testing has shown that unconditionally enabling SW_TABLET_MODE reporting
on all devices with a chassis type of 8 ("Portable") or 10 ("Notebook")
which support the VGBS method is a very bad idea.

Many of these devices are normal laptops (non 2-in-1) models with a VGBS
which always returns 0, which we translate to SW_TABLET_MODE=1. This in
turn causes userspace (libinput) to suppress events from the builtin
keyboard and touchpad, making the laptop essentially unusable.

Since the problem of wrongly reporting SW_TABLET_MODE=1 in combination
with libinput, leads to a non-usable system. Where as OTOH many people will
not even notice when SW_TABLET_MODE is not being reported, this commit
changes intel_vbtn_has_switches() to use a DMI based allow-list.

The new DMI based allow-list matches on the 31 ("Convertible") and
32 ("Detachable") chassis-types, as these clearly are 2-in-1s and
so far if they support the intel-vbtn ACPI interface they all have
properly working SW_TABLET_MODE reporting.

Besides these 2 generic matches, it also contains model specific matches
for 2-in-1 models which use a different chassis-type and which are known
to have properly working SW_TABLET_MODE reporting.

This has been tested on the following 2-in-1 devices:

Dell Venue 11 Pro 7130 vPro
HP Pavilion X2 10-p002nd
HP Stream x360 Convertible PC 11
Medion E1239T

Fixes: cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type")
BugLink: https://forum.manjaro.org/t/keyboard-and-touchpad-only-work-on-kernel-5-6/22668
BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1175599
Cc: Barnabás Pőcze <pobrn@protonmail.com>
Cc: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/intel-vbtn.c | 52 +++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c
index e85d8e58320c..f5901b0b07cd 100644
--- a/drivers/platform/x86/intel-vbtn.c
+++ b/drivers/platform/x86/intel-vbtn.c
@@ -167,20 +167,54 @@ static bool intel_vbtn_has_buttons(acpi_handle handle)
 	return ACPI_SUCCESS(status);
 }
 
+/*
+ * There are several laptops (non 2-in-1) models out there which support VGBS,
+ * but simply always return 0, which we translate to SW_TABLET_MODE=1. This in
+ * turn causes userspace (libinput) to suppress events from the builtin
+ * keyboard and touchpad, making the laptop essentially unusable.
+ *
+ * Since the problem of wrongly reporting SW_TABLET_MODE=1 in combination
+ * with libinput, leads to a non-usable system. Where as OTOH many people will
+ * not even notice when SW_TABLET_MODE is not being reported, a DMI based allow
+ * list is used here. This list mainly matches on the chassis-type of 2-in-1s.
+ *
+ * There are also some 2-in-1s which use the intel-vbtn ACPI interface to report
+ * SW_TABLET_MODE with a chassis-type of 8 ("Portable") or 10 ("Notebook"),
+ * these are matched on a per model basis, since many normal laptops with a
+ * possible broken VGBS ACPI-method also use these chassis-types.
+ */
+static const struct dmi_system_id dmi_switches_allow_list[] = {
+	{
+		.matches = {
+			DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "31" /* Convertible */),
+		},
+	},
+	{
+		.matches = {
+			DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "32" /* Detachable */),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "HP Stream x360 Convertible PC 11"),
+		},
+	},
+	{} /* Array terminator */
+};
+
 static bool intel_vbtn_has_switches(acpi_handle handle)
 {
-	const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE);
 	unsigned long long vgbs;
 	acpi_status status;
 
-	/*
-	 * Some normal laptops have a VGBS method despite being non-convertible
-	 * and their VGBS method always returns 0, causing detect_tablet_mode()
-	 * to report SW_TABLET_MODE=1 to userspace, which causes issues.
-	 * These laptops have a DMI chassis_type of 9 ("Laptop"), do not report
-	 * switches on any devices with a DMI chassis_type of 9.
-	 */
-	if (chassis_type && strcmp(chassis_type, "9") == 0)
+	if (!dmi_check_system(dmi_switches_allow_list))
 		return false;
 
 	status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);

From 9fb77799557d1537a059161bf645db7be9e358ae Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 25 Sep 2020 08:58:12 +0200
Subject: [PATCH 282/322] MAINTAINERS: Add Mark Gross and Hans de Goede as x86
 platform drivers maintainers

Darren Hart and Andy Shevchenko lately have not had enough time to
maintain the x86 platform drivers, dropping their status to:
"Odd Fixes".

Mark Gross and Hans de Goede will take over maintainership of
the x86 platform drivers. Replace Darren and Andy's entries with
theirs and change the status to "Maintained".

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Mark Gross <mgross@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 MAINTAINERS | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index deaafb617361..b1c97a16b6ce 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18857,10 +18857,10 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm
 F:	arch/x86/mm/
 
 X86 PLATFORM DRIVERS
-M:	Darren Hart <dvhart@infradead.org>
-M:	Andy Shevchenko <andy@infradead.org>
+M:	Hans de Goede <hdegoede@redhat.com>
+M:	Mark Gross <mgross@linux.intel.com>
 L:	platform-driver-x86@vger.kernel.org
-S:	Odd Fixes
+S:	Maintained
 T:	git git://git.infradead.org/linux-platform-drivers-x86.git
 F:	drivers/platform/olpc/
 F:	drivers/platform/x86/

From be458311cdbb5d94820ffc4e40c5906085c0a507 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Thu, 1 Oct 2020 13:07:49 -0700
Subject: [PATCH 283/322] mm: memcg/slab: fix slab statistics in !SMP
 configuration

Since commit ea426c2a7de8 ("mm: memcg: prepare for byte-sized vmstat
items") the write side of slab counters accepts a value in bytes and
converts it to pages.  It happens in __mod_node_page_state().

However a non-SMP version of __mod_node_page_state() doesn't perform
this conversion.  It leads to incorrect (unrealistically high) slab
counters values.  Fix this by adding a similar conversion to the non-SMP
version of __mod_node_page_state().

Signed-off-by: Roman Gushchin <guro@fb.com>
Reported-and-tested-by: Bastian Bittorf <bb@npl.de>
Fixes: ea426c2a7de8 ("mm: memcg: prepare for byte-sized vmstat items")
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 91220ace31da..7557c1070fd7 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -312,6 +312,11 @@ static inline void __mod_zone_page_state(struct zone *zone,
 static inline void __mod_node_page_state(struct pglist_data *pgdat,
 			enum node_stat_item item, int delta)
 {
+	if (vmstat_item_in_bytes(item)) {
+		VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
+		delta >>= PAGE_SHIFT;
+	}
+
 	node_page_state_add(delta, pgdat, item);
 }
 

From 09a6b0bc3be793ca8cba580b7992d73e9f68f15d Mon Sep 17 00:00:00 2001
From: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Date: Fri, 2 Oct 2020 17:16:11 +0200
Subject: [PATCH 284/322] random32: Restore __latent_entropy attribute on
 net_rand_state

Commit f227e3ec3b5c ("random32: update the net random state on interrupt
and activity") broke compilation and was temporarily fixed by Linus in
83bdc7275e62 ("random32: remove net_rand_state from the latent entropy
gcc plugin") by entirely moving net_rand_state out of the things handled
by the latent_entropy GCC plugin.

From what I understand when reading the plugin code, using the
__latent_entropy attribute on a declaration was the wrong part and
simply keeping the __latent_entropy attribute on the variable definition
was the correct fix.

Fixes: 83bdc7275e62 ("random32: remove net_rand_state from the latent entropy gcc plugin")
Acked-by: Willy Tarreau <w@1wt.eu>
Cc: Emese Revfy <re.emese@gmail.com>
Signed-off-by: Thibaut Sautereau <thibaut.sautereau@ssi.gouv.fr>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/random32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/random32.c b/lib/random32.c
index 932345323af0..dfb9981ab798 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -49,7 +49,7 @@ static inline void prandom_state_selftest(void)
 }
 #endif
 
-DEFINE_PER_CPU(struct rnd_state, net_rand_state);
+DEFINE_PER_CPU(struct rnd_state, net_rand_state)  __latent_entropy;
 
 /**
  *	prandom_u32_state - seeded pseudo-random number generator.

From 432161ea26d6d5e5c3f7306d9407d26ed1e1953e Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 13 Aug 2020 16:55:20 +0300
Subject: [PATCH 285/322] net/mlx5: Fix a race when moving command interface to
 polling mode

As part of driver unload, it destroys the commands EQ (via FW command).
As the commands EQ is destroyed, FW will not generate EQEs for any command
that driver sends afterwards. Driver should poll for later commands status.

Driver commands mode metadata is updated before the commands EQ is
actually destroyed. This can lead for double completion handle by the
driver (polling and interrupt), if a command is executed and completed by
FW after the mode was changed, but before the EQ was destroyed.

Fix that by using the mlx5_cmd_allowed_opcode mechanism to guarantee
that only DESTROY_EQ command can be executed during this time period.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 31ef9f8420c8..1318d774b18f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -656,8 +656,10 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev)
 
 	cleanup_async_eq(dev, &table->pages_eq, "pages");
 	cleanup_async_eq(dev, &table->async_eq, "async");
+	mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ);
 	mlx5_cmd_use_polling(dev);
 	cleanup_async_eq(dev, &table->cmd_eq, "cmd");
+	mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
 	mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
 }
 

From 50b2412b7e7862c5af0cbf4b10d93bc5c712d021 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 4 Aug 2020 10:40:21 +0300
Subject: [PATCH 286/322] net/mlx5: Avoid possible free of command entry while
 timeout comp handler

Upon command completion timeout, driver simulates a forced command
completion. In a rare case where real interrupt for that command arrives
simultaneously, it might release the command entry while the forced
handler might still access it.

Fix that by adding an entry refcount, to track current amount of allowed
handlers. Command entry to be released only when this refcount is
decremented to zero.

Command refcount is always initialized to one. For callback commands,
command completion handler is the symmetric flow to decrement it. For
non-callback commands, it is wait_func().

Before ringing the doorbell, increment the refcount for the real completion
handler. Once the real completion handler is called, it will decrement it.

For callback commands, once the delayed work is scheduled, increment the
refcount. Upon callback command completion handler, we will try to cancel
the timeout callback. In case of success, we need to decrement the callback
refcount as it will never run.

In addition, gather the entry index free and the entry free into a one
flow for all command types release.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 109 ++++++++++++------
 include/linux/mlx5/driver.h                   |   2 +
 2 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1d91a0d0ab1d..c0055f5479ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -69,12 +69,10 @@ enum {
 	MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR		= 0x10,
 };
 
-static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
-					   struct mlx5_cmd_msg *in,
-					   struct mlx5_cmd_msg *out,
-					   void *uout, int uout_size,
-					   mlx5_cmd_cbk_t cbk,
-					   void *context, int page_queue)
+static struct mlx5_cmd_work_ent *
+cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in,
+	      struct mlx5_cmd_msg *out, void *uout, int uout_size,
+	      mlx5_cmd_cbk_t cbk, void *context, int page_queue)
 {
 	gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL;
 	struct mlx5_cmd_work_ent *ent;
@@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
 	if (!ent)
 		return ERR_PTR(-ENOMEM);
 
+	ent->idx	= -EINVAL;
 	ent->in		= in;
 	ent->out	= out;
 	ent->uout	= uout;
@@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd,
 	ent->context	= context;
 	ent->cmd	= cmd;
 	ent->page_queue = page_queue;
+	refcount_set(&ent->refcnt, 1);
 
 	return ent;
 }
 
+static void cmd_free_ent(struct mlx5_cmd_work_ent *ent)
+{
+	kfree(ent);
+}
+
 static u8 alloc_token(struct mlx5_cmd *cmd)
 {
 	u8 token;
@@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd)
 	return token;
 }
 
-static int alloc_ent(struct mlx5_cmd *cmd)
+static int cmd_alloc_index(struct mlx5_cmd *cmd)
 {
 	unsigned long flags;
 	int ret;
@@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd)
 	return ret < cmd->max_reg_cmds ? ret : -ENOMEM;
 }
 
-static void free_ent(struct mlx5_cmd *cmd, int idx)
+static void cmd_free_index(struct mlx5_cmd *cmd, int idx)
 {
 	unsigned long flags;
 
@@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx)
 	spin_unlock_irqrestore(&cmd->alloc_lock, flags);
 }
 
+static void cmd_ent_get(struct mlx5_cmd_work_ent *ent)
+{
+	refcount_inc(&ent->refcnt);
+}
+
+static void cmd_ent_put(struct mlx5_cmd_work_ent *ent)
+{
+	if (!refcount_dec_and_test(&ent->refcnt))
+		return;
+
+	if (ent->idx >= 0)
+		cmd_free_index(ent->cmd, ent->idx);
+
+	cmd_free_ent(ent);
+}
+
 static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx)
 {
 	return cmd->cmd_buf + (idx << cmd->log_stride);
@@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent)
 	ent->ret = -ETIMEDOUT;
 }
 
-static void free_cmd(struct mlx5_cmd_work_ent *ent)
-{
-	kfree(ent);
-}
-
 static int verify_signature(struct mlx5_cmd_work_ent *ent)
 {
 	struct mlx5_cmd_mailbox *next = ent->out->next;
@@ -842,6 +858,7 @@ static void cb_timeout_handler(struct work_struct *work)
 		       mlx5_command_str(msg_to_opcode(ent->in)),
 		       msg_to_opcode(ent->in));
 	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+	cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
 }
 
 static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg);
@@ -873,14 +890,14 @@ static void cmd_work_handler(struct work_struct *work)
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
 	if (!ent->page_queue) {
-		alloc_ret = alloc_ent(cmd);
+		alloc_ret = cmd_alloc_index(cmd);
 		if (alloc_ret < 0) {
 			mlx5_core_err_rl(dev, "failed to allocate command entry\n");
 			if (ent->callback) {
 				ent->callback(-EAGAIN, ent->context);
 				mlx5_free_cmd_msg(dev, ent->out);
 				free_msg(dev, ent->in);
-				free_cmd(ent);
+				cmd_ent_put(ent);
 			} else {
 				ent->ret = -EAGAIN;
 				complete(&ent->done);
@@ -916,8 +933,8 @@ static void cmd_work_handler(struct work_struct *work)
 	ent->ts1 = ktime_get_ns();
 	cmd_mode = cmd->mode;
 
-	if (ent->callback)
-		schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
+	if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout))
+		cmd_ent_get(ent);
 	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
 
 	/* Skip sending command to fw if internal error */
@@ -933,13 +950,10 @@ static void cmd_work_handler(struct work_struct *work)
 		MLX5_SET(mbox_out, ent->out, syndrome, drv_synd);
 
 		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
-		/* no doorbell, no need to keep the entry */
-		free_ent(cmd, ent->idx);
-		if (ent->callback)
-			free_cmd(ent);
 		return;
 	}
 
+	cmd_ent_get(ent); /* for the _real_ FW event on completion */
 	/* ring doorbell after the descriptor is valid */
 	mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
 	wmb();
@@ -1039,11 +1053,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 	if (callback && page_queue)
 		return -EINVAL;
 
-	ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context,
-			page_queue);
+	ent = cmd_alloc_ent(cmd, in, out, uout, uout_size,
+			    callback, context, page_queue);
 	if (IS_ERR(ent))
 		return PTR_ERR(ent);
 
+	/* put for this ent is when consumed, depending on the use case
+	 * 1) (!callback) blocking flow: by caller after wait_func completes
+	 * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled
+	 */
+
 	ent->token = token;
 	ent->polling = force_polling;
 
@@ -1062,12 +1081,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 	}
 
 	if (callback)
-		goto out;
+		goto out; /* mlx5_cmd_comp_handler() will put(ent) */
 
 	err = wait_func(dev, ent);
-	if (err == -ETIMEDOUT)
-		goto out;
-	if (err == -ECANCELED)
+	if (err == -ETIMEDOUT || err == -ECANCELED)
 		goto out_free;
 
 	ds = ent->ts2 - ent->ts1;
@@ -1085,7 +1102,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 	*status = ent->status;
 
 out_free:
-	free_cmd(ent);
+	cmd_ent_put(ent);
 out:
 	return err;
 }
@@ -1516,14 +1533,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
 				if (!forced) {
 					mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n",
 						      ent->idx);
-					free_ent(cmd, ent->idx);
-					free_cmd(ent);
+					cmd_ent_put(ent);
 				}
 				continue;
 			}
 
-			if (ent->callback)
-				cancel_delayed_work(&ent->cb_timeout_work);
+			if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work))
+				cmd_ent_put(ent); /* timeout work was canceled */
+
+			if (!forced || /* Real FW completion */
+			    pci_channel_offline(dev->pdev) || /* FW is inaccessible */
+			    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+				cmd_ent_put(ent);
+
 			if (ent->page_queue)
 				sem = &cmd->pages_sem;
 			else
@@ -1545,10 +1567,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
 					      ent->ret, deliv_status_to_str(ent->status), ent->status);
 			}
 
-			/* only real completion will free the entry slot */
-			if (!forced)
-				free_ent(cmd, ent->idx);
-
 			if (ent->callback) {
 				ds = ent->ts2 - ent->ts1;
 				if (ent->op < MLX5_CMD_OP_MAX) {
@@ -1576,10 +1594,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
 				free_msg(dev, ent->in);
 
 				err = err ? err : ent->status;
-				if (!forced)
-					free_cmd(ent);
+				/* final consumer is done, release ent */
+				cmd_ent_put(ent);
 				callback(err, context);
 			} else {
+				/* release wait_func() so mlx5_cmd_invoke()
+				 * can make the final ent_put()
+				 */
 				complete(&ent->done);
 			}
 			up(sem);
@@ -1589,8 +1610,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
 
 void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
 {
+	struct mlx5_cmd *cmd = &dev->cmd;
+	unsigned long bitmask;
 	unsigned long flags;
 	u64 vector;
+	int i;
 
 	/* wait for pending handlers to complete */
 	mlx5_eq_synchronize_cmd_irq(dev);
@@ -1599,11 +1623,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev)
 	if (!vector)
 		goto no_trig;
 
+	bitmask = vector;
+	/* we must increment the allocated entries refcount before triggering the completions
+	 * to guarantee pending commands will not get freed in the meanwhile.
+	 * For that reason, it also has to be done inside the alloc_lock.
+	 */
+	for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
+		cmd_ent_get(cmd->ent_arr[i]);
 	vector |= MLX5_TRIGGERED_CMD_COMP;
 	spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 
 	mlx5_core_dbg(dev, "vector 0x%llx\n", vector);
 	mlx5_cmd_comp_handler(dev, vector, true);
+	for_each_set_bit(i, &bitmask, (1 << cmd->log_sz))
+		cmd_ent_put(cmd->ent_arr[i]);
 	return;
 
 no_trig:
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c145de0473bc..897156822f0d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -767,6 +767,8 @@ struct mlx5_cmd_work_ent {
 	u64			ts2;
 	u16			op;
 	bool			polling;
+	/* Track the max comp handlers */
+	refcount_t              refcnt;
 };
 
 struct mlx5_pas {

From 1d5558b1f0de81f54ddee05f3793acc5260d107f Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Tue, 21 Jul 2020 10:25:52 +0300
Subject: [PATCH 287/322] net/mlx5: poll cmd EQ in case of command timeout

Once driver detects a command interface command timeout, it warns the
user and returns timeout error to the caller. In such case, the entry of
the command is not evacuated (because only real event interrupt is allowed
to clear command interface entry). If the HW event interrupt
of this entry will never arrive, this entry will be left unused forever.
Command interface entries are limited and eventually we can end up without
the ability to post a new command.

In addition, if driver will not consume the EQE of the lost interrupt and
rearm the EQ, no new interrupts will arrive for other commands.

Add a resiliency mechanism for manually polling the command EQ in case of
a command timeout. In case resiliency mechanism will find non-handled EQE,
it will consume it, and the command interface will be fully functional
again. Once the resiliency flow finished, wait another 5 seconds for the
command interface to complete for this command entry.

Define mlx5_cmd_eq_recover() to manage the cmd EQ polling resiliency flow.
Add an async EQ spinlock to avoid races between resiliency flows and real
interrupts that might run simultaneously.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 53 ++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c  | 40 +++++++++++++-
 .../net/ethernet/mellanox/mlx5/core/lib/eq.h  |  2 +
 3 files changed, 86 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index c0055f5479ce..37dae95e61d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -853,11 +853,21 @@ static void cb_timeout_handler(struct work_struct *work)
 	struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev,
 						 cmd);
 
+	mlx5_cmd_eq_recover(dev);
+
+	/* Maybe got handled by eq recover ? */
+	if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) {
+		mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx,
+			       mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+		goto out; /* phew, already handled */
+	}
+
 	ent->ret = -ETIMEDOUT;
-	mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n",
-		       mlx5_command_str(msg_to_opcode(ent->in)),
-		       msg_to_opcode(ent->in));
+	mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n",
+		       ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
 	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+
+out:
 	cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */
 }
 
@@ -997,6 +1007,35 @@ static const char *deliv_status_to_str(u8 status)
 	}
 }
 
+enum {
+	MLX5_CMD_TIMEOUT_RECOVER_MSEC   = 5 * 1000,
+};
+
+static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev,
+					  struct mlx5_cmd_work_ent *ent)
+{
+	unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC);
+
+	mlx5_cmd_eq_recover(dev);
+
+	/* Re-wait on the ent->done after executing the recovery flow. If the
+	 * recovery flow (or any other recovery flow running simultaneously)
+	 * has recovered an EQE, it should cause the entry to be completed by
+	 * the command interface.
+	 */
+	if (wait_for_completion_timeout(&ent->done, timeout)) {
+		mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx,
+			       mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+		return;
+	}
+
+	mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx,
+		       mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in));
+
+	ent->ret = -ETIMEDOUT;
+	mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
+}
+
 static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 {
 	unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC);
@@ -1008,12 +1047,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent)
 		ent->ret = -ECANCELED;
 		goto out_err;
 	}
-	if (cmd->mode == CMD_MODE_POLLING || ent->polling) {
+	if (cmd->mode == CMD_MODE_POLLING || ent->polling)
 		wait_for_completion(&ent->done);
-	} else if (!wait_for_completion_timeout(&ent->done, timeout)) {
-		ent->ret = -ETIMEDOUT;
-		mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true);
-	}
+	else if (!wait_for_completion_timeout(&ent->done, timeout))
+		wait_func_handle_exec_timeout(dev, ent);
 
 out_err:
 	err = ent->ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 1318d774b18f..22a19d391e17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -189,6 +189,29 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq)
 	return count_eqe;
 }
 
+static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags)
+	__acquires(&eq->lock)
+{
+	if (in_irq())
+		spin_lock(&eq->lock);
+	else
+		spin_lock_irqsave(&eq->lock, *flags);
+}
+
+static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags)
+	__releases(&eq->lock)
+{
+	if (in_irq())
+		spin_unlock(&eq->lock);
+	else
+		spin_unlock_irqrestore(&eq->lock, *flags);
+}
+
+enum async_eq_nb_action {
+	ASYNC_EQ_IRQ_HANDLER = 0,
+	ASYNC_EQ_RECOVER = 1,
+};
+
 static int mlx5_eq_async_int(struct notifier_block *nb,
 			     unsigned long action, void *data)
 {
@@ -198,11 +221,14 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
 	struct mlx5_eq_table *eqt;
 	struct mlx5_core_dev *dev;
 	struct mlx5_eqe *eqe;
+	unsigned long flags;
 	int num_eqes = 0;
 
 	dev = eq->dev;
 	eqt = dev->priv.eq_table;
 
+	mlx5_eq_async_int_lock(eq_async, &flags);
+
 	eqe = next_eqe_sw(eq);
 	if (!eqe)
 		goto out;
@@ -223,8 +249,19 @@ static int mlx5_eq_async_int(struct notifier_block *nb,
 
 out:
 	eq_update_ci(eq, 1);
+	mlx5_eq_async_int_unlock(eq_async, &flags);
 
-	return 0;
+	return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0;
+}
+
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev)
+{
+	struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq;
+	int eqes;
+
+	eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL);
+	if (eqes)
+		mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes);
 }
 
 static void init_eq_buf(struct mlx5_eq *eq)
@@ -569,6 +606,7 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
 	int err;
 
 	eq->irq_nb.notifier_call = mlx5_eq_async_int;
+	spin_lock_init(&eq->lock);
 
 	err = create_async_eq(dev, &eq->core, param);
 	if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 4aaca7400fb2..5c681e31983b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -37,6 +37,7 @@ struct mlx5_eq {
 struct mlx5_eq_async {
 	struct mlx5_eq          core;
 	struct notifier_block   irq_nb;
+	spinlock_t              lock; /* To avoid irq EQ handle races with resiliency flows */
 };
 
 struct mlx5_eq_comp {
@@ -81,6 +82,7 @@ void mlx5_cq_tasklet_cb(unsigned long data);
 struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix);
 
 u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq);
+void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev);
 void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev);
 void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev);
 

From 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@nvidia.com>
Date: Mon, 31 Aug 2020 15:04:35 +0300
Subject: [PATCH 288/322] net/mlx5: Add retry mechanism to the command entry
 index allocation

It is possible that new command entry index allocation will temporarily
fail. The new command holds the semaphore, so it means that a free entry
should be ready soon. Add one second retry mechanism before returning an
error.

Patch "net/mlx5: Avoid possible free of command entry while timeout comp
handler" increase the possibility to bump into this temporarily failure
as it delays the entry index release for non-callback commands.

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Eran Ben Elisha <eranbe@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 21 ++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 37dae95e61d5..2b597ac365f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -883,6 +883,25 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode)
 	return cmd->allowed_opcode == opcode;
 }
 
+static int cmd_alloc_index_retry(struct mlx5_cmd *cmd)
+{
+	unsigned long alloc_end = jiffies + msecs_to_jiffies(1000);
+	int idx;
+
+retry:
+	idx = cmd_alloc_index(cmd);
+	if (idx < 0 && time_before(jiffies, alloc_end)) {
+		/* Index allocation can fail on heavy load of commands. This is a temporary
+		 * situation as the current command already holds the semaphore, meaning that
+		 * another command completion is being handled and it is expected to release
+		 * the entry index soon.
+		 */
+		cpu_relax();
+		goto retry;
+	}
+	return idx;
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -900,7 +919,7 @@ static void cmd_work_handler(struct work_struct *work)
 	sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
 	down(sem);
 	if (!ent->page_queue) {
-		alloc_ret = cmd_alloc_index(cmd);
+		alloc_ret = cmd_alloc_index_retry(cmd);
 		if (alloc_ret < 0) {
 			mlx5_core_err_rl(dev, "failed to allocate command entry\n");
 			if (ent->callback) {

From b898ce7bccf13087719c021d829dab607c175246 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@nvidia.com>
Date: Fri, 11 Sep 2020 11:48:55 -0700
Subject: [PATCH 289/322] net/mlx5: cmdif, Avoid skipping reclaim pages if FW
 is not accessible

In case of pci is offline reclaim_pages_cmd() will still try to call
the FW to release FW pages, cmd_exec() in this case will return a silent
success without actually calling the FW.

This is wrong and will cause page leaks, what we should do is to detect
pci offline or command interface un-available before tying to access the
FW and manually release the FW pages in the driver.

In this patch we share the code to check for FW command interface
availability and we call it in sensitive places e.g. reclaim_pages_cmd().

Alternative fix:
 1. Remove MLX5_CMD_OP_MANAGE_PAGES form mlx5_internal_err_ret_value,
    command success simulation list.
 2. Always Release FW pages even if cmd_exec fails in reclaim_pages_cmd().

Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c   | 17 +++++++++--------
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c |  2 +-
 include/linux/mlx5/driver.h                     |  1 +
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 2b597ac365f8..2d1f4b3be9bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -902,6 +902,13 @@ retry:
 	return idx;
 }
 
+bool mlx5_cmd_is_down(struct mlx5_core_dev *dev)
+{
+	return pci_channel_offline(dev->pdev) ||
+	       dev->cmd.state != MLX5_CMDIF_STATE_UP ||
+	       dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR;
+}
+
 static void cmd_work_handler(struct work_struct *work)
 {
 	struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work);
@@ -967,10 +974,7 @@ static void cmd_work_handler(struct work_struct *work)
 	set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
 
 	/* Skip sending command to fw if internal error */
-	if (pci_channel_offline(dev->pdev) ||
-	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
-	    cmd->state != MLX5_CMDIF_STATE_UP ||
-	    !opcode_allowed(&dev->cmd, ent->op)) {
+	if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
 		u8 status = 0;
 		u32 drv_synd;
 
@@ -1800,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 	u8 token;
 
 	opcode = MLX5_GET(mbox_in, in, opcode);
-	if (pci_channel_offline(dev->pdev) ||
-	    dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
-	    dev->cmd.state != MLX5_CMDIF_STATE_UP ||
-	    !opcode_allowed(&dev->cmd, opcode)) {
+	if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) {
 		err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status);
 		MLX5_SET(mbox_out, out, status, status);
 		MLX5_SET(mbox_out, out, syndrome, drv_synd);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index f9b798af6335..c0e18f2ade99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -432,7 +432,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 	u32 npages;
 	u32 i = 0;
 
-	if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
+	if (!mlx5_cmd_is_down(dev))
 		return mlx5_cmd_exec(dev, in, in_size, out, out_size);
 
 	/* No hard feelings, we want our pages back! */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 897156822f0d..372100c755e7 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -935,6 +935,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
 			  void *out, int out_size);
 void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
+bool mlx5_cmd_is_down(struct mlx5_core_dev *dev);
 
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type);
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn);

From 732ebfab7fe96b7ac9a3df3208f14752a4bb6db3 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Mon, 31 Aug 2020 21:37:31 +0300
Subject: [PATCH 290/322] net/mlx5: Fix request_irqs error flow

Fix error flow handling in request_irqs which try to free irq
that we failed to request.
It fixes the below trace.

WARNING: CPU: 1 PID: 7587 at kernel/irq/manage.c:1684 free_irq+0x4d/0x60
CPU: 1 PID: 7587 Comm: bash Tainted: G        W  OE    4.15.15-1.el7MELLANOXsmp-x86_64 #1
Hardware name: Advantech SKY-6200/SKY-6200, BIOS F2.00 08/06/2020
RIP: 0010:free_irq+0x4d/0x60
RSP: 0018:ffffc9000ef47af0 EFLAGS: 00010282
RAX: ffff88001476ae00 RBX: 0000000000000655 RCX: 0000000000000000
RDX: ffff88001476ae00 RSI: ffffc9000ef47ab8 RDI: ffff8800398bb478
RBP: ffff88001476a838 R08: ffff88001476ae00 R09: 000000000000156d
R10: 0000000000000000 R11: 0000000000000004 R12: ffff88001476a838
R13: 0000000000000006 R14: ffff88001476a888 R15: 00000000ffffffe4
FS:  00007efeadd32740(0000) GS:ffff88047fc40000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fc9cc010008 CR3: 00000001a2380004 CR4: 00000000007606e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
PKRU: 55555554
Call Trace:
 mlx5_irq_table_create+0x38d/0x400 [mlx5_core]
 ? atomic_notifier_chain_register+0x50/0x60
 mlx5_load_one+0x7ee/0x1130 [mlx5_core]
 init_one+0x4c9/0x650 [mlx5_core]
 pci_device_probe+0xb8/0x120
 driver_probe_device+0x2a1/0x470
 ? driver_allows_async_probing+0x30/0x30
 bus_for_each_drv+0x54/0x80
 __device_attach+0xa3/0x100
 pci_bus_add_device+0x4a/0x90
 pci_iov_add_virtfn+0x2dc/0x2f0
 pci_enable_sriov+0x32e/0x420
 mlx5_core_sriov_configure+0x61/0x1b0 [mlx5_core]
 ? kstrtoll+0x22/0x70
 num_vf_store+0x4b/0x70 [mlx5_core]
 kernfs_fop_write+0x102/0x180
 __vfs_write+0x26/0x140
 ? rcu_all_qs+0x5/0x80
 ? _cond_resched+0x15/0x30
 ? __sb_start_write+0x41/0x80
 vfs_write+0xad/0x1a0
 SyS_write+0x42/0x90
 do_syscall_64+0x60/0x110
 entry_SYSCALL_64_after_hwframe+0x3d/0xa2

Fixes: 24163189da48 ("net/mlx5: Separate IRQ request/free from EQ life cycle")
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Eran Ben Elisha <eranbe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 373981a659c7..6fd974920394 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -115,7 +115,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec)
 	return 0;
 
 err_request_irq:
-	for (; i >= 0; i--) {
+	while (i--) {
 		struct mlx5_irq *irq = mlx5_irq_get(dev, i);
 		int irqn = pci_irq_vector(dev->pdev, i);
 

From 08a762cecc60e19cd64e379df6580f1997408e04 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Mon, 3 Aug 2020 16:22:42 +0300
Subject: [PATCH 291/322] net/mlx5e: Fix error path for RQ alloc

Increase granularity of the error path to avoid unneeded free/release.
Fix the cleanup to be symmetric to the order of creation.

Fixes: 0ddf543226ac ("xdp/mlx5: setup xdp_rxq_info")
Fixes: 422d4c401edd ("net/mlx5e: RX, Split WQ objects for different RQ types")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 ++++++++++---------
 1 file changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b3cda7b6e5e1..1fbd7a0f3ca6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -396,7 +396,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
 	err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
 	if (err < 0)
-		goto err_rq_wq_destroy;
+		goto err_rq_xdp_prog;
 
 	rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
 	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
@@ -407,7 +407,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
 					&rq->wq_ctrl);
 		if (err)
-			goto err_rq_wq_destroy;
+			goto err_rq_xdp;
 
 		rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
 
@@ -429,13 +429,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 		err = mlx5e_rq_alloc_mpwqe_info(rq, c);
 		if (err)
-			goto err_free;
+			goto err_rq_mkey;
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
 					 &rq->wq_ctrl);
 		if (err)
-			goto err_rq_wq_destroy;
+			goto err_rq_xdp;
 
 		rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
 
@@ -450,19 +450,19 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 				      GFP_KERNEL, cpu_to_node(c->cpu));
 		if (!rq->wqe.frags) {
 			err = -ENOMEM;
-			goto err_free;
+			goto err_rq_wq_destroy;
 		}
 
 		err = mlx5e_init_di_list(rq, wq_sz, c->cpu);
 		if (err)
-			goto err_free;
+			goto err_rq_frags;
 
 		rq->mkey_be = c->mkey_be;
 	}
 
 	err = mlx5e_rq_set_handlers(rq, params, xsk);
 	if (err)
-		goto err_free;
+		goto err_free_by_rq_type;
 
 	if (xsk) {
 		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
@@ -486,13 +486,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		if (IS_ERR(rq->page_pool)) {
 			err = PTR_ERR(rq->page_pool);
 			rq->page_pool = NULL;
-			goto err_free;
+			goto err_free_by_rq_type;
 		}
 		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
 						 MEM_TYPE_PAGE_POOL, rq->page_pool);
 	}
 	if (err)
-		goto err_free;
+		goto err_free_by_rq_type;
 
 	for (i = 0; i < wq_sz; i++) {
 		if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
@@ -542,23 +542,25 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 	return 0;
 
-err_free:
+err_free_by_rq_type:
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		kvfree(rq->mpwqe.info);
+err_rq_mkey:
 		mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
-		kvfree(rq->wqe.frags);
 		mlx5e_free_di_list(rq);
+err_rq_frags:
+		kvfree(rq->wqe.frags);
 	}
-
 err_rq_wq_destroy:
+	mlx5_wq_destroy(&rq->wq_ctrl);
+err_rq_xdp:
+	xdp_rxq_info_unreg(&rq->xdp_rxq);
+err_rq_xdp_prog:
 	if (params->xdp_prog)
 		bpf_prog_put(params->xdp_prog);
-	xdp_rxq_info_unreg(&rq->xdp_rxq);
-	page_pool_destroy(rq->page_pool);
-	mlx5_wq_destroy(&rq->wq_ctrl);
 
 	return err;
 }

From c3c9402373fe20e2d08c04f437ce4dcd252cffb2 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Mon, 20 Jul 2020 16:53:18 +0300
Subject: [PATCH 292/322] net/mlx5e: Add resiliency in Striding RQ mode for
 packets larger than MTU

Prior to this fix, in Striding RQ mode the driver was vulnerable when
receiving packets in the range (stride size - headroom, stride size].
Where stride size is calculated by mtu+headroom+tailroom aligned to the
closest power of 2.
Usually, this filtering is performed by the HW, except for a few cases:
- Between 2 VFs over the same PF with different MTUs
- On bluefield, when the host physical function sets a larger MTU than
  the ARM has configured on its representor and uplink representor.

When the HW filtering is not present, packets that are larger than MTU
might be harmful for the RQ's integrity, in the following impacts:
1) Overflow from one WQE to the next, causing a memory corruption that
in most cases is unharmful: as the write happens to the headroom of next
packet, which will be overwritten by build_skb(). In very rare cases,
high stress/load, this is harmful. When the next WQE is not yet reposted
and points to existing SKB head.
2) Each oversize packet overflows to the headroom of the next WQE. On
the last WQE of the WQ, where addresses wrap-around, the address of the
remainder headroom does not belong to the next WQE, but it is out of the
memory region range. This results in a HW CQE error that moves the RQ
into an error state.

Solution:
Add a page buffer at the end of each WQE to absorb the leak. Actually
the maximal overflow size is headroom but since all memory units must be
of the same size, we use page size to comply with UMR WQEs. The increase
in memory consumption is of a single page per RQ. Initialize the mkey
with all MTTs pointing to a default page. When the channels are
activated, UMR WQEs will redirect the RX WQEs to the actual memory from
the RQ's pool, while the overflow MTTs remain mapped to the default page.

Fixes: 73281b78a37a ("net/mlx5e: Derive Striding RQ size from MTU")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  8 ++-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 55 +++++++++++++++++--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 90d5caabd6af..356f5852955f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -91,7 +91,12 @@ struct page_pool;
 #define MLX5_MPWRQ_PAGES_PER_WQE		BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
 
 #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
-#define MLX5E_REQUIRED_WQE_MTTS		(ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
+ * WQEs, This page will absorb write overflow by the hardware, when
+ * receiving packets larger than MTU. These oversize packets are
+ * dropped by the driver at a later stage.
+ */
+#define MLX5E_REQUIRED_WQE_MTTS		(ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8))
 #define MLX5E_LOG_ALIGNED_MPWQE_PPW	(ilog2(MLX5E_REQUIRED_WQE_MTTS))
 #define MLX5E_REQUIRED_MTTS(wqes)	(wqes * MLX5E_REQUIRED_WQE_MTTS)
 #define MLX5E_MAX_RQ_NUM_MTTS	\
@@ -617,6 +622,7 @@ struct mlx5e_rq {
 	u32                    rqn;
 	struct mlx5_core_dev  *mdev;
 	struct mlx5_core_mkey  umr_mkey;
+	struct mlx5e_dma_info  wqe_overflow;
 
 	/* XDP read-mostly */
 	struct xdp_rxq_info    xdp_rxq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 1fbd7a0f3ca6..b1a16fb9667e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -246,12 +246,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 
 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
 				 u64 npages, u8 page_shift,
-				 struct mlx5_core_mkey *umr_mkey)
+				 struct mlx5_core_mkey *umr_mkey,
+				 dma_addr_t filler_addr)
 {
-	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+	struct mlx5_mtt *mtt;
+	int inlen;
 	void *mkc;
 	u32 *in;
 	int err;
+	int i;
+
+	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages;
 
 	in = kvzalloc(inlen, GFP_KERNEL);
 	if (!in)
@@ -271,6 +276,18 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
 	MLX5_SET(mkc, mkc, translations_octword_size,
 		 MLX5_MTT_OCTW(npages));
 	MLX5_SET(mkc, mkc, log_page_size, page_shift);
+	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
+		 MLX5_MTT_OCTW(npages));
+
+	/* Initialize the mkey with all MTTs pointing to a default
+	 * page (filler_addr). When the channels are activated, UMR
+	 * WQEs will redirect the RX WQEs to the actual memory from
+	 * the RQ's pool, while the gaps (wqe_overflow) remain mapped
+	 * to the default page.
+	 */
+	mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
+	for (i = 0 ; i < npages ; i++)
+		mtt[i].ptag = cpu_to_be64(filler_addr);
 
 	err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
 
@@ -282,7 +299,8 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
 {
 	u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq));
 
-	return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
+	return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey,
+				     rq->wqe_overflow.addr);
 }
 
 static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
@@ -350,6 +368,28 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
 	mlx5e_reporter_rq_cqe_err(rq);
 }
 
+static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
+{
+	rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
+	if (!rq->wqe_overflow.page)
+		return -ENOMEM;
+
+	rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
+					     PAGE_SIZE, rq->buff.map_dir);
+	if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
+		__free_page(rq->wqe_overflow.page);
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
+{
+	 dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
+			rq->buff.map_dir);
+	 __free_page(rq->wqe_overflow.page);
+}
+
 static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			  struct mlx5e_params *params,
 			  struct mlx5e_xsk_param *xsk,
@@ -409,6 +449,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		if (err)
 			goto err_rq_xdp;
 
+		err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
+		if (err)
+			goto err_rq_wq_destroy;
+
 		rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
 
 		wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
@@ -424,7 +468,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 		err = mlx5e_create_rq_umr_mkey(mdev, rq);
 		if (err)
-			goto err_rq_wq_destroy;
+			goto err_rq_drop_page;
 		rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
 
 		err = mlx5e_rq_alloc_mpwqe_info(rq, c);
@@ -548,6 +592,8 @@ err_free_by_rq_type:
 		kvfree(rq->mpwqe.info);
 err_rq_mkey:
 		mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+err_rq_drop_page:
+		mlx5e_free_mpwqe_rq_drop_page(rq);
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		mlx5e_free_di_list(rq);
@@ -582,6 +628,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq)
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		kvfree(rq->mpwqe.info);
 		mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey);
+		mlx5e_free_mpwqe_rq_drop_page(rq);
 		break;
 	default: /* MLX5_WQ_TYPE_CYCLIC */
 		kvfree(rq->wqe.frags);

From 2b0219898b8684075ff66027551fb02ea12f652b Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Tue, 1 Sep 2020 12:33:18 +0300
Subject: [PATCH 293/322] net/mlx5e: CT, Fix coverity issue

The cited commit introduced the following coverity issue at function
mlx5_tc_ct_rule_to_tuple_nat:
- Memory - corruptions (OVERRUN)
  Overrunning array "tuple->ip.src_v6.in6_u.u6_addr32" of 4 4-byte
  elements at element index 7 (byte offset 31) using index
  "ip6_offset" (which evaluates to 7).

In case of IPv6 destination address rewrite, ip6_offset values are
between 4 to 7, which will cause memory overrun of array
"tuple->ip.src_v6.in6_u.u6_addr32" to array
"tuple->ip.dst_v6.in6_u.u6_addr32".

Fixed by writing the value directly to array
"tuple->ip.dst_v6.in6_u.u6_addr32" in case ip6_offset values are
between 4 to 7.

Fixes: bc562be9674b ("net/mlx5e: CT: Save ct entries tuples in hashtables")
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index bc5f72ec3623..a8be40cbe325 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -246,8 +246,10 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
 		case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 			ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
 			ip6_offset /= 4;
-			if (ip6_offset < 8)
+			if (ip6_offset < 4)
 				tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
+			else if (ip6_offset < 8)
+				tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
 			else
 				return -EOPNOTSUPP;
 			break;

From 3d093bc2369003b4ce6c3522d9b383e47c40045d Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Sun, 9 Aug 2020 12:34:21 +0300
Subject: [PATCH 294/322] net/mlx5e: Fix driver's declaration to support GRE
 offload

Declare GRE offload support with respect to the inner protocol. Add a
list of supported inner protocols on which the driver can offload
checksum and GSO. For other protocols, inform the stack to do the needed
operations. There is no noticeable impact on GRE performance.

Fixes: 2729984149e6 ("net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index b1a16fb9667e..42ec28e29834 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4226,6 +4226,21 @@ int mlx5e_get_vf_stats(struct net_device *dev,
 }
 #endif
 
+static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev,
+							   struct sk_buff *skb)
+{
+	switch (skb->inner_protocol) {
+	case htons(ETH_P_IP):
+	case htons(ETH_P_IPV6):
+	case htons(ETH_P_TEB):
+		return true;
+	case htons(ETH_P_MPLS_UC):
+	case htons(ETH_P_MPLS_MC):
+		return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
+	}
+	return false;
+}
+
 static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 						     struct sk_buff *skb,
 						     netdev_features_t features)
@@ -4248,7 +4263,9 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
 
 	switch (proto) {
 	case IPPROTO_GRE:
-		return features;
+		if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb))
+			return features;
+		break;
 	case IPPROTO_IPIP:
 	case IPPROTO_IPV6:
 		if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP))

From 2608a2f831c47dfdf18885a7289be5af97182b05 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@mellanox.com>
Date: Wed, 12 Aug 2020 10:44:36 +0300
Subject: [PATCH 295/322] net/mlx5e: Fix return status when setting unsupported
 FEC mode

Verify the configured FEC mode is supported by at least a single link
mode before applying the command. Otherwise fail the command and return
"Operation not supported".
Prior to this patch, the command was successful, yet it falsely set all
link modes to FEC auto mode - like configuring FEC mode to auto. Auto
mode is the default configuration if a link mode doesn't support the
configured FEC mode.

Fixes: b5ede32d3329 ("net/mlx5e: Add support for FEC modes based on 50G per lane links")
Signed-off-by: Aya Levin <ayal@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 96608dbb9314..308fd279669e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -569,6 +569,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy)
 	if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane)
 		return -EOPNOTSUPP;
 
+	if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy))
+		return -EOPNOTSUPP;
+
 	MLX5_SET(pplm_reg, in, local_port, 1);
 	err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
 	if (err)

From 8c7353b6f716436ad0bfda2b5c5524ab2dde5894 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 13 Sep 2020 17:57:23 +0300
Subject: [PATCH 296/322] net/mlx5e: Fix VLAN cleanup flow

Prior to this patch unloading an interface in promiscuous mode with RX
VLAN filtering feature turned off - resulted in a warning. This is due
to a wrong condition in the VLAN rules cleanup flow, which left the
any-vid rules in the VLAN steering table. These rules prevented
destroying the flow group and the flow table.

The any-vid rules are removed in 2 flows, but none of them remove it in
case both promiscuous is set and VLAN filtering is off. Fix the issue by
changing the condition of the VLAN table cleanup flow to clean also in
case of promiscuous mode.

mlx5_core 0000:00:08.0: mlx5_destroy_flow_group:2123:(pid 28729): Flow group 20 wasn't destroyed, refcount > 1
mlx5_core 0000:00:08.0: mlx5_destroy_flow_group:2123:(pid 28729): Flow group 19 wasn't destroyed, refcount > 1
mlx5_core 0000:00:08.0: mlx5_destroy_flow_table:2112:(pid 28729): Flow table 262149 wasn't destroyed, refcount > 1
...
...
------------[ cut here ]------------
FW pages counter is 11560 after reclaiming all pages
WARNING: CPU: 1 PID: 28729 at
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:660
mlx5_reclaim_startup_pages+0x178/0x230 [mlx5_core]
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
Call Trace:
  mlx5_function_teardown+0x2f/0x90 [mlx5_core]
  mlx5_unload_one+0x71/0x110 [mlx5_core]
  remove_one+0x44/0x80 [mlx5_core]
  pci_device_remove+0x3e/0xc0
  device_release_driver_internal+0xfb/0x1c0
  device_release_driver+0x12/0x20
  pci_stop_bus_device+0x68/0x90
  pci_stop_and_remove_bus_device+0x12/0x20
  hv_eject_device_work+0x6f/0x170 [pci_hyperv]
  ? __schedule+0x349/0x790
  process_one_work+0x206/0x400
  worker_thread+0x34/0x3f0
  ? process_one_work+0x400/0x400
  kthread+0x126/0x140
  ? kthread_park+0x90/0x90
  ret_from_fork+0x22/0x30
   ---[ end trace 6283bde8d26170dc ]---

Fixes: 9df30601c843 ("net/mlx5e: Restore vlan filter after seamless reset")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 64d002d92250..55a4c3adaa05 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -415,8 +415,12 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
 	for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
 		mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
 
-	if (priv->fs.vlan.cvlan_filter_disabled &&
-	    !(priv->netdev->flags & IFF_PROMISC))
+	WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state)));
+
+	/* must be called after DESTROY bit is set and
+	 * set_rx_mode is called and flushed
+	 */
+	if (priv->fs.vlan.cvlan_filter_disabled)
 		mlx5e_del_any_vid_rules(priv);
 }
 

From d4a16052bccdd695982f89d815ca075825115821 Mon Sep 17 00:00:00 2001
From: Aya Levin <ayal@nvidia.com>
Date: Sun, 13 Sep 2020 18:05:40 +0300
Subject: [PATCH 297/322] net/mlx5e: Fix VLAN create flow

When interface is attached while in promiscuous mode and with VLAN
filtering turned off, both configurations are not respected and VLAN
filtering is performed.
There are 2 flows which add the any-vid rules during interface attach:
VLAN creation table and set rx mode. Each is relaying on the other to
add any-vid rules, eventually non of them does.

Fix this by adding any-vid rules on VLAN creation regardless of
promiscuous mode.

Fixes: 9df30601c843 ("net/mlx5e: Restore vlan filter after seamless reset")
Signed-off-by: Aya Levin <ayal@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 55a4c3adaa05..1f48f99c0997 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -217,6 +217,9 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
+	if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type))
+		return 0;
+
 	*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 
 	if (IS_ERR(*rule_p)) {
@@ -397,8 +400,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv)
 	for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID)
 		mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i);
 
-	if (priv->fs.vlan.cvlan_filter_disabled &&
-	    !(priv->netdev->flags & IFF_PROMISC))
+	if (priv->fs.vlan.cvlan_filter_disabled)
 		mlx5e_add_any_vid_rules(priv);
 }
 

From 1253935ad801485270194d5651acab04abc97b36 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@nvidia.com>
Date: Sun, 20 Sep 2020 19:59:08 +0300
Subject: [PATCH 298/322] net/mlx5e: Fix race condition on nhe->n pointer in
 neigh update

Current neigh update event handler implementation takes reference to
neighbour structure, assigns it to nhe->n, tries to schedule workqueue task
and releases the reference if task was already enqueued. This results
potentially overwriting existing nhe->n pointer with another neighbour
instance, which causes double release of the instance (once in neigh update
handler that failed to enqueue to workqueue and another one in neigh update
workqueue task that processes updated nhe->n pointer instead of original
one):

[ 3376.512806] ------------[ cut here ]------------
[ 3376.513534] refcount_t: underflow; use-after-free.
[ 3376.521213] Modules linked in: act_skbedit act_mirred act_tunnel_key vxlan ip6_udp_tunnel udp_tunnel nfnetlink act_gact cls_flower sch_ingress openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 mlx5_ib mlx5_core mlxfw pci_hyperv_intf ptp pps_core nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd
 grace fscache ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp rpcrdma rdma_ucm ib_umad ib_ipoib ib_iser rdma_cm ib_cm iw_cm rfkill ib_uverbs ib_core sunrpc kvm_intel kvm iTCO_wdt iTCO_vendor_support virtio_net irqbypass net_failover crc32_pclmul lpc_ich i2c_i801 failover pcspkr i2c_smbus mfd_core ghash_clmulni_intel sch_fq_codel drm i2c
_core ip_tables crc32c_intel serio_raw [last unloaded: mlxfw]
[ 3376.529468] CPU: 8 PID: 22756 Comm: kworker/u20:5 Not tainted 5.9.0-rc5+ #6
[ 3376.530399] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014
[ 3376.531975] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core]
[ 3376.532820] RIP: 0010:refcount_warn_saturate+0xd8/0xe0
[ 3376.533589] Code: ff 48 c7 c7 e0 b8 27 82 c6 05 0b b6 09 01 01 e8 94 93 c1 ff 0f 0b c3 48 c7 c7 88 b8 27 82 c6 05 f7 b5 09 01 01 e8 7e 93 c1 ff <0f> 0b c3 0f 1f 44 00 00 8b 07 3d 00 00 00 c0 74 12 83 f8 01 74 13
[ 3376.536017] RSP: 0018:ffffc90002a97e30 EFLAGS: 00010286
[ 3376.536793] RAX: 0000000000000000 RBX: ffff8882de30d648 RCX: 0000000000000000
[ 3376.537718] RDX: ffff8882f5c28f20 RSI: ffff8882f5c18e40 RDI: ffff8882f5c18e40
[ 3376.538654] RBP: ffff8882cdf56c00 R08: 000000000000c580 R09: 0000000000001a4d
[ 3376.539582] R10: 0000000000000731 R11: ffffc90002a97ccd R12: 0000000000000000
[ 3376.540519] R13: ffff8882de30d600 R14: ffff8882de30d640 R15: ffff88821e000900
[ 3376.541444] FS:  0000000000000000(0000) GS:ffff8882f5c00000(0000) knlGS:0000000000000000
[ 3376.542732] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 3376.543545] CR2: 0000556e5504b248 CR3: 00000002c6f10005 CR4: 0000000000770ee0
[ 3376.544483] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 3376.545419] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 3376.546344] PKRU: 55555554
[ 3376.546911] Call Trace:
[ 3376.547479]  mlx5e_rep_neigh_update.cold+0x33/0xe2 [mlx5_core]
[ 3376.548299]  process_one_work+0x1d8/0x390
[ 3376.548977]  worker_thread+0x4d/0x3e0
[ 3376.549631]  ? rescuer_thread+0x3e0/0x3e0
[ 3376.550295]  kthread+0x118/0x130
[ 3376.550914]  ? kthread_create_worker_on_cpu+0x70/0x70
[ 3376.551675]  ret_from_fork+0x1f/0x30
[ 3376.552312] ---[ end trace d84e8f46d2a77eec ]---

Fix the bug by moving work_struct to dedicated dynamically-allocated
structure. This enabled every event handler to work on its own private
neighbour pointer and removes the need for handling the case when task is
already enqueued.

Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow")
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en/rep/neigh.c         | 81 ++++++++++++-------
 .../net/ethernet/mellanox/mlx5/core/en_rep.h  |  6 --
 2 files changed, 50 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
index 906292035088..58e27038c947 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -110,11 +110,25 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
 	rtnl_unlock();
 }
 
+struct neigh_update_work {
+	struct work_struct work;
+	struct neighbour *n;
+	struct mlx5e_neigh_hash_entry *nhe;
+};
+
+static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
+{
+	neigh_release(update_work->n);
+	mlx5e_rep_neigh_entry_release(update_work->nhe);
+	kfree(update_work);
+}
+
 static void mlx5e_rep_neigh_update(struct work_struct *work)
 {
-	struct mlx5e_neigh_hash_entry *nhe =
-		container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
-	struct neighbour *n = nhe->n;
+	struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
+							     work);
+	struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
+	struct neighbour *n = update_work->n;
 	struct mlx5e_encap_entry *e;
 	unsigned char ha[ETH_ALEN];
 	struct mlx5e_priv *priv;
@@ -146,30 +160,42 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
 		mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
 		mlx5e_encap_put(priv, e);
 	}
-	mlx5e_rep_neigh_entry_release(nhe);
 	rtnl_unlock();
-	neigh_release(n);
+	mlx5e_release_neigh_update_work(update_work);
 }
 
-static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
-					      struct mlx5e_neigh_hash_entry *nhe,
-					      struct neighbour *n)
+static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
+							       struct neighbour *n)
 {
-	/* Take a reference to ensure the neighbour and mlx5 encap
-	 * entry won't be destructed until we drop the reference in
-	 * delayed work.
-	 */
-	neigh_hold(n);
+	struct neigh_update_work *update_work;
+	struct mlx5e_neigh_hash_entry *nhe;
+	struct mlx5e_neigh m_neigh = {};
 
-	/* This assignment is valid as long as the the neigh reference
-	 * is taken
-	 */
-	nhe->n = n;
+	update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
+	if (WARN_ON(!update_work))
+		return NULL;
 
-	if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
-		mlx5e_rep_neigh_entry_release(nhe);
-		neigh_release(n);
+	m_neigh.dev = n->dev;
+	m_neigh.family = n->ops->family;
+	memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+	/* Obtain reference to nhe as last step in order not to release it in
+	 * atomic context.
+	 */
+	rcu_read_lock();
+	nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+	rcu_read_unlock();
+	if (!nhe) {
+		kfree(update_work);
+		return NULL;
 	}
+
+	INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
+	neigh_hold(n);
+	update_work->n = n;
+	update_work->nhe = nhe;
+
+	return update_work;
 }
 
 static int mlx5e_rep_netevent_event(struct notifier_block *nb,
@@ -181,7 +207,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
 	struct net_device *netdev = rpriv->netdev;
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_neigh_hash_entry *nhe = NULL;
-	struct mlx5e_neigh m_neigh = {};
+	struct neigh_update_work *update_work;
 	struct neigh_parms *p;
 	struct neighbour *n;
 	bool found = false;
@@ -196,17 +222,11 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb,
 #endif
 			return NOTIFY_DONE;
 
-		m_neigh.dev = n->dev;
-		m_neigh.family = n->ops->family;
-		memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
-
-		rcu_read_lock();
-		nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
-		rcu_read_unlock();
-		if (!nhe)
+		update_work = mlx5e_alloc_neigh_update_work(priv, n);
+		if (!update_work)
 			return NOTIFY_DONE;
 
-		mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
+		queue_work(priv->wq, &update_work->work);
 		break;
 
 	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
@@ -352,7 +372,6 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
 
 	(*nhe)->priv = priv;
 	memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
-	INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
 	spin_lock_init(&(*nhe)->encap_list_lock);
 	INIT_LIST_HEAD(&(*nhe)->encap_list);
 	refcount_set(&(*nhe)->refcnt, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 622c27ae4ac7..0d1562e20118 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -135,12 +135,6 @@ struct mlx5e_neigh_hash_entry {
 	/* encap list sharing the same neigh */
 	struct list_head encap_list;
 
-	/* valid only when the neigh reference is taken during
-	 * neigh_update_work workqueue callback.
-	 */
-	struct neighbour *n;
-	struct work_struct neigh_update_work;
-
 	/* neigh hash entry can be deleted only when the refcount is zero.
 	 * refcount is needed to avoid neigh hash entry removal by TC, while
 	 * it's used by the neigh notification call.

From a95bc734e60449e7b073ff7ff70c35083b290ae9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 2 Oct 2020 09:46:04 +0200
Subject: [PATCH 299/322] netlink: fix policy dump leak

If userspace doesn't complete the policy dump, we leak the
allocated state. Fix this.

Fixes: d07dcf9aadd6 ("netlink: add infrastructure to expose policies to userspace")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h   |  3 ++-
 net/netlink/genetlink.c |  9 ++++++++-
 net/netlink/policy.c    | 24 ++++++++++--------------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 8e0eb2c9c528..271620f6bc7f 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1934,7 +1934,8 @@ void nla_get_range_signed(const struct nla_policy *pt,
 int netlink_policy_dump_start(const struct nla_policy *policy,
 			      unsigned int maxtype,
 			      unsigned long *state);
-bool netlink_policy_dump_loop(unsigned long *state);
+bool netlink_policy_dump_loop(unsigned long state);
 int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state);
+void netlink_policy_dump_free(unsigned long state);
 
 #endif
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 1eb65a7a27fd..c4b4d3376227 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1079,7 +1079,7 @@ static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
 	if (err)
 		return err;
 
-	while (netlink_policy_dump_loop(&cb->args[1])) {
+	while (netlink_policy_dump_loop(cb->args[1])) {
 		void *hdr;
 		struct nlattr *nest;
 
@@ -1113,6 +1113,12 @@ nla_put_failure:
 	return skb->len;
 }
 
+static int ctrl_dumppolicy_done(struct netlink_callback *cb)
+{
+	netlink_policy_dump_free(cb->args[1]);
+	return 0;
+}
+
 static const struct genl_ops genl_ctrl_ops[] = {
 	{
 		.cmd		= CTRL_CMD_GETFAMILY,
@@ -1123,6 +1129,7 @@ static const struct genl_ops genl_ctrl_ops[] = {
 	{
 		.cmd		= CTRL_CMD_GETPOLICY,
 		.dumpit		= ctrl_dumppolicy,
+		.done		= ctrl_dumppolicy_done,
 	},
 };
 
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index 641ffbdd977a..0176b59ce530 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -84,7 +84,6 @@ int netlink_policy_dump_start(const struct nla_policy *policy,
 	unsigned int policy_idx;
 	int err;
 
-	/* also returns 0 if "*_state" is our ERR_PTR() end marker */
 	if (*_state)
 		return 0;
 
@@ -140,21 +139,11 @@ static bool netlink_policy_dump_finished(struct nl_policy_dump *state)
 	       !state->policies[state->policy_idx].policy;
 }
 
-bool netlink_policy_dump_loop(unsigned long *_state)
+bool netlink_policy_dump_loop(unsigned long _state)
 {
-	struct nl_policy_dump *state = (void *)*_state;
+	struct nl_policy_dump *state = (void *)_state;
 
-	if (IS_ERR(state))
-		return false;
-
-	if (netlink_policy_dump_finished(state)) {
-		kfree(state);
-		/* store end marker instead of freed state */
-		*_state = (unsigned long)ERR_PTR(-ENOENT);
-		return false;
-	}
-
-	return true;
+	return !netlink_policy_dump_finished(state);
 }
 
 int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state)
@@ -309,3 +298,10 @@ nla_put_failure:
 	nla_nest_cancel(skb, policy);
 	return -ENOBUFS;
 }
+
+void netlink_policy_dump_free(unsigned long _state)
+{
+	struct nl_policy_dump *state = (void *)_state;
+
+	kfree(state);
+}

From a93bdcb94a0b3ca72046151412c2389dca681d2a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 2 Oct 2020 07:49:45 +0200
Subject: [PATCH 300/322] net: core: document two new elements of struct
 net_device

As warned by "make htmldocs", there are two new struct elements
that aren't documented:

	../include/linux/netdevice.h:2159: warning: Function parameter or member 'unlink_list' not described in 'net_device'
	../include/linux/netdevice.h:2159: warning: Function parameter or member 'nested_level' not described in 'net_device'

Fixes: 1fc70edb7d7b ("net: core: add nested_level variable in net_device")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9fdb3ebef306..18dec08439f9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1851,6 +1851,11 @@ enum netdev_priv_flags {
  *	@udp_tunnel_nic:	UDP tunnel offload state
  *	@xdp_state:		stores info on attached XDP BPF programs
  *
+ *	@nested_level:	Used as as a parameter of spin_lock_nested() of
+ *			dev->addr_list_lock.
+ *	@unlink_list:	As netif_addr_lock() can be called recursively,
+ *			keep a list of interfaces to be deleted.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */

From f30e25a9d1b25ac8d40071c4dc2679ad0fcdc55a Mon Sep 17 00:00:00 2001
From: Petko Manolov <petko.manolov@konsulko.com>
Date: Fri, 2 Oct 2020 10:56:04 +0300
Subject: [PATCH 301/322] net: usb: pegasus: Proper error handing when setting
 pegasus' MAC address

v2:

If reading the MAC address from eeprom fail don't throw an error, use randomly
generated MAC instead.  Either way the adapter will soldier on and the return
type of set_ethernet_addr() can be reverted to void.

v1:

Fix a bug in set_ethernet_addr() which does not take into account possible
errors (or partial reads) returned by its helpers.  This can potentially lead to
writing random data into device's MAC address registers.

Signed-off-by: Petko Manolov <petko.manolov@konsulko.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/pegasus.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index e92cb51a2c77..060a8a03e6c4 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -360,28 +360,47 @@ fail:
 }
 #endif				/* PEGASUS_WRITE_EEPROM */
 
-static inline void get_node_id(pegasus_t *pegasus, __u8 *id)
+static inline int get_node_id(pegasus_t *pegasus, u8 *id)
 {
-	int i;
-	__u16 w16;
+	int i, ret;
+	u16 w16;
 
 	for (i = 0; i < 3; i++) {
-		read_eprom_word(pegasus, i, &w16);
+		ret = read_eprom_word(pegasus, i, &w16);
+		if (ret < 0)
+			return ret;
 		((__le16 *) id)[i] = cpu_to_le16(w16);
 	}
+
+	return 0;
 }
 
 static void set_ethernet_addr(pegasus_t *pegasus)
 {
-	__u8 node_id[6];
+	int ret;
+	u8 node_id[6];
 
 	if (pegasus->features & PEGASUS_II) {
-		get_registers(pegasus, 0x10, sizeof(node_id), node_id);
+		ret = get_registers(pegasus, 0x10, sizeof(node_id), node_id);
+		if (ret < 0)
+			goto err;
 	} else {
-		get_node_id(pegasus, node_id);
-		set_registers(pegasus, EthID, sizeof(node_id), node_id);
+		ret = get_node_id(pegasus, node_id);
+		if (ret < 0)
+			goto err;
+		ret = set_registers(pegasus, EthID, sizeof(node_id), node_id);
+		if (ret < 0)
+			goto err;
 	}
+
 	memcpy(pegasus->net->dev_addr, node_id, sizeof(node_id));
+
+	return;
+err:
+	eth_hw_addr_random(pegasus->net);
+	dev_info(&pegasus->intf->dev, "software assigned MAC address.\n");
+
+	return;
 }
 
 static inline int reset_mac(pegasus_t *pegasus)

From c381b07941adc2274ce552daf86c94701c5e265a Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 2 Oct 2020 16:27:28 +0800
Subject: [PATCH 302/322] net: introduce helper sendpage_ok() in
 include/linux/net.h

The original problem was from nvme-over-tcp code, who mistakenly uses
kernel_sendpage() to send pages allocated by __get_free_pages() without
__GFP_COMP flag. Such pages don't have refcount (page_count is 0) on
tail pages, sending them by kernel_sendpage() may trigger a kernel panic
from a corrupted kernel heap, because these pages are incorrectly freed
in network stack as page_count 0 pages.

This patch introduces a helper sendpage_ok(), it returns true if the
checking page,
- is not slab page: PageSlab(page) is false.
- has page refcount: page_count(page) is not zero

All drivers who want to send page to remote end by kernel_sendpage()
may use this helper to check whether the page is OK. If the helper does
not return true, the driver should try other non sendpage method (e.g.
sock_no_sendpage()) to handle the page.

Signed-off-by: Coly Li <colyli@suse.de>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Vlastimil Babka <vbabka@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/include/linux/net.h b/include/linux/net.h
index d48ff1180879..ae713c851342 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -21,6 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/once.h>
 #include <linux/fs.h>
+#include <linux/mm.h>
 #include <linux/sockptr.h>
 
 #include <uapi/linux/net.h>
@@ -286,6 +287,21 @@ do {									\
 #define net_get_random_once_wait(buf, nbytes)			\
 	get_random_once_wait((buf), (nbytes))
 
+/*
+ * E.g. XFS meta- & log-data is in slab pages, or bcache meta
+ * data pages, or other high order pages allocated by
+ * __get_free_pages() without __GFP_COMP, which have a page_count
+ * of 0 and/or have PageSlab() set. We cannot use send_page for
+ * those, as that does get_page(); put_page(); and would cause
+ * either a VM_BUG directly, or __page_cache_release a page that
+ * would actually still be referenced by someone, leading to some
+ * obscure delayed Oops somewhere else.
+ */
+static inline bool sendpage_ok(struct page *page)
+{
+	return !PageSlab(page) && page_count(page) >= 1;
+}
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec,
 		   size_t num, size_t len);
 int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,

From 7b62d31d3f399079e7de7cc43e85d6481170970a Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 2 Oct 2020 16:27:29 +0800
Subject: [PATCH 303/322] net: add WARN_ONCE in kernel_sendpage() for improper
 zero-copy send

If a page sent into kernel_sendpage() is a slab page or it doesn't have
ref_count, this page is improper to send by the zero copy sendpage()
method. Otherwise such page might be unexpected released in network code
path and causes impredictable panic due to kernel memory management data
structure corruption.

This path adds a WARN_ON() on the sending page before sends it into the
concrete zero-copy sendpage() method, if the page is improper for the
zero-copy sendpage() method, a warning message can be observed before
the consequential unpredictable kernel panic.

This patch does not change existing kernel_sendpage() behavior for the
improper page zero-copy send, it just provides hint warning message for
following potential panic due the kernel memory heap corruption.

Signed-off-by: Coly Li <colyli@suse.de>
Cc: Cong Wang <amwang@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 0c0144604f81..58cac2da5f66 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3638,9 +3638,11 @@ EXPORT_SYMBOL(kernel_getpeername);
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
 {
-	if (sock->ops->sendpage)
+	if (sock->ops->sendpage) {
+		/* Warn in case the improper page to zero-copy send */
+		WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send");
 		return sock->ops->sendpage(sock, page, offset, size, flags);
-
+	}
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
 EXPORT_SYMBOL(kernel_sendpage);

From 7d4194abfc4de13a2663c7fee6891de8360f7a52 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 2 Oct 2020 16:27:30 +0800
Subject: [PATCH 304/322] nvme-tcp: check page by sendpage_ok() before calling
 kernel_sendpage()

Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to
send slab pages. But for pages allocated by __get_free_pages() without
__GFP_COMP, which also have refcount as 0, they are still sent by
kernel_sendpage() to remote end, this is problematic.

The new introduced helper sendpage_ok() checks both PageSlab tag and
page_count counter, and returns true if the checking page is OK to be
sent by kernel_sendpage().

This patch fixes the page checking issue of nvme_tcp_try_send_data()
with sendpage_ok(). If sendpage_ok() returns true, send this page by
kernel_sendpage(), otherwise use sock_no_sendpage to handle this page.

Signed-off-by: Coly Li <colyli@suse.de>
Cc: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Jan Kara <jack@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mikhail Skorzhinskii <mskorzhinskiy@solarflare.com>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Vlastimil Babka <vbabka@suse.com>
Cc: stable@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 8f4f29f18b8c..d6a3e1487354 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -913,12 +913,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 		else
 			flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
 
-		/* can't zcopy slab pages */
-		if (unlikely(PageSlab(page))) {
-			ret = sock_no_sendpage(queue->sock, page, offset, len,
+		if (sendpage_ok(page)) {
+			ret = kernel_sendpage(queue->sock, page, offset, len,
 					flags);
 		} else {
-			ret = kernel_sendpage(queue->sock, page, offset, len,
+			ret = sock_no_sendpage(queue->sock, page, offset, len,
 					flags);
 		}
 		if (ret <= 0)

From cf83a17edeeb36195596d2dae060a7c381db35f1 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 2 Oct 2020 16:27:31 +0800
Subject: [PATCH 305/322] tcp: use sendpage_ok() to detect misused .sendpage

commit a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab
objects") adds the checks for Slab pages, but the pages don't have
page_count are still missing from the check.

Network layer's sendpage method is not designed to send page_count 0
pages neither, therefore both PageSlab() and page_count() should be
both checked for the sending page. This is exactly what sendpage_ok()
does.

This patch uses sendpage_ok() in do_tcp_sendpages() to detect misused
.sendpage, to make the code more robust.

Fixes: a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects")
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Coly Li <colyli@suse.de>
Cc: Vasily Averin <vvs@virtuozzo.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: stable@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 31f3b858db81..2135ee7c806d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 
 	if (IS_ENABLED(CONFIG_DEBUG_VM) &&
-	    WARN_ONCE(PageSlab(page), "page must not be a Slab one"))
+	    WARN_ONCE(!sendpage_ok(page),
+		      "page must not be a Slab one and have page_count > 0"))
 		return -EINVAL;
 
 	/* Wait for a connection to finish. One exception is TCP Fast Open

From fb25ebe1b212c37781b23a7f7cd21c0bb2f6eb83 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 2 Oct 2020 16:27:32 +0800
Subject: [PATCH 306/322] drbd: code cleanup by using sendpage_ok() to check
 page for kernel_sendpage()

In _drbd_send_page() a page is checked by following code before sending
it by kernel_sendpage(),
        (page_count(page) < 1) || PageSlab(page)
If the check is true, this page won't be send by kernel_sendpage() and
handled by sock_no_sendpage().

This kind of check is exactly what macro sendpage_ok() does, which is
introduced into include/linux/net.h to solve a similar send page issue
in nvme-tcp code.

This patch uses macro sendpage_ok() to replace the open coded checks to
page type and refcount in _drbd_send_page(), as a code cleanup.

Signed-off-by: Coly Li <colyli@suse.de>
Cc: Philipp Reisner <philipp.reisner@linbit.com>
Cc: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/drbd/drbd_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 04b6bde9419d..573dbf6f0c31 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1553,7 +1553,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa
 	 * put_page(); and would cause either a VM_BUG directly, or
 	 * __page_cache_release a page that would actually still be referenced
 	 * by someone, leading to some obscure delayed Oops somewhere else. */
-	if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page))
+	if (drbd_disable_sendpage || !sendpage_ok(page))
 		return _drbd_no_send_page(peer_device, page, offset, size, msg_flags);
 
 	msg_flags |= MSG_NOSIGNAL;

From 6aa25c737705466020e74ad44679d3758b8961f6 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 2 Oct 2020 16:27:33 +0800
Subject: [PATCH 307/322] scsi: libiscsi: use sendpage_ok() in
 iscsi_tcp_segment_map()

In iscsci driver, iscsi_tcp_segment_map() uses the following code to
check whether the page should or not be handled by sendpage:
    if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg)))

The "page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg)" part is to
make sure the page can be sent to network layer's zero copy path. This
part is exactly what sendpage_ok() does.

This patch uses  use sendpage_ok() in iscsi_tcp_segment_map() to replace
the original open coded checks.

Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Lee Duncan <lduncan@suse.com>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Vasily Averin <vvs@virtuozzo.com>
Cc: Cong Wang <amwang@redhat.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Chris Leech <cleech@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/libiscsi_tcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 37e5d4e48c2f..83f14b2c8804 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -128,7 +128,7 @@ static void iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
 	 * coalescing neighboring slab objects into a single frag which
 	 * triggers one of hardened usercopy checks.
 	 */
-	if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg)))
+	if (!recv && sendpage_ok(sg_page(sg)))
 		return;
 
 	if (recv) {

From 40efc4dc73956e1fab177783e55f30117517c542 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 2 Oct 2020 16:27:34 +0800
Subject: [PATCH 308/322] libceph: use sendpage_ok() in ceph_tcp_sendpage()

In libceph, ceph_tcp_sendpage() does the following checks before handle
the page by network layer's zero copy sendpage method,
	if (page_count(page) >= 1 && !PageSlab(page))

This check is exactly what sendpage_ok() does. This patch replace the
open coded checks by sendpage_ok() as a code cleanup.

Signed-off-by: Coly Li <colyli@suse.de>
Acked-by: Jeff Layton <jlayton@kernel.org>
Cc: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ceph/messenger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index bdfd66ba3843..d4d7a0e52491 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -575,7 +575,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
 	 * coalescing neighboring slab objects into a single frag which
 	 * triggers one of hardened usercopy checks.
 	 */
-	if (page_count(page) >= 1 && !PageSlab(page))
+	if (sendpage_ok(page))
 		sendpage = sock->ops->sendpage;
 	else
 		sendpage = sock_no_sendpage;

From 9d8c05ad5627b3a650087c14c67dd1d22a6368ab Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 2 Oct 2020 12:39:44 +0200
Subject: [PATCH 309/322] tcp: fix syn cookied MPTCP request socket leak

If a syn-cookies request socket don't pass MPTCP-level
validation done in syn_recv_sock(), we need to release
it immediately, or it will be leaked.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/89
Fixes: 9466a1ccebbe ("mptcp: enable JOIN requests even if cookies are in use")
Reported-and-tested-by: Geliang Tang <geliangtang@gmail.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/syncookies.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f0794f0232ba..e03756631541 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -214,7 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 		sock_rps_save_rxhash(child, skb);
 
 		if (rsk_drop_req(req)) {
-			refcount_set(&req->rsk_refcnt, 2);
+			reqsk_put(req);
 			return child;
 		}
 

From b502e6ecdc3b6d381bd72c5f879bc1e00d6fe7db Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 29 Sep 2020 08:31:32 -0400
Subject: [PATCH 310/322] KVM: VMX: update PFEC_MASK/PFEC_MATCH together with
 PF intercept

The PFEC_MASK and PFEC_MATCH fields in the VMCS reverse the meaning of
the #PF intercept bit in the exception bitmap when they do not match.
This means that, if PFEC_MASK and/or PFEC_MATCH are set, the
hypervisor can get a vmexit for #PF exceptions even when the
corresponding bit is clear in the exception bitmap.

This is unexpected and is promptly detected by a WARN_ON_ONCE.
To fix it, reset PFEC_MASK and PFEC_MATCH when the #PF intercept
is disabled (as is common with enable_ept && !allow_smaller_maxphyaddr).

Reported-by: Qian Cai <cai@redhat.com>>
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f0384e93548a..f4e9c310032a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -794,6 +794,18 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	 */
 	if (is_guest_mode(vcpu))
 		eb |= get_vmcs12(vcpu)->exception_bitmap;
+        else {
+		/*
+		 * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
+		 * between guest and host.  In that case we only care about present
+		 * faults.  For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in
+		 * prepare_vmcs02_rare.
+		 */
+		bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR));
+		int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0;
+		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask);
+		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask);
+	}
 
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -4355,16 +4367,6 @@ static void init_vmcs(struct vcpu_vmx *vmx)
 		vmx->pt_desc.guest.output_mask = 0x7F;
 		vmcs_write64(GUEST_IA32_RTIT_CTL, 0);
 	}
-
-	/*
-	 * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched
-	 * between guest and host.  In that case we only care about present
-	 * faults.
-	 */
-	if (enable_ept) {
-		vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, PFERR_PRESENT_MASK);
-		vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, PFERR_PRESENT_MASK);
-	}
 }
 
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)

From 484cfaca95925f1a38ded6d0561de06a70409a32 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 2 Oct 2020 22:21:41 -0700
Subject: [PATCH 311/322] mm, slub: restore initial kmem_cache flags

The routine that applies debug flags to the kmem_cache slabs
inadvertantly prevents non-debug flags from being applied to those
same objects.  That is, if slub_debug=<flag>,<slab> is specified,
non-debugged slabs will end up having flags of zero, and the slabs
may be unusable.

Fix this by including the input flags for non-matching slabs with the
contents of slub_debug, so that the caches are created as expected
alongside any debugging options that may be requested.  With this, we
can remove the check for a NULL slub_debug_string, since it's covered
by the loop itself.

Fixes: e17f1dfba37b ("mm, slub: extend slub_debug syntax for multiple blocks")
Signed-off-by: Eric Farman <farman@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Kees Cook <keescook@chromium.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Link: https://lkml.kernel.org/r/20200930161931.28575-1-farman@linux.ibm.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/slub.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index d4177aecedf6..6d3574013b2f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1413,10 +1413,6 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
 	char *next_block;
 	slab_flags_t block_flags;
 
-	/* If slub_debug = 0, it folds into the if conditional. */
-	if (!slub_debug_string)
-		return flags | slub_debug;
-
 	len = strlen(name);
 	next_block = slub_debug_string;
 	/* Go through all blocks of debug options, see if any matches our slab's name */
@@ -1450,7 +1446,7 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
 		}
 	}
 
-	return slub_debug;
+	return flags | slub_debug;
 }
 #else /* !CONFIG_SLUB_DEBUG */
 static inline void setup_object_debug(struct kmem_cache *s,

From 1d91df85f399adbe4f318f3e74ac5a5d84c0ca7c Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Fri, 2 Oct 2020 22:21:45 -0700
Subject: [PATCH 312/322] mm/page_alloc: handle a missing case for
 memalloc_nocma_{save/restore} APIs

memalloc_nocma_{save/restore} APIs can be used to skip page allocation
on CMA area, but, there is a missing case and the page on CMA area could
be allocated even if APIs are used.  This patch handles this case to fix
the potential issue.

For now, these APIs are used to prevent long-term pinning on the CMA
page.  When the long-term pinning is requested on the CMA page, it is
migrated to the non-CMA page before pinning.  This non-CMA page is
allocated by using memalloc_nocma_{save/restore} APIs.  If APIs doesn't
work as intended, the CMA page is allocated and it is pinned for a long
time.  This long-term pin for the CMA page causes cma_alloc() failure
and it could result in wrong behaviour on the device driver who uses the
cma_alloc().

Missing case is an allocation from the pcplist.  MIGRATE_MOVABLE pcplist
could have the pages on CMA area so we need to skip it if ALLOC_CMA
isn't specified.

Fixes: 8510e69c8efe (mm/page_alloc: fix memalloc_nocma_{save/restore} APIs)
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: "Aneesh Kumar K . V" <aneesh.kumar@linux.ibm.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Link: https://lkml.kernel.org/r/1601429472-12599-1-git-send-email-iamjoonsoo.kim@lge.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5661fa164f13..6866533de8e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3367,9 +3367,16 @@ struct page *rmqueue(struct zone *preferred_zone,
 	struct page *page;
 
 	if (likely(order == 0)) {
-		page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
+		/*
+		 * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
+		 * we need to skip it when CMA area isn't allowed.
+		 */
+		if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
+				migratetype != MIGRATE_MOVABLE) {
+			page = rmqueue_pcplist(preferred_zone, zone, gfp_flags,
 					migratetype, alloc_flags);
-		goto out;
+			goto out;
+		}
 	}
 
 	/*
@@ -3381,7 +3388,13 @@ struct page *rmqueue(struct zone *preferred_zone,
 
 	do {
 		page = NULL;
-		if (alloc_flags & ALLOC_HARDER) {
+		/*
+		 * order-0 request can reach here when the pcplist is skipped
+		 * due to non-CMA allocation context. HIGHATOMIC area is
+		 * reserved for high-order atomic allocation, so order-0
+		 * request should skip it.
+		 */
+		if (order > 0 && alloc_flags & ALLOC_HARDER) {
 			page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
 			if (page)
 				trace_mm_page_alloc_zone_locked(page, order, migratetype);

From d43ca1386bf21b783d618e3a5f61c3b2e8759df2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 2 Oct 2020 22:21:48 -0700
Subject: [PATCH 313/322] scripts/spelling.txt: fix malformed entry

One of the entries has three fields "mistake||correction||correction"
rather than the expected two fields "mistake||correction".  Fix it.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200930234359.255295-1-ebiggers@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/spelling.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index f253681e7e2a..feb2efaaa5e6 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -589,7 +589,7 @@ explictly||explicitly
 expresion||expression
 exprimental||experimental
 extened||extended
-exteneded||extended||extended
+exteneded||extended
 extensability||extensibility
 extention||extension
 extenstion||extension

From 388e201d41fa1ed8f2dce0f0567f56f8e919ffb0 Mon Sep 17 00:00:00 2001
From: "Vineetha G. Jaya Kumaran" <vineetha.g.jaya.kumaran@intel.com>
Date: Thu, 1 Oct 2020 23:56:09 +0800
Subject: [PATCH 314/322] net: stmmac: Modify configuration method of EEE
 timers

Ethtool manual stated that the tx-timer is the "the amount of time the
device should stay in idle mode prior to asserting its Tx LPI". The
previous implementation for "ethtool --set-eee tx-timer" sets the LPI TW
timer duration which is not correct. Hence, this patch fixes the
"ethtool --set-eee tx-timer" to configure the EEE LPI timer.

The LPI TW Timer will be using the defined default value instead of
"ethtool --set-eee tx-timer" which follows the EEE LS timer implementation.

Changelog V2
*Not removing/modifying the eee_timer.
*EEE LPI timer can be configured through ethtool and also the eee_timer
module param.
*EEE TW Timer will be configured with default value only, not able to be
configured through ethtool or module param. This follows the implementation
of the EEE LS Timer.

Fixes: d765955d2ae0 ("stmmac: add the Energy Efficient Ethernet support")
Signed-off-by: Vineetha G. Jaya Kumaran <vineetha.g.jaya.kumaran@intel.com>
Signed-off-by: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h  |  2 ++
 .../ethernet/stmicro/stmmac/stmmac_ethtool.c  | 12 +++++++++-
 .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 ++++++++++++-------
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 9c02fc754bf1..545696971f65 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -203,6 +203,8 @@ struct stmmac_priv {
 	int eee_enabled;
 	int eee_active;
 	int tx_lpi_timer;
+	int tx_lpi_enabled;
+	int eee_tw_timer;
 	unsigned int mode;
 	unsigned int chain_mode;
 	int extend_desc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 430a4b32ec1e..814879f91f76 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -665,6 +665,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev,
 	edata->eee_enabled = priv->eee_enabled;
 	edata->eee_active = priv->eee_active;
 	edata->tx_lpi_timer = priv->tx_lpi_timer;
+	edata->tx_lpi_enabled = priv->tx_lpi_enabled;
 
 	return phylink_ethtool_get_eee(priv->phylink, edata);
 }
@@ -678,6 +679,10 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 	if (!priv->dma_cap.eee)
 		return -EOPNOTSUPP;
 
+	if (priv->tx_lpi_enabled != edata->tx_lpi_enabled)
+		netdev_warn(priv->dev,
+			    "Setting EEE tx-lpi is not supported\n");
+
 	if (!edata->eee_enabled)
 		stmmac_disable_eee_mode(priv);
 
@@ -685,7 +690,12 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 	if (ret)
 		return ret;
 
-	priv->tx_lpi_timer = edata->tx_lpi_timer;
+	if (edata->eee_enabled &&
+	    priv->tx_lpi_timer != edata->tx_lpi_timer) {
+		priv->tx_lpi_timer = edata->tx_lpi_timer;
+		stmmac_eee_init(priv);
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 89b2b3472852..b56b13d64ab4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -94,7 +94,7 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
 static int eee_timer = STMMAC_DEFAULT_LPI_TIMER;
 module_param(eee_timer, int, 0644);
 MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
-#define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x))
+#define STMMAC_LPI_T(x) (jiffies + usecs_to_jiffies(x))
 
 /* By default the driver will use the ring mode to manage tx and rx descriptors,
  * but allow user to force to use the chain instead of the ring
@@ -370,7 +370,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
 	struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer);
 
 	stmmac_enable_eee_mode(priv);
-	mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
+	mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
 }
 
 /**
@@ -383,7 +383,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
  */
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
-	int tx_lpi_timer = priv->tx_lpi_timer;
+	int eee_tw_timer = priv->eee_tw_timer;
 
 	/* Using PCS we cannot dial with the phy registers at this stage
 	 * so we do not support extra feature like EEE.
@@ -403,7 +403,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 		if (priv->eee_enabled) {
 			netdev_dbg(priv->dev, "disable EEE\n");
 			del_timer_sync(&priv->eee_ctrl_timer);
-			stmmac_set_eee_timer(priv, priv->hw, 0, tx_lpi_timer);
+			stmmac_set_eee_timer(priv, priv->hw, 0, eee_tw_timer);
 		}
 		mutex_unlock(&priv->lock);
 		return false;
@@ -411,11 +411,12 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 
 	if (priv->eee_active && !priv->eee_enabled) {
 		timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0);
-		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
 		stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS,
-				     tx_lpi_timer);
+				     eee_tw_timer);
 	}
 
+	mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
+
 	mutex_unlock(&priv->lock);
 	netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
 	return true;
@@ -930,6 +931,7 @@ static void stmmac_mac_link_down(struct phylink_config *config,
 
 	stmmac_mac_set(priv, priv->ioaddr, false);
 	priv->eee_active = false;
+	priv->tx_lpi_enabled = false;
 	stmmac_eee_init(priv);
 	stmmac_set_eee_pls(priv, priv->hw, false);
 }
@@ -1027,6 +1029,7 @@ static void stmmac_mac_link_up(struct phylink_config *config,
 	if (phy && priv->dma_cap.eee) {
 		priv->eee_active = phy_init_eee(phy, 1) >= 0;
 		priv->eee_enabled = stmmac_eee_init(priv);
+		priv->tx_lpi_enabled = priv->eee_enabled;
 		stmmac_set_eee_pls(priv, priv->hw, true);
 	}
 }
@@ -2061,7 +2064,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 
 	if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
 		stmmac_enable_eee_mode(priv);
-		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
+		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer));
 	}
 
 	/* We still have pending packets, let's call for a new scheduling */
@@ -2694,7 +2697,11 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 			netdev_warn(priv->dev, "PTP init failed\n");
 	}
 
-	priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS;
+	priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS;
+
+	/* Convert the timer from msec to usec */
+	if (!priv->tx_lpi_timer)
+		priv->tx_lpi_timer = eee_timer * 1000;
 
 	if (priv->use_riwt) {
 		if (!priv->rx_riwt)

From 1f7e877c20517735bceff1535e1b7fa846b2f215 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 1 Oct 2020 10:54:49 -0700
Subject: [PATCH 315/322] net: hinic: fix DEVLINK build errors

Fix many (lots deleted here) build errors in hinic by selecting NET_DEVLINK.

ld: drivers/net/ethernet/huawei/hinic/hinic_hw_dev.o: in function `mgmt_watchdog_timeout_event_handler':
hinic_hw_dev.c:(.text+0x30a): undefined reference to `devlink_health_report'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_fw_reporter_dump':
hinic_devlink.c:(.text+0x1c): undefined reference to `devlink_fmsg_u32_pair_put'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_fw_reporter_dump':
hinic_devlink.c:(.text+0x126): undefined reference to `devlink_fmsg_binary_pair_put'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_hw_reporter_dump':
hinic_devlink.c:(.text+0x1ba): undefined reference to `devlink_fmsg_string_pair_put'
ld: hinic_devlink.c:(.text+0x227): undefined reference to `devlink_fmsg_u8_pair_put'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_alloc':
hinic_devlink.c:(.text+0xaee): undefined reference to `devlink_alloc'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_free':
hinic_devlink.c:(.text+0xb04): undefined reference to `devlink_free'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_register':
hinic_devlink.c:(.text+0xb26): undefined reference to `devlink_register'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_unregister':
hinic_devlink.c:(.text+0xb46): undefined reference to `devlink_unregister'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_health_reporters_create':
hinic_devlink.c:(.text+0xb75): undefined reference to `devlink_health_reporter_create'
ld: hinic_devlink.c:(.text+0xb95): undefined reference to `devlink_health_reporter_create'
ld: hinic_devlink.c:(.text+0xbac): undefined reference to `devlink_health_reporter_destroy'
ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_health_reporters_destroy':

Fixes: 51ba902a16e6 ("net-next/hinic: Initialize hw interface")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Bin Luo <luobin9@huawei.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Aviad Krawczyk <aviad.krawczyk@huawei.com>
Cc: Zhao Chen <zhaochen6@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig
index 936e2dd3bb13..b47bd5440c5f 100644
--- a/drivers/net/ethernet/huawei/hinic/Kconfig
+++ b/drivers/net/ethernet/huawei/hinic/Kconfig
@@ -6,6 +6,7 @@
 config HINIC
 	tristate "Huawei Intelligent PCIE Network Interface Card"
 	depends on (PCI_MSI && (X86 || ARM64))
+	select NET_DEVLINK
 	help
 	  This driver supports HiNIC PCIE Ethernet cards.
 	  To compile this driver as part of the kernel, choose Y here.

From 790ca79d3e7f76d77291540e1728f400c6ad931f Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 2 Oct 2020 21:47:43 +0200
Subject: [PATCH 316/322] net: typhoon: Fix a typo Typoon --> Typhoon

s/Typoon/Typhoon/

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/3com/typhoon.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/3com/typhoon.h b/drivers/net/ethernet/3com/typhoon.h
index 2f634c64d5d1..38e6dcab4e94 100644
--- a/drivers/net/ethernet/3com/typhoon.h
+++ b/drivers/net/ethernet/3com/typhoon.h
@@ -33,7 +33,7 @@ struct basic_ring {
 	u32 lastWrite;
 };
 
-/* The Typoon transmit ring -- same as a basic ring, plus:
+/* The Typhoon transmit ring -- same as a basic ring, plus:
  * lastRead:      where we're at in regard to cleaning up the ring
  * writeRegister: register to use for writing (different for Hi & Lo rings)
  */

From 9a9e77495958c7382b2438bc19746dd3aaaabb8e Mon Sep 17 00:00:00 2001
From: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Date: Mon, 5 Oct 2020 02:25:36 +0530
Subject: [PATCH 317/322] net: team: fix memory leak in __team_options_register

The variable "i" isn't initialized back correctly after the first loop
under the label inst_rollback gets executed.

The value of "i" is assigned to be option_count - 1, and the ensuing
loop (under alloc_rollback) begins by initializing i--.
Thus, the value of i when the loop begins execution will now become
i = option_count - 2.

Thus, when kfree(dst_opts[i]) is called in the second loop in this
order, (i.e., inst_rollback followed by alloc_rollback),
dst_optsp[option_count - 2] is the first element freed, and
dst_opts[option_count - 1] does not get freed, and thus, a memory
leak is caused.

This memory leak can be fixed, by assigning i = option_count (instead of
option_count - 1).

Fixes: 80f7c6683fe0 ("team: add support for per-port options")
Reported-by: syzbot+69b804437cfec30deac3@syzkaller.appspotmail.com
Tested-by: syzbot+69b804437cfec30deac3@syzkaller.appspotmail.com
Signed-off-by: Anant Thazhemadam <anant.thazhemadam@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 69dfb1a49cc8..bcc4a4c011f1 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -287,7 +287,7 @@ inst_rollback:
 	for (i--; i >= 0; i--)
 		__team_option_inst_del_option(team, dst_opts[i]);
 
-	i = option_count - 1;
+	i = option_count;
 alloc_rollback:
 	for (i--; i >= 0; i--)
 		kfree(dst_opts[i]);

From 580e4273d7a883ececfefa692c1f96bdbacb99b5 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 2 Oct 2020 12:13:34 -0700
Subject: [PATCH 318/322] net_sched: check error pointer in tcf_dump_walker()

Although we take RTNL on dump path, it is possible to
skip RTNL on insertion path. So the following race condition
is possible:

rtnl_lock()		// no rtnl lock
			mutex_lock(&idrinfo->lock);
			// insert ERR_PTR(-EBUSY)
			mutex_unlock(&idrinfo->lock);
tc_dump_action()
rtnl_unlock()

So we have to skip those temporary -EBUSY entries on dump path
too.

Reported-and-tested-by: syzbot+b47bc4f247856fb4d9e1@syzkaller.appspotmail.com
Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together")
Cc: Vlad Buslov <vladbu@mellanox.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 5612b336e18e..798430e1a79f 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -235,6 +235,8 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
 		index++;
 		if (index < s_i)
 			continue;
+		if (IS_ERR(p))
+			continue;
 
 		if (jiffy_since &&
 		    time_after(jiffy_since,

From f4544e5361da5050ff5c0330ceea095cb5dbdd72 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 3 Oct 2020 11:51:21 -0700
Subject: [PATCH 319/322] net: mvneta: fix double free of txq->buf

clang static analysis reports this problem:

drivers/net/ethernet/marvell/mvneta.c:3465:2: warning:
  Attempt to free released memory
        kfree(txq->buf);
        ^~~~~~~~~~~~~~~

When mvneta_txq_sw_init() fails to alloc txq->tso_hdrs,
it frees without poisoning txq->buf.  The error is caught
in the mvneta_setup_txqs() caller which handles the error
by cleaning up all of the txqs with a call to
mvneta_txq_sw_deinit which also frees txq->buf.

Since mvneta_txq_sw_deinit is a general cleaner, all of the
partial cleaning in mvneta_txq_sw_deinit()'s error handling
is not needed.

Fixes: 2adb719d74f6 ("net: mvneta: Implement software TSO")
Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index c4345e3d616f..5bf0409f5d42 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3400,24 +3400,15 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp,
 	txq->last_desc = txq->size - 1;
 
 	txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL);
-	if (!txq->buf) {
-		dma_free_coherent(pp->dev->dev.parent,
-				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
-				  txq->descs, txq->descs_phys);
+	if (!txq->buf)
 		return -ENOMEM;
-	}
 
 	/* Allocate DMA buffers for TSO MAC/IP/TCP headers */
 	txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent,
 					   txq->size * TSO_HEADER_SIZE,
 					   &txq->tso_hdrs_phys, GFP_KERNEL);
-	if (!txq->tso_hdrs) {
-		kfree(txq->buf);
-		dma_free_coherent(pp->dev->dev.parent,
-				  txq->size * MVNETA_DESC_ALIGNED_SIZE,
-				  txq->descs, txq->descs_phys);
+	if (!txq->tso_hdrs)
 		return -ENOMEM;
-	}
 
 	/* Setup XPS mapping */
 	if (txq_number > 1)

From 4296adc3e32f5d544a95061160fe7e127be1b9ff Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 2 Oct 2020 21:53:08 +0200
Subject: [PATCH 320/322] net/core: check length before updating Ethertype in
 skb_mpls_{push,pop}

Openvswitch allows to drop a packet's Ethernet header, therefore
skb_mpls_push() and skb_mpls_pop() might be called with ethernet=true
and mac_len=0. In that case the pointer passed to skb_mod_eth_type()
doesn't point to an Ethernet header and the new Ethertype is written at
unexpected locations.

Fix this by verifying that mac_len is big enough to contain an Ethernet
header.

Fixes: fa4e0f8855fc ("net/sched: fix corrupted L2 header with MPLS 'push' and 'pop' actions")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Acked-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6faf73d6a0f7..2b48cb0cc684 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5622,7 +5622,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
 	lse->label_stack_entry = mpls_lse;
 	skb_postpush_rcsum(skb, lse, MPLS_HLEN);
 
-	if (ethernet)
+	if (ethernet && mac_len >= ETH_HLEN)
 		skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto);
 	skb->protocol = mpls_proto;
 
@@ -5662,7 +5662,7 @@ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len,
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, mac_len);
 
-	if (ethernet) {
+	if (ethernet && mac_len >= ETH_HLEN) {
 		struct ethhdr *hdr;
 
 		/* use mpls_hdr() to get ethertype to account for VLANs. */

From 549738f15da0e5a00275977623be199fbbf7df50 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 4 Oct 2020 16:04:34 -0700
Subject: [PATCH 321/322] Linux 5.9-rc8

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 992d24467ca0..f84d7e4ca0be 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 5
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Kleptomaniac Octopus
 
 # *DOCUMENTATION*

From 720ef73d1a239e33c3ad8fac356b9b1348e68aaf Mon Sep 17 00:00:00 2001
From: Aaron Ma <aaron.ma@canonical.com>
Date: Sat, 3 Oct 2020 01:09:16 +0800
Subject: [PATCH 322/322] platform/x86: thinkpad_acpi: re-initialize ACPI
 buffer size when reuse

Evaluating ACPI _BCL could fail, then ACPI buffer size will be set to 0.
When reuse this ACPI buffer, AE_BUFFER_OVERFLOW will be triggered.

Re-initialize buffer size will make ACPI evaluate successfully.

Fixes: 46445b6b896fd ("thinkpad-acpi: fix handle locate for video and query of _BCL")
Signed-off-by: Aaron Ma <aaron.ma@canonical.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/thinkpad_acpi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 7cb3b71c67fe..e5a62fa9a879 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -6829,8 +6829,10 @@ static int __init tpacpi_query_bcl_levels(acpi_handle handle)
 	list_for_each_entry(child, &device->children, node) {
 		acpi_status status = acpi_evaluate_object(child->handle, "_BCL",
 							  NULL, &buffer);
-		if (ACPI_FAILURE(status))
+		if (ACPI_FAILURE(status)) {
+			buffer.length = ACPI_ALLOCATE_BUFFER;
 			continue;
+		}
 
 		obj = (union acpi_object *)buffer.pointer;
 		if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {