From 61ee4137b5744d9c555cd4674aae353a45f9cd2e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 31 Jul 2020 14:49:52 +0800 Subject: [PATCH 001/322] ip_vti: Fix unused variable warning If CONFIG_INET_XFRM_TUNNEL is set but CONFIG_IPV6 is n, net/ipv4/ip_vti.c:493:27: warning: 'vti_ipip6_handler' defined but not used [-Wunused-variable] Signed-off-by: YueHaibing Signed-off-by: Steffen Klassert --- net/ipv4/ip_vti.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 49daaed89764..f687abb069fa 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -490,6 +490,7 @@ static struct xfrm_tunnel vti_ipip_handler __read_mostly = { .priority = 0, }; +#if IS_ENABLED(CONFIG_IPV6) static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { .handler = vti_rcv_tunnel, .cb_handler = vti_rcv_cb, @@ -497,6 +498,7 @@ static struct xfrm_tunnel vti_ipip6_handler __read_mostly = { .priority = 0, }; #endif +#endif static int __net_init vti_init_net(struct net *net) { From 4eb2e13415757a2bce5bb0d580d22bbeef1f5346 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 13 Aug 2020 16:24:04 +0200 Subject: [PATCH 002/322] espintcp: restore IP CB before handing the packet to xfrm Xiumei reported a bug with espintcp over IPv6 in transport mode, because xfrm6_transport_finish expects to find IP6CB data (struct inet6_skb_cb). Currently, espintcp zeroes the CB, but the relevant part is actually preserved by previous layers (first set up by tcp, then strparser only zeroes a small part of tcp_skb_tb), so we can just relocate it to the start of skb->cb. Fixes: e27cca96cd68 ("xfrm: add espintcp (RFC 8229)") Reported-by: Xiumei Mu Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/espintcp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 827ccdf2db57..1f08ebf7d80c 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -29,8 +29,12 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, static void handle_esp(struct sk_buff *skb, struct sock *sk) { + struct tcp_skb_cb *tcp_cb = (struct tcp_skb_cb *)skb->cb; + skb_reset_transport_header(skb); - memset(skb->cb, 0, sizeof(skb->cb)); + + /* restore IP CB, we need at least IP6CB->nhoff */ + memmove(skb->cb, &tcp_cb->header, sizeof(tcp_cb->header)); rcu_read_lock(); skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif); From d3f99f9183068efb5b931e50e298bad40285f938 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 19 Aug 2020 12:24:45 +0300 Subject: [PATCH 003/322] gpio: omap: Fix warnings if PM is disabled Fix warnings for omap_gpio_resume and omap_gpio_suspend defined but not used when PM is disabled as noticed when doing make randconfig builds. Fixes: f02a03985d06 ("gpio: omap: Add missing PM ops for suspend") Cc: Arnd Bergmann Signed-off-by: Tony Lindgren Acked-by: Grygorii Strashko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-omap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 7fbe0c9e1fc1..0ea640fb636c 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -1516,7 +1516,7 @@ static int __maybe_unused omap_gpio_runtime_resume(struct device *dev) return 0; } -static int omap_gpio_suspend(struct device *dev) +static int __maybe_unused omap_gpio_suspend(struct device *dev) { struct gpio_bank *bank = dev_get_drvdata(dev); @@ -1528,7 +1528,7 @@ static int omap_gpio_suspend(struct device *dev) return omap_gpio_runtime_suspend(dev); } -static int omap_gpio_resume(struct device *dev) +static int __maybe_unused omap_gpio_resume(struct device *dev) { struct gpio_bank *bank = dev_get_drvdata(dev); From 400d033f5a599120089b5f0c54d14d198499af5a Mon Sep 17 00:00:00 2001 From: Tianjia Zhang Date: Sun, 2 Aug 2020 19:15:41 +0800 Subject: [PATCH 004/322] clocksource/drivers/h8300_timer8: Fix wrong return value in h8300_8timer_init() In the init function, if the call to of_iomap() fails, the return value is ENXIO instead of -ENXIO. Change to the right negative errno. Fixes: 691f8f878290f ("clocksource/drivers/h8300_timer8: Convert init function to return error") Cc: Daniel Lezcano Signed-off-by: Tianjia Zhang Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200802111541.5429-1-tianjia.zhang@linux.alibaba.com --- drivers/clocksource/h8300_timer8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/h8300_timer8.c b/drivers/clocksource/h8300_timer8.c index 1d740a8c42ab..47114c2a7cb5 100644 --- a/drivers/clocksource/h8300_timer8.c +++ b/drivers/clocksource/h8300_timer8.c @@ -169,7 +169,7 @@ static int __init h8300_8timer_init(struct device_node *node) return PTR_ERR(clk); } - ret = ENXIO; + ret = -ENXIO; base = of_iomap(node, 0); if (!base) { pr_err("failed to map registers for clockevent\n"); From 164805157f3c6834670afbaff563353c773131f1 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 17 Aug 2020 12:24:28 +0300 Subject: [PATCH 005/322] clocksource/drivers/timer-ti-dm: Do reset before enable Commit 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4") exposed a new issue for type2 dual mode timers on at least omap5 where the clockevent will stop when the SoC starts entering idle states during the boot. Turns out we are wrongly first enabling the system timer and then resetting it, while we must also re-enable it after reset. The current sequence leaves the timer module in a partially initialized state. This issue went unnoticed earlier with ti-sysc driver reconfiguring the timer module until we fixed the issue of ti-sysc reconfiguring system timers. Let's fix the issue by calling dmtimer_systimer_enable() from reset for both type1 and type2 timers, and switch the order of reset and enable in dmtimer_systimer_setup(). Let's also move dmtimer_systimer_enable() and dmtimer_systimer_disable() to do this without adding forward declarations. Fixes: 6cfcd5563b4f ("clocksource/drivers/timer-ti-dm: Fix suspend and resume for am3 and am4") Reported-by: H. Nikolaus Schaller" Signed-off-by: Tony Lindgren Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/20200817092428.6176-1-tony@atomide.com --- drivers/clocksource/timer-ti-dm-systimer.c | 44 +++++++++++----------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index f6fd1c1cc527..33b3e8aa2cc5 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -69,12 +69,33 @@ static bool dmtimer_systimer_revision1(struct dmtimer_systimer *t) return !(tidr >> 16); } +static void dmtimer_systimer_enable(struct dmtimer_systimer *t) +{ + u32 val; + + if (dmtimer_systimer_revision1(t)) + val = DMTIMER_TYPE1_ENABLE; + else + val = DMTIMER_TYPE2_ENABLE; + + writel_relaxed(val, t->base + t->sysc); +} + +static void dmtimer_systimer_disable(struct dmtimer_systimer *t) +{ + if (!dmtimer_systimer_revision1(t)) + return; + + writel_relaxed(DMTIMER_TYPE1_DISABLE, t->base + t->sysc); +} + static int __init dmtimer_systimer_type1_reset(struct dmtimer_systimer *t) { void __iomem *syss = t->base + OMAP_TIMER_V1_SYS_STAT_OFFSET; int ret; u32 l; + dmtimer_systimer_enable(t); writel_relaxed(BIT(1) | BIT(2), t->base + t->ifctrl); ret = readl_poll_timeout_atomic(syss, l, l & BIT(0), 100, DMTIMER_RESET_WAIT); @@ -88,6 +109,7 @@ static int __init dmtimer_systimer_type2_reset(struct dmtimer_systimer *t) void __iomem *sysc = t->base + t->sysc; u32 l; + dmtimer_systimer_enable(t); l = readl_relaxed(sysc); l |= BIT(0); writel_relaxed(l, sysc); @@ -336,26 +358,6 @@ static int __init dmtimer_systimer_init_clock(struct dmtimer_systimer *t, return 0; } -static void dmtimer_systimer_enable(struct dmtimer_systimer *t) -{ - u32 val; - - if (dmtimer_systimer_revision1(t)) - val = DMTIMER_TYPE1_ENABLE; - else - val = DMTIMER_TYPE2_ENABLE; - - writel_relaxed(val, t->base + t->sysc); -} - -static void dmtimer_systimer_disable(struct dmtimer_systimer *t) -{ - if (!dmtimer_systimer_revision1(t)) - return; - - writel_relaxed(DMTIMER_TYPE1_DISABLE, t->base + t->sysc); -} - static int __init dmtimer_systimer_setup(struct device_node *np, struct dmtimer_systimer *t) { @@ -409,8 +411,8 @@ static int __init dmtimer_systimer_setup(struct device_node *np, t->wakeup = regbase + _OMAP_TIMER_WAKEUP_EN_OFFSET; t->ifctrl = regbase + _OMAP_TIMER_IF_CTRL_OFFSET; - dmtimer_systimer_enable(t); dmtimer_systimer_reset(t); + dmtimer_systimer_enable(t); pr_debug("dmtimer rev %08x sysc %08x\n", readl_relaxed(t->base), readl_relaxed(t->base + t->sysc)); From bc6717d55d07110d8f3c6d31ec2af50c11b07091 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Tue, 18 Aug 2020 07:31:17 +0000 Subject: [PATCH 006/322] clocksource/drivers/timer-gx6605s: Fixup counter reload When the timer counts to the upper limit, an overflow interrupt is generated, and the count is reset with the value in the TIME_INI register. But the software expects to start counting from 0 when the count overflows, so it forces TIME_INI to 0 to solve the potential interrupt storm problem. Signed-off-by: Guo Ren Tested-by: Xu Kai Cc: Daniel Lezcano Cc: Thomas Gleixner Signed-off-by: Daniel Lezcano Link: https://lore.kernel.org/r/1597735877-71115-1-git-send-email-guoren@kernel.org --- drivers/clocksource/timer-gx6605s.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-gx6605s.c b/drivers/clocksource/timer-gx6605s.c index 80d0939d040b..8d386adbe800 100644 --- a/drivers/clocksource/timer-gx6605s.c +++ b/drivers/clocksource/timer-gx6605s.c @@ -28,6 +28,7 @@ static irqreturn_t gx6605s_timer_interrupt(int irq, void *dev) void __iomem *base = timer_of_base(to_timer_of(ce)); writel_relaxed(GX6605S_STATUS_CLR, base + TIMER_STATUS); + writel_relaxed(0, base + TIMER_INI); ce->event_handler(ce); From 45a36a18d01907710bad5258d81f76c18882ad88 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 4 Aug 2020 11:37:29 +0200 Subject: [PATCH 007/322] xfrmi: drop ignore_df check before updating pmtu xfrm interfaces currently test for !skb->ignore_df when deciding whether to update the pmtu on the skb's dst. Because of this, no pmtu exception is created when we do something like: ping -s 1438 By dropping this check, the pmtu exception will be created and the next ping attempt will work. Fixes: f203b76d7809 ("xfrm: Add virtual xfrm interfaces") Reported-by: Xiumei Mu Signed-off-by: Sabrina Dubroca Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_interface.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index eb8181987620..a8f66112c52b 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -303,7 +303,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) } mtu = dst_mtu(dst); - if (!skb->ignore_df && skb->len > mtu) { + if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { From 5fcface659aab7eac4bd65dd116d98b8f7bb88d5 Mon Sep 17 00:00:00 2001 From: Taiping Lai Date: Mon, 31 Aug 2020 17:09:47 +0800 Subject: [PATCH 008/322] gpio: sprd: Clear interrupt when setting the type as edge The raw interrupt status of GPIO maybe set before the interrupt is enabled, which would trigger the interrupt event once enabled it from user side. This is the case for edge interrupts only. Adding a clear operation when setting interrupt type can avoid that. There're a few considerations for the solution: 1) This issue is for edge interrupt only; The interrupts requested by users are IRQ_TYPE_LEVEL_HIGH as default, so clearing interrupt when request is useless. 2) The interrupt type can be set to edge when request and following up with clearing it though, but the problem is still there once users set the interrupt type to level trggier. 3) We can add a clear operation after each time of setting interrupt enable bit, but it is redundant for level trigger interrupt. Therefore, the solution is this patch seems the best for now. Fixes: 9a3821c2bb47 ("gpio: Add GPIO driver for Spreadtrum SC9860 platform") Signed-off-by: Taiping Lai Signed-off-by: Chunyan Zhang Reviewed-by: Baolin Wang Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sprd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpio/gpio-sprd.c b/drivers/gpio/gpio-sprd.c index d7314d39ab65..36ea8a3bd451 100644 --- a/drivers/gpio/gpio-sprd.c +++ b/drivers/gpio/gpio-sprd.c @@ -149,17 +149,20 @@ static int sprd_gpio_irq_set_type(struct irq_data *data, sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IEV, 1); + sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_FALLING: sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IEV, 0); + sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_EDGE_BOTH: sprd_gpio_update(chip, offset, SPRD_GPIO_IS, 0); sprd_gpio_update(chip, offset, SPRD_GPIO_IBE, 1); + sprd_gpio_update(chip, offset, SPRD_GPIO_IC, 1); irq_set_handler_locked(data, handle_edge_irq); break; case IRQ_TYPE_LEVEL_HIGH: From 545e5c571662b1cd79d9588f9d3b6e36985b8007 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Fri, 4 Sep 2020 08:49:38 +0200 Subject: [PATCH 009/322] xfrm: clone XFRMA_SET_MARK in xfrm_do_migrate XFRMA_SET_MARK and XFRMA_SET_MARK_MASK was not cloned from the old to the new. Migrate these two attributes during XFRMA_MSG_MIGRATE Fixes: 9b42c1f179a6 ("xfrm: Extend the output_mark to support input direction and masking.") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 69520ad3d83b..3a000f289dcd 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1510,6 +1510,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, } memcpy(&x->mark, &orig->mark, sizeof(x->mark)); + memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark)); if (xfrm_init_state(x) < 0) goto error; From 91a46c6d1b4fcbfa4773df9421b8ad3e58088101 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Fri, 4 Sep 2020 08:49:55 +0200 Subject: [PATCH 010/322] xfrm: clone XFRMA_REPLAY_ESN_VAL in xfrm_do_migrate XFRMA_REPLAY_ESN_VAL was not cloned completely from the old to the new. Migrate this attribute during XFRMA_MSG_MIGRATE v1->v2: - move curleft cloning to a separate patch Fixes: af2f464e326e ("xfrm: Assign esn pointers when cloning a state") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2737d24ec244..9e806c781025 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1773,21 +1773,17 @@ static inline unsigned int xfrm_replay_state_esn_len(struct xfrm_replay_state_es static inline int xfrm_replay_clone(struct xfrm_state *x, struct xfrm_state *orig) { - x->replay_esn = kzalloc(xfrm_replay_state_esn_len(orig->replay_esn), + + x->replay_esn = kmemdup(orig->replay_esn, + xfrm_replay_state_esn_len(orig->replay_esn), GFP_KERNEL); if (!x->replay_esn) return -ENOMEM; - - x->replay_esn->bmp_len = orig->replay_esn->bmp_len; - x->replay_esn->replay_window = orig->replay_esn->replay_window; - - x->preplay_esn = kmemdup(x->replay_esn, - xfrm_replay_state_esn_len(x->replay_esn), + x->preplay_esn = kmemdup(orig->preplay_esn, + xfrm_replay_state_esn_len(orig->preplay_esn), GFP_KERNEL); - if (!x->preplay_esn) { - kfree(x->replay_esn); + if (!x->preplay_esn) return -ENOMEM; - } return 0; } From 7aa05d304785204703a67a6aa7f1db402889a172 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Fri, 4 Sep 2020 08:50:11 +0200 Subject: [PATCH 011/322] xfrm: clone XFRMA_SEC_CTX in xfrm_do_migrate XFRMA_SEC_CTX was not cloned from the old to the new. Migrate this attribute during XFRMA_MSG_MIGRATE v1->v2: - return -ENOMEM on error v2->v3: - fix return type to int Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3a000f289dcd..5e5ed8108498 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1441,6 +1441,30 @@ out: EXPORT_SYMBOL(xfrm_state_add); #ifdef CONFIG_XFRM_MIGRATE +static inline int clone_security(struct xfrm_state *x, struct xfrm_sec_ctx *security) +{ + struct xfrm_user_sec_ctx *uctx; + int size = sizeof(*uctx) + security->ctx_len; + int err; + + uctx = kmalloc(size, GFP_KERNEL); + if (!uctx) + return -ENOMEM; + + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = size; + uctx->ctx_doi = security->ctx_doi; + uctx->ctx_alg = security->ctx_alg; + uctx->ctx_len = security->ctx_len; + memcpy(uctx + 1, security->ctx_str, security->ctx_len); + err = security_xfrm_state_alloc(x, uctx); + kfree(uctx); + if (err) + return err; + + return 0; +} + static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, struct xfrm_encap_tmpl *encap) { @@ -1497,6 +1521,10 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, goto error; } + if (orig->security) + if (clone_security(x, orig->security)) + goto error; + if (orig->coaddr) { x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr), GFP_KERNEL); From 8366685b2883e523f91e9816d7be371eb1144749 Mon Sep 17 00:00:00 2001 From: Antony Antony Date: Fri, 4 Sep 2020 08:50:29 +0200 Subject: [PATCH 012/322] xfrm: clone whole liftime_cur structure in xfrm_do_migrate When we clone state only add_time was cloned. It missed values like bytes, packets. Now clone the all members of the structure. v1->v3: - use memcpy to copy the entire structure Fixes: 80c9abaabf42 ("[XFRM]: Extension for dynamic update of endpoint address(es)") Signed-off-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5e5ed8108498..5ff392e6f3c1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1550,7 +1550,7 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->tfcpad = orig->tfcpad; x->replay_maxdiff = orig->replay_maxdiff; x->replay_maxage = orig->replay_maxage; - x->curlft.add_time = orig->curlft.add_time; + memcpy(&x->curlft, &orig->curlft, sizeof(x->curlft)); x->km.state = orig->km.state; x->km.seq = orig->km.seq; x->replay = orig->replay; From 850280156f6421a404f2351bee07a0e7bedfd4c6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 5 Sep 2020 15:46:48 +0300 Subject: [PATCH 013/322] phy: ti: am654: Fix a leak in serdes_am654_probe() If devm_phy_create() fails then we need to call of_clk_del_provider(node) to undo the call to of_clk_add_provider(). Fixes: 71e2f5c5c224 ("phy: ti: Add a new SERDES driver for TI's AM654x SoC") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/20200905124648.GA183976@mwanda Signed-off-by: Vinod Koul --- drivers/phy/ti/phy-am654-serdes.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/phy/ti/phy-am654-serdes.c b/drivers/phy/ti/phy-am654-serdes.c index a174b3c3f010..819c49af169a 100644 --- a/drivers/phy/ti/phy-am654-serdes.c +++ b/drivers/phy/ti/phy-am654-serdes.c @@ -725,8 +725,10 @@ static int serdes_am654_probe(struct platform_device *pdev) pm_runtime_enable(dev); phy = devm_phy_create(dev, NULL, &ops); - if (IS_ERR(phy)) - return PTR_ERR(phy); + if (IS_ERR(phy)) { + ret = PTR_ERR(phy); + goto clk_err; + } phy_set_drvdata(phy, am654_phy); phy_provider = devm_of_phy_provider_register(dev, serdes_am654_xlate); From e338eecf3fe79054e8a31b8c39a1234b5acfdabe Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 4 Sep 2020 15:09:04 +0100 Subject: [PATCH 014/322] PCI: rockchip: Fix bus checks in rockchip_pcie_valid_device() The root bus checks rework in d84c572de1a3 ("PCI: rockchip: Use pci_is_root_bus() to check if bus is root bus") caused a regression whereby in rockchip_pcie_valid_device() if the bus parameter is the root bus and the dev value == 0, the function should return 1 (ie true) without checking if the bus->parent pointer is a root bus because that triggers a NULL pointer dereference. Fix this by streamlining the root bus detection. Fixes: d84c572de1a3 ("PCI: rockchip: Use pci_is_root_bus() to check if bus is root bus") Link: https://lore.kernel.org/r/20200904140904.944-1-lorenzo.pieralisi@arm.com Reported-by: Samuel Dionne-Riel Tested-by: Samuel Dionne-Riel Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Rob Herring Cc: Shawn Lin --- drivers/pci/controller/pcie-rockchip-host.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index 0bb2fb3e8a0b..9705059523a6 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -71,16 +71,13 @@ static void rockchip_pcie_update_txcredit_mui(struct rockchip_pcie *rockchip) static int rockchip_pcie_valid_device(struct rockchip_pcie *rockchip, struct pci_bus *bus, int dev) { - /* access only one slot on each root port */ - if (pci_is_root_bus(bus) && dev > 0) - return 0; - /* - * do not read more than one device on the bus directly attached + * Access only one slot on each root port. + * Do not read more than one device on the bus directly attached * to RC's downstream side. */ - if (pci_is_root_bus(bus->parent) && dev > 0) - return 0; + if (pci_is_root_bus(bus) || pci_is_root_bus(bus->parent)) + return dev == 0; return 1; } From d89a80ddbfd86d04d0901e576ca2016d324bcb85 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 25 Aug 2020 16:31:05 +0300 Subject: [PATCH 015/322] dt-bindings: crypto: sa2ul: fix a DT binding check warning DT binding check produces a warning about bad cell size: Documentation/devicetree/bindings/crypto/ti,sa2ul.example.dt.yaml: example-0: crypto@4e00000:reg:0: [0, 81788928, 0, 4608] is too long From schema: python3.6/site-packages/dtschema/schemas/reg.yaml Fix this by reducing the address sizes for the example to 1 cell from current 2. Fixes: 2ce9a7299bf6 ("dt-bindings: crypto: Add TI SA2UL crypto accelerator documentation") Reported-by: Rob Herring Cc: Rob Herring Cc: devicetree@vger.kernel.org Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20200825133106.21542-2-t-kristo@ti.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml index 85ef69ffebed..1465c9ebaf93 100644 --- a/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml +++ b/Documentation/devicetree/bindings/crypto/ti,sa2ul.yaml @@ -67,7 +67,7 @@ examples: main_crypto: crypto@4e00000 { compatible = "ti,j721-sa2ul"; - reg = <0x0 0x4e00000 0x0 0x1200>; + reg = <0x4e00000 0x1200>; power-domains = <&k3_pds 264 TI_SCI_PD_EXCLUSIVE>; dmas = <&main_udmap 0xc000>, <&main_udmap 0x4000>, <&main_udmap 0x4001>; From 214b0e1ad01abf4c1f6d8d28fa096bf167e47cef Mon Sep 17 00:00:00 2001 From: dillon min Date: Thu, 3 Sep 2020 15:30:21 +0800 Subject: [PATCH 016/322] gpio: tc35894: fix up tc35894 interrupt configuration The offset of regmap is incorrect, j * 8 is move to the wrong register. for example: asume i = 0, j = 1. we want to set KPY5 as interrupt falling edge mode, regmap[0][1] should be TC3589x_GPIOIBE1 0xcd but, regmap[i] + j * 8 = TC3589x_GPIOIBE0 + 8 ,point to 0xd4, this is TC3589x_GPIOIE2 not TC3589x_GPIOIBE1. Fixes: d88b25be3584 ("gpio: Add TC35892 GPIO driver") Cc: Cc: stable@vger.kernel.org Signed-off-by: dillon min Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tc3589x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c index 58b0da9eb76f..ea3f68a28fea 100644 --- a/drivers/gpio/gpio-tc3589x.c +++ b/drivers/gpio/gpio-tc3589x.c @@ -212,7 +212,7 @@ static void tc3589x_gpio_irq_sync_unlock(struct irq_data *d) continue; tc3589x_gpio->oldregs[i][j] = new; - tc3589x_reg_write(tc3589x, regmap[i] + j * 8, new); + tc3589x_reg_write(tc3589x, regmap[i] + j, new); } } From 45ccf6556720293323c20cda717756014ff63007 Mon Sep 17 00:00:00 2001 From: Ahmad Fatoum Date: Mon, 7 Sep 2020 17:31:35 +0200 Subject: [PATCH 017/322] gpio: siox: explicitly support only threaded irqs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gpio-siox driver uses handle_nested_irq() to implement its interrupt support. This is only capable of handling threaded irq actions. For a hardirq action it triggers a NULL pointer oops. (It calls action->thread_fn which is NULL then.) Prevent registration of a hardirq action by setting gpio_irq_chip::threaded to true. Cc: u.kleine-koenig@pengutronix.de Fixes: be8c8facc707 ("gpio: new driver to work with a 8x12 siox") Cc: stable@vger.kernel.org Signed-off-by: Ahmad Fatoum Acked-by: Uwe Kleine-König Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-siox.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-siox.c b/drivers/gpio/gpio-siox.c index 26e1fe092304..f8c5e9fc4bac 100644 --- a/drivers/gpio/gpio-siox.c +++ b/drivers/gpio/gpio-siox.c @@ -245,6 +245,7 @@ static int gpio_siox_probe(struct siox_device *sdevice) girq->chip = &ddata->ichip; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_level_irq; + girq->threaded = true; ret = devm_gpiochip_add_data(dev, &ddata->gchip, NULL); if (ret) From 1b02d9e770cd7087f34c743f85ccf5ea8372b047 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 8 Sep 2020 15:07:49 +0200 Subject: [PATCH 018/322] gpio: mockup: fix resource leak in error path If the module init function fails after creating the debugs directory, it's never removed. Add proper cleanup calls to avoid this resource leak. Fixes: 9202ba2397d1 ("gpio: mockup: implement event injecting over debugfs") Cc: Signed-off-by: Bartosz Golaszewski Reviewed-by: Andy Shevchenko --- drivers/gpio/gpio-mockup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index bc345185db26..1652897fdf90 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -552,6 +552,7 @@ static int __init gpio_mockup_init(void) err = platform_driver_register(&gpio_mockup_driver); if (err) { gpio_mockup_err("error registering platform driver\n"); + debugfs_remove_recursive(gpio_mockup_dbg_dir); return err; } @@ -582,6 +583,7 @@ static int __init gpio_mockup_init(void) gpio_mockup_err("error registering device"); platform_driver_unregister(&gpio_mockup_driver); gpio_mockup_unregister_pdevs(); + debugfs_remove_recursive(gpio_mockup_dbg_dir); return PTR_ERR(pdev); } From ec653df2a0cbc306a4bfcb0e3484d318fa779002 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 1 Jul 2020 09:39:49 +0200 Subject: [PATCH 019/322] drm/vc4/vc4_hdmi: fill ASoC card owner card->owner is a required property and since commit 81033c6b584b ("ALSA: core: Warn on empty module") a warning is issued if it is empty. Fix lack of it. This fixes following warning observed on RaspberryPi 3B board with ARM 32bit kernel and multi_v7_defconfig: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 210 at sound/core/init.c:207 snd_card_new+0x378/0x398 [snd] Modules linked in: vc4(+) snd_soc_core ac97_bus snd_pcm_dmaengine bluetooth snd_pcm snd_timer crc32_arm_ce raspberrypi_hwmon snd soundcore ecdh_generic ecc bcm2835_thermal phy_generic CPU: 1 PID: 210 Comm: systemd-udevd Not tainted 5.8.0-rc1-00027-g81033c6b584b #1087 Hardware name: BCM2835 [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0xd4/0xe8) [] (dump_stack) from [] (__warn+0xdc/0xf4) [] (__warn) from [] (warn_slowpath_fmt+0xb0/0xb8) [] (warn_slowpath_fmt) from [] (snd_card_new+0x378/0x398 [snd]) [] (snd_card_new [snd]) from [] (snd_soc_bind_card+0x280/0x99c [snd_soc_core]) [] (snd_soc_bind_card [snd_soc_core]) from [] (devm_snd_soc_register_card+0x34/0x6c [snd_soc_core]) [] (devm_snd_soc_register_card [snd_soc_core]) from [] (vc4_hdmi_bind+0x43c/0x5f4 [vc4]) [] (vc4_hdmi_bind [vc4]) from [] (component_bind_all+0xec/0x24c) [] (component_bind_all) from [] (vc4_drm_bind+0xd4/0x174 [vc4]) [] (vc4_drm_bind [vc4]) from [] (try_to_bring_up_master+0x160/0x1b0) [] (try_to_bring_up_master) from [] (component_master_add_with_match+0xd0/0x104) [] (component_master_add_with_match) from [] (vc4_platform_drm_probe+0x9c/0xbc [vc4]) [] (vc4_platform_drm_probe [vc4]) from [] (platform_drv_probe+0x6c/0xa4) [] (platform_drv_probe) from [] (really_probe+0x210/0x350) [] (really_probe) from [] (driver_probe_device+0x5c/0xb4) [] (driver_probe_device) from [] (device_driver_attach+0x58/0x60) [] (device_driver_attach) from [] (__driver_attach+0x80/0xbc) [] (__driver_attach) from [] (bus_for_each_dev+0x68/0xb4) [] (bus_for_each_dev) from [] (bus_add_driver+0x130/0x1e8) [] (bus_add_driver) from [] (driver_register+0x78/0x110) [] (driver_register) from [] (do_one_initcall+0x50/0x220) [] (do_one_initcall) from [] (do_init_module+0x60/0x210) [] (do_init_module) from [] (load_module+0x1e34/0x2338) [] (load_module) from [] (sys_finit_module+0xac/0xbc) [] (sys_finit_module) from [] (ret_fast_syscall+0x0/0x54) Exception stack(0xeded9fa8 to 0xeded9ff0) ... ---[ end trace 6414689569c2bc08 ]--- Fixes: bb7d78568814 ("drm/vc4: Add HDMI audio support") Suggested-by: Takashi Iwai Signed-off-by: Marek Szyprowski Tested-by: Stefan Wahren Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200701073949.28941-1-m.szyprowski@samsung.com --- drivers/gpu/drm/vc4/vc4_hdmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 15a11cd4de25..6339c6f0f571 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1117,6 +1117,7 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *hdmi) card->num_links = 1; card->name = "vc4-hdmi"; card->dev = dev; + card->owner = THIS_MODULE; /* * Be careful, snd_soc_register_card() calls dev_set_drvdata() and From f8d4f44df056c5b504b0d49683fb7279218fd207 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 9 Sep 2020 22:25:06 -0400 Subject: [PATCH 020/322] epoll: do not insert into poll queues until all sanity checks are done Signed-off-by: Al Viro --- fs/eventpoll.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8107e06d7f6f..5207dfc85b78 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1522,6 +1522,22 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, RCU_INIT_POINTER(epi->ws, NULL); } + /* Add the current item to the list of active epoll hook for this file */ + spin_lock(&tfile->f_lock); + list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); + spin_unlock(&tfile->f_lock); + + /* + * Add the current item to the RB tree. All RB tree operations are + * protected by "mtx", and ep_insert() is called with "mtx" held. + */ + ep_rbtree_insert(ep, epi); + + /* now check if we've created too many backpaths */ + error = -EINVAL; + if (full_check && reverse_path_check()) + goto error_remove_epi; + /* Initialize the poll table using the queue callback */ epq.epi = epi; init_poll_funcptr(&epq.pt, ep_ptable_queue_proc); @@ -1544,22 +1560,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, if (epi->nwait < 0) goto error_unregister; - /* Add the current item to the list of active epoll hook for this file */ - spin_lock(&tfile->f_lock); - list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links); - spin_unlock(&tfile->f_lock); - - /* - * Add the current item to the RB tree. All RB tree operations are - * protected by "mtx", and ep_insert() is called with "mtx" held. - */ - ep_rbtree_insert(ep, epi); - - /* now check if we've created too many backpaths */ - error = -EINVAL; - if (full_check && reverse_path_check()) - goto error_remove_epi; - /* We have to drop the new item inside our item list to keep track of it */ write_lock_irq(&ep->lock); @@ -1588,6 +1588,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event, return 0; +error_unregister: + ep_unregister_pollwait(ep, epi); error_remove_epi: spin_lock(&tfile->f_lock); list_del_rcu(&epi->fllink); @@ -1595,9 +1597,6 @@ error_remove_epi: rb_erase_cached(&epi->rbn, &ep->rbr); -error_unregister: - ep_unregister_pollwait(ep, epi); - /* * We need to do this because an event could have been arrived on some * allocated wait queue. Note that we don't care about the ep->ovflist From cab4c03b4ba54c8d9378298cacb8bc0fd74ceece Mon Sep 17 00:00:00 2001 From: Martin Cerveny Date: Sun, 6 Sep 2020 18:21:39 +0200 Subject: [PATCH 021/322] drm/sun4i: sun8i-csc: Secondary CSC register correction "Allwinner V3s" has secondary video layer (VI). Decoded video is displayed in wrong colors until secondary CSC registers are programmed correctly. Fixes: 883029390550 ("drm/sun4i: Add DE2 CSC library") Signed-off-by: Martin Cerveny Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200906162140.5584-2-m.cerveny@computer.org --- drivers/gpu/drm/sun4i/sun8i_csc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_csc.h b/drivers/gpu/drm/sun4i/sun8i_csc.h index f42441b1b14d..a55a38ad849c 100644 --- a/drivers/gpu/drm/sun4i/sun8i_csc.h +++ b/drivers/gpu/drm/sun4i/sun8i_csc.h @@ -12,7 +12,7 @@ struct sun8i_mixer; /* VI channel CSC units offsets */ #define CCSC00_OFFSET 0xAA050 -#define CCSC01_OFFSET 0xFA000 +#define CCSC01_OFFSET 0xFA050 #define CCSC10_OFFSET 0xA0000 #define CCSC11_OFFSET 0xF0000 From 74ea06164cda81dc80e97790164ca533fd7e3087 Mon Sep 17 00:00:00 2001 From: Martin Cerveny Date: Sun, 6 Sep 2020 18:21:40 +0200 Subject: [PATCH 022/322] drm/sun4i: mixer: Extend regmap max_register Better guess. Secondary CSC registers are from 0xF0000. Signed-off-by: Martin Cerveny Reviewed-by: Jernej Skrabec Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20200906162140.5584-3-m.cerveny@computer.org --- drivers/gpu/drm/sun4i/sun8i_mixer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.c b/drivers/gpu/drm/sun4i/sun8i_mixer.c index cc4fb916318f..c3304028e3dc 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.c +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.c @@ -307,7 +307,7 @@ static struct regmap_config sun8i_mixer_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, - .max_register = 0xbfffc, /* guessed */ + .max_register = 0xffffc, /* guessed */ }; static int sun8i_mixer_of_get_id(struct device_node *node) From 288eceb0858323d66bff03cf386630a797b248ad Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Tue, 8 Sep 2020 12:02:53 +0200 Subject: [PATCH 023/322] media: cec-adap.c: don't use flush_scheduled_work() For some inexplicable reason I decided to call flush_scheduled_work() instead of cancel_delayed_work_sync(). The problem with that is that flush_scheduled_work() waits for *all* queued scheduled work to be completed instead of just the work itself. This can cause a deadlock if a CEC driver also schedules work that takes the same lock. See the comments for flush_scheduled_work() in linux/workqueue.h. This is exactly what has been observed a few times. This patch simply replaces flush_scheduled_work() by cancel_delayed_work_sync(). Signed-off-by: Hans Verkuil Cc: # for v5.8 and up Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/core/cec-adap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index 4efe8014445e..926d65db6d3e 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1199,7 +1199,7 @@ void cec_received_msg_ts(struct cec_adapter *adap, /* Cancel the pending timeout work */ if (!cancel_delayed_work(&data->work)) { mutex_unlock(&adap->lock); - flush_scheduled_work(); + cancel_delayed_work_sync(&data->work); mutex_lock(&adap->lock); } /* From 18306c404abe18a0972587a6266830583c60c928 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 10 Sep 2020 08:30:05 -0400 Subject: [PATCH 024/322] epoll: replace ->visited/visited_list with generation count removes the need to clear it, along with the races. Signed-off-by: Al Viro --- fs/eventpoll.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5207dfc85b78..82ab9a25f12f 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -218,8 +218,7 @@ struct eventpoll { struct file *file; /* used to optimize loop detection check */ - struct list_head visited_list_link; - int visited; + u64 gen; #ifdef CONFIG_NET_RX_BUSY_POLL /* used to track busy poll napi_id */ @@ -274,6 +273,8 @@ static long max_user_watches __read_mostly; */ static DEFINE_MUTEX(epmutex); +static u64 loop_check_gen = 0; + /* Used to check for epoll file descriptor inclusion loops */ static struct nested_calls poll_loop_ncalls; @@ -283,9 +284,6 @@ static struct kmem_cache *epi_cache __read_mostly; /* Slab cache used to allocate "struct eppoll_entry" */ static struct kmem_cache *pwq_cache __read_mostly; -/* Visited nodes during ep_loop_check(), so we can unset them when we finish */ -static LIST_HEAD(visited_list); - /* * List of files with newly added links, where we may need to limit the number * of emanating paths. Protected by the epmutex. @@ -1971,13 +1969,12 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) struct epitem *epi; mutex_lock_nested(&ep->mtx, call_nests + 1); - ep->visited = 1; - list_add(&ep->visited_list_link, &visited_list); + ep->gen = loop_check_gen; for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { epi = rb_entry(rbp, struct epitem, rbn); if (unlikely(is_file_epoll(epi->ffd.file))) { ep_tovisit = epi->ffd.file->private_data; - if (ep_tovisit->visited) + if (ep_tovisit->gen == loop_check_gen) continue; error = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, epi->ffd.file, @@ -2018,18 +2015,8 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) */ static int ep_loop_check(struct eventpoll *ep, struct file *file) { - int ret; - struct eventpoll *ep_cur, *ep_next; - - ret = ep_call_nested(&poll_loop_ncalls, + return ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, file, ep, current); - /* clear visited list */ - list_for_each_entry_safe(ep_cur, ep_next, &visited_list, - visited_list_link) { - ep_cur->visited = 0; - list_del(&ep_cur->visited_list_link); - } - return ret; } static void clear_tfile_check_list(void) @@ -2199,6 +2186,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error = epoll_mutex_lock(&epmutex, 0, nonblock); if (error) goto error_tgt_fput; + loop_check_gen++; full_check = 1; if (is_file_epoll(tf.file)) { error = -ELOOP; @@ -2262,6 +2250,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, error_tgt_fput: if (full_check) { clear_tfile_check_list(); + loop_check_gen++; mutex_unlock(&epmutex); } From fe0a916c1eae8e17e86c3753d13919177d63ed7e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 10 Sep 2020 08:33:27 -0400 Subject: [PATCH 025/322] epoll: EPOLL_CTL_ADD: close the race in decision to take fast path Checking for the lack of epitems refering to the epoll we want to insert into is not enough; we might have an insertion of that epoll into another one that has already collected the set of files to recheck for excessive reverse paths, but hasn't gotten to creating/inserting the epitem for it. However, any such insertion in progress can be detected - it will update the generation count in our epoll when it's done looking through it for files to check. That gets done under ->mtx of our epoll and that allows us to detect that safely. We are *not* holding epmutex here, so the generation count is not stable. However, since both the update of ep->gen by loop check and (later) insertion into ->f_ep_link are done with ep->mtx held, we are fine - the sequence is grab epmutex bump loop_check_gen ... grab tep->mtx // 1 tep->gen = loop_check_gen ... drop tep->mtx // 2 ... grab tep->mtx // 3 ... insert into ->f_ep_link ... drop tep->mtx // 4 bump loop_check_gen drop epmutex and if the fastpath check in another thread happens for that eventpoll, it can come * before (1) - in that case fastpath is just fine * after (4) - we'll see non-empty ->f_ep_link, slow path taken * between (2) and (3) - loop_check_gen is stable, with ->mtx providing barriers and we end up taking slow path. Note that ->f_ep_link emptiness check is slightly racy - we are protected against insertions into that list, but removals can happen right under us. Not a problem - in the worst case we'll end up taking a slow path for no good reason. Signed-off-by: Al Viro --- fs/eventpoll.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 82ab9a25f12f..16313180e4c1 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -2181,6 +2181,7 @@ int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, goto error_tgt_fput; if (op == EPOLL_CTL_ADD) { if (!list_empty(&f.file->f_ep_links) || + ep->gen == loop_check_gen || is_file_epoll(tf.file)) { mutex_unlock(&ep->mtx); error = epoll_mutex_lock(&epmutex, 0, nonblock); From 505623a2be48b36de533951ced130876a76a2d55 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 10 Sep 2020 15:15:32 +0300 Subject: [PATCH 026/322] spi: spi-fsl-dspi: use XSPI mode instead of DMA for DPAA2 SoCs The arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi device tree lacks DMA channels for DSPI, so naturally, the driver fails to probe: [ 2.945302] fsl-dspi 2100000.spi: rx dma channel not available [ 2.951134] fsl-dspi 2100000.spi: can't get dma channels In retrospect, this should have been obvious, because LS2080A, LS2085A LS2088A and LX2160A don't appear to have an eDMA module at all. Looking again at their datasheets, the CTARE register (which is specific to XSPI functionality) seems to be documented, so switch them to XSPI mode instead. Fixes: 0feaf8f5afe0 ("spi: spi-fsl-dspi: Convert the instantiations that support it to DMA") Reported-by: Qiang Zhao Tested-by: Qiang Zhao Signed-off-by: Vladimir Oltean Link: https://lore.kernel.org/r/20200910121532.1138596-1-olteanv@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 91c6affe139c..283f2468a2f4 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -174,17 +174,17 @@ static const struct fsl_dspi_devtype_data devtype_data[] = { .fifo_size = 16, }, [LS2080A] = { - .trans_mode = DSPI_DMA_MODE, + .trans_mode = DSPI_XSPI_MODE, .max_clock_factor = 8, .fifo_size = 4, }, [LS2085A] = { - .trans_mode = DSPI_DMA_MODE, + .trans_mode = DSPI_XSPI_MODE, .max_clock_factor = 8, .fifo_size = 4, }, [LX2160A] = { - .trans_mode = DSPI_DMA_MODE, + .trans_mode = DSPI_XSPI_MODE, .max_clock_factor = 8, .fifo_size = 4, }, From 63c3212e7a37d68c89a13bdaebce869f4e064e67 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Tue, 8 Sep 2020 09:17:10 +1200 Subject: [PATCH 027/322] pinctrl: mvebu: Fix i2c sda definition for 98DX3236 Per the datasheet the i2c functions use MPP_Sel=0x1. They are documented as using MPP_Sel=0x4 as well but mixing 0x1 and 0x4 is clearly wrong. On the board tested 0x4 resulted in a non-functioning i2c bus so stick with 0x1 which works. Fixes: d7ae8f8dee7f ("pinctrl: mvebu: pinctrl driver for 98DX3236 SoC") Signed-off-by: Chris Packham Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20200907211712.9697-2-chris.packham@alliedtelesis.co.nz Signed-off-by: Linus Walleij --- drivers/pinctrl/mvebu/pinctrl-armada-xp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c index a767a05fa3a0..48e2a6c56a83 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-xp.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-xp.c @@ -414,7 +414,7 @@ static struct mvebu_mpp_mode mv98dx3236_mpp_modes[] = { MPP_VAR_FUNCTION(0x1, "i2c0", "sck", V_98DX3236_PLUS)), MPP_MODE(15, MPP_VAR_FUNCTION(0x0, "gpio", NULL, V_98DX3236_PLUS), - MPP_VAR_FUNCTION(0x4, "i2c0", "sda", V_98DX3236_PLUS)), + MPP_VAR_FUNCTION(0x1, "i2c0", "sda", V_98DX3236_PLUS)), MPP_MODE(16, MPP_VAR_FUNCTION(0x0, "gpo", NULL, V_98DX3236_PLUS), MPP_VAR_FUNCTION(0x4, "dev", "oe", V_98DX3236_PLUS)), From 28284943ac94014767ecc2f7b3c5747c4a5617a0 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 14 Sep 2020 11:03:02 +0800 Subject: [PATCH 028/322] drm/i915/gvt: Fix port number for BDW on EDID region setup Current BDW virtual display port is initialized as PORT_B, so need to use same port for VFIO EDID region, otherwise invalid EDID blob pointer is assigned which caused kernel null pointer reference. We might evaluate actual display hotplug for BDW to make this function work as expected, anyway this is always required to be fixed first. Reported-by: Alejandro Sior Cc: Alejandro Sior Fixes: 0178f4ce3c3b ("drm/i915/gvt: Enable vfio edid for all GVT supported platform") Reviewed-by: Hang Yuan Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200914030302.2775505-1-zhenyuw@linux.intel.com --- drivers/gpu/drm/i915/gvt/vgpu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 7d361623ff67..041f601b07d0 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -367,6 +367,7 @@ void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu) static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, struct intel_vgpu_creation_params *param) { + struct drm_i915_private *dev_priv = gvt->gt->i915; struct intel_vgpu *vgpu; int ret; @@ -434,7 +435,10 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt, if (ret) goto out_clean_sched_policy; - ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); + if (IS_BROADWELL(dev_priv)) + ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_B); + else + ret = intel_gvt_hypervisor_set_edid(vgpu, PORT_D); if (ret) goto out_clean_sched_policy; From 14801c624066a55139c2c57963eb1b859d0a316a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 14 Sep 2020 11:42:43 +0200 Subject: [PATCH 029/322] mmc: mmc_spi: Fix mmc_spi_dma_alloc() return type for !HAS_DMA MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If CONFIG_NO_DMA=y (e.g. Sun-3 allmodconfig): drivers/mmc/host/mmc_spi.c:1323:15: warning: return type defaults to ‘int’ [-Wreturn-type] static inline mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; } ^~~~~~~~~~~~~~~~~ Fix this by adding the missing return type. Fixes: a395acf0f6dc6409 ("mmc: mmc_spi: Allow the driver to be built when CONFIG_HAS_DMA is unset") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20200914094243.3912-1-geert@linux-m68k.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/mmc_spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 5055a7eb134a..18a850f37ddc 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1320,7 +1320,7 @@ static void mmc_spi_dma_free(struct mmc_spi_host *host) DMA_BIDIRECTIONAL); } #else -static inline mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; } +static inline int mmc_spi_dma_alloc(struct mmc_spi_host *host) { return 0; } static inline void mmc_spi_dma_free(struct mmc_spi_host *host) {} #endif From 129134e5415d46f38b9978b3809af94ed649b57d Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Fri, 11 Sep 2020 05:07:58 +0200 Subject: [PATCH 030/322] media: media/v4l2: remove V4L2_FLAG_MEMORY_NON_CONSISTENT flag The patch partially reverts some of the UAPI bits of the buffer cache management hints. Namely, the queue consistency (memory coherency) user-space hint because, as it turned out, the kernel implementation of this feature was misusing DMA_ATTR_NON_CONSISTENT. The patch reverts both kernel and user space parts: removes the DMA consistency attr functions, rolls back changes to v4l2_requestbuffers, v4l2_create_buffers structures and corresponding UAPI functions (plus compat32 layer) and cleans up the documentation. [hverkuil: fixed a few typos in the commit log] [hverkuil: fixed vb2_core_reqbufs call in drivers/media/dvb-core/dvb_vb2.c] [mchehab: fixed a typo in the commit log: revers->reverts] Signed-off-by: Christoph Hellwig Signed-off-by: Sergey Senozhatsky Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../userspace-api/media/v4l/buffer.rst | 17 ------- .../media/v4l/vidioc-create-bufs.rst | 6 +-- .../media/v4l/vidioc-reqbufs.rst | 12 +---- .../media/common/videobuf2/videobuf2-core.c | 46 +++---------------- .../common/videobuf2/videobuf2-dma-contig.c | 19 -------- .../media/common/videobuf2/videobuf2-dma-sg.c | 3 +- .../media/common/videobuf2/videobuf2-v4l2.c | 18 +------- drivers/media/dvb-core/dvb_vb2.c | 2 +- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 10 +--- drivers/media/v4l2-core/v4l2-ioctl.c | 5 +- include/media/videobuf2-core.h | 7 +-- include/uapi/linux/videodev2.h | 13 +----- 12 files changed, 23 insertions(+), 135 deletions(-) diff --git a/Documentation/userspace-api/media/v4l/buffer.rst b/Documentation/userspace-api/media/v4l/buffer.rst index 57e752aaf414..2044ed13cd9d 100644 --- a/Documentation/userspace-api/media/v4l/buffer.rst +++ b/Documentation/userspace-api/media/v4l/buffer.rst @@ -701,23 +701,6 @@ Memory Consistency Flags :stub-columns: 0 :widths: 3 1 4 - * .. _`V4L2-FLAG-MEMORY-NON-CONSISTENT`: - - - ``V4L2_FLAG_MEMORY_NON_CONSISTENT`` - - 0x00000001 - - A buffer is allocated either in consistent (it will be automatically - coherent between the CPU and the bus) or non-consistent memory. The - latter can provide performance gains, for instance the CPU cache - sync/flush operations can be avoided if the buffer is accessed by the - corresponding device only and the CPU does not read/write to/from that - buffer. However, this requires extra care from the driver -- it must - guarantee memory consistency by issuing a cache flush/sync when - consistency is needed. If this flag is set V4L2 will attempt to - allocate the buffer in non-consistent memory. The flag takes effect - only if the buffer is used for :ref:`memory mapping ` I/O and the - queue reports the :ref:`V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS - ` capability. - .. c:type:: v4l2_memory enum v4l2_memory diff --git a/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst b/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst index f2a702870fad..12cf6b44f414 100644 --- a/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst +++ b/Documentation/userspace-api/media/v4l/vidioc-create-bufs.rst @@ -120,13 +120,9 @@ than the number requested. If you want to just query the capabilities without making any other changes, then set ``count`` to 0, ``memory`` to ``V4L2_MEMORY_MMAP`` and ``format.type`` to the buffer type. - * - __u32 - - ``flags`` - - Specifies additional buffer management attributes. - See :ref:`memory-flags`. * - __u32 - - ``reserved``\ [6] + - ``reserved``\ [7] - A place holder for future extensions. Drivers and applications must set the array to zero. diff --git a/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst b/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst index 75d894d9c36c..0e3e2fde65e8 100644 --- a/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst +++ b/Documentation/userspace-api/media/v4l/vidioc-reqbufs.rst @@ -112,17 +112,10 @@ aborting or finishing any DMA in progress, an implicit ``V4L2_MEMORY_MMAP`` and ``type`` set to the buffer type. This will free any previously allocated buffers, so this is typically something that will be done at the start of the application. - * - union { - - (anonymous) - * - __u32 - - ``flags`` - - Specifies additional buffer management attributes. - See :ref:`memory-flags`. * - __u32 - ``reserved``\ [1] - - Kept for backwards compatibility. Use ``flags`` instead. - * - } - - + - A place holder for future extensions. Drivers and applications + must set the array to zero. .. tabularcolumns:: |p{6.1cm}|p{2.2cm}|p{8.7cm}| @@ -169,7 +162,6 @@ aborting or finishing any DMA in progress, an implicit - This capability is set by the driver to indicate that the queue supports cache and memory management hints. However, it's only valid when the queue is used for :ref:`memory mapping ` streaming I/O. See - :ref:`V4L2_FLAG_MEMORY_NON_CONSISTENT `, :ref:`V4L2_BUF_FLAG_NO_CACHE_INVALIDATE ` and :ref:`V4L2_BUF_FLAG_NO_CACHE_CLEAN `. diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index f544d3393e9d..4eab6d81cce1 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -721,39 +721,14 @@ int vb2_verify_memory_type(struct vb2_queue *q, } EXPORT_SYMBOL(vb2_verify_memory_type); -static void set_queue_consistency(struct vb2_queue *q, bool consistent_mem) -{ - q->dma_attrs &= ~DMA_ATTR_NON_CONSISTENT; - - if (!vb2_queue_allows_cache_hints(q)) - return; - if (!consistent_mem) - q->dma_attrs |= DMA_ATTR_NON_CONSISTENT; -} - -static bool verify_consistency_attr(struct vb2_queue *q, bool consistent_mem) -{ - bool queue_is_consistent = !(q->dma_attrs & DMA_ATTR_NON_CONSISTENT); - - if (consistent_mem != queue_is_consistent) { - dprintk(q, 1, "memory consistency model mismatch\n"); - return false; - } - return true; -} - int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, - unsigned int flags, unsigned int *count) + unsigned int *count) { unsigned int num_buffers, allocated_buffers, num_planes = 0; unsigned plane_sizes[VB2_MAX_PLANES] = { }; - bool consistent_mem = true; unsigned int i; int ret; - if (flags & V4L2_FLAG_MEMORY_NON_CONSISTENT) - consistent_mem = false; - if (q->streaming) { dprintk(q, 1, "streaming active\n"); return -EBUSY; @@ -765,8 +740,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, } if (*count == 0 || q->num_buffers != 0 || - (q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory) || - !verify_consistency_attr(q, consistent_mem)) { + (q->memory != VB2_MEMORY_UNKNOWN && q->memory != memory)) { /* * We already have buffers allocated, so first check if they * are not in use and can be freed. @@ -803,7 +777,6 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME); memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); q->memory = memory; - set_queue_consistency(q, consistent_mem); /* * Ask the driver how many buffers and planes per buffer it requires. @@ -888,18 +861,14 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, EXPORT_SYMBOL_GPL(vb2_core_reqbufs); int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, - unsigned int flags, unsigned int *count, + unsigned int *count, unsigned int requested_planes, const unsigned int requested_sizes[]) { unsigned int num_planes = 0, num_buffers, allocated_buffers; unsigned plane_sizes[VB2_MAX_PLANES] = { }; - bool consistent_mem = true; int ret; - if (flags & V4L2_FLAG_MEMORY_NON_CONSISTENT) - consistent_mem = false; - if (q->num_buffers == VB2_MAX_FRAME) { dprintk(q, 1, "maximum number of buffers already allocated\n"); return -ENOBUFS; @@ -912,15 +881,12 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, } memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); q->memory = memory; - set_queue_consistency(q, consistent_mem); q->waiting_for_buffers = !q->is_output; } else { if (q->memory != memory) { dprintk(q, 1, "memory model mismatch\n"); return -EINVAL; } - if (!verify_consistency_attr(q, consistent_mem)) - return -EINVAL; } num_buffers = min(*count, VB2_MAX_FRAME - q->num_buffers); @@ -2581,7 +2547,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read) fileio->memory = VB2_MEMORY_MMAP; fileio->type = q->type; q->fileio = fileio; - ret = vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count); + ret = vb2_core_reqbufs(q, fileio->memory, &fileio->count); if (ret) goto err_kfree; @@ -2638,7 +2604,7 @@ static int __vb2_init_fileio(struct vb2_queue *q, int read) err_reqbufs: fileio->count = 0; - vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count); + vb2_core_reqbufs(q, fileio->memory, &fileio->count); err_kfree: q->fileio = NULL; @@ -2658,7 +2624,7 @@ static int __vb2_cleanup_fileio(struct vb2_queue *q) vb2_core_streamoff(q, q->type); q->fileio = NULL; fileio->count = 0; - vb2_core_reqbufs(q, fileio->memory, 0, &fileio->count); + vb2_core_reqbufs(q, fileio->memory, &fileio->count); kfree(fileio); dprintk(q, 3, "file io emulator closed\n"); } diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c index ec3446cc45b8..7b1b86ec942d 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c @@ -42,11 +42,6 @@ struct vb2_dc_buf { struct dma_buf_attachment *db_attach; }; -static inline bool vb2_dc_buffer_consistent(unsigned long attr) -{ - return !(attr & DMA_ATTR_NON_CONSISTENT); -} - /*********************************************/ /* scatterlist table functions */ /*********************************************/ @@ -341,13 +336,6 @@ static int vb2_dc_dmabuf_ops_begin_cpu_access(struct dma_buf *dbuf, enum dma_data_direction direction) { - struct vb2_dc_buf *buf = dbuf->priv; - struct sg_table *sgt = buf->dma_sgt; - - if (vb2_dc_buffer_consistent(buf->attrs)) - return 0; - - dma_sync_sg_for_cpu(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir); return 0; } @@ -355,13 +343,6 @@ static int vb2_dc_dmabuf_ops_end_cpu_access(struct dma_buf *dbuf, enum dma_data_direction direction) { - struct vb2_dc_buf *buf = dbuf->priv; - struct sg_table *sgt = buf->dma_sgt; - - if (vb2_dc_buffer_consistent(buf->attrs)) - return 0; - - dma_sync_sg_for_device(buf->dev, sgt->sgl, sgt->nents, buf->dma_dir); return 0; } diff --git a/drivers/media/common/videobuf2/videobuf2-dma-sg.c b/drivers/media/common/videobuf2/videobuf2-dma-sg.c index 0a40e00f0d7e..a86fce5d8ea8 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-sg.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-sg.c @@ -123,8 +123,7 @@ static void *vb2_dma_sg_alloc(struct device *dev, unsigned long dma_attrs, /* * NOTE: dma-sg allocates memory using the page allocator directly, so * there is no memory consistency guarantee, hence dma-sg ignores DMA - * attributes passed from the upper layer. That means that - * V4L2_FLAG_MEMORY_NON_CONSISTENT has no effect on dma-sg buffers. + * attributes passed from the upper layer. */ buf->pages = kvmalloc_array(buf->num_pages, sizeof(struct page *), GFP_KERNEL | __GFP_ZERO); diff --git a/drivers/media/common/videobuf2/videobuf2-v4l2.c b/drivers/media/common/videobuf2/videobuf2-v4l2.c index 30caad27281e..cfe197df970d 100644 --- a/drivers/media/common/videobuf2/videobuf2-v4l2.c +++ b/drivers/media/common/videobuf2/videobuf2-v4l2.c @@ -722,22 +722,12 @@ static void fill_buf_caps(struct vb2_queue *q, u32 *caps) #endif } -static void clear_consistency_attr(struct vb2_queue *q, - int memory, - unsigned int *flags) -{ - if (!q->allow_cache_hints || memory != V4L2_MEMORY_MMAP) - *flags &= ~V4L2_FLAG_MEMORY_NON_CONSISTENT; -} - int vb2_reqbufs(struct vb2_queue *q, struct v4l2_requestbuffers *req) { int ret = vb2_verify_memory_type(q, req->memory, req->type); fill_buf_caps(q, &req->capabilities); - clear_consistency_attr(q, req->memory, &req->flags); - return ret ? ret : vb2_core_reqbufs(q, req->memory, - req->flags, &req->count); + return ret ? ret : vb2_core_reqbufs(q, req->memory, &req->count); } EXPORT_SYMBOL_GPL(vb2_reqbufs); @@ -769,7 +759,6 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create) unsigned i; fill_buf_caps(q, &create->capabilities); - clear_consistency_attr(q, create->memory, &create->flags); create->index = q->num_buffers; if (create->count == 0) return ret != -EBUSY ? ret : 0; @@ -813,7 +802,6 @@ int vb2_create_bufs(struct vb2_queue *q, struct v4l2_create_buffers *create) if (requested_sizes[i] == 0) return -EINVAL; return ret ? ret : vb2_core_create_bufs(q, create->memory, - create->flags, &create->count, requested_planes, requested_sizes); @@ -998,12 +986,11 @@ int vb2_ioctl_reqbufs(struct file *file, void *priv, int res = vb2_verify_memory_type(vdev->queue, p->memory, p->type); fill_buf_caps(vdev->queue, &p->capabilities); - clear_consistency_attr(vdev->queue, p->memory, &p->flags); if (res) return res; if (vb2_queue_is_busy(vdev, file)) return -EBUSY; - res = vb2_core_reqbufs(vdev->queue, p->memory, p->flags, &p->count); + res = vb2_core_reqbufs(vdev->queue, p->memory, &p->count); /* If count == 0, then the owner has released all buffers and he is no longer owner of the queue. Otherwise we have a new owner. */ if (res == 0) @@ -1021,7 +1008,6 @@ int vb2_ioctl_create_bufs(struct file *file, void *priv, p->index = vdev->queue->num_buffers; fill_buf_caps(vdev->queue, &p->capabilities); - clear_consistency_attr(vdev->queue, p->memory, &p->flags); /* * If count == 0, then just check if memory and type are valid. * Any -EBUSY result from vb2_verify_memory_type can be mapped to 0. diff --git a/drivers/media/dvb-core/dvb_vb2.c b/drivers/media/dvb-core/dvb_vb2.c index 959d110407a4..6974f1731529 100644 --- a/drivers/media/dvb-core/dvb_vb2.c +++ b/drivers/media/dvb-core/dvb_vb2.c @@ -342,7 +342,7 @@ int dvb_vb2_reqbufs(struct dvb_vb2_ctx *ctx, struct dmx_requestbuffers *req) ctx->buf_siz = req->size; ctx->buf_cnt = req->count; - ret = vb2_core_reqbufs(&ctx->vb_q, VB2_MEMORY_MMAP, 0, &req->count); + ret = vb2_core_reqbufs(&ctx->vb_q, VB2_MEMORY_MMAP, &req->count); if (ret) { ctx->state = DVB_VB2_STATE_NONE; dprintk(1, "[%s] count=%d size=%d errno=%d\n", ctx->name, diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 593bcf6c3735..a99e82ec9ab6 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -246,9 +246,6 @@ struct v4l2_format32 { * @memory: buffer memory type * @format: frame format, for which buffers are requested * @capabilities: capabilities of this buffer type. - * @flags: additional buffer management attributes (ignored unless the - * queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability and - * configured for MMAP streaming I/O). * @reserved: future extensions */ struct v4l2_create_buffers32 { @@ -257,8 +254,7 @@ struct v4l2_create_buffers32 { __u32 memory; /* enum v4l2_memory */ struct v4l2_format32 format; __u32 capabilities; - __u32 flags; - __u32 reserved[6]; + __u32 reserved[7]; }; static int __bufsize_v4l2_format(struct v4l2_format32 __user *p32, u32 *size) @@ -359,8 +355,7 @@ static int get_v4l2_create32(struct v4l2_create_buffers __user *p64, { if (!access_ok(p32, sizeof(*p32)) || copy_in_user(p64, p32, - offsetof(struct v4l2_create_buffers32, format)) || - assign_in_user(&p64->flags, &p32->flags)) + offsetof(struct v4l2_create_buffers32, format))) return -EFAULT; return __get_v4l2_format32(&p64->format, &p32->format, aux_buf, aux_space); @@ -422,7 +417,6 @@ static int put_v4l2_create32(struct v4l2_create_buffers __user *p64, copy_in_user(p32, p64, offsetof(struct v4l2_create_buffers32, format)) || assign_in_user(&p32->capabilities, &p64->capabilities) || - assign_in_user(&p32->flags, &p64->flags) || copy_in_user(p32->reserved, p64->reserved, sizeof(p64->reserved))) return -EFAULT; return __put_v4l2_format32(&p64->format, &p32->format); diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index e3a25ea913ac..ccf947632a3b 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -2044,6 +2044,9 @@ static int v4l_reqbufs(const struct v4l2_ioctl_ops *ops, if (ret) return ret; + + CLEAR_AFTER_FIELD(p, capabilities); + return ops->vidioc_reqbufs(file, fh, p); } @@ -2083,7 +2086,7 @@ static int v4l_create_bufs(const struct v4l2_ioctl_ops *ops, if (ret) return ret; - CLEAR_AFTER_FIELD(create, flags); + CLEAR_AFTER_FIELD(create, capabilities); v4l_sanitize_format(&create->format); diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 52ef92049073..bbb3f26fbde9 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -744,8 +744,6 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb); * vb2_core_reqbufs() - Initiate streaming. * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory type, as defined by &enum vb2_memory. - * @flags: auxiliary queue/buffer management flags. Currently, the only - * used flag is %V4L2_FLAG_MEMORY_NON_CONSISTENT. * @count: requested buffer count. * * Videobuf2 core helper to implement VIDIOC_REQBUF() operation. It is called @@ -770,13 +768,12 @@ void vb2_core_querybuf(struct vb2_queue *q, unsigned int index, void *pb); * Return: returns zero on success; an error code otherwise. */ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, - unsigned int flags, unsigned int *count); + unsigned int *count); /** * vb2_core_create_bufs() - Allocate buffers and any required auxiliary structs * @q: pointer to &struct vb2_queue with videobuf2 queue. * @memory: memory type, as defined by &enum vb2_memory. - * @flags: auxiliary queue/buffer management flags. * @count: requested buffer count. * @requested_planes: number of planes requested. * @requested_sizes: array with the size of the planes. @@ -794,7 +791,7 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, * Return: returns zero on success; an error code otherwise. */ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, - unsigned int flags, unsigned int *count, + unsigned int *count, unsigned int requested_planes, const unsigned int requested_sizes[]); diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index c7b70ff53bc1..235db7754606 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -191,8 +191,6 @@ enum v4l2_memory { V4L2_MEMORY_DMABUF = 4, }; -#define V4L2_FLAG_MEMORY_NON_CONSISTENT (1 << 0) - /* see also http://vektor.theorem.ca/graphics/ycbcr/ */ enum v4l2_colorspace { /* @@ -949,10 +947,7 @@ struct v4l2_requestbuffers { __u32 type; /* enum v4l2_buf_type */ __u32 memory; /* enum v4l2_memory */ __u32 capabilities; - union { - __u32 flags; - __u32 reserved[1]; - }; + __u32 reserved[1]; }; /* capabilities for struct v4l2_requestbuffers and v4l2_create_buffers */ @@ -2456,9 +2451,6 @@ struct v4l2_dbg_chip_info { * @memory: enum v4l2_memory; buffer memory type * @format: frame format, for which buffers are requested * @capabilities: capabilities of this buffer type. - * @flags: additional buffer management attributes (ignored unless the - * queue has V4L2_BUF_CAP_SUPPORTS_MMAP_CACHE_HINTS capability - * and configured for MMAP streaming I/O). * @reserved: future extensions */ struct v4l2_create_buffers { @@ -2467,8 +2459,7 @@ struct v4l2_create_buffers { __u32 memory; struct v4l2_format format; __u32 capabilities; - __u32 flags; - __u32 reserved[6]; + __u32 reserved[7]; }; /* From 4aa1615268a8ac2b20096211d3f9ac53874067d7 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 14 Sep 2020 15:20:18 -0300 Subject: [PATCH 031/322] RDMA/core: Fix ordering of CQ pool destruction rxe will hold a refcount on the IB device as long as CQ objects exist, this causes destruction of a rxe device to hang if the CQ pool has any cached CQs since they are being destroyed after the refcount must go to zero. Treat the CQ pool like a client and create/destroy it before/after all other clients. No users of CQ pool can exist past a client remove call. Link: https://lore.kernel.org/r/e8a240aa-9e9b-3dca-062f-9130b787f29b@acm.org Fixes: c7ff819aefea ("RDMA/core: Introduce shared CQ pool API") Tested-by: Bart Van Assche Tested-by: Yi Zhang Signed-off-by: Bart Van Assche Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/device.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index c36b4d2b61e0..23ee65a9185f 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1285,6 +1285,8 @@ static void disable_device(struct ib_device *device) remove_client_context(device, cid); } + ib_cq_pool_destroy(device); + /* Pairs with refcount_set in enable_device */ ib_device_put(device); wait_for_completion(&device->unreg_completion); @@ -1328,6 +1330,8 @@ static int enable_device_and_get(struct ib_device *device) goto out; } + ib_cq_pool_init(device); + down_read(&clients_rwsem); xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { ret = add_client_context(device, client); @@ -1400,7 +1404,6 @@ int ib_register_device(struct ib_device *device, const char *name) goto dev_cleanup; } - ib_cq_pool_init(device); ret = enable_device_and_get(device); dev_set_uevent_suppress(&device->dev, false); /* Mark for userspace that device is ready */ @@ -1455,7 +1458,6 @@ static void __ib_unregister_device(struct ib_device *ib_dev) goto out; disable_device(ib_dev); - ib_cq_pool_destroy(ib_dev); /* Expedite removing unregistered pointers from the hash table */ free_netdevs(ib_dev); From 0212a0483b0a36cc94cfab882b3edbb41fcfe1cd Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 7 Aug 2020 15:31:43 +0200 Subject: [PATCH 032/322] clk: samsung: Keep top BPLL mux on Exynos542x enabled BPLL clock must not be disabled because it is needed for proper DRAM operation. This is normally handled by respective memory devfreq driver, but when that driver is not yet probed or its probe has been deferred the clock might get disabled what causes board hang. Fix this by calling clk_prepare_enable() directly from the clock provider driver. Cc: stable@vger.kernel.org Signed-off-by: Marek Szyprowski Reviewed-by: Lukasz Luba Tested-by: Lukasz Luba Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20200807133143.22748-1-m.szyprowski@samsung.com Fixes: 6e7674c3c6df ("memory: Add DMC driver for Exynos5422") Signed-off-by: Sylwester Nawrocki --- drivers/clk/samsung/clk-exynos5420.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index fea33399a632..bd620876544d 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -1655,6 +1655,11 @@ static void __init exynos5x_clk_init(struct device_node *np, * main G3D clock enablement status. */ clk_prepare_enable(__clk_lookup("mout_sw_aclk_g3d")); + /* + * Keep top BPLL mux enabled permanently to ensure that DRAM operates + * properly. + */ + clk_prepare_enable(__clk_lookup("mout_bpll")); samsung_clk_of_add_provider(np, ctx); } From 50851f5033a34fb9f2ecefa57fc9efd862634416 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Sat, 12 Sep 2020 15:22:11 +0800 Subject: [PATCH 033/322] spi: bcm2835: Make polling_limit_us static This eliminates the following sparse warning: drivers/spi/spi-bcm2835.c:78:14: warning: symbol 'polling_limit_us' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Link: https://lore.kernel.org/r/20200912072211.602735-1-yanaijie@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm2835.c b/drivers/spi/spi-bcm2835.c index c45d76c848c8..41986ac0fbfb 100644 --- a/drivers/spi/spi-bcm2835.c +++ b/drivers/spi/spi-bcm2835.c @@ -75,7 +75,7 @@ #define DRV_NAME "spi-bcm2835" /* define polling limits */ -unsigned int polling_limit_us = 30; +static unsigned int polling_limit_us = 30; module_param(polling_limit_us, uint, 0664); MODULE_PARM_DESC(polling_limit_us, "time in us to run a transfer in polling mode\n"); From 7f04839ec4483563f38062b4dd90253e45447198 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 11 Sep 2020 13:01:47 -0700 Subject: [PATCH 034/322] scsi: lpfc: Fix initial FLOGI failure due to BBSCN not supported Initial FLOGIs are failing with the following message: lpfc 0000:13:00.1: 1:(0):0820 FLOGI Failed (x300). BBCredit Not Supported In a prior patch, the READ_SPARAM command was re-ordered to post after CONFIG_LINK as the driver is expected to update the driver's copy of the service parameters for the FLOGI payload. If the bb-credit recovery feature is enabled, this is fine. But on adapters were bb-credit recovery isn't enabled, it would cause the FLOGI to fail. Fix by restoring the original command order (READ_SPARAM before CONFIG_LINK), and after issuing CONFIG_LINK, detect bb-credit recovery support and reissuing READ_SPARAM to obtain the updated service parameters (effectively adding in the fix command order). [mkp: corrected SHA] Link: https://lore.kernel.org/r/20200911200147.110826-1-james.smart@broadcom.com Fixes: 835214f5d5f5 ("scsi: lpfc: Fix broken Credit Recovery after driver load") CC: # v5.7+ Co-developed-by: Dick Kennedy Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Martin K. Petersen --- drivers/scsi/lpfc/lpfc_hbadisc.c | 76 ++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 142a02114479..bf06d8549bd3 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -71,6 +71,7 @@ static void lpfc_disc_timeout_handler(struct lpfc_vport *); static void lpfc_disc_flush_list(struct lpfc_vport *vport); static void lpfc_unregister_fcfi_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *); static int lpfc_fcf_inuse(struct lpfc_hba *); +static void lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_terminate_rport_io(struct fc_rport *rport) @@ -1138,11 +1139,13 @@ out: return; } - void lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) { struct lpfc_vport *vport = pmb->vport; + LPFC_MBOXQ_t *sparam_mb; + struct lpfc_dmabuf *sparam_mp; + int rc; if (pmb->u.mb.mbxStatus) goto out; @@ -1167,12 +1170,42 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } /* Start discovery by sending a FLOGI. port_state is identically - * LPFC_FLOGI while waiting for FLOGI cmpl. Check if sending - * the FLOGI is being deferred till after MBX_READ_SPARAM completes. + * LPFC_FLOGI while waiting for FLOGI cmpl. */ if (vport->port_state != LPFC_FLOGI) { - if (!(phba->hba_flag & HBA_DEFER_FLOGI)) + /* Issue MBX_READ_SPARAM to update CSPs before FLOGI if + * bb-credit recovery is in place. + */ + if (phba->bbcredit_support && phba->cfg_enable_bbcr && + !(phba->link_flag & LS_LOOPBACK_MODE)) { + sparam_mb = mempool_alloc(phba->mbox_mem_pool, + GFP_KERNEL); + if (!sparam_mb) + goto sparam_out; + + rc = lpfc_read_sparam(phba, sparam_mb, 0); + if (rc) { + mempool_free(sparam_mb, phba->mbox_mem_pool); + goto sparam_out; + } + sparam_mb->vport = vport; + sparam_mb->mbox_cmpl = lpfc_mbx_cmpl_read_sparam; + rc = lpfc_sli_issue_mbox(phba, sparam_mb, MBX_NOWAIT); + if (rc == MBX_NOT_FINISHED) { + sparam_mp = (struct lpfc_dmabuf *) + sparam_mb->ctx_buf; + lpfc_mbuf_free(phba, sparam_mp->virt, + sparam_mp->phys); + kfree(sparam_mp); + sparam_mb->ctx_buf = NULL; + mempool_free(sparam_mb, phba->mbox_mem_pool); + goto sparam_out; + } + + phba->hba_flag |= HBA_DEFER_FLOGI; + } else { lpfc_initial_flogi(vport); + } } else { if (vport->fc_flag & FC_PT2PT) lpfc_disc_start(vport); @@ -1184,6 +1217,7 @@ out: "0306 CONFIG_LINK mbxStatus error x%x " "HBA state x%x\n", pmb->u.mb.mbxStatus, vport->port_state); +sparam_out: mempool_free(pmb, phba->mbox_mem_pool); lpfc_linkdown(phba); @@ -3239,21 +3273,6 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la) lpfc_linkup(phba); sparam_mbox = NULL; - if (!(phba->hba_flag & HBA_FCOE_MODE)) { - cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!cfglink_mbox) - goto out; - vport->port_state = LPFC_LOCAL_CFG_LINK; - lpfc_config_link(phba, cfglink_mbox); - cfglink_mbox->vport = vport; - cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link; - rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT); - if (rc == MBX_NOT_FINISHED) { - mempool_free(cfglink_mbox, phba->mbox_mem_pool); - goto out; - } - } - sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!sparam_mbox) goto out; @@ -3274,7 +3293,20 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la) goto out; } - if (phba->hba_flag & HBA_FCOE_MODE) { + if (!(phba->hba_flag & HBA_FCOE_MODE)) { + cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!cfglink_mbox) + goto out; + vport->port_state = LPFC_LOCAL_CFG_LINK; + lpfc_config_link(phba, cfglink_mbox); + cfglink_mbox->vport = vport; + cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link; + rc = lpfc_sli_issue_mbox(phba, cfglink_mbox, MBX_NOWAIT); + if (rc == MBX_NOT_FINISHED) { + mempool_free(cfglink_mbox, phba->mbox_mem_pool); + goto out; + } + } else { vport->port_state = LPFC_VPORT_UNKNOWN; /* * Add the driver's default FCF record at FCF index 0 now. This @@ -3331,10 +3363,6 @@ lpfc_mbx_process_link_up(struct lpfc_hba *phba, struct lpfc_mbx_read_top *la) } /* Reset FCF roundrobin bmask for new discovery */ lpfc_sli4_clear_fcf_rr_bmask(phba); - } else { - if (phba->bbcredit_support && phba->cfg_enable_bbcr && - !(phba->link_flag & LS_LOOPBACK_MODE)) - phba->hba_flag |= HBA_DEFER_FLOGI; } /* Prepare for LINK up registrations */ From 27ba3e8ff3ab86449e63d38a8d623053591e65fa Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 15 Sep 2020 16:33:46 +0900 Subject: [PATCH 035/322] scsi: sd: sd_zbc: Fix handling of host-aware ZBC disks When CONFIG_BLK_DEV_ZONED is disabled, allow using host-aware ZBC disks as regular disks. In this case, ensure that command completion is correctly executed by changing sd_zbc_complete() to return good_bytes instead of 0 and causing a hang during device probe (endless retries). When CONFIG_BLK_DEV_ZONED is enabled and a host-aware disk is detected to have partitions, it will be used as a regular disk. In this case, make sure to not do anything in sd_zbc_revalidate_zones() as that triggers warnings. Since all these different cases result in subtle settings of the disk queue zoned model, introduce the block layer helper function blk_queue_set_zoned() to generically implement setting up the effective zoned model according to the disk type, the presence of partitions on the disk and CONFIG_BLK_DEV_ZONED configuration. Link: https://lore.kernel.org/r/20200915073347.832424-2-damien.lemoal@wdc.com Fixes: b72053072c0b ("block: allow partitions on host aware zone devices") Cc: Reported-by: Borislav Petkov Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- block/blk-settings.c | 46 ++++++++++++++++++++++++++++++++++++++++++ drivers/scsi/sd.c | 30 ++++++++++++++++----------- drivers/scsi/sd.h | 2 +- drivers/scsi/sd_zbc.c | 6 +++++- include/linux/blkdev.h | 2 ++ 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/block/blk-settings.c b/block/blk-settings.c index 76a7e03bcd6c..34b721a2743a 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -801,6 +801,52 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); +/** + * blk_queue_set_zoned - configure a disk queue zoned model. + * @disk: the gendisk of the queue to configure + * @model: the zoned model to set + * + * Set the zoned model of the request queue of @disk according to @model. + * When @model is BLK_ZONED_HM (host managed), this should be called only + * if zoned block device support is enabled (CONFIG_BLK_DEV_ZONED option). + * If @model specifies BLK_ZONED_HA (host aware), the effective model used + * depends on CONFIG_BLK_DEV_ZONED settings and on the existence of partitions + * on the disk. + */ +void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) +{ + switch (model) { + case BLK_ZONED_HM: + /* + * Host managed devices are supported only if + * CONFIG_BLK_DEV_ZONED is enabled. + */ + WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)); + break; + case BLK_ZONED_HA: + /* + * Host aware devices can be treated either as regular block + * devices (similar to drive managed devices) or as zoned block + * devices to take advantage of the zone command set, similarly + * to host managed devices. We try the latter if there are no + * partitions and zoned block device support is enabled, else + * we do nothing special as far as the block layer is concerned. + */ + if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || + disk_has_partitions(disk)) + model = BLK_ZONED_NONE; + break; + case BLK_ZONED_NONE: + default: + if (WARN_ON_ONCE(model != BLK_ZONED_NONE)) + model = BLK_ZONED_NONE; + break; + } + + disk->queue->limits.zoned = model; +} +EXPORT_SYMBOL_GPL(blk_queue_set_zoned); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 95018e650f2d..06286b6aeaec 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2964,26 +2964,32 @@ static void sd_read_block_characteristics(struct scsi_disk *sdkp) if (sdkp->device->type == TYPE_ZBC) { /* Host-managed */ - q->limits.zoned = BLK_ZONED_HM; + blk_queue_set_zoned(sdkp->disk, BLK_ZONED_HM); } else { sdkp->zoned = (buffer[8] >> 4) & 3; - if (sdkp->zoned == 1 && !disk_has_partitions(sdkp->disk)) { + if (sdkp->zoned == 1) { /* Host-aware */ - q->limits.zoned = BLK_ZONED_HA; + blk_queue_set_zoned(sdkp->disk, BLK_ZONED_HA); } else { - /* - * Treat drive-managed devices and host-aware devices - * with partitions as regular block devices. - */ - q->limits.zoned = BLK_ZONED_NONE; - if (sdkp->zoned == 2 && sdkp->first_scan) - sd_printk(KERN_NOTICE, sdkp, - "Drive-managed SMR disk\n"); + /* Regular disk or drive managed disk */ + blk_queue_set_zoned(sdkp->disk, BLK_ZONED_NONE); } } - if (blk_queue_is_zoned(q) && sdkp->first_scan) + + if (!sdkp->first_scan) + goto out; + + if (blk_queue_is_zoned(q)) { sd_printk(KERN_NOTICE, sdkp, "Host-%s zoned block device\n", q->limits.zoned == BLK_ZONED_HM ? "managed" : "aware"); + } else { + if (sdkp->zoned == 1) + sd_printk(KERN_NOTICE, sdkp, + "Host-aware SMR disk used as regular disk\n"); + else if (sdkp->zoned == 2) + sd_printk(KERN_NOTICE, sdkp, + "Drive-managed SMR disk\n"); + } out: kfree(buffer); diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 4933e7daf17d..7251434100e6 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -259,7 +259,7 @@ static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr) { - return 0; + return good_bytes; } static inline blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 0e94ff056bff..a739456dea02 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -667,7 +667,11 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) u32 max_append; int ret = 0; - if (!sd_is_zoned(sdkp)) + /* + * There is nothing to do for regular disks, including host-aware disks + * that have partitions. + */ + if (!blk_queue_is_zoned(q)) return 0; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bb5636cc17b9..868e11face00 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -352,6 +352,8 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); +void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model); + #ifdef CONFIG_BLK_DEV_ZONED #define BLK_ALL_ZONES ((unsigned int)-1) From 6c5dee18756b4721ac8518c69b22ee8ac0c9c442 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 15 Sep 2020 16:33:47 +0900 Subject: [PATCH 036/322] scsi: sd: sd_zbc: Fix ZBC disk initialization Make sure to call sd_zbc_init_disk() when the sdkp->zoned field is known, that is, once sd_read_block_characteristics() is executed in sd_revalidate_disk(), so that host-aware disks also get initialized. To do so, move sd_zbc_init_disk() call in sd_zbc_revalidate_zones() and make sure to execute it for all zoned disks, including for host-aware disks used as regular disks as these disk zoned model may be changed back to BLK_ZONED_HA when partitions are deleted. Link: https://lore.kernel.org/r/20200915073347.832424-3-damien.lemoal@wdc.com Fixes: 5795eb443060 ("scsi: sd_zbc: emulate ZONE_APPEND commands") Cc: # v5.8+ Reported-by: Borislav Petkov Tested-by: Borislav Petkov Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 --- drivers/scsi/sd.h | 6 ----- drivers/scsi/sd_zbc.c | 60 +++++++++++++++++++++++++------------------ 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 06286b6aeaec..16503e22691e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3410,10 +3410,6 @@ static int sd_probe(struct device *dev) sdkp->first_scan = 1; sdkp->max_medium_access_timeouts = SD_MAX_MEDIUM_TIMEOUTS; - error = sd_zbc_init_disk(sdkp); - if (error) - goto out_free_index; - sd_revalidate_disk(gd); gd->flags = GENHD_FL_EXT_DEVT; diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index 7251434100e6..a3aad608bc38 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -215,7 +215,6 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp) #ifdef CONFIG_BLK_DEV_ZONED -int sd_zbc_init_disk(struct scsi_disk *sdkp); void sd_zbc_release_disk(struct scsi_disk *sdkp); int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); int sd_zbc_revalidate_zones(struct scsi_disk *sdkp); @@ -231,11 +230,6 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba, #else /* CONFIG_BLK_DEV_ZONED */ -static inline int sd_zbc_init_disk(struct scsi_disk *sdkp) -{ - return 0; -} - static inline void sd_zbc_release_disk(struct scsi_disk *sdkp) {} static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index a739456dea02..cf07b7f93579 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -651,6 +651,28 @@ static void sd_zbc_print_zones(struct scsi_disk *sdkp) sdkp->zone_blocks); } +static int sd_zbc_init_disk(struct scsi_disk *sdkp) +{ + sdkp->zones_wp_offset = NULL; + spin_lock_init(&sdkp->zones_wp_offset_lock); + sdkp->rev_wp_offset = NULL; + mutex_init(&sdkp->rev_mutex); + INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn); + sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL); + if (!sdkp->zone_wp_update_buf) + return -ENOMEM; + + return 0; +} + +void sd_zbc_release_disk(struct scsi_disk *sdkp) +{ + kvfree(sdkp->zones_wp_offset); + sdkp->zones_wp_offset = NULL; + kfree(sdkp->zone_wp_update_buf); + sdkp->zone_wp_update_buf = NULL; +} + static void sd_zbc_revalidate_zones_cb(struct gendisk *disk) { struct scsi_disk *sdkp = scsi_disk(disk); @@ -667,6 +689,19 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) u32 max_append; int ret = 0; + /* + * For all zoned disks, initialize zone append emulation data if not + * already done. This is necessary also for host-aware disks used as + * regular disks due to the presence of partitions as these partitions + * may be deleted and the disk zoned model changed back from + * BLK_ZONED_NONE to BLK_ZONED_HA. + */ + if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) { + ret = sd_zbc_init_disk(sdkp); + if (ret) + return ret; + } + /* * There is nothing to do for regular disks, including host-aware disks * that have partitions. @@ -768,28 +803,3 @@ err: return ret; } - -int sd_zbc_init_disk(struct scsi_disk *sdkp) -{ - if (!sd_is_zoned(sdkp)) - return 0; - - sdkp->zones_wp_offset = NULL; - spin_lock_init(&sdkp->zones_wp_offset_lock); - sdkp->rev_wp_offset = NULL; - mutex_init(&sdkp->rev_mutex); - INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn); - sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL); - if (!sdkp->zone_wp_update_buf) - return -ENOMEM; - - return 0; -} - -void sd_zbc_release_disk(struct scsi_disk *sdkp) -{ - kvfree(sdkp->zones_wp_offset); - sdkp->zones_wp_offset = NULL; - kfree(sdkp->zone_wp_update_buf); - sdkp->zone_wp_update_buf = NULL; -} From e5467b672bd99f0c1904900013e778e3a5ce30b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Beh=C3=BAn?= Date: Tue, 15 Sep 2020 02:54:26 +0200 Subject: [PATCH 037/322] dt-bindings: leds: cznic,turris-omnia-leds: fix error in binding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a bug in the device tree binding for cznic,turris-omnia-leds which causes make dt_binding_check to complain. The reason is that the multi-led property binding's regular expression does not contain the `@` character, while the example nodes do. Fix this, and also adjust the maximum address to 'b' as there are 12 LEDs. Cc: Rob Herring Cc: devicetree@vger.kernel.org Cc: Pavel Machek Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20200915005426.15957-1-marek.behun@nic.cz Signed-off-by: Rob Herring --- .../devicetree/bindings/leds/cznic,turris-omnia-leds.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml b/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml index 24ad1446445e..fe7fa25877fd 100644 --- a/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml +++ b/Documentation/devicetree/bindings/leds/cznic,turris-omnia-leds.yaml @@ -30,7 +30,7 @@ properties: const: 0 patternProperties: - "^multi-led[0-9a-f]$": + "^multi-led@[0-9a-b]$": type: object allOf: - $ref: leds-class-multicolor.yaml# From d33030e2ee3508d65db5644551435310df86010e Mon Sep 17 00:00:00 2001 From: Jeffrey Mitchell Date: Tue, 15 Sep 2020 16:42:52 -0500 Subject: [PATCH 038/322] nfs: Fix security label length not being reset nfs_readdir_page_filler() iterates over entries in a directory, reusing the same security label buffer, but does not reset the buffer's length. This causes decode_attr_security_label() to return -ERANGE if an entry's security label is longer than the previous one's. This error, in nfs4_decode_dirent(), only gets passed up as -EAGAIN, which causes another failed attempt to copy into the buffer. The second error is ignored and the remaining entries do not show up in ls, specifically the getdents64() syscall. Reproduce by creating multiple files in NFS and giving one of the later files a longer security label. ls will not see that file nor any that are added afterwards, though they will exist on the backend. In nfs_readdir_page_filler(), reset security label buffer length before every reuse Signed-off-by: Jeffrey Mitchell Fixes: b4487b935452 ("nfs: Fix getxattr kernel panic and memory overflow") Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e732580fe47b..cb52db9a0cfb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -579,6 +579,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); do { + if (entry->label) + entry->label->len = NFS4_MAXLABELLEN; + status = xdr_decode(desc, entry, &stream); if (status != 0) { if (status == -EAGAIN) From 16abd2a0c124a6c3543c88ca4c53c997c9fb4114 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Wed, 16 Sep 2020 09:07:54 -0400 Subject: [PATCH 039/322] NFSv4.2: fix client's attribute cache management for copy_file_range After client is done with the COPY operation, it needs to invalidate its pagecache (as it did no reading or writing of the data locally) and it needs to invalidate it's attributes just like it would have for a read on the source file and write on the destination file. Once the linux server started giving out read delegations to read+write opens, the destination file of the copy_file range started having delegations and not doing syncup on close of the file leading to xfstest failures for generic/430,431,432,433,565. v2: changing cache_validity needs to be protected by the i_lock. Reported-by: Murphy Zhou Fixes: 2e72448b07dc ("NFS: Add COPY nfs operation") Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 142225f0af59..2b2211d1234e 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -356,7 +356,15 @@ static ssize_t _nfs42_proc_copy(struct file *src, truncate_pagecache_range(dst_inode, pos_dst, pos_dst + res->write_res.count); - + spin_lock(&dst_inode->i_lock); + NFS_I(dst_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE | + NFS_INO_REVAL_FORCED | NFS_INO_INVALID_SIZE | + NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA); + spin_unlock(&dst_inode->i_lock); + spin_lock(&src_inode->i_lock); + NFS_I(src_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE | + NFS_INO_REVAL_FORCED | NFS_INO_INVALID_ATIME); + spin_unlock(&src_inode->i_lock); status = res->write_res.count; out: if (args->sync) From 642e450b6b5955f2059d0ae372183f7c6323f951 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Wed, 16 Sep 2020 14:00:25 +0200 Subject: [PATCH 040/322] xsk: Do not discard packet when NETDEV_TX_BUSY In the skb Tx path, transmission of a packet is performed with dev_direct_xmit(). When NETDEV_TX_BUSY is set in the drivers, it signifies that it was not possible to send the packet right now, please try later. Unfortunately, the xsk transmit code discarded the packet and returned EBUSY to the application. Fix this unnecessary packet loss, by not discarding the packet in the Tx ring and return EAGAIN. As EAGAIN is returned to the application, it can then retry the send operation later and the packet will then likely be sent as the driver will then likely have space/resources to send the packet. In summary, EAGAIN tells the application that the packet was not discarded from the Tx ring and that it needs to call send() again. EBUSY, on the other hand, signifies that the packet was not sent and discarded from the Tx ring. The application needs to put the packet on the Tx ring again if it wants it to be sent. Fixes: 35fcde7f8deb ("xsk: support for Tx") Reported-by: Arkadiusz Zema Suggested-by: Arkadiusz Zema Suggested-by: Daniel Borkmann Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Reviewed-by: Jesse Brandeburg Link: https://lore.kernel.org/bpf/1600257625-2353-1-git-send-email-magnus.karlsson@gmail.com --- net/xdp/xsk.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c3231620d210..6c5e09e7440a 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -377,15 +377,30 @@ static int xsk_generic_xmit(struct sock *sk) skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; + /* Hinder dev_direct_xmit from freeing the packet and + * therefore completing it in the destructor + */ + refcount_inc(&skb->users); err = dev_direct_xmit(skb, xs->queue_id); + if (err == NETDEV_TX_BUSY) { + /* Tell user-space to retry the send */ + skb->destructor = sock_wfree; + /* Free skb without triggering the perf drop trace */ + consume_skb(skb); + err = -EAGAIN; + goto out; + } + xskq_cons_release(xs->tx); /* Ignore NET_XMIT_CN as packet might have been sent */ - if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { + if (err == NET_XMIT_DROP) { /* SKB completed but not sent */ + kfree_skb(skb); err = -EBUSY; goto out; } + consume_skb(skb); sent_frame = true; } From 5ad284ab3a01e2d6a89be2a8663ae76f4e617549 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 15 Sep 2020 15:58:16 +0300 Subject: [PATCH 041/322] gpiolib: Fix line event handling in syscall compatible mode The introduced line event handling ABI in the commit 61f922db7221 ("gpio: userspace ABI for reading GPIO line events") missed the fact that 64-bit kernel may serve for 32-bit applications. In such case the very first check in the lineevent_read() will fail due to alignment differences. To workaround this introduce lineevent_get_size() helper which returns actual size of the structure in user space. Fixes: 61f922db7221 ("gpio: userspace ABI for reading GPIO line events") Suggested-by: Arnd Bergmann Signed-off-by: Andy Shevchenko Acked-by: Arnd Bergmann Tested-by: Kent Gibson Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-cdev.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index e6c9b78adfc2..76c36b05aef6 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -423,6 +423,21 @@ static __poll_t lineevent_poll(struct file *file, return events; } +static ssize_t lineevent_get_size(void) +{ +#ifdef __x86_64__ + /* i386 has no padding after 'id' */ + if (in_ia32_syscall()) { + struct compat_gpioeevent_data { + compat_u64 timestamp; + u32 id; + }; + + return sizeof(struct compat_gpioeevent_data); + } +#endif + return sizeof(struct gpioevent_data); +} static ssize_t lineevent_read(struct file *file, char __user *buf, @@ -432,9 +447,20 @@ static ssize_t lineevent_read(struct file *file, struct lineevent_state *le = file->private_data; struct gpioevent_data ge; ssize_t bytes_read = 0; + ssize_t ge_size; int ret; - if (count < sizeof(ge)) + /* + * When compatible system call is being used the struct gpioevent_data, + * in case of at least ia32, has different size due to the alignment + * differences. Because we have first member 64 bits followed by one of + * 32 bits there is no gap between them. The only difference is the + * padding at the end of the data structure. Hence, we calculate the + * actual sizeof() and pass this as an argument to copy_to_user() to + * drop unneeded bytes from the output. + */ + ge_size = lineevent_get_size(); + if (count < ge_size) return -EINVAL; do { @@ -470,10 +496,10 @@ static ssize_t lineevent_read(struct file *file, break; } - if (copy_to_user(buf + bytes_read, &ge, sizeof(ge))) + if (copy_to_user(buf + bytes_read, &ge, ge_size)) return -EFAULT; - bytes_read += sizeof(ge); - } while (count >= bytes_read + sizeof(ge)); + bytes_read += ge_size; + } while (count >= bytes_read + ge_size); return bytes_read; } From 94cc89eb8fa5039fcb6e3e3d50f929ddcccee095 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Thu, 17 Sep 2020 13:08:28 +0100 Subject: [PATCH 042/322] regmap: debugfs: Fix handling of name string for debugfs init delays In regmap_debugfs_init the initialisation of the debugfs is delayed if the root node isn't ready yet. Most callers of regmap_debugfs_init pass the name from the regmap_config, which is considered temporary ie. may be unallocated after the regmap_init call returns. This leads to a potential use after free, where config->name has been freed by the time it is used in regmap_debugfs_initcall. This situation can be seen on Zynq, where the architecture init_irq callback registers a syscon device, using a local variable for the regmap_config. As init_irq is very early in the platform bring up the regmap debugfs root isn't ready yet. Although this doesn't crash it does result in the debugfs entry not having the correct name. Regmap already sets map->name from config->name on the regmap_init path and the fact that a separate field is used to pass the name to regmap_debugfs_init appears to be an artifact of the debugfs name being added before the map name. As such this patch updates regmap_debugfs_init to use map->name, which is already duplicated from the config avoiding the issue. This does however leave two lose ends, both regmap_attach_dev and regmap_reinit_cache can be called after a regmap is registered and would have had the effect of applying a new name to the debugfs entries. In both of these cases it was chosen to update the map name. In the case of regmap_attach_dev there are 3 users that currently use this function to update the name, thus doing so avoids changes for those users and it seems reasonable that attaching a device would want to set the name of the map. In the case of regmap_reinit_cache the primary use-case appears to be devices that need some register access to identify the device (for example devices in the same family) and then update the cache to match the exact hardware. Whilst no users do currently update the name here, given the use-case it seemed reasonable the name might want to be updated once the device is better identified. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200917120828.12987-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/base/regmap/internal.h | 4 +-- drivers/base/regmap/regmap-debugfs.c | 7 ++--- drivers/base/regmap/regmap.c | 44 +++++++++++++++++++++------- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index 3d80c4b43f72..e0ff8e90ebdc 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -217,7 +217,7 @@ struct regmap_field { #ifdef CONFIG_DEBUG_FS extern void regmap_debugfs_initcall(void); -extern void regmap_debugfs_init(struct regmap *map, const char *name); +extern void regmap_debugfs_init(struct regmap *map); extern void regmap_debugfs_exit(struct regmap *map); static inline void regmap_debugfs_disable(struct regmap *map) @@ -227,7 +227,7 @@ static inline void regmap_debugfs_disable(struct regmap *map) #else static inline void regmap_debugfs_initcall(void) { } -static inline void regmap_debugfs_init(struct regmap *map, const char *name) { } +static inline void regmap_debugfs_init(struct regmap *map) { } static inline void regmap_debugfs_exit(struct regmap *map) { } static inline void regmap_debugfs_disable(struct regmap *map) { } #endif diff --git a/drivers/base/regmap/regmap-debugfs.c b/drivers/base/regmap/regmap-debugfs.c index f58baff2be0a..b6d63ef16b44 100644 --- a/drivers/base/regmap/regmap-debugfs.c +++ b/drivers/base/regmap/regmap-debugfs.c @@ -17,7 +17,6 @@ struct regmap_debugfs_node { struct regmap *map; - const char *name; struct list_head link; }; @@ -544,11 +543,12 @@ static const struct file_operations regmap_cache_bypass_fops = { .write = regmap_cache_bypass_write_file, }; -void regmap_debugfs_init(struct regmap *map, const char *name) +void regmap_debugfs_init(struct regmap *map) { struct rb_node *next; struct regmap_range_node *range_node; const char *devname = "dummy"; + const char *name = map->name; /* * Userspace can initiate reads from the hardware over debugfs. @@ -569,7 +569,6 @@ void regmap_debugfs_init(struct regmap *map, const char *name) if (!node) return; node->map = map; - node->name = name; mutex_lock(®map_debugfs_early_lock); list_add(&node->link, ®map_debugfs_early_list); mutex_unlock(®map_debugfs_early_lock); @@ -679,7 +678,7 @@ void regmap_debugfs_initcall(void) mutex_lock(®map_debugfs_early_lock); list_for_each_entry_safe(node, tmp, ®map_debugfs_early_list, link) { - regmap_debugfs_init(node->map, node->name); + regmap_debugfs_init(node->map); list_del(&node->link); kfree(node); } diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index e93700af7e6e..e22c21affebd 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -581,14 +581,34 @@ static void regmap_range_exit(struct regmap *map) kfree(map->selector_work_buf); } +static int regmap_set_name(struct regmap *map, const struct regmap_config *config) +{ + if (config->name) { + const char *name = kstrdup_const(config->name, GFP_KERNEL); + + if (!name) + return -ENOMEM; + + kfree_const(map->name); + map->name = name; + } + + return 0; +} + int regmap_attach_dev(struct device *dev, struct regmap *map, const struct regmap_config *config) { struct regmap **m; + int ret; map->dev = dev; - regmap_debugfs_init(map, config->name); + ret = regmap_set_name(map, config); + if (ret) + return ret; + + regmap_debugfs_init(map); /* Add a devres resource for dev_get_regmap() */ m = devres_alloc(dev_get_regmap_release, sizeof(*m), GFP_KERNEL); @@ -674,9 +694,9 @@ struct regmap *__regmap_init(struct device *dev, const char *lock_name) { struct regmap *map; - int ret = -EINVAL; enum regmap_endian reg_endian, val_endian; int i, j; + int ret; if (!config) goto err; @@ -687,13 +707,9 @@ struct regmap *__regmap_init(struct device *dev, goto err; } - if (config->name) { - map->name = kstrdup_const(config->name, GFP_KERNEL); - if (!map->name) { - ret = -ENOMEM; - goto err_map; - } - } + ret = regmap_set_name(map, config); + if (ret) + goto err_map; if (config->disable_locking) { map->lock = map->unlock = regmap_lock_unlock_none; @@ -1137,7 +1153,7 @@ skip_format_initialization: if (ret != 0) goto err_regcache; } else { - regmap_debugfs_init(map, config->name); + regmap_debugfs_init(map); } return map; @@ -1297,6 +1313,8 @@ EXPORT_SYMBOL_GPL(regmap_field_free); */ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config) { + int ret; + regcache_exit(map); regmap_debugfs_exit(map); @@ -1309,7 +1327,11 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config) map->readable_noinc_reg = config->readable_noinc_reg; map->cache_type = config->cache_type; - regmap_debugfs_init(map, config->name); + ret = regmap_set_name(map, config); + if (ret) + return ret; + + regmap_debugfs_init(map); map->cache_bypass = false; map->cache_only = false; From 1d0e16ac1a9e800598dcfa5b6bc53b704a103390 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 15 Sep 2020 17:07:35 -0400 Subject: [PATCH 043/322] drm/amdgpu: prevent double kfree ttm->sg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set ttm->sg to NULL after kfree, to avoid memory corruption backtrace: [ 420.932812] kernel BUG at /build/linux-do9eLF/linux-4.15.0/mm/slub.c:295! [ 420.934182] invalid opcode: 0000 [#1] SMP NOPTI [ 420.935445] Modules linked in: xt_conntrack ipt_MASQUERADE [ 420.951332] Hardware name: Dell Inc. PowerEdge R7525/0PYVT1, BIOS 1.5.4 07/09/2020 [ 420.952887] RIP: 0010:__slab_free+0x180/0x2d0 [ 420.954419] RSP: 0018:ffffbe426291fa60 EFLAGS: 00010246 [ 420.955963] RAX: ffff9e29263e9c30 RBX: ffff9e29263e9c30 RCX: 000000018100004b [ 420.957512] RDX: ffff9e29263e9c30 RSI: fffff3d33e98fa40 RDI: ffff9e297e407a80 [ 420.959055] RBP: ffffbe426291fb00 R08: 0000000000000001 R09: ffffffffc0d39ade [ 420.960587] R10: ffffbe426291fb20 R11: ffff9e49ffdd4000 R12: ffff9e297e407a80 [ 420.962105] R13: fffff3d33e98fa40 R14: ffff9e29263e9c30 R15: ffff9e2954464fd8 [ 420.963611] FS: 00007fa2ea097780(0000) GS:ffff9e297e840000(0000) knlGS:0000000000000000 [ 420.965144] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 420.966663] CR2: 00007f16bfffefb8 CR3: 0000001ff0c62000 CR4: 0000000000340ee0 [ 420.968193] Call Trace: [ 420.969703] ? __page_cache_release+0x3c/0x220 [ 420.971294] ? amdgpu_ttm_tt_unpopulate+0x5e/0x80 [amdgpu] [ 420.972789] kfree+0x168/0x180 [ 420.974353] ? amdgpu_ttm_tt_set_user_pages+0x64/0xc0 [amdgpu] [ 420.975850] ? kfree+0x168/0x180 [ 420.977403] amdgpu_ttm_tt_unpopulate+0x5e/0x80 [amdgpu] [ 420.978888] ttm_tt_unpopulate.part.10+0x53/0x60 [amdttm] [ 420.980357] ttm_tt_destroy.part.11+0x4f/0x60 [amdttm] [ 420.981814] ttm_tt_destroy+0x13/0x20 [amdttm] [ 420.983273] ttm_bo_cleanup_memtype_use+0x36/0x80 [amdttm] [ 420.984725] ttm_bo_release+0x1c9/0x360 [amdttm] [ 420.986167] amdttm_bo_put+0x24/0x30 [amdttm] [ 420.987663] amdgpu_bo_unref+0x1e/0x30 [amdgpu] [ 420.989165] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x9ca/0xb10 [amdgpu] [ 420.990666] kfd_ioctl_alloc_memory_of_gpu+0xef/0x2c0 [amdgpu] Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e11c5d69843d..978bae731398 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1076,6 +1076,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) release_sg: kfree(ttm->sg); + ttm->sg = NULL; return r; } From 5d5b71e8a0f2f5649991d651d005181dc087981c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 1 May 2020 16:42:56 -0400 Subject: [PATCH 044/322] drm/amdgpu: add the GC 10.3 VRS registers Add the VRS registers. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../include/asic_reg/gc/gc_10_3_0_default.h | 2 + .../include/asic_reg/gc/gc_10_3_0_offset.h | 4 ++ .../include/asic_reg/gc/gc_10_3_0_sh_mask.h | 50 +++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h index 1116779252e6..e245e912535e 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h @@ -2727,6 +2727,7 @@ #define mmDB_STENCIL_WRITE_BASE_DEFAULT 0x00000000 #define mmDB_RESERVED_REG_1_DEFAULT 0x00000000 #define mmDB_RESERVED_REG_3_DEFAULT 0x00000000 +#define mmDB_VRS_OVERRIDE_CNTL_DEFAULT 0x00000000 #define mmDB_Z_READ_BASE_HI_DEFAULT 0x00000000 #define mmDB_STENCIL_READ_BASE_HI_DEFAULT 0x00000000 #define mmDB_Z_WRITE_BASE_HI_DEFAULT 0x00000000 @@ -3062,6 +3063,7 @@ #define mmPA_SU_OVER_RASTERIZATION_CNTL_DEFAULT 0x00000000 #define mmPA_STEREO_CNTL_DEFAULT 0x00000000 #define mmPA_STATE_STEREO_X_DEFAULT 0x00000000 +#define mmPA_CL_VRS_CNTL_DEFAULT 0x00000000 #define mmPA_SU_POINT_SIZE_DEFAULT 0x00000000 #define mmPA_SU_POINT_MINMAX_DEFAULT 0x00000000 #define mmPA_SU_LINE_CNTL_DEFAULT 0x00000000 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h index 05d1b0a5f6d2..644a9fa71bb2 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h @@ -5379,6 +5379,8 @@ #define mmDB_RESERVED_REG_1_BASE_IDX 1 #define mmDB_RESERVED_REG_3 0x0017 #define mmDB_RESERVED_REG_3_BASE_IDX 1 +#define mmDB_VRS_OVERRIDE_CNTL 0x0019 +#define mmDB_VRS_OVERRIDE_CNTL_BASE_IDX 1 #define mmDB_Z_READ_BASE_HI 0x001a #define mmDB_Z_READ_BASE_HI_BASE_IDX 1 #define mmDB_STENCIL_READ_BASE_HI 0x001b @@ -6049,6 +6051,8 @@ #define mmPA_STEREO_CNTL_BASE_IDX 1 #define mmPA_STATE_STEREO_X 0x0211 #define mmPA_STATE_STEREO_X_BASE_IDX 1 +#define mmPA_CL_VRS_CNTL 0x0212 +#define mmPA_CL_VRS_CNTL_BASE_IDX 1 #define mmPA_SU_POINT_SIZE 0x0280 #define mmPA_SU_POINT_SIZE_BASE_IDX 1 #define mmPA_SU_POINT_MINMAX 0x0281 diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h index aac57f714cf1..2e449fcff893 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h @@ -9777,6 +9777,7 @@ #define DB_EXCEPTION_CONTROL__AUTO_FLUSH_HTILE__SHIFT 0x3 #define DB_EXCEPTION_CONTROL__AUTO_FLUSH_QUAD__SHIFT 0x4 #define DB_EXCEPTION_CONTROL__FORCE_SUMMARIZE__SHIFT 0x8 +#define DB_EXCEPTION_CONTROL__FORCE_VRS_RATE_FINE__SHIFT 0x10 #define DB_EXCEPTION_CONTROL__DTAG_WATERMARK__SHIFT 0x18 #define DB_EXCEPTION_CONTROL__EARLY_Z_PANIC_DISABLE_MASK 0x00000001L #define DB_EXCEPTION_CONTROL__LATE_Z_PANIC_DISABLE_MASK 0x00000002L @@ -9784,6 +9785,7 @@ #define DB_EXCEPTION_CONTROL__AUTO_FLUSH_HTILE_MASK 0x00000008L #define DB_EXCEPTION_CONTROL__AUTO_FLUSH_QUAD_MASK 0x00000010L #define DB_EXCEPTION_CONTROL__FORCE_SUMMARIZE_MASK 0x00000F00L +#define DB_EXCEPTION_CONTROL__FORCE_VRS_RATE_FINE_MASK 0x00FF0000L #define DB_EXCEPTION_CONTROL__DTAG_WATERMARK_MASK 0x7F000000L //DB_DFSM_CONFIG #define DB_DFSM_CONFIG__BYPASS_DFSM__SHIFT 0x0 @@ -10076,6 +10078,7 @@ #define CB_HW_CONTROL_3__DISABLE_NACK_PROCESSING_CM__SHIFT 0x18 #define CB_HW_CONTROL_3__DISABLE_NACK_COLOR_RD_WR_OPT__SHIFT 0x19 #define CB_HW_CONTROL_3__DISABLE_BLENDER_CLOCK_GATING__SHIFT 0x1a +#define CB_HW_CONTROL_3__DISABLE_DCC_VRS_OPT__SHIFT 0x1c #define CB_HW_CONTROL_3__DISABLE_FMASK_NOFETCH_OPT__SHIFT 0x1e #define CB_HW_CONTROL_3__DISABLE_FMASK_NOFETCH_OPT_BC__SHIFT 0x1f #define CB_HW_CONTROL_3__DISABLE_SLOW_MODE_EMPTY_HALF_QUAD_KILL_MASK 0x00000001L @@ -10103,12 +10106,15 @@ #define CB_HW_CONTROL_3__DISABLE_NACK_PROCESSING_CM_MASK 0x01000000L #define CB_HW_CONTROL_3__DISABLE_NACK_COLOR_RD_WR_OPT_MASK 0x02000000L #define CB_HW_CONTROL_3__DISABLE_BLENDER_CLOCK_GATING_MASK 0x04000000L +#define CB_HW_CONTROL_3__DISABLE_DCC_VRS_OPT_MASK 0x10000000L #define CB_HW_CONTROL_3__DISABLE_FMASK_NOFETCH_OPT_MASK 0x40000000L #define CB_HW_CONTROL_3__DISABLE_FMASK_NOFETCH_OPT_BC_MASK 0x80000000L //CB_HW_CONTROL #define CB_HW_CONTROL__ALLOW_MRT_WITH_DUAL_SOURCE__SHIFT 0x0 +#define CB_HW_CONTROL__DISABLE_VRS_FILLRATE_OPTIMIZATION__SHIFT 0x1 #define CB_HW_CONTROL__DISABLE_FILLRATE_OPT_FIX_WITH_CFC__SHIFT 0x3 #define CB_HW_CONTROL__DISABLE_POST_DCC_WITH_CFC_FIX__SHIFT 0x4 +#define CB_HW_CONTROL__DISABLE_COMPRESS_1FRAG_WHEN_VRS_RATE_HINT_EN__SHIFT 0x5 #define CB_HW_CONTROL__RMI_CREDITS__SHIFT 0x6 #define CB_HW_CONTROL__CHICKEN_BITS__SHIFT 0xc #define CB_HW_CONTROL__DISABLE_FMASK_MULTI_MGCG_DOMAINS__SHIFT 0xf @@ -10129,8 +10135,10 @@ #define CB_HW_CONTROL__DISABLE_CC_IB_SERIALIZER_STATE_OPT__SHIFT 0x1e #define CB_HW_CONTROL__DISABLE_PIXEL_IN_QUAD_FIX_FOR_LINEAR_SURFACE__SHIFT 0x1f #define CB_HW_CONTROL__ALLOW_MRT_WITH_DUAL_SOURCE_MASK 0x00000001L +#define CB_HW_CONTROL__DISABLE_VRS_FILLRATE_OPTIMIZATION_MASK 0x00000002L #define CB_HW_CONTROL__DISABLE_FILLRATE_OPT_FIX_WITH_CFC_MASK 0x00000008L #define CB_HW_CONTROL__DISABLE_POST_DCC_WITH_CFC_FIX_MASK 0x00000010L +#define CB_HW_CONTROL__DISABLE_COMPRESS_1FRAG_WHEN_VRS_RATE_HINT_EN_MASK 0x00000020L #define CB_HW_CONTROL__RMI_CREDITS_MASK 0x00000FC0L #define CB_HW_CONTROL__CHICKEN_BITS_MASK 0x00007000L #define CB_HW_CONTROL__DISABLE_FMASK_MULTI_MGCG_DOMAINS_MASK 0x00008000L @@ -19881,6 +19889,7 @@ #define DB_RENDER_OVERRIDE2__PRESERVE_SRESULTS__SHIFT 0x16 #define DB_RENDER_OVERRIDE2__DISABLE_FAST_PASS__SHIFT 0x17 #define DB_RENDER_OVERRIDE2__ALLOW_PARTIAL_RES_HIER_KILL__SHIFT 0x19 +#define DB_RENDER_OVERRIDE2__FORCE_VRS_RATE_FINE__SHIFT 0x1a #define DB_RENDER_OVERRIDE2__CENTROID_COMPUTATION_MODE__SHIFT 0x1b #define DB_RENDER_OVERRIDE2__PARTIAL_SQUAD_LAUNCH_CONTROL_MASK 0x00000003L #define DB_RENDER_OVERRIDE2__PARTIAL_SQUAD_LAUNCH_COUNTDOWN_MASK 0x0000001CL @@ -19898,6 +19907,7 @@ #define DB_RENDER_OVERRIDE2__PRESERVE_SRESULTS_MASK 0x00400000L #define DB_RENDER_OVERRIDE2__DISABLE_FAST_PASS_MASK 0x00800000L #define DB_RENDER_OVERRIDE2__ALLOW_PARTIAL_RES_HIER_KILL_MASK 0x02000000L +#define DB_RENDER_OVERRIDE2__FORCE_VRS_RATE_FINE_MASK 0x04000000L #define DB_RENDER_OVERRIDE2__CENTROID_COMPUTATION_MODE_MASK 0x18000000L //DB_HTILE_DATA_BASE #define DB_HTILE_DATA_BASE__BASE_256B__SHIFT 0x0 @@ -20021,6 +20031,13 @@ //DB_RESERVED_REG_3 #define DB_RESERVED_REG_3__FIELD_1__SHIFT 0x0 #define DB_RESERVED_REG_3__FIELD_1_MASK 0x003FFFFFL +//DB_VRS_OVERRIDE_CNTL +#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_COMBINER_MODE__SHIFT 0x0 +#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_X__SHIFT 0x4 +#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_Y__SHIFT 0x6 +#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_COMBINER_MODE_MASK 0x00000007L +#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_X_MASK 0x00000030L +#define DB_VRS_OVERRIDE_CNTL__VRS_OVERRIDE_RATE_Y_MASK 0x000000C0L //DB_Z_READ_BASE_HI #define DB_Z_READ_BASE_HI__BASE_HI__SHIFT 0x0 #define DB_Z_READ_BASE_HI__BASE_HI_MASK 0x000000FFL @@ -22598,6 +22615,7 @@ #define PA_CL_VS_OUT_CNTL__VS_OUT_MISC_SIDE_BUS_ENA__SHIFT 0x18 #define PA_CL_VS_OUT_CNTL__USE_VTX_GS_CUT_FLAG__SHIFT 0x19 #define PA_CL_VS_OUT_CNTL__USE_VTX_LINE_WIDTH__SHIFT 0x1b +#define PA_CL_VS_OUT_CNTL__USE_VTX_VRS_RATE__SHIFT 0x1c #define PA_CL_VS_OUT_CNTL__BYPASS_VTX_RATE_COMBINER__SHIFT 0x1d #define PA_CL_VS_OUT_CNTL__BYPASS_PRIM_RATE_COMBINER__SHIFT 0x1e #define PA_CL_VS_OUT_CNTL__CLIP_DIST_ENA_0_MASK 0x00000001L @@ -22627,6 +22645,7 @@ #define PA_CL_VS_OUT_CNTL__VS_OUT_MISC_SIDE_BUS_ENA_MASK 0x01000000L #define PA_CL_VS_OUT_CNTL__USE_VTX_GS_CUT_FLAG_MASK 0x02000000L #define PA_CL_VS_OUT_CNTL__USE_VTX_LINE_WIDTH_MASK 0x08000000L +#define PA_CL_VS_OUT_CNTL__USE_VTX_VRS_RATE_MASK 0x10000000L #define PA_CL_VS_OUT_CNTL__BYPASS_VTX_RATE_COMBINER_MASK 0x20000000L #define PA_CL_VS_OUT_CNTL__BYPASS_PRIM_RATE_COMBINER_MASK 0x40000000L //PA_CL_NANINF_CNTL @@ -22740,6 +22759,19 @@ //PA_STATE_STEREO_X #define PA_STATE_STEREO_X__STEREO_X_OFFSET__SHIFT 0x0 #define PA_STATE_STEREO_X__STEREO_X_OFFSET_MASK 0xFFFFFFFFL +//PA_CL_VRS_CNTL +#define PA_CL_VRS_CNTL__VERTEX_RATE_COMBINER_MODE__SHIFT 0x0 +#define PA_CL_VRS_CNTL__PRIMITIVE_RATE_COMBINER_MODE__SHIFT 0x3 +#define PA_CL_VRS_CNTL__HTILE_RATE_COMBINER_MODE__SHIFT 0x6 +#define PA_CL_VRS_CNTL__SAMPLE_ITER_COMBINER_MODE__SHIFT 0x9 +#define PA_CL_VRS_CNTL__EXPOSE_VRS_PIXELS_MASK__SHIFT 0xd +#define PA_CL_VRS_CNTL__CMASK_RATE_HINT_FORCE_ZERO__SHIFT 0xe +#define PA_CL_VRS_CNTL__VERTEX_RATE_COMBINER_MODE_MASK 0x00000007L +#define PA_CL_VRS_CNTL__PRIMITIVE_RATE_COMBINER_MODE_MASK 0x00000038L +#define PA_CL_VRS_CNTL__HTILE_RATE_COMBINER_MODE_MASK 0x000001C0L +#define PA_CL_VRS_CNTL__SAMPLE_ITER_COMBINER_MODE_MASK 0x00000E00L +#define PA_CL_VRS_CNTL__EXPOSE_VRS_PIXELS_MASK_MASK 0x00002000L +#define PA_CL_VRS_CNTL__CMASK_RATE_HINT_FORCE_ZERO_MASK 0x00004000L //PA_SU_POINT_SIZE #define PA_SU_POINT_SIZE__HEIGHT__SHIFT 0x0 #define PA_SU_POINT_SIZE__WIDTH__SHIFT 0x10 @@ -23088,6 +23120,7 @@ #define DB_HTILE_SURFACE__DST_OUTSIDE_ZERO_TO_ONE__SHIFT 0x10 #define DB_HTILE_SURFACE__RESERVED_FIELD_6__SHIFT 0x11 #define DB_HTILE_SURFACE__PIPE_ALIGNED__SHIFT 0x12 +#define DB_HTILE_SURFACE__VRS_HTILE_ENCODING__SHIFT 0x13 #define DB_HTILE_SURFACE__RESERVED_FIELD_1_MASK 0x00000001L #define DB_HTILE_SURFACE__FULL_CACHE_MASK 0x00000002L #define DB_HTILE_SURFACE__RESERVED_FIELD_2_MASK 0x00000004L @@ -23097,6 +23130,7 @@ #define DB_HTILE_SURFACE__DST_OUTSIDE_ZERO_TO_ONE_MASK 0x00010000L #define DB_HTILE_SURFACE__RESERVED_FIELD_6_MASK 0x00020000L #define DB_HTILE_SURFACE__PIPE_ALIGNED_MASK 0x00040000L +#define DB_HTILE_SURFACE__VRS_HTILE_ENCODING_MASK 0x00180000L //DB_SRESULTS_COMPARE_STATE0 #define DB_SRESULTS_COMPARE_STATE0__COMPAREFUNC0__SHIFT 0x0 #define DB_SRESULTS_COMPARE_STATE0__COMPAREVALUE0__SHIFT 0x4 @@ -24954,6 +24988,7 @@ #define CB_COLOR0_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT 0x1a #define CB_COLOR0_ATTRIB3__RESOURCE_LEVEL__SHIFT 0x1b #define CB_COLOR0_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT 0x1e +#define CB_COLOR0_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT 0x1f #define CB_COLOR0_ATTRIB3__MIP0_DEPTH_MASK 0x00001FFFL #define CB_COLOR0_ATTRIB3__META_LINEAR_MASK 0x00002000L #define CB_COLOR0_ATTRIB3__COLOR_SW_MODE_MASK 0x0007C000L @@ -24962,6 +24997,7 @@ #define CB_COLOR0_ATTRIB3__CMASK_PIPE_ALIGNED_MASK 0x04000000L #define CB_COLOR0_ATTRIB3__RESOURCE_LEVEL_MASK 0x38000000L #define CB_COLOR0_ATTRIB3__DCC_PIPE_ALIGNED_MASK 0x40000000L +#define CB_COLOR0_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK 0x80000000L //CB_COLOR1_ATTRIB3 #define CB_COLOR1_ATTRIB3__MIP0_DEPTH__SHIFT 0x0 #define CB_COLOR1_ATTRIB3__META_LINEAR__SHIFT 0xd @@ -24971,6 +25007,7 @@ #define CB_COLOR1_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT 0x1a #define CB_COLOR1_ATTRIB3__RESOURCE_LEVEL__SHIFT 0x1b #define CB_COLOR1_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT 0x1e +#define CB_COLOR1_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT 0x1f #define CB_COLOR1_ATTRIB3__MIP0_DEPTH_MASK 0x00001FFFL #define CB_COLOR1_ATTRIB3__META_LINEAR_MASK 0x00002000L #define CB_COLOR1_ATTRIB3__COLOR_SW_MODE_MASK 0x0007C000L @@ -24979,6 +25016,7 @@ #define CB_COLOR1_ATTRIB3__CMASK_PIPE_ALIGNED_MASK 0x04000000L #define CB_COLOR1_ATTRIB3__RESOURCE_LEVEL_MASK 0x38000000L #define CB_COLOR1_ATTRIB3__DCC_PIPE_ALIGNED_MASK 0x40000000L +#define CB_COLOR1_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK 0x80000000L //CB_COLOR2_ATTRIB3 #define CB_COLOR2_ATTRIB3__MIP0_DEPTH__SHIFT 0x0 #define CB_COLOR2_ATTRIB3__META_LINEAR__SHIFT 0xd @@ -24988,6 +25026,7 @@ #define CB_COLOR2_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT 0x1a #define CB_COLOR2_ATTRIB3__RESOURCE_LEVEL__SHIFT 0x1b #define CB_COLOR2_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT 0x1e +#define CB_COLOR2_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT 0x1f #define CB_COLOR2_ATTRIB3__MIP0_DEPTH_MASK 0x00001FFFL #define CB_COLOR2_ATTRIB3__META_LINEAR_MASK 0x00002000L #define CB_COLOR2_ATTRIB3__COLOR_SW_MODE_MASK 0x0007C000L @@ -24996,6 +25035,7 @@ #define CB_COLOR2_ATTRIB3__CMASK_PIPE_ALIGNED_MASK 0x04000000L #define CB_COLOR2_ATTRIB3__RESOURCE_LEVEL_MASK 0x38000000L #define CB_COLOR2_ATTRIB3__DCC_PIPE_ALIGNED_MASK 0x40000000L +#define CB_COLOR2_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK 0x80000000L //CB_COLOR3_ATTRIB3 #define CB_COLOR3_ATTRIB3__MIP0_DEPTH__SHIFT 0x0 #define CB_COLOR3_ATTRIB3__META_LINEAR__SHIFT 0xd @@ -25005,6 +25045,7 @@ #define CB_COLOR3_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT 0x1a #define CB_COLOR3_ATTRIB3__RESOURCE_LEVEL__SHIFT 0x1b #define CB_COLOR3_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT 0x1e +#define CB_COLOR3_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT 0x1f #define CB_COLOR3_ATTRIB3__MIP0_DEPTH_MASK 0x00001FFFL #define CB_COLOR3_ATTRIB3__META_LINEAR_MASK 0x00002000L #define CB_COLOR3_ATTRIB3__COLOR_SW_MODE_MASK 0x0007C000L @@ -25013,6 +25054,7 @@ #define CB_COLOR3_ATTRIB3__CMASK_PIPE_ALIGNED_MASK 0x04000000L #define CB_COLOR3_ATTRIB3__RESOURCE_LEVEL_MASK 0x38000000L #define CB_COLOR3_ATTRIB3__DCC_PIPE_ALIGNED_MASK 0x40000000L +#define CB_COLOR3_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK 0x80000000L //CB_COLOR4_ATTRIB3 #define CB_COLOR4_ATTRIB3__MIP0_DEPTH__SHIFT 0x0 #define CB_COLOR4_ATTRIB3__META_LINEAR__SHIFT 0xd @@ -25022,6 +25064,7 @@ #define CB_COLOR4_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT 0x1a #define CB_COLOR4_ATTRIB3__RESOURCE_LEVEL__SHIFT 0x1b #define CB_COLOR4_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT 0x1e +#define CB_COLOR4_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT 0x1f #define CB_COLOR4_ATTRIB3__MIP0_DEPTH_MASK 0x00001FFFL #define CB_COLOR4_ATTRIB3__META_LINEAR_MASK 0x00002000L #define CB_COLOR4_ATTRIB3__COLOR_SW_MODE_MASK 0x0007C000L @@ -25030,6 +25073,7 @@ #define CB_COLOR4_ATTRIB3__CMASK_PIPE_ALIGNED_MASK 0x04000000L #define CB_COLOR4_ATTRIB3__RESOURCE_LEVEL_MASK 0x38000000L #define CB_COLOR4_ATTRIB3__DCC_PIPE_ALIGNED_MASK 0x40000000L +#define CB_COLOR4_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK 0x80000000L //CB_COLOR5_ATTRIB3 #define CB_COLOR5_ATTRIB3__MIP0_DEPTH__SHIFT 0x0 #define CB_COLOR5_ATTRIB3__META_LINEAR__SHIFT 0xd @@ -25039,6 +25083,7 @@ #define CB_COLOR5_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT 0x1a #define CB_COLOR5_ATTRIB3__RESOURCE_LEVEL__SHIFT 0x1b #define CB_COLOR5_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT 0x1e +#define CB_COLOR5_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT 0x1f #define CB_COLOR5_ATTRIB3__MIP0_DEPTH_MASK 0x00001FFFL #define CB_COLOR5_ATTRIB3__META_LINEAR_MASK 0x00002000L #define CB_COLOR5_ATTRIB3__COLOR_SW_MODE_MASK 0x0007C000L @@ -25047,6 +25092,7 @@ #define CB_COLOR5_ATTRIB3__CMASK_PIPE_ALIGNED_MASK 0x04000000L #define CB_COLOR5_ATTRIB3__RESOURCE_LEVEL_MASK 0x38000000L #define CB_COLOR5_ATTRIB3__DCC_PIPE_ALIGNED_MASK 0x40000000L +#define CB_COLOR5_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK 0x80000000L //CB_COLOR6_ATTRIB3 #define CB_COLOR6_ATTRIB3__MIP0_DEPTH__SHIFT 0x0 #define CB_COLOR6_ATTRIB3__META_LINEAR__SHIFT 0xd @@ -25056,6 +25102,7 @@ #define CB_COLOR6_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT 0x1a #define CB_COLOR6_ATTRIB3__RESOURCE_LEVEL__SHIFT 0x1b #define CB_COLOR6_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT 0x1e +#define CB_COLOR6_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT 0x1f #define CB_COLOR6_ATTRIB3__MIP0_DEPTH_MASK 0x00001FFFL #define CB_COLOR6_ATTRIB3__META_LINEAR_MASK 0x00002000L #define CB_COLOR6_ATTRIB3__COLOR_SW_MODE_MASK 0x0007C000L @@ -25064,6 +25111,7 @@ #define CB_COLOR6_ATTRIB3__CMASK_PIPE_ALIGNED_MASK 0x04000000L #define CB_COLOR6_ATTRIB3__RESOURCE_LEVEL_MASK 0x38000000L #define CB_COLOR6_ATTRIB3__DCC_PIPE_ALIGNED_MASK 0x40000000L +#define CB_COLOR6_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK 0x80000000L //CB_COLOR7_ATTRIB3 #define CB_COLOR7_ATTRIB3__MIP0_DEPTH__SHIFT 0x0 #define CB_COLOR7_ATTRIB3__META_LINEAR__SHIFT 0xd @@ -25073,6 +25121,7 @@ #define CB_COLOR7_ATTRIB3__CMASK_PIPE_ALIGNED__SHIFT 0x1a #define CB_COLOR7_ATTRIB3__RESOURCE_LEVEL__SHIFT 0x1b #define CB_COLOR7_ATTRIB3__DCC_PIPE_ALIGNED__SHIFT 0x1e +#define CB_COLOR7_ATTRIB3__VRS_RATE_HINT_ENABLE__SHIFT 0x1f #define CB_COLOR7_ATTRIB3__MIP0_DEPTH_MASK 0x00001FFFL #define CB_COLOR7_ATTRIB3__META_LINEAR_MASK 0x00002000L #define CB_COLOR7_ATTRIB3__COLOR_SW_MODE_MASK 0x0007C000L @@ -25081,6 +25130,7 @@ #define CB_COLOR7_ATTRIB3__CMASK_PIPE_ALIGNED_MASK 0x04000000L #define CB_COLOR7_ATTRIB3__RESOURCE_LEVEL_MASK 0x38000000L #define CB_COLOR7_ATTRIB3__DCC_PIPE_ALIGNED_MASK 0x40000000L +#define CB_COLOR7_ATTRIB3__VRS_RATE_HINT_ENABLE_MASK 0x80000000L // addressBlock: gc_gfxudec From 1b51916b9734acd609c4f552e582cdc59ac254d6 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 1 May 2020 16:45:09 -0400 Subject: [PATCH 045/322] drm/amdgpu: add VCN 3.0 AV1 registers This adds the AV1 registers. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- .../include/asic_reg/vcn/vcn_3_0_0_sh_mask.h | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h index c0efd90808f2..58cf7adb9d54 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_3_0_0_sh_mask.h @@ -2393,6 +2393,7 @@ #define VCN_FEATURES__HAS_MJPEG2_IDCT_DEC__SHIFT 0x7 #define VCN_FEATURES__HAS_SCLR_DEC__SHIFT 0x8 #define VCN_FEATURES__HAS_VP9_DEC__SHIFT 0x9 +#define VCN_FEATURES__HAS_AV1_DEC__SHIFT 0xa #define VCN_FEATURES__HAS_EFC_ENC__SHIFT 0xb #define VCN_FEATURES__HAS_EFC_HDR2SDR_ENC__SHIFT 0xc #define VCN_FEATURES__HAS_DUAL_MJPEG_DEC__SHIFT 0xd @@ -2407,6 +2408,7 @@ #define VCN_FEATURES__HAS_MJPEG2_IDCT_DEC_MASK 0x00000080L #define VCN_FEATURES__HAS_SCLR_DEC_MASK 0x00000100L #define VCN_FEATURES__HAS_VP9_DEC_MASK 0x00000200L +#define VCN_FEATURES__HAS_AV1_DEC_MASK 0x00000400L #define VCN_FEATURES__HAS_EFC_ENC_MASK 0x00000800L #define VCN_FEATURES__HAS_EFC_HDR2SDR_ENC_MASK 0x00001000L #define VCN_FEATURES__HAS_DUAL_MJPEG_DEC_MASK 0x00002000L @@ -2809,8 +2811,10 @@ #define UVD_SUVD_CGC_GATE__IME_HEVC__SHIFT 0x18 #define UVD_SUVD_CGC_GATE__EFC__SHIFT 0x19 #define UVD_SUVD_CGC_GATE__SAOE__SHIFT 0x1a +#define UVD_SUVD_CGC_GATE__SRE_AV1__SHIFT 0x1b #define UVD_SUVD_CGC_GATE__FBC_PCLK__SHIFT 0x1c #define UVD_SUVD_CGC_GATE__FBC_CCLK__SHIFT 0x1d +#define UVD_SUVD_CGC_GATE__SCM_AV1__SHIFT 0x1e #define UVD_SUVD_CGC_GATE__SMPA__SHIFT 0x1f #define UVD_SUVD_CGC_GATE__SRE_MASK 0x00000001L #define UVD_SUVD_CGC_GATE__SIT_MASK 0x00000002L @@ -2839,8 +2843,10 @@ #define UVD_SUVD_CGC_GATE__IME_HEVC_MASK 0x01000000L #define UVD_SUVD_CGC_GATE__EFC_MASK 0x02000000L #define UVD_SUVD_CGC_GATE__SAOE_MASK 0x04000000L +#define UVD_SUVD_CGC_GATE__SRE_AV1_MASK 0x08000000L #define UVD_SUVD_CGC_GATE__FBC_PCLK_MASK 0x10000000L #define UVD_SUVD_CGC_GATE__FBC_CCLK_MASK 0x20000000L +#define UVD_SUVD_CGC_GATE__SCM_AV1_MASK 0x40000000L #define UVD_SUVD_CGC_GATE__SMPA_MASK 0x80000000L //UVD_SUVD_CGC_STATUS #define UVD_SUVD_CGC_STATUS__SRE_VCLK__SHIFT 0x0 @@ -2873,6 +2879,8 @@ #define UVD_SUVD_CGC_STATUS__IME_HEVC_DCLK__SHIFT 0x1b #define UVD_SUVD_CGC_STATUS__EFC_DCLK__SHIFT 0x1c #define UVD_SUVD_CGC_STATUS__SAOE_DCLK__SHIFT 0x1d +#define UVD_SUVD_CGC_STATUS__SRE_AV1_VCLK__SHIFT 0x1e +#define UVD_SUVD_CGC_STATUS__SCM_AV1_DCLK__SHIFT 0x1f #define UVD_SUVD_CGC_STATUS__SRE_VCLK_MASK 0x00000001L #define UVD_SUVD_CGC_STATUS__SRE_DCLK_MASK 0x00000002L #define UVD_SUVD_CGC_STATUS__SIT_DCLK_MASK 0x00000004L @@ -2903,6 +2911,8 @@ #define UVD_SUVD_CGC_STATUS__IME_HEVC_DCLK_MASK 0x08000000L #define UVD_SUVD_CGC_STATUS__EFC_DCLK_MASK 0x10000000L #define UVD_SUVD_CGC_STATUS__SAOE_DCLK_MASK 0x20000000L +#define UVD_SUVD_CGC_STATUS__SRE_AV1_VCLK_MASK 0x40000000L +#define UVD_SUVD_CGC_STATUS__SCM_AV1_DCLK_MASK 0x80000000L //UVD_SUVD_CGC_CTRL #define UVD_SUVD_CGC_CTRL__SRE_MODE__SHIFT 0x0 #define UVD_SUVD_CGC_CTRL__SIT_MODE__SHIFT 0x1 @@ -2919,6 +2929,8 @@ #define UVD_SUVD_CGC_CTRL__SMPA_MODE__SHIFT 0xc #define UVD_SUVD_CGC_CTRL__MPBE0_MODE__SHIFT 0xd #define UVD_SUVD_CGC_CTRL__MPBE1_MODE__SHIFT 0xe +#define UVD_SUVD_CGC_CTRL__SIT_AV1_MODE__SHIFT 0xf +#define UVD_SUVD_CGC_CTRL__SDB_AV1_MODE__SHIFT 0x10 #define UVD_SUVD_CGC_CTRL__MPC1_MODE__SHIFT 0x11 #define UVD_SUVD_CGC_CTRL__FBC_PCLK__SHIFT 0x1c #define UVD_SUVD_CGC_CTRL__FBC_CCLK__SHIFT 0x1d @@ -2937,6 +2949,8 @@ #define UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK 0x00001000L #define UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK 0x00002000L #define UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK 0x00004000L +#define UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK 0x00008000L +#define UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK 0x00010000L #define UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK 0x00020000L #define UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK 0x10000000L #define UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK 0x20000000L @@ -3658,6 +3672,8 @@ #define UVD_SUVD_CGC_STATUS2__SMPA_VCLK__SHIFT 0x0 #define UVD_SUVD_CGC_STATUS2__SMPA_DCLK__SHIFT 0x1 #define UVD_SUVD_CGC_STATUS2__MPBE1_DCLK__SHIFT 0x3 +#define UVD_SUVD_CGC_STATUS2__SIT_AV1_DCLK__SHIFT 0x4 +#define UVD_SUVD_CGC_STATUS2__SDB_AV1_DCLK__SHIFT 0x5 #define UVD_SUVD_CGC_STATUS2__MPC1_DCLK__SHIFT 0x6 #define UVD_SUVD_CGC_STATUS2__MPC1_SCLK__SHIFT 0x7 #define UVD_SUVD_CGC_STATUS2__MPC1_VCLK__SHIFT 0x8 @@ -3666,6 +3682,8 @@ #define UVD_SUVD_CGC_STATUS2__SMPA_VCLK_MASK 0x00000001L #define UVD_SUVD_CGC_STATUS2__SMPA_DCLK_MASK 0x00000002L #define UVD_SUVD_CGC_STATUS2__MPBE1_DCLK_MASK 0x00000008L +#define UVD_SUVD_CGC_STATUS2__SIT_AV1_DCLK_MASK 0x00000010L +#define UVD_SUVD_CGC_STATUS2__SDB_AV1_DCLK_MASK 0x00000020L #define UVD_SUVD_CGC_STATUS2__MPC1_DCLK_MASK 0x00000040L #define UVD_SUVD_CGC_STATUS2__MPC1_SCLK_MASK 0x00000080L #define UVD_SUVD_CGC_STATUS2__MPC1_VCLK_MASK 0x00000100L @@ -3674,25 +3692,41 @@ //UVD_SUVD_CGC_GATE2 #define UVD_SUVD_CGC_GATE2__MPBE0__SHIFT 0x0 #define UVD_SUVD_CGC_GATE2__MPBE1__SHIFT 0x1 +#define UVD_SUVD_CGC_GATE2__SIT_AV1__SHIFT 0x2 +#define UVD_SUVD_CGC_GATE2__SDB_AV1__SHIFT 0x3 #define UVD_SUVD_CGC_GATE2__MPC1__SHIFT 0x4 #define UVD_SUVD_CGC_GATE2__MPBE0_MASK 0x00000001L #define UVD_SUVD_CGC_GATE2__MPBE1_MASK 0x00000002L +#define UVD_SUVD_CGC_GATE2__SIT_AV1_MASK 0x00000004L +#define UVD_SUVD_CGC_GATE2__SDB_AV1_MASK 0x00000008L #define UVD_SUVD_CGC_GATE2__MPC1_MASK 0x00000010L //UVD_SUVD_INT_STATUS2 #define UVD_SUVD_INT_STATUS2__SMPA_FUNC_INT__SHIFT 0x0 #define UVD_SUVD_INT_STATUS2__SMPA_ERR_INT__SHIFT 0x5 +#define UVD_SUVD_INT_STATUS2__SDB_AV1_FUNC_INT__SHIFT 0x6 +#define UVD_SUVD_INT_STATUS2__SDB_AV1_ERR_INT__SHIFT 0xb #define UVD_SUVD_INT_STATUS2__SMPA_FUNC_INT_MASK 0x0000001FL #define UVD_SUVD_INT_STATUS2__SMPA_ERR_INT_MASK 0x00000020L +#define UVD_SUVD_INT_STATUS2__SDB_AV1_FUNC_INT_MASK 0x000007C0L +#define UVD_SUVD_INT_STATUS2__SDB_AV1_ERR_INT_MASK 0x00000800L //UVD_SUVD_INT_EN2 #define UVD_SUVD_INT_EN2__SMPA_FUNC_INT_EN__SHIFT 0x0 #define UVD_SUVD_INT_EN2__SMPA_ERR_INT_EN__SHIFT 0x5 +#define UVD_SUVD_INT_EN2__SDB_AV1_FUNC_INT_EN__SHIFT 0x6 +#define UVD_SUVD_INT_EN2__SDB_AV1_ERR_INT_EN__SHIFT 0xb #define UVD_SUVD_INT_EN2__SMPA_FUNC_INT_EN_MASK 0x0000001FL #define UVD_SUVD_INT_EN2__SMPA_ERR_INT_EN_MASK 0x00000020L +#define UVD_SUVD_INT_EN2__SDB_AV1_FUNC_INT_EN_MASK 0x000007C0L +#define UVD_SUVD_INT_EN2__SDB_AV1_ERR_INT_EN_MASK 0x00000800L //UVD_SUVD_INT_ACK2 #define UVD_SUVD_INT_ACK2__SMPA_FUNC_INT_ACK__SHIFT 0x0 #define UVD_SUVD_INT_ACK2__SMPA_ERR_INT_ACK__SHIFT 0x5 +#define UVD_SUVD_INT_ACK2__SDB_AV1_FUNC_INT_ACK__SHIFT 0x6 +#define UVD_SUVD_INT_ACK2__SDB_AV1_ERR_INT_ACK__SHIFT 0xb #define UVD_SUVD_INT_ACK2__SMPA_FUNC_INT_ACK_MASK 0x0000001FL #define UVD_SUVD_INT_ACK2__SMPA_ERR_INT_ACK_MASK 0x00000020L +#define UVD_SUVD_INT_ACK2__SDB_AV1_FUNC_INT_ACK_MASK 0x000007C0L +#define UVD_SUVD_INT_ACK2__SDB_AV1_ERR_INT_ACK_MASK 0x00000800L // addressBlock: uvd0_ecpudec From 8a410da6aaf6e9a1903aeefc6353f62dfcf0065a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 1 May 2020 16:46:11 -0400 Subject: [PATCH 046/322] drm/amdgpu: use the AV1 defines for VCN 3.0 Switch from magic numbers to defines for AV1 clockgating. Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 63e5547cfb16..3a805eaf6f11 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -746,18 +746,18 @@ static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst) | UVD_SUVD_CGC_GATE__IME_HEVC_MASK | UVD_SUVD_CGC_GATE__EFC_MASK | UVD_SUVD_CGC_GATE__SAOE_MASK - | 0x08000000 + | UVD_SUVD_CGC_GATE__SRE_AV1_MASK | UVD_SUVD_CGC_GATE__FBC_PCLK_MASK | UVD_SUVD_CGC_GATE__FBC_CCLK_MASK - | 0x40000000 + | UVD_SUVD_CGC_GATE__SCM_AV1_MASK | UVD_SUVD_CGC_GATE__SMPA_MASK); WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data); data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2); data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK | UVD_SUVD_CGC_GATE2__MPBE1_MASK - | 0x00000004 - | 0x00000008 + | UVD_SUVD_CGC_GATE2__SIT_AV1_MASK + | UVD_SUVD_CGC_GATE2__SDB_AV1_MASK | UVD_SUVD_CGC_GATE2__MPC1_MASK); WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data); @@ -776,8 +776,8 @@ static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst) | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK - | 0x00008000 - | 0x00010000 + | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK); @@ -892,8 +892,8 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK - | 0x00008000 - | 0x00010000 + | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK); From fc08ce66c0f3776b185b099b8093df2fd7f81151 Mon Sep 17 00:00:00 2001 From: Likun Gao Date: Thu, 16 Jan 2020 10:45:38 +0800 Subject: [PATCH 047/322] drm/amdgpu: add device ID for sienna_cichlid (v2) Add device ID for sienna_cichlid. v2: squash in additional device ids. Reviewed-by: Hawking Zhang Signed-off-by: Likun Gao Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 26127c7d2f32..eb11149c3fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1047,6 +1047,14 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, + /* Sienna_Cichlid */ + {0x1002, 0x73A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0, 0, 0} }; From b4ebd0827fd024d0d23c1c9ee33be39c57cf7ad0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 15 Sep 2020 13:36:20 -0400 Subject: [PATCH 048/322] drm/amdgpu: remove experimental flag from navi12 Navi12 has worked fine for a while now. Reviewed-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index eb11149c3fd2..321032d3a51a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1044,8 +1044,8 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU}, /* Navi12 */ - {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7360, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, + {0x1002, 0x7362, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI12}, /* Sienna_Cichlid */ {0x1002, 0x73A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, From d36cb0205f034e943aa29e35b59c6a441f0056b5 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 18 Sep 2020 12:20:02 +0100 Subject: [PATCH 049/322] regmap: debugfs: Add back in erroneously removed initialisation of ret Fixes: 94cc89eb8fa5 ("regmap: debugfs: Fix handling of name string for debugfs init delays") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20200918112002.15216-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index e22c21affebd..388ff8a6816c 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -694,9 +694,9 @@ struct regmap *__regmap_init(struct device *dev, const char *lock_name) { struct regmap *map; + int ret = -EINVAL; enum regmap_endian reg_endian, val_endian; int i, j; - int ret; if (!config) goto err; From ee15c7b53e52fb04583f734461244c4dcca828fa Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2020 08:58:58 -0400 Subject: [PATCH 050/322] pNFS/flexfiles: Ensure we initialise the mirror bsizes correctly on read While it is true that reading from an unmirrored source always uses index 0, that is no longer true for mirrored sources when we fail over. Fixes: 563c53e73b8b ("NFS: Fix flexfiles read failover") Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index ff8965d1a4d4..1edeebd51937 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -838,6 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; int ds_idx; + u32 i; retry: ff_layout_pg_check_layout(pgio, req); @@ -863,14 +864,14 @@ retry: goto retry; } - mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); + for (i = 0; i < pgio->pg_mirror_count; i++) { + mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); + pgm = &pgio->pg_mirrors[i]; + pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; + } pgio->pg_mirror_idx = ds_idx; - /* read always uses only one mirror - idx 0 for pgio layer */ - pgm = &pgio->pg_mirrors[0]; - pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; - if (NFS_SERVER(pgio->pg_inode)->flags & (NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR)) pgio->pg_maxretrans = io_maxretrans; From b9df46d08a8d098ea2124cb9e3b84458a474b4d4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 18 Sep 2020 09:19:43 -0400 Subject: [PATCH 051/322] pNFS/flexfiles: Be consistent about mirror index types A mirror index is always of type u32. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 34 +++++++++++++------------- include/linux/nfs_xdr.h | 4 +-- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1edeebd51937..a163533446fa 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -715,7 +715,7 @@ nfs4_ff_layout_stat_io_end_write(struct rpc_task *task, } static void -ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) +ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -724,7 +724,7 @@ ff_layout_mark_ds_unreachable(struct pnfs_layout_segment *lseg, int idx) } static void -ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx) +ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -734,14 +734,14 @@ ff_layout_mark_ds_reachable(struct pnfs_layout_segment *lseg, int idx) static struct nfs4_pnfs_ds * ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx, + u32 start_idx, u32 *best_idx, bool check_device) { struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; bool fail_return = false; - int idx; + u32 idx; /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { @@ -766,21 +766,21 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, static struct nfs4_pnfs_ds * ff_layout_choose_any_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, false); } static struct nfs4_pnfs_ds * ff_layout_choose_valid_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { return ff_layout_choose_ds_for_read(lseg, start_idx, best_idx, true); } static struct nfs4_pnfs_ds * ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, - int start_idx, int *best_idx) + u32 start_idx, u32 *best_idx) { struct nfs4_pnfs_ds *ds; @@ -791,7 +791,8 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg, } static struct nfs4_pnfs_ds * -ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, int *best_idx) +ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio, + u32 *best_idx) { struct pnfs_layout_segment *lseg = pgio->pg_lseg; struct nfs4_pnfs_ds *ds; @@ -837,8 +838,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_pgio_mirror *pgm; struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; - int ds_idx; - u32 i; + u32 ds_idx, i; retry: ff_layout_pg_check_layout(pgio, req); @@ -895,7 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs4_ff_layout_mirror *mirror; struct nfs_pgio_mirror *pgm; struct nfs4_pnfs_ds *ds; - int i; + u32 i; retry: ff_layout_pg_check_layout(pgio, req); @@ -1039,7 +1039,7 @@ static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) { u32 idx = hdr->pgio_mirror_idx + 1; - int new_idx = 0; + u32 new_idx = 0; if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) ff_layout_send_layouterror(hdr->lseg); @@ -1076,7 +1076,7 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { struct pnfs_layout_hdr *lo = lseg->pls_layout; struct inode *inode = lo->plh_inode; @@ -1150,7 +1150,7 @@ reset: /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ static int ff_layout_async_handle_error_v3(struct rpc_task *task, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); @@ -1185,7 +1185,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, struct nfs4_state *state, struct nfs_client *clp, struct pnfs_layout_segment *lseg, - int idx) + u32 idx) { int vers = clp->cl_nfs_mod->rpc_vers->number; @@ -1212,7 +1212,7 @@ static int ff_layout_async_handle_error(struct rpc_task *task, } static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, - int idx, u64 offset, u64 length, + u32 idx, u64 offset, u64 length, u32 *op_status, int opnum, int error) { struct nfs4_ff_layout_mirror *mirror; @@ -1810,7 +1810,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) loff_t offset = hdr->args.offset; int vers; struct nfs_fh *fh; - int idx = hdr->pgio_mirror_idx; + u32 idx = hdr->pgio_mirror_idx; mirror = FF_LAYOUT_COMP(lseg, idx); ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true); diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9408f3252c8e..69cb46f7b8d2 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1611,8 +1611,8 @@ struct nfs_pgio_header { __u64 mds_offset; /* Filelayout dense stripe */ struct nfs_page_array page_array; struct nfs_client *ds_clp; /* pNFS data server */ - int ds_commit_idx; /* ds index if ds_clp is set */ - int pgio_mirror_idx;/* mirror index in pgio layer */ + u32 ds_commit_idx; /* ds index if ds_clp is set */ + u32 pgio_mirror_idx;/* mirror index in pgio layer */ }; struct nfs_mds_commit_info { From ba2fd563b7408b8dd9bbd72fcae8bcb3c14e8ec8 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Thu, 17 Sep 2020 04:58:33 -0700 Subject: [PATCH 052/322] tools/bpftool: Support passing BPFTOOL_VERSION to make This change facilitates out-of-tree builds, packaging, and versioning for test and debug purposes. Defining BPFTOOL_VERSION allows self-contained builds within the tools tree, since it avoids use of the 'kernelversion' target in the top-level makefile, which would otherwise pull in several other includes from outside the tools tree. Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: Quentin Monnet Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200917115833.1235518-1-Tony.Ambardar@gmail.com --- tools/bpf/bpftool/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 8462690a039b..4828913703b6 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -25,7 +25,7 @@ endif LIBBPF = $(LIBBPF_PATH)libbpf.a -BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) +BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion) $(LIBBPF): FORCE $(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT)) From 62fe3e42ba4fa8156ac6f9096f796dd721489e0a Mon Sep 17 00:00:00 2001 From: Mircea Caprioru Date: Wed, 2 Sep 2020 16:42:22 +0300 Subject: [PATCH 053/322] iio: adc: ad7124: Fix typo in device name This patch fixes the device name typo. Fixes: 951ad4700313 ("iio: adc: ad7124: move chip ID & name on the chip_info table") Signed-off-by: Mircea Caprioru Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200902134222.28357-1-mircea.caprioru@analog.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 8dce06e9e69c..766c73333604 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -177,12 +177,12 @@ static const struct iio_chan_spec ad7124_channel_template = { static struct ad7124_chip_info ad7124_chip_info_tbl[] = { [ID_AD7124_4] = { - .name = "ad7127-4", + .name = "ad7124-4", .chip_id = CHIPID_AD7124_4, .num_inputs = 8, }, [ID_AD7124_8] = { - .name = "ad7127-8", + .name = "ad7124-8", .chip_id = CHIPID_AD7124_8, .num_inputs = 16, }, From 472eb39103e885f302fd8fd6eff104fcf5503f1b Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 13 Sep 2020 09:52:30 -0700 Subject: [PATCH 054/322] ALSA: asihpi: fix iounmap in error handler clang static analysis flags this problem hpioctl.c:513:7: warning: Branch condition evaluates to a garbage value if (pci.ap_mem_base[idx]) { ^~~~~~~~~~~~~~~~~~~~ If there is a failure in the middle of the memory space loop, only some of the memory spaces need to be cleaned up. At the error handler, idx holds the number of successful memory spaces mapped. So rework the handler loop to use the old idx. There is a second problem, the memory space loop conditionally iomaps()/sets the mem_base so it is necessay to initize pci. Fixes: 719f82d3987a ("ALSA: Add support of AudioScience ASI boards") Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20200913165230.17166-1-trix@redhat.com Signed-off-by: Takashi Iwai --- sound/pci/asihpi/hpioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 496dcde9715d..9790f5108a16 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -343,7 +343,7 @@ int asihpi_adapter_probe(struct pci_dev *pci_dev, struct hpi_message hm; struct hpi_response hr; struct hpi_adapter adapter; - struct hpi_pci pci; + struct hpi_pci pci = { 0 }; memset(&adapter, 0, sizeof(adapter)); @@ -499,7 +499,7 @@ int asihpi_adapter_probe(struct pci_dev *pci_dev, return 0; err: - for (idx = 0; idx < HPI_MAX_ADAPTER_MEM_SPACES; idx++) { + while (--idx >= 0) { if (pci.ap_mem_base[idx]) { iounmap(pci.ap_mem_base[idx]); pci.ap_mem_base[idx] = NULL; From 3f74249057827c5f6676c41c18f6be12ce1469ce Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 14 Sep 2020 14:51:18 +0800 Subject: [PATCH 055/322] ALSA: hda/realtek - Couldn't detect Mic if booting with headset plugged We found a Mic detection issue on many Lenovo laptops, those laptops belong to differnt models and they have different audio design like internal mic connects to the codec or PCH, they all have this problem, the problem is if plugging a headset before powerup/reboot the machine, after booting up, the headphone could be detected but Mic couldn't. If we plug out and plug in the headset, both headphone and Mic could be detected then. Through debugging we found the codec on those laptops are same, it is alc257, and if we don't disable the 3k pulldown in alc256_shutup(), the issue will be fixed. So far there is no pop noise or power consumption regression on those laptops after this change. Cc: Kailang Yang Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20200914065118.19238-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 85e207173f5d..b6dc47da1d7b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3428,7 +3428,11 @@ static void alc256_shutup(struct hda_codec *codec) /* 3k pull low control for Headset jack. */ /* NOTE: call this before clearing the pin, otherwise codec stalls */ - alc_update_coef_idx(codec, 0x46, 0, 3 << 12); + /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly + * when booting with headset plugged. So skip setting it for the codec alc257 + */ + if (codec->core.vendor_id != 0x10ec0257) + alc_update_coef_idx(codec, 0x46, 0, 3 << 12); if (!spec->no_shutup_pins) snd_hda_codec_write(codec, hp_pin, 0, From f73bbf639b32acb6b409e188fdde5644b301978f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Mon, 14 Sep 2020 15:02:29 +0800 Subject: [PATCH 056/322] ALSA: hda/realtek: Enable front panel headset LED on Lenovo ThinkStation P520 On Lenovo P520, the front panel headset LED isn't lit up right now. Realtek states that the LED needs to be enabled by ALC233's GPIO2, so let's do it accordingly to light the LED up. Signed-off-by: Kai-Heng Feng Acked-by: Hui Wang Cc: Link: https://lore.kernel.org/r/20200914070231.13192-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b6dc47da1d7b..c4d615cf8de1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6055,6 +6055,7 @@ static void alc_fixup_thinkpad_acpi(struct hda_codec *codec, #include "hp_x360_helper.c" enum { + ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, ALC275_FIXUP_SONY_VAIO_GPIO2, ALC269_FIXUP_DELL_M101Z, @@ -6236,6 +6237,10 @@ enum { }; static const struct hda_fixup alc269_fixups[] = { + [ALC269_FIXUP_GPIO2] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_gpio2, + }, [ALC269_FIXUP_SONY_VAIO] = { .type = HDA_FIXUP_PINCTLS, .v.pins = (const struct hda_pintbl[]) { @@ -7055,6 +7060,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC233_FIXUP_LENOVO_MULTI_CODECS] = { .type = HDA_FIXUP_FUNC, .v.func = alc233_alc662_fixup_lenovo_dual_codecs, + .chained = true, + .chain_id = ALC269_FIXUP_GPIO2 }, [ALC233_FIXUP_ACER_HEADSET_MIC] = { .type = HDA_FIXUP_VERBS, From 315c7ad7a701baba28c628c4c5426b3d9617ceed Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Thu, 10 Sep 2020 10:53:28 +0200 Subject: [PATCH 057/322] ALSA: usb-audio: Add delay quirk for H570e USB headsets Needs the same delay as H650e Signed-off-by: Joakim Tjernlund Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200910085328.19188-1-joakim.tjernlund@infinera.com Signed-off-by: Takashi Iwai --- sound/usb/quirks.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 75bbdc691243..892296df131d 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1678,12 +1678,13 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) msleep(20); - /* Zoom R16/24, Logitech H650e, Jabra 550a, Kingston HyperX needs a tiny - * delay here, otherwise requests like get/set frequency return as - * failed despite actually succeeding. + /* Zoom R16/24, Logitech H650e/H570e, Jabra 550a, Kingston HyperX + * needs a tiny delay here, otherwise requests like get/set + * frequency return as failed despite actually succeeding. */ if ((chip->usb_id == USB_ID(0x1686, 0x00dd) || chip->usb_id == USB_ID(0x046d, 0x0a46) || + chip->usb_id == USB_ID(0x046d, 0x0a56) || chip->usb_id == USB_ID(0x0b0e, 0x0349) || chip->usb_id == USB_ID(0x0951, 0x16ad)) && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) From 19a508bd1ad8e444de86873bf2f2b2ab8edd6552 Mon Sep 17 00:00:00 2001 From: Charan Teja Reddy Date: Fri, 18 Sep 2020 16:02:31 +0530 Subject: [PATCH 058/322] dmabuf: fix NULL pointer dereference in dma_buf_release() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NULL pointer dereference is observed while exporting the dmabuf but failed to allocate the 'struct file' which results into the dropping of the allocated dentry corresponding to this file in the dmabuf fs, which is ending up in dma_buf_release() and accessing the uninitialzed dentry->d_fsdata. Call stack on 5.4 is below: dma_buf_release+0x2c/0x254 drivers/dma-buf/dma-buf.c:88 __dentry_kill+0x294/0x31c fs/dcache.c:584 dentry_kill fs/dcache.c:673 [inline] dput+0x250/0x380 fs/dcache.c:859 path_put+0x24/0x40 fs/namei.c:485 alloc_file_pseudo+0x1a4/0x200 fs/file_table.c:235 dma_buf_getfile drivers/dma-buf/dma-buf.c:473 [inline] dma_buf_export+0x25c/0x3ec drivers/dma-buf/dma-buf.c:585 Fix this by checking for the valid pointer in the dentry->d_fsdata. Fixes: 4ab59c3c638c ("dma-buf: Move dma_buf_release() from fops to dentry_ops") Cc: [5.7+] Signed-off-by: Charan Teja Reddy Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/391319/ Signed-off-by: Christian König --- drivers/dma-buf/dma-buf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index 58564d82a3a2..844967f98866 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -59,6 +59,8 @@ static void dma_buf_release(struct dentry *dentry) struct dma_buf *dmabuf; dmabuf = dentry->d_fsdata; + if (unlikely(!dmabuf)) + return; BUG_ON(dmabuf->vmapping_counter); From 845b89127bc5458d0152a4d63f165c62a22fcb70 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 10 Sep 2020 11:57:08 +0200 Subject: [PATCH 059/322] i2c: i801: Exclude device from suspend direct complete optimization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default, PCI drivers with runtime PM enabled will skip the calls to suspend and resume on system PM. For this driver, we don't want that, as we need to perform additional steps for system PM to work properly on all systems. So instruct the PM core to not skip these calls. Fixes: a9c8088c7988 ("i2c: i801: Don't restore config registers on runtime PM") Reported-by: Volker Rümelin Signed-off-by: Jean Delvare Cc: stable@vger.kernel.org Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ebb4c0b03057..bffca729e1c7 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -1917,6 +1917,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) pci_set_drvdata(dev, priv); + dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); pm_runtime_set_autosuspend_delay(&dev->dev, 1000); pm_runtime_use_autosuspend(&dev->dev); pm_runtime_put_autosuspend(&dev->dev); From 8f8bf00b1c00b37ef5c32a642e5c3e0a364182b6 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 21 Sep 2020 12:26:32 +0200 Subject: [PATCH 060/322] Revert "ALSA: hda - Fix silent audio output and corrupted input on MSI X570-A PRO" This reverts commit 15cbff3fbbc6 ("ALSA: hda - Fix silent audio output and corrupted input on MSI X570-A PRO"). A regression reported by a Fedora user for MSI X570-A PRO mobo. Until the correct solution is found out, let's revert the quirk as a quick workaround. Fixes: 15cbff3fbbc6 ("ALSA: hda - Fix silent audio output and corrupted input on MSI X570-A PRO") Reported-by: Hans de Goede Cc: Cc: Dan Crawford BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1879277 Link: https://lore.kernel.org/r/7efd2fe5-bf38-7f85-891a-eee3845d1493@redhat.com Link: https://lore.kernel.org/r/20200921102632.31139-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index c4d615cf8de1..d4f17b465892 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2475,7 +2475,6 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), - SND_PCI_QUIRK(0x1462, 0x9c37, "MSI X570-A PRO", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), From 35be8851d172c6e3db836c0f28c19087b10c9e00 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Mon, 21 Sep 2020 16:57:14 +0900 Subject: [PATCH 061/322] btrfs: fix overflow when copying corrupt csums for a message Syzkaller reported a buffer overflow in btree_readpage_end_io_hook() when loop mounting a crafted image: detected buffer overflow in memcpy ------------[ cut here ]------------ kernel BUG at lib/string.c:1129! invalid opcode: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 26 Comm: kworker/u4:2 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: btrfs-endio-meta btrfs_work_helper RIP: 0010:fortify_panic+0xf/0x20 lib/string.c:1129 RSP: 0018:ffffc90000e27980 EFLAGS: 00010286 RAX: 0000000000000022 RBX: ffff8880a80dca64 RCX: 0000000000000000 RDX: ffff8880a90860c0 RSI: ffffffff815dba07 RDI: fffff520001c4f22 RBP: ffff8880a80dca00 R08: 0000000000000022 R09: ffff8880ae7318e7 R10: 0000000000000000 R11: 0000000000077578 R12: 00000000ffffff6e R13: 0000000000000008 R14: ffffc90000e27a40 R15: 1ffff920001c4f3c FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000557335f440d0 CR3: 000000009647d000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: memcpy include/linux/string.h:405 [inline] btree_readpage_end_io_hook.cold+0x206/0x221 fs/btrfs/disk-io.c:642 end_bio_extent_readpage+0x4de/0x10c0 fs/btrfs/extent_io.c:2854 bio_endio+0x3cf/0x7f0 block/bio.c:1449 end_workqueue_fn+0x114/0x170 fs/btrfs/disk-io.c:1695 btrfs_work_helper+0x221/0xe20 fs/btrfs/async-thread.c:318 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 Modules linked in: ---[ end trace b68924293169feef ]--- RIP: 0010:fortify_panic+0xf/0x20 lib/string.c:1129 RSP: 0018:ffffc90000e27980 EFLAGS: 00010286 RAX: 0000000000000022 RBX: ffff8880a80dca64 RCX: 0000000000000000 RDX: ffff8880a90860c0 RSI: ffffffff815dba07 RDI: fffff520001c4f22 RBP: ffff8880a80dca00 R08: 0000000000000022 R09: ffff8880ae7318e7 R10: 0000000000000000 R11: 0000000000077578 R12: 00000000ffffff6e R13: 0000000000000008 R14: ffffc90000e27a40 R15: 1ffff920001c4f3c FS: 0000000000000000(0000) GS:ffff8880ae700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f95b7c4d008 CR3: 000000009647d000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 The overflow happens, because in btree_readpage_end_io_hook() we assume that we have found a 4 byte checksum instead of the real possible 32 bytes we have for the checksums. With the fix applied: [ 35.726623] BTRFS: device fsid 815caf9a-dc43-4d2a-ac54-764b8333d765 devid 1 transid 5 /dev/loop0 scanned by syz-repro (215) [ 35.738994] BTRFS info (device loop0): disk space caching is enabled [ 35.738998] BTRFS info (device loop0): has skinny extents [ 35.743337] BTRFS warning (device loop0): loop0 checksum verify failed on 1052672 wanted 0xf9c035fc8d239a54 found 0x67a25c14b7eabcf9 level 0 [ 35.743420] BTRFS error (device loop0): failed to read chunk root [ 35.745899] BTRFS error (device loop0): open_ctree failed Reported-by: syzbot+e864a35d361e1d4e29a5@syzkaller.appspotmail.com Fixes: d5178578bcd4 ("btrfs: directly call into crypto framework for checksumming") CC: stable@vger.kernel.org # 5.4+ Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/disk-io.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9fe9ec6aab71..93d3bb8989df 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -636,16 +636,15 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, csum_tree_block(eb, result); if (memcmp_extent_buffer(eb, result, 0, csum_size)) { - u32 val; - u32 found = 0; - - memcpy(&found, result, csum_size); + u8 val[BTRFS_CSUM_SIZE] = { 0 }; read_extent_buffer(eb, &val, 0, csum_size); btrfs_warn_rl(fs_info, - "%s checksum verify failed on %llu wanted %x found %x level %d", + "%s checksum verify failed on %llu wanted " CSUM_FMT " found " CSUM_FMT " level %d", fs_info->sb->s_id, eb->start, - val, found, btrfs_header_level(eb)); + CSUM_FMT_VALUE(csum_size, val), + CSUM_FMT_VALUE(csum_size, result), + btrfs_header_level(eb)); ret = -EUCLEAN; goto err; } From 6402e780e9434246a4c221034e1f8a0fd67f20bd Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 3 Jun 2020 13:12:12 +0200 Subject: [PATCH 062/322] clk: tegra: Capitalization fixes HW, XUSB and PLL are abbreviations and should be all-uppercase. Signed-off-by: Thierry Reding --- drivers/clk/tegra/clk-pll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index f180c055d33f..52ccb13d0a88 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -1673,7 +1673,7 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw) pll_writel(val, PLLE_SS_CTRL, pll); udelay(1); - /* Enable hw control of xusb brick pll */ + /* Enable HW control of XUSB brick PLL */ val = pll_readl_misc(pll); val &= ~PLLE_MISC_IDDQ_SW_CTRL; pll_writel_misc(val, pll); @@ -1696,7 +1696,7 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw) val |= XUSBIO_PLL_CFG0_SEQ_ENABLE; pll_writel(val, XUSBIO_PLL_CFG0, pll); - /* Enable hw control of SATA pll */ + /* Enable HW control of SATA PLL */ val = pll_readl(SATA_PLL_CFG0, pll); val &= ~SATA_PLL_CFG0_PADPLL_RESET_SWCTL; val |= SATA_PLL_CFG0_PADPLL_USE_LOCKDET; From 5105660ee80862b85f7769626d0f936c18ce1885 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 3 Jun 2020 13:13:07 +0200 Subject: [PATCH 063/322] clk: tegra: Always program PLL_E when enabled Commit bff1cef5f23a ("clk: tegra: Don't enable already enabled PLLs") added checks to avoid enabling PLLs that have already been enabled by the bootloader. However, the PLL_E configuration inherited from the bootloader isn't necessarily the one that is needed for the kernel. This can cause SATA to fail like this: [ 5.310270] phy phy-sata.6: phy poweron failed --> -110 [ 5.315604] tegra-ahci 70027000.sata: failed to power on AHCI controller: -110 [ 5.323022] tegra-ahci: probe of 70027000.sata failed with error -110 Fix this by always programming the PLL_E. This ensures that any mis- configuration by the bootloader will be overwritten by the kernel. Fixes: bff1cef5f23a ("clk: tegra: Don't enable already enabled PLLs") Reported-by: LABBE Corentin Tested-by: Corentin Labbe Reviewed-by: Dmitry Osipenko Signed-off-by: Thierry Reding --- drivers/clk/tegra/clk-pll.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/clk/tegra/clk-pll.c b/drivers/clk/tegra/clk-pll.c index 52ccb13d0a88..c5cc0a2dac6f 100644 --- a/drivers/clk/tegra/clk-pll.c +++ b/drivers/clk/tegra/clk-pll.c @@ -1611,9 +1611,6 @@ static int clk_plle_tegra114_enable(struct clk_hw *hw) unsigned long flags = 0; unsigned long input_rate; - if (clk_pll_is_enabled(hw)) - return 0; - input_rate = clk_hw_get_rate(clk_hw_get_parent(hw)); if (_get_table_rate(hw, &sel, pll->params->fixed_rate, input_rate)) From 2f878d04218c8b26f6d0ab26955ca6b03848a1ad Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 22 Jun 2020 08:08:26 +0200 Subject: [PATCH 064/322] clk: tegra: Fix missing prototype for tegra210_clk_register_emc() Include the Tegra driver's clk.h to pull in the prototype definition for this function so that compilers don't warn about it being missing. Fixes: 0ac65fc946d3 ("clk: tegra: Implement Tegra210 EMC clock") Reported-by: kernel test robot Signed-off-by: Thierry Reding --- drivers/clk/tegra/clk-tegra210-emc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/tegra/clk-tegra210-emc.c b/drivers/clk/tegra/clk-tegra210-emc.c index 352a2c3fc374..51fd0ec2a2d0 100644 --- a/drivers/clk/tegra/clk-tegra210-emc.c +++ b/drivers/clk/tegra/clk-tegra210-emc.c @@ -12,6 +12,8 @@ #include #include +#include "clk.h" + #define CLK_SOURCE_EMC 0x19c #define CLK_SOURCE_EMC_2X_CLK_SRC GENMASK(31, 29) #define CLK_SOURCE_EMC_MC_EMC_SAME_FREQ BIT(16) From 3ad1c8ef083bef96ec922688966484be1039e6b5 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 21 Sep 2020 12:31:36 +0200 Subject: [PATCH 065/322] rcu/tree: Export rcu_idle_{enter,exit} to modules Fix this link error: ERROR: modpost: "rcu_idle_enter" [drivers/acpi/processor.ko] undefined! ERROR: modpost: "rcu_idle_exit" [drivers/acpi/processor.ko] undefined! when CONFIG_ACPI_PROCESSOR is built as module. PeterZ says that in light of ARM needing those soon too, they should simply be exported. Fixes: 1fecfdbb7acc ("ACPI: processor: Take over RCU-idle for C3-BM idle") Reported-by: Sven Joachim Suggested-by: Peter Zijlstra Signed-off-by: Borislav Petkov Reviewed-by: Paul E. McKenney Signed-off-by: Rafael J. Wysocki --- kernel/rcu/tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8ce77d9ac716..f78ee759af9c 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -673,6 +673,7 @@ void rcu_idle_enter(void) lockdep_assert_irqs_disabled(); rcu_eqs_enter(false); } +EXPORT_SYMBOL_GPL(rcu_idle_enter); #ifdef CONFIG_NO_HZ_FULL /** @@ -886,6 +887,7 @@ void rcu_idle_exit(void) rcu_eqs_exit(false); local_irq_restore(flags); } +EXPORT_SYMBOL_GPL(rcu_idle_exit); #ifdef CONFIG_NO_HZ_FULL /** From 36050d8984ab743f9990a2eb97a0062fdc3d7bbd Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 4 Sep 2020 08:47:05 +0200 Subject: [PATCH 066/322] cpuidle: psci: Fix suspicious RCU usage The commit eb1f00237aca ("lockdep,trace: Expose tracepoints"), started to expose us for tracepoints. This lead to the following RCU splat on an ARM64 Qcom board. [ 5.529634] WARNING: suspicious RCU usage [ 5.537307] sdhci-pltfm: SDHCI platform and OF driver helper [ 5.541092] 5.9.0-rc3 #86 Not tainted [ 5.541098] ----------------------------- [ 5.541105] ../include/trace/events/lock.h:37 suspicious rcu_dereference_check() usage! [ 5.541110] [ 5.541110] other info that might help us debug this: [ 5.541110] [ 5.541116] [ 5.541116] rcu_scheduler_active = 2, debug_locks = 1 [ 5.541122] RCU used illegally from extended quiescent state! [ 5.541129] no locks held by swapper/0/0. [ 5.541134] [ 5.541134] stack backtrace: [ 5.541143] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.9.0-rc3 #86 [ 5.541149] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT) [ 5.541157] Call trace: [ 5.568185] sdhci_msm 7864900.sdhci: Got CD GPIO [ 5.574186] dump_backtrace+0x0/0x1c8 [ 5.574206] show_stack+0x14/0x20 [ 5.574229] dump_stack+0xe8/0x154 [ 5.574250] lockdep_rcu_suspicious+0xd4/0xf8 [ 5.574269] lock_acquire+0x3f0/0x460 [ 5.574292] _raw_spin_lock_irqsave+0x80/0xb0 [ 5.574314] __pm_runtime_suspend+0x4c/0x188 [ 5.574341] psci_enter_domain_idle_state+0x40/0xa0 [ 5.574362] cpuidle_enter_state+0xc0/0x610 [ 5.646487] cpuidle_enter+0x38/0x50 [ 5.650651] call_cpuidle+0x18/0x40 [ 5.654467] do_idle+0x228/0x278 [ 5.657678] cpu_startup_entry+0x24/0x70 [ 5.661153] rest_init+0x1a4/0x278 [ 5.665061] arch_call_rest_init+0xc/0x14 [ 5.668272] start_kernel+0x508/0x540 Following the path in pm_runtime_put_sync_suspend() from psci_enter_domain_idle_state(), it seems like we end up using the RCU. Therefore, let's simply silence the splat by informing the RCU about it with RCU_NONIDLE. Note that, this is a temporary solution. Instead we should strive to avoid using RCU_NONIDLE (and similar), but rather push rcu_idle_enter|exit() further down, closer to the arch specific code. However, as the CPU PM notifiers are also using the RCU, additional rework is needed. Reported-by: Naresh Kamboju Signed-off-by: Ulf Hansson Acked-by: Paul E. McKenney Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle-psci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c index 74463841805f..d928b37718bd 100644 --- a/drivers/cpuidle/cpuidle-psci.c +++ b/drivers/cpuidle/cpuidle-psci.c @@ -66,7 +66,7 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev, return -1; /* Do runtime PM to manage a hierarchical CPU toplogy. */ - pm_runtime_put_sync_suspend(pd_dev); + RCU_NONIDLE(pm_runtime_put_sync_suspend(pd_dev)); state = psci_get_domain_state(); if (!state) @@ -74,7 +74,7 @@ static int psci_enter_domain_idle_state(struct cpuidle_device *dev, ret = psci_cpu_suspend_enter(state) ? -1 : idx; - pm_runtime_get_sync(pd_dev); + RCU_NONIDLE(pm_runtime_get_sync(pd_dev)); cpu_pm_exit(); From 13a9a9d74d4d9689ad65938966dbc66386063648 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 20 Sep 2020 13:46:25 -0400 Subject: [PATCH 067/322] SUNRPC: Fix svc_flush_dcache() On platforms that implement flush_dcache_page(), a large NFS WRITE triggers the WARN_ONCE in bvec_iter_advance(): Sep 20 14:01:05 klimt.1015granger.net kernel: Attempted to advance past end of bvec iter Sep 20 14:01:05 klimt.1015granger.net kernel: WARNING: CPU: 0 PID: 1032 at include/linux/bvec.h:101 bvec_iter_advance.isra.0+0xa7/0x158 [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: Call Trace: Sep 20 14:01:05 klimt.1015granger.net kernel: svc_tcp_recvfrom+0x60c/0x12c7 [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: ? bvec_iter_advance.isra.0+0x158/0x158 [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: ? del_timer_sync+0x4b/0x55 Sep 20 14:01:05 klimt.1015granger.net kernel: ? test_bit+0x1d/0x27 [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: svc_recv+0x1193/0x15e4 [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: ? try_to_freeze.isra.0+0x6f/0x6f [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: ? refcount_sub_and_test.constprop.0+0x13/0x40 [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: ? svc_xprt_put+0x1e/0x29f [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: ? svc_send+0x39f/0x3c1 [sunrpc] Sep 20 14:01:05 klimt.1015granger.net kernel: nfsd+0x282/0x345 [nfsd] Sep 20 14:01:05 klimt.1015granger.net kernel: ? __kthread_parkme+0x74/0xba Sep 20 14:01:05 klimt.1015granger.net kernel: kthread+0x2ad/0x2bc Sep 20 14:01:05 klimt.1015granger.net kernel: ? nfsd_destroy+0x124/0x124 [nfsd] Sep 20 14:01:05 klimt.1015granger.net kernel: ? test_bit+0x1d/0x27 Sep 20 14:01:05 klimt.1015granger.net kernel: ? kthread_mod_delayed_work+0x115/0x115 Sep 20 14:01:05 klimt.1015granger.net kernel: ret_from_fork+0x22/0x30 Reported-by: He Zhe Fixes: ca07eda33e01 ("SUNRPC: Refactor svc_recvfrom()") Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d5805fa1d066..c2752e2b9ce3 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -228,7 +228,7 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) { struct bvec_iter bi = { - .bi_size = size, + .bi_size = size + seek, }; struct bio_vec bv; From e23bb04b0c938588eae41b7f4712b722290ed2b8 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sat, 19 Sep 2020 22:01:33 -0700 Subject: [PATCH 068/322] bpf: Fix sysfs export of empty BTF section If BTF data is missing or removed from the ELF section it is still exported via sysfs as a zero-length file: root@OpenWrt:/# ls -l /sys/kernel/btf/vmlinux -r--r--r-- 1 root root 0 Jul 18 02:59 /sys/kernel/btf/vmlinux Moreover, reads from this file succeed and leak kernel data: root@OpenWrt:/# hexdump -C /sys/kernel/btf/vmlinux|head -10 000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 000cc0 00 00 00 00 00 00 00 00 00 00 00 00 80 83 b0 80 |................| 000cd0 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| 000ce0 00 00 00 00 00 00 00 00 00 00 00 00 57 ac 6e 9d |............W.n.| 000cf0 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| * 002650 00 00 00 00 00 00 00 10 00 00 00 01 00 00 00 01 |................| 002660 80 82 9a c4 80 85 97 80 81 a9 51 68 00 00 00 02 |..........Qh....| 002670 80 25 44 dc 80 85 97 80 81 a9 50 24 81 ab c4 60 |.%D.......P$...`| This situation was first observed with kernel 5.4.x, cross-compiled for a MIPS target system. Fix by adding a sanity-check for export of zero-length data sections. Fixes: 341dfcf8d78e ("btf: expose BTF info through sysfs") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/b38db205a66238f70823039a8c531535864eaac5.1600417359.git.Tony.Ambardar@gmail.com --- kernel/bpf/sysfs_btf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/sysfs_btf.c b/kernel/bpf/sysfs_btf.c index 3b495773de5a..11b3380887fa 100644 --- a/kernel/bpf/sysfs_btf.c +++ b/kernel/bpf/sysfs_btf.c @@ -30,15 +30,15 @@ static struct kobject *btf_kobj; static int __init btf_vmlinux_init(void) { - if (!__start_BTF) + bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; + + if (!__start_BTF || bin_attr_btf_vmlinux.size == 0) return 0; btf_kobj = kobject_create_and_add("btf", kernel_kobj); if (!btf_kobj) return -ENOMEM; - bin_attr_btf_vmlinux.size = __stop_BTF - __start_BTF; - return sysfs_create_bin_file(btf_kobj, &bin_attr_btf_vmlinux); } From 65c204398928f9c79f1a29912b410439f7052635 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sat, 19 Sep 2020 22:01:34 -0700 Subject: [PATCH 069/322] bpf: Prevent .BTF section elimination Systems with memory or disk constraints often reduce the kernel footprint by configuring LD_DEAD_CODE_DATA_ELIMINATION. However, this can result in removal of any BTF information. Use the KEEP() macro to preserve the BTF data as done with other important sections, while still allowing for smaller kernels. Fixes: 90ceddcb4950 ("bpf: Support llvm-objcopy for vmlinux BTF") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/a635b5d3e2da044e7b51ec1315e8910fbce0083f.1600417359.git.Tony.Ambardar@gmail.com --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 5430febd34be..7636bc71c71f 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -661,7 +661,7 @@ #define BTF \ .BTF : AT(ADDR(.BTF) - LOAD_OFFSET) { \ __start_BTF = .; \ - *(.BTF) \ + KEEP(*(.BTF)) \ __stop_BTF = .; \ } \ . = ALIGN(4); \ From 1245008122d7311683d70c05b2eea167a314fb5f Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Sat, 19 Sep 2020 22:01:35 -0700 Subject: [PATCH 070/322] libbpf: Fix native endian assumption when parsing BTF Code in btf__parse_raw() fails to detect raw BTF of non-native endianness and assumes it must be ELF data, which then fails to parse as ELF and yields a misleading error message: root:/# bpftool btf dump file /sys/kernel/btf/vmlinux libbpf: failed to get EHDR from /sys/kernel/btf/vmlinux For example, this could occur after cross-compiling a BTF-enabled kernel for a target with non-native endianness, which is currently unsupported. Check for correct endianness and emit a clearer error message: root:/# bpftool btf dump file /sys/kernel/btf/vmlinux libbpf: non-native BTF endianness is not supported Fixes: 94a1fedd63ed ("libbpf: Add btf__parse_raw() and generic btf__parse() APIs") Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/90f81508ecc57bc0da318e0fe0f45cfe49b17ea7.1600417359.git.Tony.Ambardar@gmail.com --- tools/lib/bpf/btf.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7dfca7016aaa..6bdbc389b493 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -659,6 +659,12 @@ struct btf *btf__parse_raw(const char *path) err = -EIO; goto err_out; } + if (magic == __bswap_16(BTF_MAGIC)) { + /* non-native endian raw BTF */ + pr_warn("non-native BTF endianness is not supported\n"); + err = -LIBBPF_ERRNO__ENDIAN; + goto err_out; + } if (magic != BTF_MAGIC) { /* definitely not a raw BTF */ err = -EPROTO; From 4003324856311faebb46cbd56a1616bd3f3b67c2 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 17 Sep 2020 18:34:04 +0300 Subject: [PATCH 071/322] regmap: fix page selection for noinc reads Non-incrementing reads can fail if register + length crosses page border. However for non-incrementing reads we should not check for page border crossing. Fix this by passing additional flag to _regmap_raw_read and passing length to _regmap_select_page basing on the flag. Signed-off-by: Dmitry Baryshkov Fixes: 74fe7b551f33 ("regmap: Add regmap_noinc_read API") Link: https://lore.kernel.org/r/20200917153405.3139200-1-dmitry.baryshkov@linaro.org Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 388ff8a6816c..e2822dc21022 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -2472,7 +2472,7 @@ int regmap_raw_write_async(struct regmap *map, unsigned int reg, EXPORT_SYMBOL_GPL(regmap_raw_write_async); static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, - unsigned int val_len) + unsigned int val_len, bool noinc) { struct regmap_range_node *range; int ret; @@ -2485,7 +2485,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, range = _regmap_range_lookup(map, reg); if (range) { ret = _regmap_select_page(map, ®, range, - val_len / map->format.val_bytes); + noinc ? 1 : val_len / map->format.val_bytes); if (ret != 0) return ret; } @@ -2523,7 +2523,7 @@ static int _regmap_bus_read(void *context, unsigned int reg, if (!map->format.parse_val) return -EINVAL; - ret = _regmap_raw_read(map, reg, work_val, map->format.val_bytes); + ret = _regmap_raw_read(map, reg, work_val, map->format.val_bytes, false); if (ret == 0) *val = map->format.parse_val(work_val); @@ -2639,7 +2639,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, /* Read bytes that fit into whole chunks */ for (i = 0; i < chunk_count; i++) { - ret = _regmap_raw_read(map, reg, val, chunk_bytes); + ret = _regmap_raw_read(map, reg, val, chunk_bytes, false); if (ret != 0) goto out; @@ -2650,7 +2650,7 @@ int regmap_raw_read(struct regmap *map, unsigned int reg, void *val, /* Read remaining bytes */ if (val_len) { - ret = _regmap_raw_read(map, reg, val, val_len); + ret = _regmap_raw_read(map, reg, val, val_len, false); if (ret != 0) goto out; } @@ -2725,7 +2725,7 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, read_len = map->max_raw_read; else read_len = val_len; - ret = _regmap_raw_read(map, reg, val, read_len); + ret = _regmap_raw_read(map, reg, val, read_len, true); if (ret) goto out_unlock; val = ((u8 *)val) + read_len; From 05669b63170771d554854c0e465b76dc98fc7c84 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 17 Sep 2020 18:34:05 +0300 Subject: [PATCH 072/322] regmap: fix page selection for noinc writes Non-incrementing writes can fail if register + length crosses page border. However for non-incrementing writes we should not check for page border crossing. Fix this by passing additional flag to _regmap_raw_write and passing length to _regmap_select_page basing on the flag. Signed-off-by: Dmitry Baryshkov Fixes: cdf6b11daa77 ("regmap: Add regmap_noinc_write API") Link: https://lore.kernel.org/r/20200917153405.3139200-2-dmitry.baryshkov@linaro.org Signed-off-by: Mark Brown --- drivers/base/regmap/internal.h | 2 +- drivers/base/regmap/regcache.c | 2 +- drivers/base/regmap/regmap.c | 21 +++++++++++---------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index e0ff8e90ebdc..7be2fcfeea52 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -259,7 +259,7 @@ bool regcache_set_val(struct regmap *map, void *base, unsigned int idx, int regcache_lookup_reg(struct regmap *map, unsigned int reg); int _regmap_raw_write(struct regmap *map, unsigned int reg, - const void *val, size_t val_len); + const void *val, size_t val_len, bool noinc); void regmap_async_complete_cb(struct regmap_async *async, int ret); diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c index a93cafd7be4f..7f4b3b62492c 100644 --- a/drivers/base/regmap/regcache.c +++ b/drivers/base/regmap/regcache.c @@ -717,7 +717,7 @@ static int regcache_sync_block_raw_flush(struct regmap *map, const void **data, map->cache_bypass = true; - ret = _regmap_raw_write(map, base, *data, count * val_bytes); + ret = _regmap_raw_write(map, base, *data, count * val_bytes, false); if (ret) dev_err(map->dev, "Unable to sync registers %#x-%#x. %d\n", base, cur - map->reg_stride, ret); diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index e2822dc21022..b71f9ecddff5 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -1486,7 +1486,7 @@ static void regmap_set_work_buf_flag_mask(struct regmap *map, int max_bytes, } static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, - const void *val, size_t val_len) + const void *val, size_t val_len, bool noinc) { struct regmap_range_node *range; unsigned long flags; @@ -1545,7 +1545,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, win_residue, val_len / map->format.val_bytes); ret = _regmap_raw_write_impl(map, reg, val, win_residue * - map->format.val_bytes); + map->format.val_bytes, noinc); if (ret != 0) return ret; @@ -1559,7 +1559,7 @@ static int _regmap_raw_write_impl(struct regmap *map, unsigned int reg, win_residue = range->window_len - win_offset; } - ret = _regmap_select_page(map, ®, range, val_num); + ret = _regmap_select_page(map, ®, range, noinc ? 1 : val_num); if (ret != 0) return ret; } @@ -1767,7 +1767,8 @@ static int _regmap_bus_raw_write(void *context, unsigned int reg, map->work_buf + map->format.reg_bytes + map->format.pad_bytes, - map->format.val_bytes); + map->format.val_bytes, + false); } static inline void *_regmap_map_get_context(struct regmap *map) @@ -1861,7 +1862,7 @@ int regmap_write_async(struct regmap *map, unsigned int reg, unsigned int val) EXPORT_SYMBOL_GPL(regmap_write_async); int _regmap_raw_write(struct regmap *map, unsigned int reg, - const void *val, size_t val_len) + const void *val, size_t val_len, bool noinc) { size_t val_bytes = map->format.val_bytes; size_t val_count = val_len / val_bytes; @@ -1882,7 +1883,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, /* Write as many bytes as possible with chunk_size */ for (i = 0; i < chunk_count; i++) { - ret = _regmap_raw_write_impl(map, reg, val, chunk_bytes); + ret = _regmap_raw_write_impl(map, reg, val, chunk_bytes, noinc); if (ret) return ret; @@ -1893,7 +1894,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg, /* Write remaining bytes */ if (val_len) - ret = _regmap_raw_write_impl(map, reg, val, val_len); + ret = _regmap_raw_write_impl(map, reg, val, val_len, noinc); return ret; } @@ -1926,7 +1927,7 @@ int regmap_raw_write(struct regmap *map, unsigned int reg, map->lock(map->lock_arg); - ret = _regmap_raw_write(map, reg, val, val_len); + ret = _regmap_raw_write(map, reg, val, val_len, false); map->unlock(map->lock_arg); @@ -1984,7 +1985,7 @@ int regmap_noinc_write(struct regmap *map, unsigned int reg, write_len = map->max_raw_write; else write_len = val_len; - ret = _regmap_raw_write(map, reg, val, write_len); + ret = _regmap_raw_write(map, reg, val, write_len, true); if (ret) goto out_unlock; val = ((u8 *)val) + write_len; @@ -2461,7 +2462,7 @@ int regmap_raw_write_async(struct regmap *map, unsigned int reg, map->async = true; - ret = _regmap_raw_write(map, reg, val, val_len); + ret = _regmap_raw_write(map, reg, val, val_len, false); map->async = false; From b867eef4cf548cd9541225aadcdcee644669b9e1 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Fri, 4 Sep 2020 12:28:12 +1200 Subject: [PATCH 073/322] spi: fsl-espi: Only process interrupts for expected events The SPIE register contains counts for the TX FIFO so any time the irq handler was invoked we would attempt to process the RX/TX fifos. Use the SPIM value to mask the events so that we only process interrupts that were expected. This was a latent issue exposed by commit 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C"). Signed-off-by: Chris Packham Link: https://lore.kernel.org/r/20200904002812.7300-1-chris.packham@alliedtelesis.co.nz Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-espi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c index e60581283a24..6d148ab70b93 100644 --- a/drivers/spi/spi-fsl-espi.c +++ b/drivers/spi/spi-fsl-espi.c @@ -564,13 +564,14 @@ static void fsl_espi_cpu_irq(struct fsl_espi *espi, u32 events) static irqreturn_t fsl_espi_irq(s32 irq, void *context_data) { struct fsl_espi *espi = context_data; - u32 events; + u32 events, mask; spin_lock(&espi->lock); /* Get interrupt events(tx/rx) */ events = fsl_espi_read_reg(espi, ESPI_SPIE); - if (!events) { + mask = fsl_espi_read_reg(espi, ESPI_SPIM); + if (!(events & mask)) { spin_unlock(&espi->lock); return IRQ_NONE; } From 921daeeca91bda2dfb57fcba9f69ff368083be4b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 4 Sep 2020 19:21:41 +0200 Subject: [PATCH 074/322] pinctrl: cherryview: Preserve CHV_PADCTRL1_INVRXTX_TXDATA flag on GPIOs One some devices the GPIO should output the inverted value from what device-drivers / ACPI code expects. The reason for this is unknown, perhaps these systems use an external buffer chip on the GPIO which inverts the signal. The BIOS makes this work by setting the CHV_PADCTRL1_INVRXTX_TXDATA flag. Before this commit we would unconditionally clear all INVRXTX flags, including the CHV_PADCTRL1_INVRXTX_TXDATA flag when a GPIO is requested by a driver (from chv_gpio_request_enable()). This breaks systems using this setup. Specifically it is causing problems for systems with a goodix touchscreen, where the BIOS sets the INVRXTX_TXDATA flag on the GPIO used for the touchscreen's reset pin. The goodix touchscreen driver by defaults configures this pin as input (relying on the pull-up to keep it high), but the clearing of the INVRXTX_TXDATA flag done by chv_gpio_request_enable() causes it to be driven low for a brief time before the GPIO gets set to input mode. This causes the touchscreen controller to get reset. On most CHT devs with this touchscreen this leads to: [ 31.596534] Goodix-TS i2c-GDIX1001:00: i2c test failed attempt 1: -121 The driver retries this though and then everything is fine. But during reset the touchscreen uses its interrupt pin as bootstrap to determine which i2c address to use and on the Acer One S1003 the spurious reset caused by the clearing of the INVRXTX_TXDATA flag causes the controller to come back up again on the wrong i2c address, breaking things. This commit fixes both the -121 errors, as well as the total breakage on the Acer One S1003, by making chv_gpio_clear_triggering() not clear the INVRXTX_TXDATA flag if the pin is already configured as a GPIO. Note that chv_pinmux_set_mux() does still unconditionally clear the flag, so this only affects GPIO usage. Fixes: a7d4b171660c ("Input: goodix - add support for getting IRQ + reset GPIOs on Cherry Trail devices") Signed-off-by: Hans de Goede Acked-by: Mika Westerberg Signed-off-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/pinctrl/intel/pinctrl-cherryview.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index 9ef246145bde..06521097513a 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -58,6 +58,7 @@ #define CHV_PADCTRL1_CFGLOCK BIT(31) #define CHV_PADCTRL1_INVRXTX_SHIFT 4 #define CHV_PADCTRL1_INVRXTX_MASK GENMASK(7, 4) +#define CHV_PADCTRL1_INVRXTX_TXDATA BIT(7) #define CHV_PADCTRL1_INVRXTX_RXDATA BIT(6) #define CHV_PADCTRL1_INVRXTX_TXENABLE BIT(5) #define CHV_PADCTRL1_ODEN BIT(3) @@ -792,11 +793,22 @@ static int chv_pinmux_set_mux(struct pinctrl_dev *pctldev, static void chv_gpio_clear_triggering(struct chv_pinctrl *pctrl, unsigned int offset) { + u32 invrxtx_mask = CHV_PADCTRL1_INVRXTX_MASK; u32 value; + /* + * One some devices the GPIO should output the inverted value from what + * device-drivers / ACPI code expects (inverted external buffer?). The + * BIOS makes this work by setting the CHV_PADCTRL1_INVRXTX_TXDATA flag, + * preserve this flag if the pin is already setup as GPIO. + */ + value = chv_readl(pctrl, offset, CHV_PADCTRL0); + if (value & CHV_PADCTRL0_GPIOEN) + invrxtx_mask &= ~CHV_PADCTRL1_INVRXTX_TXDATA; + value = chv_readl(pctrl, offset, CHV_PADCTRL1); value &= ~CHV_PADCTRL1_INTWAKECFG_MASK; - value &= ~CHV_PADCTRL1_INVRXTX_MASK; + value &= ~invrxtx_mask; chv_writel(pctrl, offset, CHV_PADCTRL1, value); } From ee1dfad5325ff1cfb2239e564cd411b3bfe8667a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 14 Sep 2020 13:04:19 -0400 Subject: [PATCH 075/322] dm: fix bio splitting and its bio completion order for regular IO dm_queue_split() is removed because __split_and_process_bio() _must_ handle splitting bios to ensure proper bio submission and completion ordering as a bio is split. Otherwise, multiple recursive calls to ->submit_bio will cause multiple split bios to be allocated from the same ->bio_split mempool at the same time. This would result in deadlock in low memory conditions because no progress could be made (only one bio is available in ->bio_split mempool). This fix has been verified to still fix the loss of performance, due to excess splitting, that commit 120c9257f5f1 provided. Fixes: 120c9257f5f1 ("Revert "dm: always call blk_queue_split() in dm_process_bio()"") Cc: stable@vger.kernel.org # 5.0+, requires custom backport due to 5.9 changes Reported-by: Ming Lei Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4a40df8af7d3..d948cd522431 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1724,23 +1724,6 @@ out: return ret; } -static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio) -{ - unsigned len, sector_count; - - sector_count = bio_sectors(*bio); - len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count); - - if (sector_count > len) { - struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split); - - bio_chain(split, *bio); - trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector); - submit_bio_noacct(*bio); - *bio = split; - } -} - static blk_qc_t dm_process_bio(struct mapped_device *md, struct dm_table *map, struct bio *bio) { @@ -1768,14 +1751,12 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, if (current->bio_list) { if (is_abnormal_io(bio)) blk_queue_split(&bio); - else - dm_queue_split(md, ti, &bio); + /* regular IO is split by __split_and_process_bio */ } if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED) return __process_bio(md, map, bio, ti); - else - return __split_and_process_bio(md, map, bio); + return __split_and_process_bio(md, map, bio); } static blk_qc_t dm_submit_bio(struct bio *bio) From cf9c37865557d39292d82da29e9ebda1dbc584b3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Mon, 21 Sep 2020 19:08:30 -0400 Subject: [PATCH 076/322] dm: fix comment in dm_process_bio() Refer to the correct function (->submit_bio instead of ->queue_bio). Also, add details about why using blk_queue_split() isn't needed for dm_wq_work()'s call to dm_process_bio(). Fixes: c62b37d96b6eb ("block: move ->make_request_fn to struct block_device_operations") Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d948cd522431..6ed05ca65a0f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1744,9 +1744,11 @@ static blk_qc_t dm_process_bio(struct mapped_device *md, } /* - * If in ->queue_bio we need to use blk_queue_split(), otherwise + * If in ->submit_bio we need to use blk_queue_split(), otherwise * queue_limits for abnormal requests (e.g. discard, writesame, etc) * won't be imposed. + * If called from dm_wq_work() for deferred bio processing, bio + * was already handled by following code with previous ->submit_bio. */ if (current->bio_list) { if (is_abnormal_io(bio)) From ead1e19ad905b97261f0ad7a98bb64abb9323b2b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 21 Sep 2020 18:44:42 +0900 Subject: [PATCH 077/322] lib/bootconfig: Fix a bug of breaking existing tree nodes Fix a bug of breaking existing tree nodes by parsing the second and subsequent braces. Since the bootconfig parser uses the node.next field as a flag of current parent node, but this will break the existing tree if the same key node is specified again in the bootconfig. For example, the following bootconfig should be foo.buz and bar. foo bar foo { buz } However, when parsing the brace "{", it breaks foo->bar link by marking open-brace node. So the bootconfig unlinks bar from the bootconfig internal tree. This introduces a stack outside of the tree and record the last open-brace on the stack instead of using node.next field. Link: https://lkml.kernel.org/r/160068148267.1088739.8264704338030168660.stgit@devnote2 Fixes: 76db5a27a827 ("bootconfig: Add Extra Boot Config support") Cc: Ingo Molnar Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- lib/bootconfig.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/lib/bootconfig.c b/lib/bootconfig.c index 2c905a91d4eb..b44bba0f1583 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -31,6 +31,8 @@ static size_t xbc_data_size __initdata; static struct xbc_node *last_parent __initdata; static const char *xbc_err_msg __initdata; static int xbc_err_pos __initdata; +static int open_brace[XBC_DEPTH_MAX] __initdata; +static int brace_index __initdata; static int __init xbc_parse_error(const char *msg, const char *p) { @@ -431,27 +433,27 @@ static char *skip_spaces_until_newline(char *p) return p; } -static int __init __xbc_open_brace(void) +static int __init __xbc_open_brace(char *p) { - /* Mark the last key as open brace */ - last_parent->next = XBC_NODE_MAX; + /* Push the last key as open brace */ + open_brace[brace_index++] = xbc_node_index(last_parent); + if (brace_index >= XBC_DEPTH_MAX) + return xbc_parse_error("Exceed max depth of braces", p); return 0; } static int __init __xbc_close_brace(char *p) { - struct xbc_node *node; - - if (!last_parent || last_parent->next != XBC_NODE_MAX) + brace_index--; + if (!last_parent || brace_index < 0 || + (open_brace[brace_index] != xbc_node_index(last_parent))) return xbc_parse_error("Unexpected closing brace", p); - node = last_parent; - node->next = 0; - do { - node = xbc_node_get_parent(node); - } while (node && node->next != XBC_NODE_MAX); - last_parent = node; + if (brace_index == 0) + last_parent = NULL; + else + last_parent = &xbc_nodes[open_brace[brace_index - 1]]; return 0; } @@ -661,7 +663,7 @@ static int __init xbc_open_brace(char **k, char *n) return ret; *k = n; - return __xbc_open_brace(); + return __xbc_open_brace(n - 1); } static int __init xbc_close_brace(char **k, char *n) @@ -681,6 +683,13 @@ static int __init xbc_verify_tree(void) int i, depth, len, wlen; struct xbc_node *n, *m; + /* Brace closing */ + if (brace_index) { + n = &xbc_nodes[open_brace[brace_index]]; + return xbc_parse_error("Brace is not closed", + xbc_node_get_data(n)); + } + /* Empty tree */ if (xbc_node_num == 0) { xbc_parse_error("Empty config", xbc_data); @@ -745,6 +754,7 @@ void __init xbc_destroy_all(void) xbc_node_num = 0; memblock_free(__pa(xbc_nodes), sizeof(struct xbc_node) * XBC_NODE_MAX); xbc_nodes = NULL; + brace_index = 0; } /** From c7af4ecdffe1537ba8aeed0ac12c3326f908df43 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 21 Sep 2020 18:44:51 +0900 Subject: [PATCH 078/322] lib/bootconfig: Fix to remove tailing spaces after value Fix to remove tailing spaces after value. If there is a space after value, the bootconfig failed to remove it because it applies strim() before replacing the delimiter with null. For example, foo = var # comment was parsed as below. foo="var " but user will expect foo="var" This fixes it by applying strim() after removing the delimiter. Link: https://lkml.kernel.org/r/160068149134.1088739.8868306567670058853.stgit@devnote2 Fixes: 76db5a27a827 ("bootconfig: Add Extra Boot Config support") Cc: Ingo Molnar Cc: stable@vger.kernel.org Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- lib/bootconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bootconfig.c b/lib/bootconfig.c index b44bba0f1583..649ed44f199c 100644 --- a/lib/bootconfig.c +++ b/lib/bootconfig.c @@ -494,8 +494,8 @@ static int __init __xbc_parse_value(char **__v, char **__n) break; } if (strchr(",;\n#}", c)) { - v = strim(v); *p++ = '\0'; + v = strim(v); break; } } From 1d210c166b693c3a659abbac4b58385c2d4cf887 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 21 Sep 2020 18:45:02 +0900 Subject: [PATCH 079/322] tools/bootconfig: Add testcases for repeated key with brace Add a testcase for repeated key with brace parsing issue. Link: https://lkml.kernel.org/r/160068150176.1088739.409481347784771987.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/test-bootconfig.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index 56284b98d8f0..95eec768b503 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -137,6 +137,18 @@ $BOOTCONF $INITRD > $TEMPCONF cat $TEMPCONF xpass grep \'\"string\"\' $TEMPCONF +echo "Repeat same-key tree" +cat > $TEMPCONF << EOF +foo +bar +foo { buz } +EOF +echo > $INITRD + +xpass $BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $OUTFILE +xpass grep -q "bar" $OUTFILE + echo "=== expected failure cases ===" for i in samples/bad-* ; do xfail $BOOTCONF -a $i $INITRD From 2f5fb555637eff4a3e5579f2323cd358c77efdc8 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 21 Sep 2020 18:45:11 +0900 Subject: [PATCH 080/322] tools/bootconfig: Add testcase for tailing space Add testcases for removing/keeping tailing space in the value. Link: https://lkml.kernel.org/r/160068151151.1088739.3469541807296024227.stgit@devnote2 Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- tools/bootconfig/test-bootconfig.sh | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index 95eec768b503..d295e406a756 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -149,6 +149,19 @@ xpass $BOOTCONF -a $TEMPCONF $INITRD $BOOTCONF $INITRD > $OUTFILE xpass grep -q "bar" $OUTFILE + +echo "Remove/keep tailing spaces" +cat > $TEMPCONF << EOF +foo = val # comment +bar = "val2 " # comment +EOF +echo > $INITRD + +xpass $BOOTCONF -a $TEMPCONF $INITRD +$BOOTCONF $INITRD > $OUTFILE +xfail grep -q val[[:space:]] $OUTFILE +xpass grep -q val2[[:space:]] $OUTFILE + echo "=== expected failure cases ===" for i in samples/bad-* ; do xfail $BOOTCONF -a $i $INITRD From d0254f82d702a1d0d92e42f87676111de88846cf Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Sat, 12 Sep 2020 12:30:45 +0200 Subject: [PATCH 081/322] media: dt-bindings: media: imx274: Convert to json-schema Convert the imx274 bindings document to json-schema and update the MAINTAINERS file accordingly. Reviewed-by: Luca Ceresoli Reviewed-by: Laurent Pinchart Signed-off-by: Jacopo Mondi Reviewed-by: Rob Herring Signed-off-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- .../devicetree/bindings/media/i2c/imx274.txt | 38 ---------- .../bindings/media/i2c/sony,imx274.yaml | 76 +++++++++++++++++++ MAINTAINERS | 2 +- 3 files changed, 77 insertions(+), 39 deletions(-) delete mode 100644 Documentation/devicetree/bindings/media/i2c/imx274.txt create mode 100644 Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml diff --git a/Documentation/devicetree/bindings/media/i2c/imx274.txt b/Documentation/devicetree/bindings/media/i2c/imx274.txt deleted file mode 100644 index 0727079d2410..000000000000 --- a/Documentation/devicetree/bindings/media/i2c/imx274.txt +++ /dev/null @@ -1,38 +0,0 @@ -* Sony 1/2.5-Inch 8.51Mp CMOS Digital Image Sensor - -The Sony imx274 is a 1/2.5-inch CMOS active pixel digital image sensor with -an active array size of 3864H x 2202V. It is programmable through I2C -interface. The I2C address is fixed to 0x1a as per sensor data sheet. -Image data is sent through MIPI CSI-2, which is configured as 4 lanes -at 1440 Mbps. - - -Required Properties: -- compatible: value should be "sony,imx274" for imx274 sensor -- reg: I2C bus address of the device - -Optional Properties: -- reset-gpios: Sensor reset GPIO -- clocks: Reference to the input clock. -- clock-names: Should be "inck". -- VANA-supply: Sensor 2.8v analog supply. -- VDIG-supply: Sensor 1.8v digital core supply. -- VDDL-supply: Sensor digital IO 1.2v supply. - -The imx274 device node should contain one 'port' child node with -an 'endpoint' subnode. For further reading on port node refer to -Documentation/devicetree/bindings/media/video-interfaces.txt. - -Example: - sensor@1a { - compatible = "sony,imx274"; - reg = <0x1a>; - #address-cells = <1>; - #size-cells = <0>; - reset-gpios = <&gpio_sensor 0 0>; - port { - sensor_out: endpoint { - remote-endpoint = <&csiss_in>; - }; - }; - }; diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml new file mode 100644 index 000000000000..f697e1a20beb --- /dev/null +++ b/Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/media/i2c/sony,imx274.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sony 1/2.5-Inch 8.51MP CMOS Digital Image Sensor + +maintainers: + - Leon Luo + +description: | + The Sony IMX274 is a 1/2.5-inch CMOS active pixel digital image sensor with an + active array size of 3864H x 2202V. It is programmable through I2C interface. + Image data is sent through MIPI CSI-2, which is configured as 4 lanes at 1440 + Mbps. + +properties: + compatible: + const: sony,imx274 + + reg: + const: 0x1a + + reset-gpios: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + const: inck + + vana-supply: + description: Sensor 2.8 V analog supply. + maxItems: 1 + + vdig-supply: + description: Sensor 1.8 V digital core supply. + maxItems: 1 + + vddl-supply: + description: Sensor digital IO 1.2 V supply. + maxItems: 1 + + port: + type: object + description: Output video port. See ../video-interfaces.txt. + +required: + - compatible + - reg + - port + +additionalProperties: false + +examples: + - | + i2c0 { + #address-cells = <1>; + #size-cells = <0>; + + imx274: camera-sensor@1a { + compatible = "sony,imx274"; + reg = <0x1a>; + reset-gpios = <&gpio_sensor 0 0>; + + port { + sensor_out: endpoint { + remote-endpoint = <&csiss_in>; + }; + }; + }; + }; + +... diff --git a/MAINTAINERS b/MAINTAINERS index f0068bceeb61..ba1a4d2238cc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16125,7 +16125,7 @@ M: Leon Luo L: linux-media@vger.kernel.org S: Maintained T: git git://linuxtv.org/media_tree.git -F: Documentation/devicetree/bindings/media/i2c/imx274.txt +F: Documentation/devicetree/bindings/media/i2c/sony,imx274.yaml F: drivers/media/i2c/imx274.c SONY IMX290 SENSOR DRIVER From 01ce6d4d2c8157b076425e3dd8319948652583c5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 24 Aug 2020 15:44:03 +0800 Subject: [PATCH 082/322] MIPS: Loongson-3: Fix fp register access if MSA enabled If MSA is enabled, FPU_REG_WIDTH is 128 rather than 64, then get_fpr64() /set_fpr64() in the original unaligned instruction emulation code access the wrong fp registers. This is because the current code doesn't specify the correct index field, so fix it. Fixes: f83e4f9896eff614d0f2547a ("MIPS: Loongson-3: Add some unaligned instructions emulation") Signed-off-by: Huacai Chen Signed-off-by: Pei Huang Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/cop2-ex.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/arch/mips/loongson64/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c index f130f62129b8..00055d4b6042 100644 --- a/arch/mips/loongson64/cop2-ex.c +++ b/arch/mips/loongson64/cop2-ex.c @@ -95,10 +95,8 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, if (res) goto fault; - set_fpr64(current->thread.fpu.fpr, - insn.loongson3_lswc2_format.rt, value); - set_fpr64(current->thread.fpu.fpr, - insn.loongson3_lswc2_format.rq, value_next); + set_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lswc2_format.rt], 0, value); + set_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lswc2_format.rq], 0, value_next); compute_return_epc(regs); own_fpu(1); } @@ -130,15 +128,13 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, goto sigbus; lose_fpu(1); - value_next = get_fpr64(current->thread.fpu.fpr, - insn.loongson3_lswc2_format.rq); + value_next = get_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lswc2_format.rq], 0); StoreDW(addr + 8, value_next, res); if (res) goto fault; - value = get_fpr64(current->thread.fpu.fpr, - insn.loongson3_lswc2_format.rt); + value = get_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lswc2_format.rt], 0); StoreDW(addr, value, res); if (res) @@ -204,8 +200,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, if (res) goto fault; - set_fpr64(current->thread.fpu.fpr, - insn.loongson3_lsdc2_format.rt, value); + set_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0, value); compute_return_epc(regs); own_fpu(1); @@ -221,8 +216,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, if (res) goto fault; - set_fpr64(current->thread.fpu.fpr, - insn.loongson3_lsdc2_format.rt, value); + set_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0, value); compute_return_epc(regs); own_fpu(1); break; @@ -286,8 +280,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, goto sigbus; lose_fpu(1); - value = get_fpr64(current->thread.fpu.fpr, - insn.loongson3_lsdc2_format.rt); + value = get_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0); StoreW(addr, value, res); if (res) @@ -305,8 +298,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, goto sigbus; lose_fpu(1); - value = get_fpr64(current->thread.fpu.fpr, - insn.loongson3_lsdc2_format.rt); + value = get_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0); StoreDW(addr, value, res); if (res) From fdb29f4de1374483291232ae7515e5e7bb464762 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Thu, 10 Sep 2020 16:59:51 +0300 Subject: [PATCH 083/322] iio: adc: qcom-spmi-adc5: fix driver name Remove superfluous '.c' from qcom-spmi-adc5 device driver name. Fixes: e13d757279bb ("iio: adc: Add QCOM SPMI PMIC5 ADC driver") Signed-off-by: Dmitry Baryshkov Acked-by: Manivannan Sadhasivam Cc: Link: https://lore.kernel.org/r/20200910140000.324091-2-dmitry.baryshkov@linaro.org Signed-off-by: Jonathan Cameron --- drivers/iio/adc/qcom-spmi-adc5.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/adc/qcom-spmi-adc5.c b/drivers/iio/adc/qcom-spmi-adc5.c index b4b73c9920b4..c10aa28be70a 100644 --- a/drivers/iio/adc/qcom-spmi-adc5.c +++ b/drivers/iio/adc/qcom-spmi-adc5.c @@ -982,7 +982,7 @@ static int adc5_probe(struct platform_device *pdev) static struct platform_driver adc5_driver = { .driver = { - .name = "qcom-spmi-adc5.c", + .name = "qcom-spmi-adc5", .of_match_table = adc5_match_table, }, .probe = adc5_probe, From b5ddcffa37778244d5e786fe32f778edf2bfc93e Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Sat, 5 Sep 2020 01:34:21 +0800 Subject: [PATCH 084/322] btrfs: fix put of uninitialized kobject after seed device delete The following test case leads to NULL kobject free error: mount seed /mnt add sprout to /mnt umount /mnt mount sprout to /mnt delete seed kobject: '(null)' (00000000dd2b87e4): is not initialized, yet kobject_put() is being called. WARNING: CPU: 1 PID: 15784 at lib/kobject.c:736 kobject_put+0x80/0x350 RIP: 0010:kobject_put+0x80/0x350 :: Call Trace: btrfs_sysfs_remove_devices_dir+0x6e/0x160 [btrfs] btrfs_rm_device.cold+0xa8/0x298 [btrfs] btrfs_ioctl+0x206c/0x22a0 [btrfs] ksys_ioctl+0xe2/0x140 __x64_sys_ioctl+0x1e/0x29 do_syscall_64+0x96/0x150 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f4047c6288b :: This is because, at the end of the seed device-delete, we try to remove the seed's devid sysfs entry. But for the seed devices under the sprout fs, we don't initialize the devid kobject yet. So add a kobject state check, which takes care of the bug. Fixes: 668e48af7a94 ("btrfs: sysfs, add devid/dev_state kobject and device attributes") CC: stable@vger.kernel.org # 5.6+ Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/sysfs.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index c8df2edafd85..5be30066563c 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -1170,10 +1170,12 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices, disk_kobj->name); } - kobject_del(&one_device->devid_kobj); - kobject_put(&one_device->devid_kobj); + if (one_device->devid_kobj.state_initialized) { + kobject_del(&one_device->devid_kobj); + kobject_put(&one_device->devid_kobj); - wait_for_completion(&one_device->kobj_unregister); + wait_for_completion(&one_device->kobj_unregister); + } return 0; } @@ -1186,10 +1188,12 @@ int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices, sysfs_remove_link(fs_devices->devices_kobj, disk_kobj->name); } - kobject_del(&one_device->devid_kobj); - kobject_put(&one_device->devid_kobj); + if (one_device->devid_kobj.state_initialized) { + kobject_del(&one_device->devid_kobj); + kobject_put(&one_device->devid_kobj); - wait_for_completion(&one_device->kobj_unregister); + wait_for_completion(&one_device->kobj_unregister); + } } return 0; From ce65d55f92a67e247f4d799e581cf9fed677871c Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Tue, 22 Sep 2020 14:58:45 +0300 Subject: [PATCH 085/322] dmaengine: dmatest: Prevent to run on misconfigured channel Andy reported that commit 6b41030fdc79 ("dmaengine: dmatest: Restore default for channel") broke his scripts for the case where "busy" channel is used for configuration with expectation that run command would do nothing. Instead, behavior was (unintentionally) changed to treat such case as under-configuration and progress with defaults, i.e. run command would start a test with default setting for channel (which would use all channels). Restore original behavior with tracking status of channel setter so we can distinguish between misconfigured and under-configured cases in run command and act accordingly. Fixes: 6b41030fdc79 ("dmaengine: dmatest: Restore default for channel") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Signed-off-by: Vladimir Murzin Tested-by: Peter Ujfalusi Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20200922115847.30100-1-andriy.shevchenko@linux.intel.com Signed-off-by: Vinod Koul --- drivers/dma/dmatest.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 45d4d92e91db..a819611b8892 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -129,6 +129,7 @@ struct dmatest_params { * @nr_channels: number of channels under test * @lock: access protection to the fields of this structure * @did_init: module has been initialized completely + * @last_error: test has faced configuration issues */ static struct dmatest_info { /* Test parameters */ @@ -137,6 +138,7 @@ static struct dmatest_info { /* Internal state */ struct list_head channels; unsigned int nr_channels; + int last_error; struct mutex lock; bool did_init; } test_info = { @@ -1184,10 +1186,22 @@ static int dmatest_run_set(const char *val, const struct kernel_param *kp) return ret; } else if (dmatest_run) { if (!is_threaded_test_pending(info)) { - pr_info("No channels configured, continue with any\n"); - if (!is_threaded_test_run(info)) - stop_threaded_test(info); - add_threaded_test(info); + /* + * We have nothing to run. This can be due to: + */ + ret = info->last_error; + if (ret) { + /* 1) Misconfiguration */ + pr_err("Channel misconfigured, can't continue\n"); + mutex_unlock(&info->lock); + return ret; + } else { + /* 2) We rely on defaults */ + pr_info("No channels configured, continue with any\n"); + if (!is_threaded_test_run(info)) + stop_threaded_test(info); + add_threaded_test(info); + } } start_threaded_tests(info); } else { @@ -1204,7 +1218,7 @@ static int dmatest_chan_set(const char *val, const struct kernel_param *kp) struct dmatest_info *info = &test_info; struct dmatest_chan *dtc; char chan_reset_val[20]; - int ret = 0; + int ret; mutex_lock(&info->lock); ret = param_set_copystring(val, kp); @@ -1259,12 +1273,14 @@ static int dmatest_chan_set(const char *val, const struct kernel_param *kp) goto add_chan_err; } + info->last_error = ret; mutex_unlock(&info->lock); return ret; add_chan_err: param_set_copystring(chan_reset_val, kp); + info->last_error = ret; mutex_unlock(&info->lock); return ret; From c413c3102703a453c1312ce2160c6b7ffd55403e Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Tue, 15 Sep 2020 18:39:23 +0800 Subject: [PATCH 086/322] Revert "ALSA: usb-audio: Disable Lenovo P620 Rear line-in volume control" This reverts commit 34dedd2a83b241ba6aeb290260313c65dc58660e. According to Realtek, volume FU works for line-in. I can confirm volume control works after device firmware is updated. Fixes: 34dedd2a83b2 ("ALSA: usb-audio: Disable Lenovo P620 Rear line-in volume control") Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200915103925.12777-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/usb/mixer_maps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index 5b43e9e40e49..c369c81e74c4 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -371,7 +371,6 @@ static const struct usbmix_name_map asus_rog_map[] = { }; static const struct usbmix_name_map lenovo_p620_rear_map[] = { - { 19, NULL, 2 }, /* FU, Volume */ { 19, NULL, 12 }, /* FU, Input Gain Pad */ {} }; From 59e330f8ff7ada7aa64fa422f6adf22a45152a7e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 17 Sep 2020 08:50:25 -0700 Subject: [PATCH 087/322] nvme: return errors for hwmon init Initializing the nvme hwmon retrieves a log from the controller. If the controller is broken, we need to return the appropriate error so that subsequent initialization doesn't attempt to continue. Reported-by: Tong Zhang Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 +++++-- drivers/nvme/host/hwmon.c | 14 ++++++-------- drivers/nvme/host/nvme.h | 7 +++++-- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c013eb52fdc8..4cea14c18a6d 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3236,8 +3236,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) if (ret < 0) return ret; - if (!ctrl->identified) - nvme_hwmon_init(ctrl); + if (!ctrl->identified) { + ret = nvme_hwmon_init(ctrl); + if (ret < 0) + return ret; + } ctrl->identified = true; diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c index 412a6c97c0d8..552dbc04567b 100644 --- a/drivers/nvme/host/hwmon.c +++ b/drivers/nvme/host/hwmon.c @@ -59,12 +59,8 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under, static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data) { - int ret; - - ret = nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, + return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0, NVME_CSI_NVM, &data->log, sizeof(data->log), 0); - - return ret <= 0 ? ret : -EIO; } static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type, @@ -225,7 +221,7 @@ static const struct hwmon_chip_info nvme_hwmon_chip_info = { .info = nvme_hwmon_info, }; -void nvme_hwmon_init(struct nvme_ctrl *ctrl) +int nvme_hwmon_init(struct nvme_ctrl *ctrl) { struct device *dev = ctrl->dev; struct nvme_hwmon_data *data; @@ -234,7 +230,7 @@ void nvme_hwmon_init(struct nvme_ctrl *ctrl) data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) - return; + return 0; data->ctrl = ctrl; mutex_init(&data->read_lock); @@ -244,7 +240,7 @@ void nvme_hwmon_init(struct nvme_ctrl *ctrl) dev_warn(ctrl->device, "Failed to read smart log (error %d)\n", err); devm_kfree(dev, data); - return; + return err; } hwmon = devm_hwmon_device_register_with_info(dev, "nvme", data, @@ -254,4 +250,6 @@ void nvme_hwmon_init(struct nvme_ctrl *ctrl) dev_warn(dev, "Failed to instantiate hwmon device\n"); devm_kfree(dev, data); } + + return 0; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9fd45ff656da..2aaedfa43ed8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -827,9 +827,12 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) } #ifdef CONFIG_NVME_HWMON -void nvme_hwmon_init(struct nvme_ctrl *ctrl); +int nvme_hwmon_init(struct nvme_ctrl *ctrl); #else -static inline void nvme_hwmon_init(struct nvme_ctrl *ctrl) { } +static inline int nvme_hwmon_init(struct nvme_ctrl *ctrl) +{ + return 0; +} #endif u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, From 50b7c24390a53c78de546215282fb52980f1d7b7 Mon Sep 17 00:00:00 2001 From: Xianting Tian Date: Tue, 22 Sep 2020 14:25:17 +0800 Subject: [PATCH 088/322] nvme-pci: fix NULL req in completion handler Currently, we use nvmeq->q_depth as the upper limit for a valid tag in nvme_handle_cqe(), it is not correct. Because the available tag number is recorded in tagset, which is not equal to nvmeq->q_depth. The nvme driver registers interrupts for queues before initializing the tagset, because it uses the number of successful request_irq() calls to configure the tagset parameters. This allows a race condition with the current tag validity check if the controller happens to produce an interrupt with a corrupted CQE before the tagset is initialized. Replace the driver's indirect tag check with the one already provided by the block layer. Signed-off-by: Xianting Tian Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d31e298669a9..d286c267869d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -940,13 +940,6 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) struct nvme_completion *cqe = &nvmeq->cqes[idx]; struct request *req; - if (unlikely(cqe->command_id >= nvmeq->q_depth)) { - dev_warn(nvmeq->dev->ctrl.device, - "invalid id %d completed on queue %d\n", - cqe->command_id, le16_to_cpu(cqe->sq_id)); - return; - } - /* * AEN requests are special as they don't time out and can * survive any kind of queue freeze and often don't respond to @@ -960,6 +953,13 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) } req = blk_mq_tag_to_rq(nvme_queue_tagset(nvmeq), cqe->command_id); + if (unlikely(!req)) { + dev_warn(nvmeq->dev->ctrl.device, + "invalid id %d completed on queue %d\n", + cqe->command_id, le16_to_cpu(cqe->sq_id)); + return; + } + trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); if (!nvme_try_complete_req(req, cqe->status, cqe->result)) nvme_pci_complete_rq(req); From 9e0e8dac985d4bd07d9e62922b9d189d3ca2fccf Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 17 Sep 2020 13:33:22 -0700 Subject: [PATCH 089/322] nvme-fc: fail new connections to a deleted host or remote port The lldd may have made calls to delete a remote port or local port and the delete is in progress when the cli then attempts to create a new controller. Currently, this proceeds without error although it can't be very successful. Fix this by validating that both the host port and remote port are present when a new controller is to be created. Signed-off-by: James Smart Reviewed-by: Himanshu Madhani Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e8ef42b9d50c..e2e09e25c056 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3671,12 +3671,14 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) spin_lock_irqsave(&nvme_fc_lock, flags); list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { if (lport->localport.node_name != laddr.nn || - lport->localport.port_name != laddr.pn) + lport->localport.port_name != laddr.pn || + lport->localport.port_state != FC_OBJSTATE_ONLINE) continue; list_for_each_entry(rport, &lport->endp_list, endp_list) { if (rport->remoteport.node_name != raddr.nn || - rport->remoteport.port_name != raddr.pn) + rport->remoteport.port_name != raddr.pn || + rport->remoteport.port_state != FC_OBJSTATE_ONLINE) continue; /* if fail to get reference fall through. Will error */ From 2b405533c2560d7878199c57d95a39151351df72 Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Sun, 20 Sep 2020 18:01:58 +0100 Subject: [PATCH 090/322] USB: gadget: f_ncm: Fix NDP16 datagram validation commit 2b74b0a04d3e ("USB: gadget: f_ncm: add bounds checks to ncm_unwrap_ntb()") adds important bounds checking however it unfortunately also introduces a bug with respect to section 3.3.1 of the NCM specification. wDatagramIndex[1] : "Byte index, in little endian, of the second datagram described by this NDP16. If zero, then this marks the end of the sequence of datagrams in this NDP16." wDatagramLength[1]: "Byte length, in little endian, of the second datagram described by this NDP16. If zero, then this marks the end of the sequence of datagrams in this NDP16." wDatagramIndex[1] and wDatagramLength[1] respectively then may be zero but that does not mean we should throw away the data referenced by wDatagramIndex[0] and wDatagramLength[0] as is currently the case. Breaking the loop on (index2 == 0 || dg_len2 == 0) should come at the end as was previously the case and checks for index2 and dg_len2 should be removed since zero is valid. I'm not sure how much testing the above patch received but for me right now after enumeration ping doesn't work. Reverting the commit restores ping, scp, etc. The extra validation associated with wDatagramIndex[0] and wDatagramLength[0] appears to be valid so, this change removes the incorrect restriction on wDatagramIndex[1] and wDatagramLength[1] restoring data processing between host and device. Fixes: 2b74b0a04d3e ("USB: gadget: f_ncm: add bounds checks to ncm_unwrap_ntb()") Cc: Ilja Van Sprundel Cc: Brooke Basile Cc: stable Signed-off-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20200920170158.1217068-1-bryan.odonoghue@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_ncm.c | 30 ++--------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index b4206b0dede5..1f638759a953 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -1189,7 +1189,6 @@ static int ncm_unwrap_ntb(struct gether *port, const struct ndp_parser_opts *opts = ncm->parser_opts; unsigned crc_len = ncm->is_crc ? sizeof(uint32_t) : 0; int dgram_counter; - bool ndp_after_header; /* dwSignature */ if (get_unaligned_le32(tmp) != opts->nth_sign) { @@ -1216,7 +1215,6 @@ static int ncm_unwrap_ntb(struct gether *port, } ndp_index = get_ncm(&tmp, opts->ndp_index); - ndp_after_header = false; /* Run through all the NDP's in the NTB */ do { @@ -1232,8 +1230,6 @@ static int ncm_unwrap_ntb(struct gether *port, ndp_index); goto err; } - if (ndp_index == opts->nth_size) - ndp_after_header = true; /* * walk through NDP @@ -1312,37 +1308,13 @@ static int ncm_unwrap_ntb(struct gether *port, index2 = get_ncm(&tmp, opts->dgram_item_len); dg_len2 = get_ncm(&tmp, opts->dgram_item_len); - if (index2 == 0 || dg_len2 == 0) - break; - /* wDatagramIndex[1] */ - if (ndp_after_header) { - if (index2 < opts->nth_size + opts->ndp_size) { - INFO(port->func.config->cdev, - "Bad index: %#X\n", index2); - goto err; - } - } else { - if (index2 < opts->nth_size + opts->dpe_size) { - INFO(port->func.config->cdev, - "Bad index: %#X\n", index2); - goto err; - } - } if (index2 > block_len - opts->dpe_size) { INFO(port->func.config->cdev, "Bad index: %#X\n", index2); goto err; } - /* wDatagramLength[1] */ - if ((dg_len2 < 14 + crc_len) || - (dg_len2 > frame_max)) { - INFO(port->func.config->cdev, - "Bad dgram length: %#X\n", dg_len); - goto err; - } - /* * Copy the data into a new skb. * This ensures the truesize is correct @@ -1359,6 +1331,8 @@ static int ncm_unwrap_ntb(struct gether *port, ndp_len -= 2 * (opts->dgram_item_len * 2); dgram_counter++; + if (index2 == 0 || dg_len2 == 0) + break; } while (ndp_len > 2 * (opts->dgram_item_len * 2)); } while (ndp_index); From 4a5caa4af0df5cd8f1c7f0c2f871d382662b022e Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 20 Aug 2020 19:45:38 +0200 Subject: [PATCH 091/322] dm crypt: document new no_workqueue flags Commit 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") introduced new dm-crypt 'no_read_workqueue' and 'no_write_workqueue' flags. Add documentation to admin guide for them. Fixes: 39d42fa96ba1 ("dm crypt: add flags to optionally bypass kcryptd workqueues") Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- Documentation/admin-guide/device-mapper/dm-crypt.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index 8f4a3f889d43..40dc2df58cd5 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -121,6 +121,14 @@ submit_from_crypt_cpus thread because it benefits CFQ to have writes submitted using the same context. +no_read_workqueue + Bypass dm-crypt internal workqueue and process read requests synchronously. + +no_write_workqueue + Bypass dm-crypt internal workqueue and process write requests synchronously. + This option is automatically enabled for host-managed zoned block devices + (e.g. host-managed SMR hard-disks). + integrity:: The device requires additional metadata per-sector stored in per-bio integrity structure. This metadata must by provided From 4c07ae0ad493b7b2d3dd3e53870e594f136ce8a5 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 20 Aug 2020 21:20:26 +0200 Subject: [PATCH 092/322] dm crypt: document encrypted keyring key option Commit 27f5411a718c4 ("dm crypt: support using encrypted keys") introduced support for encrypted keyring type. Fix documentation in admin guide to mention this type. Fixes: 27f5411a718c4 ("dm crypt: support using encrypted keys") Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- Documentation/admin-guide/device-mapper/dm-crypt.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index 40dc2df58cd5..bc28a9527ee5 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -67,7 +67,7 @@ Parameters:: the value passed in . - Either 'logon' or 'user' kernel key type. + Either 'logon', 'user' or 'encrypted' kernel key type. The kernel keyring key description crypt target should look for From bd805274577457f301bf826f86543f550cf5c1ef Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 22 Sep 2020 11:15:50 +0200 Subject: [PATCH 093/322] cpuidle: Drop misleading comments about RCU usage The commit 1098582a0f6c ("sched,idle,rcu: Push rcu_idle deeper into the idle path"), moved the calls rcu_idle_enter|exit() into the cpuidle core. However, it forgot to remove a couple of comments in enter_s2idle_proper() about why RCU_NONIDLE earlier was needed. So, let's drop them as they have become a bit misleading. Fixes: 1098582a0f6c ("sched,idle,rcu: Push rcu_idle deeper into the idle path") Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/cpuidle.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 6c7e5621cf9a..29e84687f3c3 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -142,11 +142,6 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, time_start = ns_to_ktime(local_clock()); - /* - * trace_suspend_resume() called by tick_freeze() for the last CPU - * executing it contains RCU usage regarded as invalid in the idle - * context, so tell RCU about that. - */ tick_freeze(); /* * The state used here cannot be a "coupled" one, because the "coupled" @@ -159,11 +154,6 @@ static void enter_s2idle_proper(struct cpuidle_driver *drv, target_state->enter_s2idle(dev, drv, index); if (WARN_ON_ONCE(!irqs_disabled())) local_irq_disable(); - /* - * timekeeping_resume() that will be called by tick_unfreeze() for the - * first CPU executing it calls functions containing RCU read-side - * critical sections, so tell RCU about that. - */ if (!(target_state->flags & CPUIDLE_FLAG_RCU_IDLE)) rcu_idle_exit(); tick_unfreeze(); From f3bb0f796f5ffe32f0fbdce5b1b12eb85511158f Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 22 Sep 2020 14:40:46 +0200 Subject: [PATCH 094/322] clk: samsung: exynos4: mark 'chipid' clock as CLK_IGNORE_UNUSED The ChipID IO region has it's own clock, which is being disabled while scanning for unused clocks. It turned out that some CPU hotplug, CPU idle or even SOC firmware code depends on the reads from that area. Fix the mysterious hang caused by entering deep CPU idle state by ignoring the 'chipid' clock during unused clocks scan, as there are no direct clients for it which will keep it enabled. Fixes: e062b571777f ("clk: exynos4: register clocks using common clock framework") Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20200922124046.10496-1-m.szyprowski@samsung.com Reviewed-by: Krzysztof Kozlowski Acked-by: Sylwester Nawrocki Signed-off-by: Stephen Boyd --- drivers/clk/samsung/clk-exynos4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index 51564fc23c63..f4086287bb71 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -927,7 +927,7 @@ static const struct samsung_gate_clock exynos4210_gate_clks[] __initconst = { GATE(CLK_PCIE, "pcie", "aclk133", GATE_IP_FSYS, 14, 0, 0), GATE(CLK_SMMU_PCIE, "smmu_pcie", "aclk133", GATE_IP_FSYS, 18, 0, 0), GATE(CLK_MODEMIF, "modemif", "aclk100", GATE_IP_PERIL, 28, 0, 0), - GATE(CLK_CHIPID, "chipid", "aclk100", E4210_GATE_IP_PERIR, 0, 0, 0), + GATE(CLK_CHIPID, "chipid", "aclk100", E4210_GATE_IP_PERIR, 0, CLK_IGNORE_UNUSED, 0), GATE(CLK_SYSREG, "sysreg", "aclk100", E4210_GATE_IP_PERIR, 0, CLK_IGNORE_UNUSED, 0), GATE(CLK_HDMI_CEC, "hdmi_cec", "aclk100", E4210_GATE_IP_PERIR, 11, 0, @@ -969,7 +969,7 @@ static const struct samsung_gate_clock exynos4x12_gate_clks[] __initconst = { 0), GATE(CLK_TSADC, "tsadc", "aclk133", E4X12_GATE_BUS_FSYS1, 16, 0, 0), GATE(CLK_MIPI_HSI, "mipi_hsi", "aclk133", GATE_IP_FSYS, 10, 0, 0), - GATE(CLK_CHIPID, "chipid", "aclk100", E4X12_GATE_IP_PERIR, 0, 0, 0), + GATE(CLK_CHIPID, "chipid", "aclk100", E4X12_GATE_IP_PERIR, 0, CLK_IGNORE_UNUSED, 0), GATE(CLK_SYSREG, "sysreg", "aclk100", E4X12_GATE_IP_PERIR, 1, CLK_IGNORE_UNUSED, 0), GATE(CLK_HDMI_CEC, "hdmi_cec", "aclk100", E4X12_GATE_IP_PERIR, 11, 0, From b02cf0c4736c65c6667f396efaae6b5521e82abf Mon Sep 17 00:00:00 2001 From: Dinh Nguyen Date: Mon, 31 Aug 2020 15:26:57 -0500 Subject: [PATCH 095/322] clk: socfpga: stratix10: fix the divider for the emac_ptp_free_clk The fixed divider the emac_ptp_free_clk should be 2, not 4. Fixes: 07afb8db7340 ("clk: socfpga: stratix10: add clock driver for Stratix10 platform") Cc: stable@vger.kernel.org Signed-off-by: Dinh Nguyen Link: https://lore.kernel.org/r/20200831202657.8224-1-dinguyen@kernel.org Signed-off-by: Stephen Boyd --- drivers/clk/socfpga/clk-s10.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/socfpga/clk-s10.c b/drivers/clk/socfpga/clk-s10.c index c1dfc9b34e4e..661a8e9bfb9b 100644 --- a/drivers/clk/socfpga/clk-s10.c +++ b/drivers/clk/socfpga/clk-s10.c @@ -209,7 +209,7 @@ static const struct stratix10_perip_cnt_clock s10_main_perip_cnt_clks[] = { { STRATIX10_EMAC_B_FREE_CLK, "emacb_free_clk", NULL, emacb_free_mux, ARRAY_SIZE(emacb_free_mux), 0, 0, 2, 0xB0, 1}, { STRATIX10_EMAC_PTP_FREE_CLK, "emac_ptp_free_clk", NULL, emac_ptp_free_mux, - ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 4, 0xB0, 2}, + ARRAY_SIZE(emac_ptp_free_mux), 0, 0, 2, 0xB0, 2}, { STRATIX10_GPIO_DB_FREE_CLK, "gpio_db_free_clk", NULL, gpio_db_free_mux, ARRAY_SIZE(gpio_db_free_mux), 0, 0, 0, 0xB0, 3}, { STRATIX10_SDMMC_FREE_CLK, "sdmmc_free_clk", NULL, sdmmc_free_mux, From a7b3474cbb2864d5500d5e4f48dd57c903975cab Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 22 Sep 2020 09:58:52 +0200 Subject: [PATCH 096/322] x86/irq: Make run_on_irqstack_cond() typesafe Sami reported that run_on_irqstack_cond() requires the caller to cast functions to mismatching types, which trips indirect call Control-Flow Integrity (CFI) in Clang. Instead of disabling CFI on that function, provide proper helpers for the three call variants. The actual ASM code stays the same as that is out of reach. [ bp: Fix __run_on_irqstack() prototype to match. ] Fixes: 931b94145981 ("x86/entry: Provide helpers for executing on the irqstack") Reported-by: Nathan Chancellor Reported-by: Sami Tolvanen Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Tested-by: Sami Tolvanen Cc: Link: https://github.com/ClangBuiltLinux/linux/issues/1052 Link: https://lkml.kernel.org/r/87pn6eb5tv.fsf@nanos.tec.linutronix.de --- arch/x86/entry/common.c | 2 +- arch/x86/entry/entry_64.S | 2 + arch/x86/include/asm/idtentry.h | 2 +- arch/x86/include/asm/irq_stack.h | 71 ++++++++++++++++++++++++++++---- arch/x86/kernel/irq.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- 6 files changed, 68 insertions(+), 13 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2f84c7ca74ea..870efeec8bda 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -299,7 +299,7 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) old_regs = set_irq_regs(regs); instrumentation_begin(); - run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, NULL, regs); + run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); instrumentation_begin(); set_irq_regs(old_regs); diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 70dea9337816..d977079a7d02 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -682,6 +682,8 @@ SYM_CODE_END(.Lbad_gs) * rdx: Function argument (can be NULL if none) */ SYM_FUNC_START(asm_call_on_stack) +SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL) +SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL) /* * Save the frame pointer unconditionally. This allows the ORC * unwinder to handle the stack switch. diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index a43366191212..a0638640f1ed 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -242,7 +242,7 @@ __visible noinstr void func(struct pt_regs *regs) \ instrumentation_begin(); \ irq_enter_rcu(); \ kvm_set_cpu_l1tf_flush_l1d(); \ - run_on_irqstack_cond(__##func, regs, regs); \ + run_sysvec_on_irqstack_cond(__##func, regs); \ irq_exit_rcu(); \ instrumentation_end(); \ irqentry_exit(regs, state); \ diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 4ae66f097101..775816965c6a 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -12,20 +12,50 @@ static __always_inline bool irqstack_active(void) return __this_cpu_read(irq_count) != -1; } -void asm_call_on_stack(void *sp, void *func, void *arg); +void asm_call_on_stack(void *sp, void (*func)(void), void *arg); +void asm_call_sysvec_on_stack(void *sp, void (*func)(struct pt_regs *regs), + struct pt_regs *regs); +void asm_call_irq_on_stack(void *sp, void (*func)(struct irq_desc *desc), + struct irq_desc *desc); -static __always_inline void __run_on_irqstack(void *func, void *arg) +static __always_inline void __run_on_irqstack(void (*func)(void)) { void *tos = __this_cpu_read(hardirq_stack_ptr); __this_cpu_add(irq_count, 1); - asm_call_on_stack(tos - 8, func, arg); + asm_call_on_stack(tos - 8, func, NULL); + __this_cpu_sub(irq_count, 1); +} + +static __always_inline void +__run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs), + struct pt_regs *regs) +{ + void *tos = __this_cpu_read(hardirq_stack_ptr); + + __this_cpu_add(irq_count, 1); + asm_call_sysvec_on_stack(tos - 8, func, regs); + __this_cpu_sub(irq_count, 1); +} + +static __always_inline void +__run_irq_on_irqstack(void (*func)(struct irq_desc *desc), + struct irq_desc *desc) +{ + void *tos = __this_cpu_read(hardirq_stack_ptr); + + __this_cpu_add(irq_count, 1); + asm_call_irq_on_stack(tos - 8, func, desc); __this_cpu_sub(irq_count, 1); } #else /* CONFIG_X86_64 */ static inline bool irqstack_active(void) { return false; } -static inline void __run_on_irqstack(void *func, void *arg) { } +static inline void __run_on_irqstack(void (*func)(void)) { } +static inline void __run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs), + struct pt_regs *regs) { } +static inline void __run_irq_on_irqstack(void (*func)(struct irq_desc *desc), + struct irq_desc *desc) { } #endif /* !CONFIG_X86_64 */ static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs) @@ -37,17 +67,40 @@ static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs) return !user_mode(regs) && !irqstack_active(); } -static __always_inline void run_on_irqstack_cond(void *func, void *arg, + +static __always_inline void run_on_irqstack_cond(void (*func)(void), struct pt_regs *regs) { - void (*__func)(void *arg) = func; - lockdep_assert_irqs_disabled(); if (irq_needs_irq_stack(regs)) - __run_on_irqstack(__func, arg); + __run_on_irqstack(func); else - __func(arg); + func(); +} + +static __always_inline void +run_sysvec_on_irqstack_cond(void (*func)(struct pt_regs *regs), + struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + if (irq_needs_irq_stack(regs)) + __run_sysvec_on_irqstack(func, regs); + else + func(regs); +} + +static __always_inline void +run_irq_on_irqstack_cond(void (*func)(struct irq_desc *desc), struct irq_desc *desc, + struct pt_regs *regs) +{ + lockdep_assert_irqs_disabled(); + + if (irq_needs_irq_stack(regs)) + __run_irq_on_irqstack(func, desc); + else + func(desc); } #endif diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 181060247e3c..c5dd50369e2f 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -227,7 +227,7 @@ static __always_inline void handle_irq(struct irq_desc *desc, struct pt_regs *regs) { if (IS_ENABLED(CONFIG_X86_64)) - run_on_irqstack_cond(desc->handle_irq, desc, regs); + run_irq_on_irqstack_cond(desc->handle_irq, desc, regs); else __handle_irq(desc, regs); } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1b4fe93a86c5..440eed558558 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -74,5 +74,5 @@ int irq_init_percpu_irqstack(unsigned int cpu) void do_softirq_own_stack(void) { - run_on_irqstack_cond(__do_softirq, NULL, NULL); + run_on_irqstack_cond(__do_softirq, NULL); } From 149415586243bd0ea729760fb6dd7b3c50601871 Mon Sep 17 00:00:00 2001 From: Sudhakar Panneerselvam Date: Wed, 16 Sep 2020 23:54:31 +0000 Subject: [PATCH 097/322] scsi: target: Fix lun lookup for TARGET_SCF_LOOKUP_LUN_FROM_TAG case transport_lookup_tmr_lun() uses "orig_fe_lun" member of struct se_cmd for the lookup. Hence, update this field directly for the TARGET_SCF_LOOKUP_LUN_FROM_TAG case. Link: https://lore.kernel.org/r/1600300471-26135-1-git-send-email-sudhakar.panneerselvam@oracle.com Fixes: a36840d80027 ("target: Initialize LUN in transport_init_se_cmd()") Reported-by: Martin Wilck Reviewed-by: Mike Christie Signed-off-by: Sudhakar Panneerselvam Signed-off-by: Martin K. Petersen --- drivers/target/target_core_transport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 9fb0be0aa620..8dd289214dd8 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1840,7 +1840,8 @@ int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess, * out unpacked_lun for the original se_cmd. */ if (tm_type == TMR_ABORT_TASK && (flags & TARGET_SCF_LOOKUP_LUN_FROM_TAG)) { - if (!target_lookup_lun_from_tag(se_sess, tag, &unpacked_lun)) + if (!target_lookup_lun_from_tag(se_sess, tag, + &se_cmd->orig_fe_lun)) goto failure; } From 0c309ed17c50e7eeaf3c66102b475198064e80da Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 8 Sep 2020 19:57:07 +0900 Subject: [PATCH 098/322] PM / devfreq: Add timer type to devfreq_summary debugfs The commit 4dc3bab8687f ("PM / devfreq: Add support delayed timer for polling mode") supports the delayed timer but this commit missed the adding the timer type to devfreq_summary debugfs node. Add the timer type to devfreq_summary debugfs. Fixes: 4dc3bab8687f ("PM / devfreq: Add support delayed timer for polling mode") Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 561d91b2d3bf..071b59fe84d2 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1766,20 +1766,23 @@ static int devfreq_summary_show(struct seq_file *s, void *data) struct devfreq *p_devfreq = NULL; unsigned long cur_freq, min_freq, max_freq; unsigned int polling_ms; + unsigned int timer; - seq_printf(s, "%-30s %-30s %-15s %10s %12s %12s %12s\n", + seq_printf(s, "%-30s %-30s %-15s %-10s %10s %12s %12s %12s\n", "dev", "parent_dev", "governor", + "timer", "polling_ms", "cur_freq_Hz", "min_freq_Hz", "max_freq_Hz"); - seq_printf(s, "%30s %30s %15s %10s %12s %12s %12s\n", + seq_printf(s, "%30s %30s %15s %10s %10s %12s %12s %12s\n", "------------------------------", "------------------------------", "---------------", "----------", + "----------", "------------", "------------", "------------"); @@ -1803,13 +1806,15 @@ static int devfreq_summary_show(struct seq_file *s, void *data) cur_freq = devfreq->previous_freq; get_freq_range(devfreq, &min_freq, &max_freq); polling_ms = devfreq->profile->polling_ms; + timer = devfreq->profile->timer; mutex_unlock(&devfreq->lock); seq_printf(s, - "%-30s %-30s %-15s %10d %12ld %12ld %12ld\n", + "%-30s %-30s %-15s %-10s %10d %12ld %12ld %12ld\n", dev_name(&devfreq->dev), p_devfreq ? dev_name(&p_devfreq->dev) : "null", devfreq->governor_name, + polling_ms ? timer_name[timer] : "null", polling_ms, cur_freq, min_freq, From 6bf560766a8ef5afe4faa3244220cf5b3a934549 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 8 Sep 2020 10:25:57 +0300 Subject: [PATCH 099/322] PM / devfreq: tegra30: Disable clock on error in probe This error path needs to call clk_disable_unprepare(). Fixes: 7296443b900e ("PM / devfreq: tegra30: Handle possible round-rate error") Signed-off-by: Dan Carpenter Reviewed-by: Dmitry Osipenko Signed-off-by: Dan Carpenter Signed-off-by: Chanwoo Choi --- drivers/devfreq/tegra30-devfreq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/tegra30-devfreq.c b/drivers/devfreq/tegra30-devfreq.c index e94a27804c20..dedd39de7367 100644 --- a/drivers/devfreq/tegra30-devfreq.c +++ b/drivers/devfreq/tegra30-devfreq.c @@ -836,7 +836,8 @@ static int tegra_devfreq_probe(struct platform_device *pdev) rate = clk_round_rate(tegra->emc_clock, ULONG_MAX); if (rate < 0) { dev_err(&pdev->dev, "Failed to round clock rate: %ld\n", rate); - return rate; + err = rate; + goto disable_clk; } tegra->max_freq = rate / KHZ; @@ -897,6 +898,7 @@ remove_opps: dev_pm_opp_remove_all_dynamic(&pdev->dev); reset_control_reset(tegra->reset); +disable_clk: clk_disable_unprepare(tegra->clock); return err; From 16cce04cdb200ba905d1241b425ac48da5a9ace5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 16 Sep 2020 11:50:22 +0100 Subject: [PATCH 100/322] drm/i915/selftests: Push the fake iommu device from the stack to data Since we store a pointer to the fake iommu device that is allocated on the stack, as soon as we leave the function it goes out of scope and any future dereference is undefined behaviour. Just in case we may need to look at the fake iommu device after initialiation, move the allocation from the stack into the data. Fixes: 01b9d4e21148 ("iommu/vt-d: Use dev_iommu_priv_get/set()") Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20200916105022.28316-2-chris@chris-wilson.co.uk (cherry picked from commit 9f9f4101fc98db56714e71676d5a1e2d27e01f7e) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index f127e633f7ca..397c313a8b69 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -118,11 +118,11 @@ static struct dev_pm_domain pm_domain = { struct drm_i915_private *mock_gem_device(void) { +#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) + static struct dev_iommu fake_iommu = { .priv = (void *)-1 }; +#endif struct drm_i915_private *i915; struct pci_dev *pdev; -#if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) - struct dev_iommu iommu; -#endif int err; pdev = kzalloc(sizeof(*pdev), GFP_KERNEL); @@ -141,10 +141,8 @@ struct drm_i915_private *mock_gem_device(void) dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); #if IS_ENABLED(CONFIG_IOMMU_API) && defined(CONFIG_INTEL_IOMMU) - /* HACK HACK HACK to disable iommu for the fake device; force identity mapping */ - memset(&iommu, 0, sizeof(iommu)); - iommu.priv = (void *)-1; - pdev->dev.iommu = &iommu; + /* HACK to disable iommu for the fake device; force identity mapping */ + pdev->dev.iommu = &fake_iommu; #endif pci_set_drvdata(pdev, i915); From 955921289dcdbc49b46a731ec07978fb7d4e1fc7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 23 Sep 2020 13:50:12 +0200 Subject: [PATCH 101/322] ACPI: processor: Fix build for ARCH_APICTIMER_STOPS_ON_C3 unset Fix the lapic_timer_needs_broadcast() stub for ARCH_APICTIMER_STOPS_ON_C3 unset to actually return a value. Fixes: aa6b43d57f99 ("ACPI: processor: Use CPUIDLE_FLAG_TIMER_STOP") Reported-by: Guenter Roeck Signed-off-by: Rafael J. Wysocki --- drivers/acpi/processor_idle.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 7ecb90e90afd..f66236cff69b 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -176,6 +176,7 @@ static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { } static bool lapic_timer_needs_broadcast(struct acpi_processor *pr, struct acpi_processor_cx *cx) { + return false; } #endif From b13812ddea615b6507beef24f76540c0c1143c5c Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Wed, 23 Sep 2020 18:33:12 +0800 Subject: [PATCH 102/322] MIPS: Loongson2ef: Disable Loongson MMI instructions It was missed when I was forking Loongson2ef from Loongson64 but should be applied to Loongson2ef as march=loongson2f will also enable Loongson MMI in GCC-9+. Signed-off-by: Jiaxun Yang Fixes: 71e2f4dd5a65 ("MIPS: Fork loongson2ef from loongson64") Reported-by: Thomas Bogendoerfer Cc: stable@vger.kernel.org # v5.8+ Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson2ef/Platform | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform index 4ab55f1123a0..ae023b9a1c51 100644 --- a/arch/mips/loongson2ef/Platform +++ b/arch/mips/loongson2ef/Platform @@ -44,6 +44,10 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif endif +# Some -march= flags enable MMI instructions, and GCC complains about that +# support being enabled alongside -msoft-float. Thus explicitly disable MMI. +cflags-y += $(call cc-option,-mno-loongson-mmi) + # # Loongson Machines' Support # From e393fbe6fa27af23f78df6e16a8fd2963578a8c4 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Wed, 23 Sep 2020 14:53:12 +0800 Subject: [PATCH 103/322] MIPS: Add the missing 'CPU_1074K' into __get_cpu_type() Commit 442e14a2c55e ("MIPS: Add 1074K CPU support explicitly.") split 1074K from the 74K as an unique CPU type, while it missed to add the 'CPU_1074K' in __get_cpu_type(). So let's add it back. Fixes: 442e14a2c55e ("MIPS: Add 1074K CPU support explicitly.") Signed-off-by: Wei Li Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/cpu-type.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 75a7a382da09..3288cef4b168 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -47,6 +47,7 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_34K: case CPU_1004K: case CPU_74K: + case CPU_1074K: case CPU_M14KC: case CPU_M14KEC: case CPU_INTERAPTIV: From be090fa62080d8501a5651a73cb954721966b125 Mon Sep 17 00:00:00 2001 From: Wei Li Date: Wed, 23 Sep 2020 14:53:26 +0800 Subject: [PATCH 104/322] MIPS: BCM47XX: Remove the needless check with the 1074K MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As there is no known soc powered by mips 1074K in bcm47xx series, the check with 1074K is needless. So just remove it. Link: https://wireless.wiki.kernel.org/en/users/Drivers/b43/soc Fixes: 442e14a2c55e ("MIPS: Add 1074K CPU support explicitly.") Signed-off-by: Wei Li Acked-by: Rafał Miłecki Signed-off-by: Thomas Bogendoerfer --- arch/mips/bcm47xx/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 82627c264964..01427bde2397 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -148,7 +148,7 @@ void __init plat_mem_setup(void) { struct cpuinfo_mips *c = ¤t_cpu_data; - if ((c->cputype == CPU_74K) || (c->cputype == CPU_1074K)) { + if (c->cputype == CPU_74K) { pr_info("Using bcma bus\n"); #ifdef CONFIG_BCM47XX_BCMA bcm47xx_bus_type = BCM47XX_BUS_TYPE_BCMA; From b96e6506c2ea7c8e3ef0860169b6d0ba2f1aca9f Mon Sep 17 00:00:00 2001 From: Mohammed Gamal Date: Thu, 3 Sep 2020 16:11:22 +0200 Subject: [PATCH 105/322] KVM: x86: VMX: Make smaller physical guest address space support user-configurable This patch exposes allow_smaller_maxphyaddr to the user as a module parameter. Since smaller physical address spaces are only supported on VMX, the parameter is only exposed in the kvm_intel module. For now disable support by default, and let the user decide if they want to enable it. Modifications to VMX page fault and EPT violation handling will depend on whether that parameter is enabled. Signed-off-by: Mohammed Gamal Message-Id: <20200903141122.72908-1-mgamal@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 15 ++++++++++----- arch/x86/kvm/vmx/vmx.h | 5 ++++- arch/x86/kvm/x86.c | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 75cd720c9e8c..f0384e93548a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -129,6 +129,9 @@ static bool __read_mostly enable_preemption_timer = 1; module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); #endif +extern bool __read_mostly allow_smaller_maxphyaddr; +module_param(allow_smaller_maxphyaddr, bool, S_IRUGO); + #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE #define KVM_VM_CR0_ALWAYS_ON \ @@ -4803,6 +4806,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) * EPT will cause page fault only if we need to * detect illegal GPAs. */ + WARN_ON_ONCE(!allow_smaller_maxphyaddr); kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code); return 1; } else @@ -5331,7 +5335,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) * would also use advanced VM-exit information for EPT violations to * reconstruct the page fault error code. */ - if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa))) + if (unlikely(allow_smaller_maxphyaddr && kvm_mmu_is_illegal_gpa(vcpu, gpa))) return kvm_emulate_instruction(vcpu, 0); return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); @@ -8305,11 +8309,12 @@ static int __init vmx_init(void) vmx_check_vmcs12_offsets(); /* - * Intel processors don't have problems with - * GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable - * it for VMX by default + * Shadow paging doesn't have a (further) performance penalty + * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it + * by default */ - allow_smaller_maxphyaddr = true; + if (!enable_ept) + allow_smaller_maxphyaddr = true; return 0; } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a2f82127c170..a0e47720f60c 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -552,7 +552,10 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) { - return !enable_ept || cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; + if (!enable_ept) + return true; + + return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; } void dump_vmcs(void); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e3de0fe5af37..67362607e396 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -188,7 +188,7 @@ static struct kvm_shared_msrs __percpu *shared_msrs; u64 __read_mostly host_efer; EXPORT_SYMBOL_GPL(host_efer); -bool __read_mostly allow_smaller_maxphyaddr; +bool __read_mostly allow_smaller_maxphyaddr = 0; EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); static u64 __read_mostly host_xss; From 18391e5e9cf59226354a864e5befa3c764eb8903 Mon Sep 17 00:00:00 2001 From: Yang Weijiang Date: Wed, 26 Aug 2020 09:55:24 +0800 Subject: [PATCH 106/322] selftests: kvm: Fix assert failure in single-step test This is a follow-up patch to fix an issue left in commit: 98b0bf02738004829d7e26d6cb47b2e469aaba86 selftests: kvm: Use a shorter encoding to clear RAX With the change in the commit, we also need to modify "xor" instruction length from 3 to 2 in array ss_size accordingly to pass below check: for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) { target_rip += ss_size[i]; CLEAR_DEBUG(); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; debug.arch.debugreg[7] = 0x00000400; APPLY_DEBUG(); vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && run->debug.arch.dr6 == target_dr6, "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx " "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)", i, run->exit_reason, run->debug.arch.exception, run->debug.arch.pc, target_rip, run->debug.arch.dr6, target_dr6); } Reported-by: kernel test robot Signed-off-by: Yang Weijiang Message-Id: <20200826015524.13251-1-weijiang.yang@intel.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/x86_64/debug_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index b8d14f9db5f9..2fc6b3af81a1 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -73,7 +73,7 @@ int main(void) int i; /* Instruction lengths starting at ss_start */ int ss_size[4] = { - 3, /* xor */ + 2, /* xor */ 2, /* cpuid */ 5, /* mov */ 2, /* rdmsr */ From fbb5a79d2fe7b01c6424fbbc04368373b1672d61 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Wed, 23 Sep 2020 08:51:42 +0800 Subject: [PATCH 107/322] regulator: axp20x: fix LDO2/4 description Currently we wrongly set the mask of value of LDO2/4 both to the mask of LDO2, and the LDO4 voltage configuration is left untouched. This leads to conflict when LDO2/4 are both in use. Fix this issue by setting different vsel_mask to both regulators. Fixes: db4a555f7c4c ("regulator: axp20x: use defines for masks") Signed-off-by: Icenowy Zheng Link: https://lore.kernel.org/r/20200923005142.147135-1-icenowy@aosc.io Signed-off-by: Mark Brown --- drivers/regulator/axp20x-regulator.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index fbc95cadaf53..126649c172e1 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -42,8 +42,9 @@ #define AXP20X_DCDC2_V_OUT_MASK GENMASK(5, 0) #define AXP20X_DCDC3_V_OUT_MASK GENMASK(7, 0) -#define AXP20X_LDO24_V_OUT_MASK GENMASK(7, 4) +#define AXP20X_LDO2_V_OUT_MASK GENMASK(7, 4) #define AXP20X_LDO3_V_OUT_MASK GENMASK(6, 0) +#define AXP20X_LDO4_V_OUT_MASK GENMASK(3, 0) #define AXP20X_LDO5_V_OUT_MASK GENMASK(7, 4) #define AXP20X_PWR_OUT_EXTEN_MASK BIT_MASK(0) @@ -542,14 +543,14 @@ static const struct regulator_desc axp20x_regulators[] = { AXP20X_PWR_OUT_CTRL, AXP20X_PWR_OUT_DCDC3_MASK), AXP_DESC_FIXED(AXP20X, LDO1, "ldo1", "acin", 1300), AXP_DESC(AXP20X, LDO2, "ldo2", "ldo24in", 1800, 3300, 100, - AXP20X_LDO24_V_OUT, AXP20X_LDO24_V_OUT_MASK, + AXP20X_LDO24_V_OUT, AXP20X_LDO2_V_OUT_MASK, AXP20X_PWR_OUT_CTRL, AXP20X_PWR_OUT_LDO2_MASK), AXP_DESC(AXP20X, LDO3, "ldo3", "ldo3in", 700, 3500, 25, AXP20X_LDO3_V_OUT, AXP20X_LDO3_V_OUT_MASK, AXP20X_PWR_OUT_CTRL, AXP20X_PWR_OUT_LDO3_MASK), AXP_DESC_RANGES(AXP20X, LDO4, "ldo4", "ldo24in", axp20x_ldo4_ranges, AXP20X_LDO4_V_OUT_NUM_VOLTAGES, - AXP20X_LDO24_V_OUT, AXP20X_LDO24_V_OUT_MASK, + AXP20X_LDO24_V_OUT, AXP20X_LDO4_V_OUT_MASK, AXP20X_PWR_OUT_CTRL, AXP20X_PWR_OUT_LDO4_MASK), AXP_DESC_IO(AXP20X, LDO5, "ldo5", "ldo5in", 1800, 3300, 100, AXP20X_LDO5_V_OUT, AXP20X_LDO5_V_OUT_MASK, From 530b5affc675ade5db4a03f04ed7cd66806c8a1a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 23 Sep 2020 15:10:26 +0200 Subject: [PATCH 108/322] spi: fsl-dspi: fix use-after-free in remove path spi_unregister_controller() not only unregisters the controller, but also frees the controller. This will free the driver data with it, so we must not access it later dspi_remove(). Solve this by allocating the driver data separately from the SPI controller. Signed-off-by: Sascha Hauer Link: https://lore.kernel.org/r/20200923131026.20707-1-s.hauer@pengutronix.de Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 283f2468a2f4..127323a4b27c 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1273,11 +1273,14 @@ static int dspi_probe(struct platform_device *pdev) void __iomem *base; bool big_endian; - ctlr = spi_alloc_master(&pdev->dev, sizeof(struct fsl_dspi)); + dspi = devm_kzalloc(&pdev->dev, sizeof(*dspi), GFP_KERNEL); + if (!dspi) + return -ENOMEM; + + ctlr = spi_alloc_master(&pdev->dev, 0); if (!ctlr) return -ENOMEM; - dspi = spi_controller_get_devdata(ctlr); dspi->pdev = pdev; dspi->ctlr = ctlr; @@ -1414,7 +1417,7 @@ poll_mode: if (dspi->devtype_data->trans_mode != DSPI_DMA_MODE) ctlr->ptp_sts_supported = true; - platform_set_drvdata(pdev, ctlr); + platform_set_drvdata(pdev, dspi); ret = spi_register_controller(ctlr); if (ret != 0) { @@ -1437,8 +1440,7 @@ out_ctlr_put: static int dspi_remove(struct platform_device *pdev) { - struct spi_controller *ctlr = platform_get_drvdata(pdev); - struct fsl_dspi *dspi = spi_controller_get_devdata(ctlr); + struct fsl_dspi *dspi = platform_get_drvdata(pdev); /* Disconnect from the SPI framework */ spi_unregister_controller(dspi->ctlr); From df3a57d1f6072d07978bafa7dbd9904cdf8f3e13 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Sep 2020 09:56:59 -0700 Subject: [PATCH 109/322] mm: split out the non-present case from copy_one_pte() This is a purely mechanical split of the copy_one_pte() function. It's not immediately obvious when looking at the diff because of the indentation change, but the way to see what is going on in this commit is to use the "-w" flag to not show pure whitespace changes, and you see how the first part of copy_one_pte() is simply lifted out into a separate function. And since the non-present case is marked unlikely, don't make the new function be inlined. Not that gcc really seems to care, since it looks like it will inline it anyway due to the whole "single callsite for static function" logic. In fact, code generation with the function split is almost identical to before. But not marking it inline is the right thing to do. This is pure prep-work and cleanup for subsequent changes. Signed-off-by: Linus Torvalds --- mm/memory.c | 152 ++++++++++++++++++++++++++++------------------------ 1 file changed, 82 insertions(+), 70 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 469af373ae76..31a3ab7d9aa3 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -695,6 +695,84 @@ out: * covered by this vma. */ +static unsigned long +copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, + unsigned long addr, int *rss) +{ + unsigned long vm_flags = vma->vm_flags; + pte_t pte = *src_pte; + struct page *page; + swp_entry_t entry = pte_to_swp_entry(pte); + + if (likely(!non_swap_entry(entry))) { + if (swap_duplicate(entry) < 0) + return entry.val; + + /* make sure dst_mm is on swapoff's mmlist. */ + if (unlikely(list_empty(&dst_mm->mmlist))) { + spin_lock(&mmlist_lock); + if (list_empty(&dst_mm->mmlist)) + list_add(&dst_mm->mmlist, + &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } + rss[MM_SWAPENTS]++; + } else if (is_migration_entry(entry)) { + page = migration_entry_to_page(entry); + + rss[mm_counter(page)]++; + + if (is_write_migration_entry(entry) && + is_cow_mapping(vm_flags)) { + /* + * COW mappings require pages in both + * parent and child to be set to read. + */ + make_migration_entry_read(&entry); + pte = swp_entry_to_pte(entry); + if (pte_swp_soft_dirty(*src_pte)) + pte = pte_swp_mksoft_dirty(pte); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); + set_pte_at(src_mm, addr, src_pte, pte); + } + } else if (is_device_private_entry(entry)) { + page = device_private_entry_to_page(entry); + + /* + * Update rss count even for unaddressable pages, as + * they should treated just like normal pages in this + * respect. + * + * We will likely want to have some new rss counters + * for unaddressable pages, at some point. But for now + * keep things as they are. + */ + get_page(page); + rss[mm_counter(page)]++; + page_dup_rmap(page, false); + + /* + * We do not preserve soft-dirty information, because so + * far, checkpoint/restore is the only feature that + * requires that. And checkpoint/restore does not work + * when a device driver is involved (you cannot easily + * save and restore device driver state). + */ + if (is_write_device_private_entry(entry) && + is_cow_mapping(vm_flags)) { + make_device_private_entry_read(&entry); + pte = swp_entry_to_pte(entry); + if (pte_swp_uffd_wp(*src_pte)) + pte = pte_swp_mkuffd_wp(pte); + set_pte_at(src_mm, addr, src_pte, pte); + } + } + set_pte_at(dst_mm, addr, dst_pte, pte); + return 0; +} + static inline unsigned long copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, @@ -705,75 +783,10 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct page *page; /* pte contains position in swap or file, so copy. */ - if (unlikely(!pte_present(pte))) { - swp_entry_t entry = pte_to_swp_entry(pte); - - if (likely(!non_swap_entry(entry))) { - if (swap_duplicate(entry) < 0) - return entry.val; - - /* make sure dst_mm is on swapoff's mmlist. */ - if (unlikely(list_empty(&dst_mm->mmlist))) { - spin_lock(&mmlist_lock); - if (list_empty(&dst_mm->mmlist)) - list_add(&dst_mm->mmlist, - &src_mm->mmlist); - spin_unlock(&mmlist_lock); - } - rss[MM_SWAPENTS]++; - } else if (is_migration_entry(entry)) { - page = migration_entry_to_page(entry); - - rss[mm_counter(page)]++; - - if (is_write_migration_entry(entry) && - is_cow_mapping(vm_flags)) { - /* - * COW mappings require pages in both - * parent and child to be set to read. - */ - make_migration_entry_read(&entry); - pte = swp_entry_to_pte(entry); - if (pte_swp_soft_dirty(*src_pte)) - pte = pte_swp_mksoft_dirty(pte); - if (pte_swp_uffd_wp(*src_pte)) - pte = pte_swp_mkuffd_wp(pte); - set_pte_at(src_mm, addr, src_pte, pte); - } - } else if (is_device_private_entry(entry)) { - page = device_private_entry_to_page(entry); - - /* - * Update rss count even for unaddressable pages, as - * they should treated just like normal pages in this - * respect. - * - * We will likely want to have some new rss counters - * for unaddressable pages, at some point. But for now - * keep things as they are. - */ - get_page(page); - rss[mm_counter(page)]++; - page_dup_rmap(page, false); - - /* - * We do not preserve soft-dirty information, because so - * far, checkpoint/restore is the only feature that - * requires that. And checkpoint/restore does not work - * when a device driver is involved (you cannot easily - * save and restore device driver state). - */ - if (is_write_device_private_entry(entry) && - is_cow_mapping(vm_flags)) { - make_device_private_entry_read(&entry); - pte = swp_entry_to_pte(entry); - if (pte_swp_uffd_wp(*src_pte)) - pte = pte_swp_mkuffd_wp(pte); - set_pte_at(src_mm, addr, src_pte, pte); - } - } - goto out_set_pte; - } + if (unlikely(!pte_present(pte))) + return copy_nonpresent_pte(dst_mm, src_mm, + dst_pte, src_pte, vma, + addr, rss); /* * If it's a COW mapping, write protect it both @@ -807,7 +820,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, rss[mm_counter(page)]++; } -out_set_pte: set_pte_at(dst_mm, addr, dst_pte, pte); return 0; } From 79a1971c5f14ea3a6e2b0c4caf73a1760db7cab8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 23 Sep 2020 10:04:16 -0700 Subject: [PATCH 110/322] mm: move the copy_one_pte() pte_present check into the caller This completes the split of the non-present and present pte cases by moving the check for the source pte being present into the single caller, which also means that we clearly separate out the very different return value case for a non-present pte. The present pte case currently always succeeds. This is a pure code re-organization with no semantic change: the intent is to make it much easier to add a new return case to the present pte case for when we do early COW at page table copy time. This was split out from the previous commit simply to make it easy to visually see that there were no semantic changes from this code re-organization. Signed-off-by: Linus Torvalds --- mm/memory.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 31a3ab7d9aa3..e315b1f1ef08 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -773,8 +773,8 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, return 0; } -static inline unsigned long -copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, +static inline void +copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, unsigned long addr, int *rss) { @@ -782,12 +782,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte = *src_pte; struct page *page; - /* pte contains position in swap or file, so copy. */ - if (unlikely(!pte_present(pte))) - return copy_nonpresent_pte(dst_mm, src_mm, - dst_pte, src_pte, vma, - addr, rss); - /* * If it's a COW mapping, write protect it both * in the parent and the child @@ -821,7 +815,6 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, } set_pte_at(dst_mm, addr, dst_pte, pte); - return 0; } static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -863,10 +856,17 @@ again: progress++; continue; } - entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, + if (unlikely(!pte_present(*src_pte))) { + entry.val = copy_nonpresent_pte(dst_mm, src_mm, + dst_pte, src_pte, vma, addr, rss); - if (entry.val) - break; + if (entry.val) + break; + progress += 8; + continue; + } + copy_present_pte(dst_mm, src_mm, dst_pte, src_pte, + vma, addr, rss); progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); From 46d2613eae51d527ecaf0e8248a9bfcc0b92aa7e Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 22 Sep 2020 12:49:38 -0700 Subject: [PATCH 111/322] nvme-core: don't use NVME_NSID_ALL for command effects and supported log In the function nvme_get_effects_log() it uses NVME_NSID_ALL which has namespace scope. The command effect log page is controller specific. Replace NVME_NSID_ALL with 0x00 which specifies the controller scope instead of namespace scope. Fixes: 84fef62d135b ("nvme: check admin passthru command effects") Link: https://bugzilla.kernel.org/show_bug.cgi?id=209287 Reported-by: Huai-Cheng Kuo Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4cea14c18a6d..53c93836c7c6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3041,7 +3041,7 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, if (!cel) return -ENOMEM; - ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0, csi, + ret = nvme_get_log(ctrl, 0x00, NVME_LOG_CMD_EFFECTS, 0, csi, &cel->log, sizeof(cel->log), 0); if (ret) { kfree(cel); From 86a82ae0b5095ea24c55898a3f025791e7958b21 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 23 Sep 2020 17:46:20 +0200 Subject: [PATCH 112/322] x86/ioapic: Unbreak check_timer() Several people reported in the kernel bugzilla that between v4.12 and v4.13 the magic which works around broken hardware and BIOSes to find the proper timer interrupt delivery mode stopped working for some older affected platforms which need to fall back to ExtINT delivery mode. The reason is that the core code changed to keep track of the masked and disabled state of an interrupt line more accurately to avoid the expensive hardware operations. That broke an assumption in i8259_make_irq() which invokes disable_irq_nosync(); irq_set_chip_and_handler(); enable_irq(); Up to v4.12 this worked because enable_irq() unconditionally unmasked the interrupt line, but after the state tracking improvements this is not longer the case because the IO/APIC uses lazy disabling. So the line state is unmasked which means that enable_irq() does not call into the new irq chip to unmask it. In principle this is a shortcoming of the core code, but it's more than unclear whether the core code should try to reset state. At least this cannot be done unconditionally as that would break other existing use cases where the chip type is changed, e.g. when changing the trigger type, but the callers expect the state to be preserved. As the way how check_timer() is switching the delivery modes is truly unique, the obvious fix is to simply unmask the i8259 manually after changing the mode to ExtINT delivery and switching the irq chip to the legacy PIC. Note, that the fixes tag is not really precise, but identifies the commit which broke the assumptions in the IO/APIC and i8259 code and that's the kernel version to which this needs to be backported. Fixes: bf22ff45bed6 ("genirq: Avoid unnecessary low level irq function calls") Reported-by: p_c_chan@hotmail.com Reported-by: ecm4@mail.com Reported-by: perdigao1@yahoo.com Reported-by: matzes@users.sourceforge.net Reported-by: rvelascog@gmail.com Signed-off-by: Thomas Gleixner Tested-by: p_c_chan@hotmail.com Tested-by: matzes@users.sourceforge.net Cc: stable@vger.kernel.org Link: https://bugzilla.kernel.org/show_bug.cgi?id=197769 --- arch/x86/kernel/apic/io_apic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 779a89e31c4c..21f9c7f11779 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2243,6 +2243,7 @@ static inline void __init check_timer(void) legacy_pic->init(0); legacy_pic->make_irq(0); apic_write(APIC_LVT0, APIC_DM_EXTINT); + legacy_pic->unmask(0); unlock_ExtINT_logic(); From ef59b1953c26130ef23e6986b2ceea1efaa472e9 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Mon, 21 Sep 2020 16:57:58 +0200 Subject: [PATCH 113/322] mptcp: Wake up MPTCP worker when DATA_FIN found on a TCP FIN packet When receiving a DATA_FIN MPTCP option on a TCP FIN packet, the DATA_FIN information would be stored but the MPTCP worker did not get scheduled. In turn, the MPTCP socket state would remain in TCP_ESTABLISHED and no blocked operations would be awakened. TCP FIN packets are seen by the MPTCP socket when moving skbs out of the subflow receive queues, so schedule the MPTCP worker when a skb with DATA_FIN but no data payload is moved from a subflow queue. Other cases (DATA_FIN on a bare TCP ACK or on a packet with data payload) are already handled. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/84 Fixes: 43b54c6ee382 ("mptcp: Use full MPTCP-level disconnect state machine") Acked-by: Paolo Abeni Signed-off-by: Mat Martineau Signed-off-by: Matthieu Baerts Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 9ead43f79023..8cbeb68f3775 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -731,7 +731,7 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (mpext->data_fin == 1) { if (data_len == 1) { - mptcp_update_rcv_data_fin(msk, mpext->data_seq); + bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq); pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); if (subflow->map_valid) { /* A DATA_FIN might arrive in a DSS @@ -742,6 +742,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk, skb_ext_del(skb, SKB_EXT_MPTCP); return MAPPING_OK; } else { + if (updated && schedule_work(&msk->work)) + sock_hold((struct sock *)msk); + return MAPPING_DATA_FIN; } } else { From 77972b55fb9d35d4a6b0abca99abffaa4ec6a85b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 22 Sep 2020 09:29:31 +0200 Subject: [PATCH 114/322] Revert "ravb: Fixed to be able to unload modules" This reverts commit 1838d6c62f57836639bd3d83e7855e0ee4f6defc. This commit moved the ravb_mdio_init() call (and thus the of_mdiobus_register() call) from the ravb_probe() to the ravb_open() call. This causes a regression during system resume (s2idle/s2ram), as new PHY devices cannot be bound while suspended. During boot, the Micrel PHY is detected like this: Micrel KSZ9031 Gigabit PHY e6800000.ethernet-ffffffff:00: attached PHY driver [Micrel KSZ9031 Gigabit PHY] (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=228) ravb e6800000.ethernet eth0: Link is Up - 1Gbps/Full - flow control off During system suspend, (A) defer_all_probes is set to true, and (B) usermodehelper_disabled is set to UMH_DISABLED, to avoid drivers being probed while suspended. A. If CONFIG_MODULES=n, phy_device_register() calling device_add() merely adds the device, but does not probe it yet, as really_probe() returns early due to defer_all_probes being set: dpm_resume+0x128/0x4f8 device_resume+0xcc/0x1b0 dpm_run_callback+0x74/0x340 ravb_resume+0x190/0x1b8 ravb_open+0x84/0x770 of_mdiobus_register+0x1e0/0x468 of_mdiobus_register_phy+0x1b8/0x250 of_mdiobus_phy_device_register+0x178/0x1e8 phy_device_register+0x114/0x1b8 device_add+0x3d4/0x798 bus_probe_device+0x98/0xa0 device_initial_probe+0x10/0x18 __device_attach+0xe4/0x140 bus_for_each_drv+0x64/0xc8 __device_attach_driver+0xb8/0xe0 driver_probe_device.part.11+0xc4/0xd8 really_probe+0x32c/0x3b8 Later, phy_attach_direct() notices no PHY driver has been bound, and falls back to the Generic PHY, leading to degraded operation: Generic PHY e6800000.ethernet-ffffffff:00: attached PHY driver [Generic PHY] (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=POLL) ravb e6800000.ethernet eth0: Link is Up - 1Gbps/Full - flow control off B. If CONFIG_MODULES=y, request_module() returns early with -EBUSY due to UMH_DISABLED, and MDIO initialization fails completely: mdio_bus e6800000.ethernet-ffffffff:00: error -16 loading PHY driver module for ID 0x00221622 ravb e6800000.ethernet eth0: failed to initialize MDIO PM: dpm_run_callback(): ravb_resume+0x0/0x1b8 returns -16 PM: Device e6800000.ethernet failed to resume: error -16 Ignoring -EBUSY in phy_request_driver_module(), like was done for -ENOENT in commit 21e194425abd65b5 ("net: phy: fix issue with loading PHY driver w/o initramfs"), would makes it fall back to the Generic PHY, like in the CONFIG_MODULES=n case. Signed-off-by: Geert Uytterhoeven Cc: stable@vger.kernel.org Reviewed-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/ravb_main.c | 110 +++++++++++------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index df89d09b253e..99f7aae102ce 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1342,51 +1342,6 @@ static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler, return error; } -/* MDIO bus init function */ -static int ravb_mdio_init(struct ravb_private *priv) -{ - struct platform_device *pdev = priv->pdev; - struct device *dev = &pdev->dev; - int error; - - /* Bitbang init */ - priv->mdiobb.ops = &bb_ops; - - /* MII controller setting */ - priv->mii_bus = alloc_mdio_bitbang(&priv->mdiobb); - if (!priv->mii_bus) - return -ENOMEM; - - /* Hook up MII support for ethtool */ - priv->mii_bus->name = "ravb_mii"; - priv->mii_bus->parent = dev; - snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", - pdev->name, pdev->id); - - /* Register MDIO bus */ - error = of_mdiobus_register(priv->mii_bus, dev->of_node); - if (error) - goto out_free_bus; - - return 0; - -out_free_bus: - free_mdio_bitbang(priv->mii_bus); - return error; -} - -/* MDIO bus release function */ -static int ravb_mdio_release(struct ravb_private *priv) -{ - /* Unregister mdio bus */ - mdiobus_unregister(priv->mii_bus); - - /* Free bitbang info */ - free_mdio_bitbang(priv->mii_bus); - - return 0; -} - /* Network device open function for Ethernet AVB */ static int ravb_open(struct net_device *ndev) { @@ -1395,13 +1350,6 @@ static int ravb_open(struct net_device *ndev) struct device *dev = &pdev->dev; int error; - /* MDIO bus init */ - error = ravb_mdio_init(priv); - if (error) { - netdev_err(ndev, "failed to initialize MDIO\n"); - return error; - } - napi_enable(&priv->napi[RAVB_BE]); napi_enable(&priv->napi[RAVB_NC]); @@ -1479,7 +1427,6 @@ out_free_irq: out_napi_off: napi_disable(&priv->napi[RAVB_NC]); napi_disable(&priv->napi[RAVB_BE]); - ravb_mdio_release(priv); return error; } @@ -1789,8 +1736,6 @@ static int ravb_close(struct net_device *ndev) ravb_ring_free(ndev, RAVB_BE); ravb_ring_free(ndev, RAVB_NC); - ravb_mdio_release(priv); - return 0; } @@ -1942,6 +1887,51 @@ static const struct net_device_ops ravb_netdev_ops = { .ndo_set_features = ravb_set_features, }; +/* MDIO bus init function */ +static int ravb_mdio_init(struct ravb_private *priv) +{ + struct platform_device *pdev = priv->pdev; + struct device *dev = &pdev->dev; + int error; + + /* Bitbang init */ + priv->mdiobb.ops = &bb_ops; + + /* MII controller setting */ + priv->mii_bus = alloc_mdio_bitbang(&priv->mdiobb); + if (!priv->mii_bus) + return -ENOMEM; + + /* Hook up MII support for ethtool */ + priv->mii_bus->name = "ravb_mii"; + priv->mii_bus->parent = dev; + snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", + pdev->name, pdev->id); + + /* Register MDIO bus */ + error = of_mdiobus_register(priv->mii_bus, dev->of_node); + if (error) + goto out_free_bus; + + return 0; + +out_free_bus: + free_mdio_bitbang(priv->mii_bus); + return error; +} + +/* MDIO bus release function */ +static int ravb_mdio_release(struct ravb_private *priv) +{ + /* Unregister mdio bus */ + mdiobus_unregister(priv->mii_bus); + + /* Free bitbang info */ + free_mdio_bitbang(priv->mii_bus); + + return 0; +} + static const struct of_device_id ravb_match_table[] = { { .compatible = "renesas,etheravb-r8a7790", .data = (void *)RCAR_GEN2 }, { .compatible = "renesas,etheravb-r8a7794", .data = (void *)RCAR_GEN2 }, @@ -2184,6 +2174,13 @@ static int ravb_probe(struct platform_device *pdev) eth_hw_addr_random(ndev); } + /* MDIO bus init */ + error = ravb_mdio_init(priv); + if (error) { + dev_err(&pdev->dev, "failed to initialize MDIO\n"); + goto out_dma_free; + } + netif_napi_add(ndev, &priv->napi[RAVB_BE], ravb_poll, 64); netif_napi_add(ndev, &priv->napi[RAVB_NC], ravb_poll, 64); @@ -2205,6 +2202,8 @@ static int ravb_probe(struct platform_device *pdev) out_napi_del: netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); + ravb_mdio_release(priv); +out_dma_free: dma_free_coherent(ndev->dev.parent, priv->desc_bat_size, priv->desc_bat, priv->desc_bat_dma); @@ -2236,6 +2235,7 @@ static int ravb_remove(struct platform_device *pdev) unregister_netdev(ndev); netif_napi_del(&priv->napi[RAVB_NC]); netif_napi_del(&priv->napi[RAVB_BE]); + ravb_mdio_release(priv); pm_runtime_disable(&pdev->dev); free_netdev(ndev); platform_set_drvdata(pdev, NULL); From ea6754aef2449e2cadfeb28741a199d35da53e28 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Tue, 22 Sep 2020 21:32:19 +0800 Subject: [PATCH 115/322] net: switchdev: Fixed kerneldoc warning Update kernel-doc line comments to fix warnings reported by make W=1. net/switchdev/switchdev.c:413: warning: Function parameter or member 'extack' not described in 'call_switchdev_notifiers' Signed-off-by: Tian Tao Acked-by: Ivan Vecera Signed-off-by: David S. Miller --- net/switchdev/switchdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 865f3e037425..23d868545362 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -404,7 +404,7 @@ EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); * @val: value passed unmodified to notifier function * @dev: port device * @info: notifier information data - * + * @extack: netlink extended ack * Call all network notifier blocks. */ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, From f9317ae5523f99999fb54c513ebabbb2bc887ddf Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Tue, 22 Sep 2020 23:41:12 +0200 Subject: [PATCH 116/322] net: lantiq: Add locking for TX DMA channel The TX DMA channel data is accessed by the xrx200_start_xmit() and the xrx200_tx_housekeeping() function from different threads. Make sure the accesses are synchronized by acquiring the netif_tx_lock() in the xrx200_tx_housekeeping() function too. This lock is acquired by the kernel before calling xrx200_start_xmit(). Signed-off-by: Hauke Mehrtens Signed-off-by: David S. Miller --- drivers/net/ethernet/lantiq_xrx200.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 635ff3a5dcfb..51ed8a54d380 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -245,6 +245,7 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) int pkts = 0; int bytes = 0; + netif_tx_lock(net_dev); while (pkts < budget) { struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->tx_free]; @@ -268,6 +269,7 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) net_dev->stats.tx_bytes += bytes; netdev_completed_queue(ch->priv->net_dev, pkts, bytes); + netif_tx_unlock(net_dev); if (netif_queue_stopped(net_dev)) netif_wake_queue(net_dev); From 7241c5a697479c7d0c5a96595822cdab750d41ae Mon Sep 17 00:00:00 2001 From: Voon Weifeng Date: Wed, 23 Sep 2020 16:56:14 +0800 Subject: [PATCH 117/322] net: stmmac: removed enabling eee in EEE set callback EEE should be only be enabled during stmmac_mac_link_up() when the link are up and being set up properly. set_eee should only do settings configuration and disabling the eee. Without this fix, turning on EEE using ethtool will return "Operation not supported". This is due to the driver is in a dead loop waiting for eee to be advertised in the for eee to be activated but the driver will only configure the EEE advertisement after the eee is activated. Ethtool should only return "Operation not supported" if there is no EEE capbility in the MAC controller. Fixes: 8a7493e58ad6 ("net: stmmac: Fix a race in EEE enable callback") Signed-off-by: Voon Weifeng Acked-by: Mark Gross Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index ac5e8cc5fb9f..430a4b32ec1e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -675,23 +675,16 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, struct stmmac_priv *priv = netdev_priv(dev); int ret; - if (!edata->eee_enabled) { + if (!priv->dma_cap.eee) + return -EOPNOTSUPP; + + if (!edata->eee_enabled) stmmac_disable_eee_mode(priv); - } else { - /* We are asking for enabling the EEE but it is safe - * to verify all by invoking the eee_init function. - * In case of failure it will return an error. - */ - edata->eee_enabled = stmmac_eee_init(priv); - if (!edata->eee_enabled) - return -EOPNOTSUPP; - } ret = phylink_ethtool_set_eee(priv->phylink, edata); if (ret) return ret; - priv->eee_enabled = edata->eee_enabled; priv->tx_lpi_timer = edata->tx_lpi_timer; return 0; } From f7e80983f0cf470bb82036e73bff4d5a7daf8fc2 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 21 Sep 2020 12:48:36 +0200 Subject: [PATCH 118/322] s390/zcrypt: Fix ZCRYPT_PERDEV_REQCNT ioctl reqcnt is an u32 pointer but we do copy sizeof(reqcnt) which is the size of the pointer. This means we only copy 8 byte. Let us copy the full monty. Signed-off-by: Christian Borntraeger Cc: Harald Freudenberger Cc: stable@vger.kernel.org Fixes: af4a72276d49 ("s390/zcrypt: Support up to 256 crypto adapters.") Reviewed-by: Harald Freudenberger Signed-off-by: Vasily Gorbik --- drivers/s390/crypto/zcrypt_api.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 4dbbfd88262c..f314936b5462 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1449,7 +1449,8 @@ static long zcrypt_unlocked_ioctl(struct file *filp, unsigned int cmd, if (!reqcnt) return -ENOMEM; zcrypt_perdev_reqcnt(reqcnt, AP_DEVICES); - if (copy_to_user((int __user *) arg, reqcnt, sizeof(reqcnt))) + if (copy_to_user((int __user *) arg, reqcnt, + sizeof(u32) * AP_DEVICES)) rc = -EFAULT; kfree(reqcnt); return rc; From 1a26044954a6d1f4d375d5e62392446af663be7a Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Fri, 18 Sep 2020 09:13:35 +0800 Subject: [PATCH 119/322] iommu/exynos: add missing put_device() call in exynos_iommu_of_xlate() if of_find_device_by_node() succeed, exynos_iommu_of_xlate() doesn't have a corresponding put_device(). Thus add put_device() to fix the exception handling for this function implementation. Fixes: aa759fd376fb ("iommu/exynos: Add callback for initializing devices from device tree") Signed-off-by: Yu Kuai Acked-by: Marek Szyprowski Link: https://lore.kernel.org/r/20200918011335.909141-1-yukuai3@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index bad3c0ce10cb..de324b4eedfe 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1295,13 +1295,17 @@ static int exynos_iommu_of_xlate(struct device *dev, return -ENODEV; data = platform_get_drvdata(sysmmu); - if (!data) + if (!data) { + put_device(&sysmmu->dev); return -ENODEV; + } if (!owner) { owner = kzalloc(sizeof(*owner), GFP_KERNEL); - if (!owner) + if (!owner) { + put_device(&sysmmu->dev); return -ENOMEM; + } INIT_LIST_HEAD(&owner->controllers); mutex_init(&owner->rpm_lock); From 4fd9ac6bd3044734a7028bd993944c3617d1eede Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Sun, 23 Aug 2020 19:12:11 +0800 Subject: [PATCH 120/322] Platform: OLPC: Fix memleak in olpc_ec_probe When devm_regulator_register() fails, ec should be freed just like when olpc_ec_cmd() fails. Fixes: 231c0c216172a ("Platform: OLPC: Add a regulator for the DCON") Signed-off-by: Dinghao Liu Signed-off-by: Andy Shevchenko --- drivers/platform/olpc/olpc-ec.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c index 190e4a6186ef..f64b82824db2 100644 --- a/drivers/platform/olpc/olpc-ec.c +++ b/drivers/platform/olpc/olpc-ec.c @@ -439,7 +439,9 @@ static int olpc_ec_probe(struct platform_device *pdev) &config); if (IS_ERR(ec->dcon_rdev)) { dev_err(&pdev->dev, "failed to register DCON regulator\n"); - return PTR_ERR(ec->dcon_rdev); + err = PTR_ERR(ec->dcon_rdev); + kfree(ec); + return err; } ec->dbgfs_dir = olpc_ec_setup_debugfs(); From d41ec792edf89858a77ad485f1a4c10e3d26a10f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 23 Aug 2020 15:55:16 -0700 Subject: [PATCH 121/322] Documentation: laptops: thinkpad-acpi: fix underline length build warning Fix underline length build warning in thinkpad-acpi.rst documentation: Documentation/admin-guide/laptops/thinkpad-acpi.rst:1437: WARNING: Title underline too short. DYTC Lapmode sensor ------------------ Fixes: acf7f4a59114 ("platform/x86: thinkpad_acpi: lap or desk mode interface") Signed-off-by: Randy Dunlap Cc: Nitin Joshi Cc: Sugumaran Cc: Bastien Nocera Cc: Mark Pearson Cc: Andy Shevchenko Cc: Darren Hart Cc: Henrique de Moraes Holschuh Cc: ibm-acpi-devel@lists.sourceforge.net Cc: platform-driver-x86@vger.kernel.org Signed-off-by: Andy Shevchenko --- Documentation/admin-guide/laptops/thinkpad-acpi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/laptops/thinkpad-acpi.rst b/Documentation/admin-guide/laptops/thinkpad-acpi.rst index 5e477869df18..5fe1ade88c17 100644 --- a/Documentation/admin-guide/laptops/thinkpad-acpi.rst +++ b/Documentation/admin-guide/laptops/thinkpad-acpi.rst @@ -1434,7 +1434,7 @@ on the feature, restricting the viewing angles. DYTC Lapmode sensor ------------------- +------------------- sysfs: dytc_lapmode From 71c548c26de20334cf0d52217de0b02471442e6a Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Tue, 30 Jun 2020 08:29:25 +0300 Subject: [PATCH 122/322] vhost: Fix documentation Fix documentation to match actual function prototypes "end" used instead of "last". Fix that. Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20200630052925.GA157062@mtl-vdi-166.wap.labs.mlnx Signed-off-by: Michael S. Tsirkin --- drivers/vhost/iotlb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 34aec4ba331e..0fd3f87e913c 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_free); * vhost_iotlb_itree_first - return the first overlapped range * @iotlb: the IOTLB * @start: start of IOVA range - * @end: end of IOVA range + * @last: last byte in IOVA range */ struct vhost_iotlb_map * vhost_iotlb_itree_first(struct vhost_iotlb *iotlb, u64 start, u64 last) @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(vhost_iotlb_itree_first); * vhost_iotlb_itree_next - return the next overlapped range * @map: the starting map node * @start: start of IOVA range - * @end: end of IOVA range + * @last: last byte IOVA range */ struct vhost_iotlb_map * vhost_iotlb_itree_next(struct vhost_iotlb_map *map, u64 start, u64 last) From a127c5bbb6a8eee851cbdec254424c480b8edd75 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 7 Sep 2020 18:43:43 +0800 Subject: [PATCH 123/322] vhost-vdpa: fix backend feature ioctls Commit 653055b9acd4 ("vhost-vdpa: support get/set backend features") introduces two malfunction backend features ioctls: 1) the ioctls was blindly added to vring ioctl instead of vdpa device ioctl 2) vhost_set_backend_features() was called when dev mutex has already been held which will lead a deadlock This patch fixes the above issues. Cc: Eli Cohen Reported-by: Zhu Lingshan Fixes: 653055b9acd4 ("vhost-vdpa: support get/set backend features") Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200907104343.31141-1-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 3fab94f88894..796fe979f997 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -353,8 +353,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, struct vdpa_callback cb; struct vhost_virtqueue *vq; struct vhost_vring_state s; - u64 __user *featurep = argp; - u64 features; u32 idx; long r; @@ -381,18 +379,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, vq->last_avail_idx = vq_state.avail_index; break; - case VHOST_GET_BACKEND_FEATURES: - features = VHOST_VDPA_BACKEND_FEATURES; - if (copy_to_user(featurep, &features, sizeof(features))) - return -EFAULT; - return 0; - case VHOST_SET_BACKEND_FEATURES: - if (copy_from_user(&features, featurep, sizeof(features))) - return -EFAULT; - if (features & ~VHOST_VDPA_BACKEND_FEATURES) - return -EOPNOTSUPP; - vhost_set_backend_features(&v->vdev, features); - return 0; } r = vhost_vring_ioctl(&v->vdev, cmd, argp); @@ -440,8 +426,20 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, struct vhost_vdpa *v = filep->private_data; struct vhost_dev *d = &v->vdev; void __user *argp = (void __user *)arg; + u64 __user *featurep = argp; + u64 features; long r; + if (cmd == VHOST_SET_BACKEND_FEATURES) { + r = copy_from_user(&features, featurep, sizeof(features)); + if (r) + return r; + if (features & ~VHOST_VDPA_BACKEND_FEATURES) + return -EOPNOTSUPP; + vhost_set_backend_features(&v->vdev, features); + return 0; + } + mutex_lock(&d->mutex); switch (cmd) { @@ -476,6 +474,10 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_SET_CONFIG_CALL: r = vhost_vdpa_set_config_call(v, argp); break; + case VHOST_GET_BACKEND_FEATURES: + features = VHOST_VDPA_BACKEND_FEATURES; + r = copy_to_user(featurep, &features, sizeof(features)); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) From efe813d0b0e90a19102a36fd1d448a65dbf5d474 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 1 Sep 2020 10:07:52 +0200 Subject: [PATCH 124/322] platform/x86: touchscreen_dmi: Add info for the MPMAN Converter9 2-in-1 Add touchscreen info for the MPMAN Converter9 2-in-1. This device uses the same case as the ITworks TW891, but it uses a different digitizer, so it needs its own firmware. Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko --- drivers/platform/x86/touchscreen_dmi.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 5c223015ee71..dda60f89c951 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -373,6 +373,23 @@ static const struct ts_dmi_data jumper_ezpad_mini3_data = { .properties = jumper_ezpad_mini3_props, }; +static const struct property_entry mpman_converter9_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 8), + PROPERTY_ENTRY_U32("touchscreen-min-y", 8), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1664), + PROPERTY_ENTRY_U32("touchscreen-size-y", 880), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-mpman-converter9.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data mpman_converter9_data = { + .acpi_name = "MSSL1680:00", + .properties = mpman_converter9_props, +}; + static const struct property_entry mpman_mpwin895cl_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 3), PROPERTY_ENTRY_U32("touchscreen-min-y", 9), @@ -976,6 +993,14 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "FlexBook edge11 - M-FBE11"), }, }, + { + /* MP Man Converter 9 */ + .driver_data = (void *)&mpman_converter9_data, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MPMAN"), + DMI_MATCH(DMI_PRODUCT_NAME, "Converter9"), + }, + }, { /* MP Man MPWIN895CL */ .driver_data = (void *)&mpman_mpwin895cl_data, From 8a333dab282467562b55096bab941b761ac26a73 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 10 Sep 2020 17:06:50 +0200 Subject: [PATCH 125/322] platform/x86: asus-nb-wmi: Revert "Do not load on Asus T100TA and T200TA" The WMI INIT method on for some reason turns on the camera LED on these 2-in-1s, without the WMI interface allowing further control over the LED. To fix this commit b5f7311d3a2e ("platform/x86: asus-nb-wmi: Do not load on Asus T100TA and T200TA") added a blacklist with these 2 models on it since the WMI driver did not add any extra functionality to these models. Recently I've been working on making more 2-in-1 models report their tablet-mode (SW_TABLET_MODE) to userspace; and I've found that these 2 Asus models report this through WMI. This commit reverts the adding of the blacklist, so that the Asus WMI driver can be used on these models to report their tablet-mode. I have another patch fixing the LED issue in a different manner. Note this is the second time the we revert the adding of the asus_nb_wmi_blacklist. It was reverted before in commit: aab9e7896ec9 ("platform/x86: asus-nb-wmi: Revert "Do not load on Asus T100TA and T200TA")" But some how (accidentally re-applying of the patch?) it got re-added again in commit 3bd12da7f50b ("platform/x86: asus-nb-wmi: Do not load on Asus T100TA and T200TA"), so now we need to revert it again. Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko --- drivers/platform/x86/asus-nb-wmi.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index b2e3d1e3b3e9..680c3640e013 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -593,33 +593,9 @@ static struct asus_wmi_driver asus_nb_wmi_driver = { .detect_quirks = asus_nb_wmi_quirks, }; -static const struct dmi_system_id asus_nb_wmi_blacklist[] __initconst = { - { - /* - * asus-nb-wm adds no functionality. The T100TA has a detachable - * USB kbd, so no hotkeys and it has no WMI rfkill; and loading - * asus-nb-wm causes the camera LED to turn and _stay_ on. - */ - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"), - }, - }, - { - /* The Asus T200TA has the same issue as the T100TA */ - .matches = { - DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T200TA"), - }, - }, - {} /* Terminating entry */ -}; static int __init asus_nb_wmi_init(void) { - if (dmi_check_system(asus_nb_wmi_blacklist)) - return -ENODEV; - return asus_wmi_register_driver(&asus_nb_wmi_driver); } From 1d2dd379bd99ee4356ae4552fd1b8e43c7ca02cd Mon Sep 17 00:00:00 2001 From: Marius Iacob Date: Mon, 31 Aug 2020 15:58:01 +0300 Subject: [PATCH 126/322] platform/x86: asus-wmi: Add BATC battery name to the list of supported The Intel Atom Cherry Trail platform reports a new battery name (BATC). Tested on ASUS Transformer Mini T103HAF. Signed-off-by: Marius Iacob Signed-off-by: Andy Shevchenko --- drivers/platform/x86/asus-wmi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 8f4acdc06b13..fa39ff030bd7 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -442,6 +442,7 @@ static int asus_wmi_battery_add(struct power_supply *battery) */ if (strcmp(battery->desc->name, "BAT0") != 0 && strcmp(battery->desc->name, "BAT1") != 0 && + strcmp(battery->desc->name, "BATC") != 0 && strcmp(battery->desc->name, "BATT") != 0) return -ENODEV; From d823346876a970522ff9e4d2b323c9b734dcc4de Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 12 Sep 2020 11:35:32 +0200 Subject: [PATCH 127/322] platform/x86: intel-vbtn: Fix SW_TABLET_MODE always reporting 1 on the HP Pavilion 11 x360 Commit cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type") restored SW_TABLET_MODE reporting on the HP stream x360 11 series on which it was previously broken by commit de9647efeaa9 ("platform/x86: intel-vbtn: Only activate tablet mode switch on 2-in-1's"). It turns out that enabling SW_TABLET_MODE reporting on devices with a chassis-type of 10 ("Notebook") causes SW_TABLET_MODE to always report 1 at boot on the HP Pavilion 11 x360, which causes libinput to disable the kbd and touchpad. The HP Pavilion 11 x360's ACPI VGBS method sets bit 4 instead of bit 6 when NOT in tablet mode at boot. Inspecting all the DSDTs in my DSDT collection shows only one other model, the Medion E1239T ever setting bit 4 and it always sets this together with bit 6. So lets treat bit 4 as a second bit which when set indicates the device not being in tablet-mode, as we already do for bit 6. While at it also prefix all VGBS constant defines with "VGBS_". Fixes: cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type") Signed-off-by: Hans de Goede Acked-by: Mark Gross Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel-vbtn.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index e85d8e58320c..f443619e1e7e 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -15,9 +15,13 @@ #include #include +/* Returned when NOT in tablet mode on some HP Stream x360 11 models */ +#define VGBS_TABLET_MODE_FLAG_ALT 0x10 /* When NOT in tablet mode, VGBS returns with the flag 0x40 */ -#define TABLET_MODE_FLAG 0x40 -#define DOCK_MODE_FLAG 0x80 +#define VGBS_TABLET_MODE_FLAG 0x40 +#define VGBS_DOCK_MODE_FLAG 0x80 + +#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT) MODULE_LICENSE("GPL"); MODULE_AUTHOR("AceLan Kao"); @@ -72,9 +76,9 @@ static void detect_tablet_mode(struct platform_device *device) if (ACPI_FAILURE(status)) return; - m = !(vgbs & TABLET_MODE_FLAG); + m = !(vgbs & VGBS_TABLET_MODE_FLAGS); input_report_switch(priv->input_dev, SW_TABLET_MODE, m); - m = (vgbs & DOCK_MODE_FLAG) ? 1 : 0; + m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0; input_report_switch(priv->input_dev, SW_DOCK, m); } From 5f38b06db8af3ed6c2fc1b427504ca56fae2eacc Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sun, 13 Sep 2020 12:02:03 -0700 Subject: [PATCH 128/322] platform/x86: thinkpad_acpi: initialize tp_nvram_state variable clang static analysis flags this represenative problem thinkpad_acpi.c:2523:7: warning: Branch condition evaluates to a garbage value if (!oldn->mute || ^~~~~~~~~~~ In hotkey_kthread() mute is conditionally set by hotkey_read_nvram() but unconditionally checked by hotkey_compare_and_issue_event(). So the tp_nvram_state variable s[2] needs to be initialized. Fixes: 01e88f25985d ("ACPI: thinkpad-acpi: add CMOS NVRAM polling for hot keys (v9)") Signed-off-by: Tom Rix Reviewed-by: Hans de Goede Acked-by: mark gross Signed-off-by: Andy Shevchenko --- drivers/platform/x86/thinkpad_acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 4864a5c189d4..7cb3b71c67fe 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2569,7 +2569,7 @@ static void hotkey_compare_and_issue_event(struct tp_nvram_state *oldn, */ static int hotkey_kthread(void *data) { - struct tp_nvram_state s[2]; + struct tp_nvram_state s[2] = { 0 }; u32 poll_mask, event_mask; unsigned int si, so; unsigned long t; From 8f0c01e666685c4d2e1a233e6f4d7ab16c9f8b2a Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Tue, 15 Sep 2020 12:09:23 +0300 Subject: [PATCH 129/322] platform/x86: fix kconfig dependency warning for LG_LAPTOP When LG_LAPTOP is enabled and NEW_LEDS is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for LEDS_CLASS Depends on [n]: NEW_LEDS [=n] Selected by [y]: - LG_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] && ACPI_WMI [=y] && INPUT [=y] The reason is that LG_LAPTOP selects LEDS_CLASS without depending on or selecting NEW_LEDS while LEDS_CLASS is subordinate to NEW_LEDS. Honor the kconfig menu hierarchy to remove kconfig dependency warnings. Fixes: dbf0c5a6b1f8 ("platform/x86: Add LG Gram laptop special features driver") Signed-off-by: Necip Fazil Yildiran Reviewed-by: Hans de Goede Acked-by: mark gross Signed-off-by: Andy Shevchenko --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 40219bba6801..81f6079d08e6 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1112,6 +1112,7 @@ config LG_LAPTOP depends on ACPI_WMI depends on INPUT select INPUT_SPARSEKMAP + select NEW_LEDS select LEDS_CLASS help This driver adds support for hotkeys as well as control of keyboard From afdd1ebb72051e8b6b83c4d7dc542a9be0e1352d Mon Sep 17 00:00:00 2001 From: Necip Fazil Yildiran Date: Thu, 17 Sep 2020 19:16:53 +0300 Subject: [PATCH 130/322] platform/x86: fix kconfig dependency warning for FUJITSU_LAPTOP When FUJITSU_LAPTOP is enabled and NEW_LEDS is disabled, it results in the following Kbuild warning: WARNING: unmet direct dependencies detected for LEDS_CLASS Depends on [n]: NEW_LEDS [=n] Selected by [y]: - FUJITSU_LAPTOP [=y] && X86 [=y] && X86_PLATFORM_DEVICES [=y] && ACPI [=y] && INPUT [=y] && BACKLIGHT_CLASS_DEVICE [=y] && (ACPI_VIDEO [=n] || ACPI_VIDEO [=n]=n) The reason is that FUJITSU_LAPTOP selects LEDS_CLASS without depending on or selecting NEW_LEDS while LEDS_CLASS is subordinate to NEW_LEDS. Honor the kconfig menu hierarchy to remove kconfig dependency warnings. Reported-by: Hans de Goede Fixes: d89bcc83e709 ("platform/x86: fujitsu-laptop: select LEDS_CLASS") Signed-off-by: Necip Fazil Yildiran Signed-off-by: Andy Shevchenko --- drivers/platform/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 81f6079d08e6..0d91d136bc3b 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -469,6 +469,7 @@ config FUJITSU_LAPTOP depends on BACKLIGHT_CLASS_DEVICE depends on ACPI_VIDEO || ACPI_VIDEO = n select INPUT_SPARSEKMAP + select NEW_LEDS select LEDS_CLASS help This is a driver for laptops built by Fujitsu: From fce55cc8b7ade3c583f47eab5885e2f541ede1ee Mon Sep 17 00:00:00 2001 From: Ed Wildgoose Date: Sun, 20 Sep 2020 21:32:06 +0100 Subject: [PATCH 131/322] platform/x86: pcengines-apuv2: Fix typo on define of AMD_FCH_GPIO_REG_GPIO55_DEVSLP0 Schematics show that the GPIO number is 55 (not 59). Trivial typo. Signed-off-by: Ed Wildgoose Signed-off-by: Andy Shevchenko --- drivers/platform/x86/pcengines-apuv2.c | 2 +- include/linux/platform_data/gpio/gpio-amd-fch.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c index 6aff6cf41414..c37349f97bb8 100644 --- a/drivers/platform/x86/pcengines-apuv2.c +++ b/drivers/platform/x86/pcengines-apuv2.c @@ -32,7 +32,7 @@ #define APU2_GPIO_REG_LED3 AMD_FCH_GPIO_REG_GPIO59_DEVSLP1 #define APU2_GPIO_REG_MODESW AMD_FCH_GPIO_REG_GPIO32_GE1 #define APU2_GPIO_REG_SIMSWAP AMD_FCH_GPIO_REG_GPIO33_GE2 -#define APU2_GPIO_REG_MPCIE2 AMD_FCH_GPIO_REG_GPIO59_DEVSLP0 +#define APU2_GPIO_REG_MPCIE2 AMD_FCH_GPIO_REG_GPIO55_DEVSLP0 #define APU2_GPIO_REG_MPCIE3 AMD_FCH_GPIO_REG_GPIO51 /* Order in which the GPIO lines are defined in the register list */ diff --git a/include/linux/platform_data/gpio/gpio-amd-fch.h b/include/linux/platform_data/gpio/gpio-amd-fch.h index 9e46678edb2a..255d51c9d36d 100644 --- a/include/linux/platform_data/gpio/gpio-amd-fch.h +++ b/include/linux/platform_data/gpio/gpio-amd-fch.h @@ -19,7 +19,7 @@ #define AMD_FCH_GPIO_REG_GPIO49 0x40 #define AMD_FCH_GPIO_REG_GPIO50 0x41 #define AMD_FCH_GPIO_REG_GPIO51 0x42 -#define AMD_FCH_GPIO_REG_GPIO59_DEVSLP0 0x43 +#define AMD_FCH_GPIO_REG_GPIO55_DEVSLP0 0x43 #define AMD_FCH_GPIO_REG_GPIO57 0x44 #define AMD_FCH_GPIO_REG_GPIO58 0x45 #define AMD_FCH_GPIO_REG_GPIO59_DEVSLP1 0x46 From 2b06a1c889ca33d550675db4b0ca91e1b4dd9873 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Wed, 23 Sep 2020 20:20:50 +0300 Subject: [PATCH 132/322] platform/x86: mlx-platform: Fix extended topology configuration for power supply units Fix topology configuration for power supply units in structure 'mlxplat_mlxcpld_ext_pwr_items_data', due to hardware change. Note: no need to backport the fix, since there is no such hardware yet (equipped with four power) at the filed. Signed-off-by: Vadim Pasternak Reviewed-by: Hans de Goede Signed-off-by: Andy Shevchenko --- drivers/platform/x86/mlx-platform.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c index 8cf8c1be2666..1506ec0a4777 100644 --- a/drivers/platform/x86/mlx-platform.c +++ b/drivers/platform/x86/mlx-platform.c @@ -171,7 +171,6 @@ #define MLXPLAT_CPLD_NR_NONE -1 #define MLXPLAT_CPLD_PSU_DEFAULT_NR 10 #define MLXPLAT_CPLD_PSU_MSNXXXX_NR 4 -#define MLXPLAT_CPLD_PSU_MSNXXXX_NR2 3 #define MLXPLAT_CPLD_FAN1_DEFAULT_NR 11 #define MLXPLAT_CPLD_FAN2_DEFAULT_NR 12 #define MLXPLAT_CPLD_FAN3_DEFAULT_NR 13 @@ -347,6 +346,15 @@ static struct i2c_board_info mlxplat_mlxcpld_pwr[] = { }, }; +static struct i2c_board_info mlxplat_mlxcpld_ext_pwr[] = { + { + I2C_BOARD_INFO("dps460", 0x5b), + }, + { + I2C_BOARD_INFO("dps460", 0x5a), + }, +}; + static struct i2c_board_info mlxplat_mlxcpld_fan[] = { { I2C_BOARD_INFO("24c32", 0x50), @@ -921,15 +929,15 @@ static struct mlxreg_core_data mlxplat_mlxcpld_ext_pwr_items_data[] = { .label = "pwr3", .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET, .mask = BIT(2), - .hpdev.brdinfo = &mlxplat_mlxcpld_pwr[0], - .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR2, + .hpdev.brdinfo = &mlxplat_mlxcpld_ext_pwr[0], + .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR, }, { .label = "pwr4", .reg = MLXPLAT_CPLD_LPC_REG_PWR_OFFSET, .mask = BIT(3), - .hpdev.brdinfo = &mlxplat_mlxcpld_pwr[1], - .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR2, + .hpdev.brdinfo = &mlxplat_mlxcpld_ext_pwr[1], + .hpdev.nr = MLXPLAT_CPLD_PSU_MSNXXXX_NR, }, }; From 938835aa903ae19ad62805134f79bbcf20fc3bea Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 23 Sep 2020 20:48:03 +0200 Subject: [PATCH 133/322] platform/x86: intel_pmc_core: do not create a static struct device A struct device is a dynamic structure, with reference counting. "Tricking" the kernel to make a dynamic structure static, by working around the driver core release detection logic, is not nice. Because of this, this code has been used as an example for others on "how to do things", which is just about the worst thing possible to have happen. Fix this all up by making the platform device dynamic and providing a real release function. Cc: Rajneesh Bhardwaj Cc: Vishwanath Somayaji Cc: Darren Hart Cc: Andy Shevchenko Cc: Rajat Jain Cc: platform-driver-x86@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reported-by: Maximilian Luz Fixes: b02f6a2ef0a1 ("platform/x86: intel_pmc_core: Attach using APCI HID "INT33A1"") Signed-off-by: Greg Kroah-Hartman Acked-by: Rajat Jain Reviewed-by: Hans de Goede Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel_pmc_core_pltdrv.c | 26 +++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/intel_pmc_core_pltdrv.c b/drivers/platform/x86/intel_pmc_core_pltdrv.c index 731281855cc8..73797680b895 100644 --- a/drivers/platform/x86/intel_pmc_core_pltdrv.c +++ b/drivers/platform/x86/intel_pmc_core_pltdrv.c @@ -20,15 +20,10 @@ static void intel_pmc_core_release(struct device *dev) { - /* Nothing to do. */ + kfree(dev); } -static struct platform_device pmc_core_device = { - .name = "intel_pmc_core", - .dev = { - .release = intel_pmc_core_release, - }, -}; +static struct platform_device *pmc_core_device; /* * intel_pmc_core_platform_ids is the list of platforms where we want to @@ -52,6 +47,8 @@ MODULE_DEVICE_TABLE(x86cpu, intel_pmc_core_platform_ids); static int __init pmc_core_platform_init(void) { + int retval; + /* Skip creating the platform device if ACPI already has a device */ if (acpi_dev_present("INT33A1", NULL, -1)) return -ENODEV; @@ -59,12 +56,23 @@ static int __init pmc_core_platform_init(void) if (!x86_match_cpu(intel_pmc_core_platform_ids)) return -ENODEV; - return platform_device_register(&pmc_core_device); + pmc_core_device = kzalloc(sizeof(*pmc_core_device), GFP_KERNEL); + if (!pmc_core_device) + return -ENOMEM; + + pmc_core_device->name = "intel_pmc_core"; + pmc_core_device->dev.release = intel_pmc_core_release; + + retval = platform_device_register(pmc_core_device); + if (retval) + kfree(pmc_core_device); + + return retval; } static void __exit pmc_core_platform_exit(void) { - platform_device_unregister(&pmc_core_device); + platform_device_unregister(pmc_core_device); } module_init(pmc_core_platform_init); From 00fb259c618ea1198fc51b53a6167aa0d78672a9 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Thu, 10 Sep 2020 08:25:38 -0700 Subject: [PATCH 134/322] spi: bcm-qspi: Fix probe regression on iProc platforms iProc chips have QSPI controller that does not have the MSPI_REV offset. Reading from that offset will cause a bus error. Fix it by having MSPI_REV query disabled in the generic compatible string. Fixes: 3a01f04d74ef ("spi: bcm-qspi: Handle lack of MSPI_REV offset") Link: https://lore.kernel.org/linux-arm-kernel/20200909211857.4144718-1-f.fainelli@gmail.com/T/#u Signed-off-by: Ray Jui Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20200910152539.45584-3-ray.jui@broadcom.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm-qspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 681d09085175..9cfa15ec8b08 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -1295,7 +1295,7 @@ static const struct of_device_id bcm_qspi_of_match[] = { }, { .compatible = "brcm,spi-bcm-qspi", - .data = &bcm_qspi_rev_data, + .data = &bcm_qspi_no_rev_data, }, { .compatible = "brcm,spi-bcm7216-qspi", From e43c26e12dd49a41cf5a4cd5c5b59a1eb98ed11e Mon Sep 17 00:00:00 2001 From: Ye Li Date: Wed, 23 Sep 2020 02:03:44 -0700 Subject: [PATCH 135/322] gpio: pca953x: Fix uninitialized pending variable When pca953x_irq_pending returns false, the pending parameter won't be set. But pca953x_irq_handler continues using this uninitialized variable as pending irqs and will cause problem. Fix the issue by initializing pending to 0. Fixes: 064c73afe738 ("gpio: pca953x: Synchronize interrupt handler properly") Signed-off-by: Ye Li Reviewed-by: Andy Shevchenko Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-pca953x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index bd2e96c34f82..29342e5def82 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -814,7 +814,7 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid) { struct pca953x_chip *chip = devid; struct gpio_chip *gc = &chip->gpio_chip; - DECLARE_BITMAP(pending, MAX_LINE); + DECLARE_BITMAP(pending, MAX_LINE) = {}; int level; bool ret; From ac67b07e268d46eba675a60c37051bb3e59fd201 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 11 Sep 2020 09:51:04 +0800 Subject: [PATCH 136/322] gpio/aspeed-sgpio: enable access to all 80 input & output sgpios Currently, the aspeed-sgpio driver exposes up to 80 GPIO lines, corresponding to the 80 status bits available in hardware. Each of these lines can be configured as either an input or an output. However, each of these GPIOs is actually an input *and* an output; we actually have 80 inputs plus 80 outputs. This change expands the maximum number of GPIOs to 160; the lower half of this range are the input-only GPIOs, the upper half are the outputs. We fix the GPIO directions to correspond to this mapping. This also fixes a bug when setting GPIOs - we were reading from the input register, making it impossible to set more than one output GPIO. Signed-off-by: Jeremy Kerr Fixes: 7db47faae79b ("gpio: aspeed: Add SGPIO driver") Reviewed-by: Joel Stanley Reviewed-by: Andrew Jeffery Acked-by: Rob Herring Signed-off-by: Bartosz Golaszewski --- .../devicetree/bindings/gpio/sgpio-aspeed.txt | 5 +- drivers/gpio/gpio-aspeed-sgpio.c | 126 ++++++++++++------ 2 files changed, 87 insertions(+), 44 deletions(-) diff --git a/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt b/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt index d4d83916c09d..be329ea4794f 100644 --- a/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt +++ b/Documentation/devicetree/bindings/gpio/sgpio-aspeed.txt @@ -20,8 +20,9 @@ Required properties: - gpio-controller : Marks the device node as a GPIO controller - interrupts : Interrupt specifier, see interrupt-controller/interrupts.txt - interrupt-controller : Mark the GPIO controller as an interrupt-controller -- ngpios : number of GPIO lines, see gpio.txt - (should be multiple of 8, up to 80 pins) +- ngpios : number of *hardware* GPIO lines, see gpio.txt. This will expose + 2 software GPIOs per hardware GPIO: one for hardware input, one for hardware + output. Up to 80 pins, must be a multiple of 8. - clocks : A phandle to the APB clock for SGPM clock division - bus-frequency : SGPM CLK frequency diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c index d16645c1d8d9..5d678dbf1a62 100644 --- a/drivers/gpio/gpio-aspeed-sgpio.c +++ b/drivers/gpio/gpio-aspeed-sgpio.c @@ -17,7 +17,17 @@ #include #include -#define MAX_NR_SGPIO 80 +/* + * MAX_NR_HW_GPIO represents the number of actual hardware-supported GPIOs (ie, + * slots within the clocked serial GPIO data). Since each HW GPIO is both an + * input and an output, we provide MAX_NR_HW_GPIO * 2 lines on our gpiochip + * device. + * + * We use SGPIO_OUTPUT_OFFSET to define the split between the inputs and + * outputs; the inputs start at line 0, the outputs start at OUTPUT_OFFSET. + */ +#define MAX_NR_HW_SGPIO 80 +#define SGPIO_OUTPUT_OFFSET MAX_NR_HW_SGPIO #define ASPEED_SGPIO_CTRL 0x54 @@ -30,8 +40,8 @@ struct aspeed_sgpio { struct clk *pclk; spinlock_t lock; void __iomem *base; - uint32_t dir_in[3]; int irq; + int n_sgpio; }; struct aspeed_sgpio_bank { @@ -111,31 +121,69 @@ static void __iomem *bank_reg(struct aspeed_sgpio *gpio, } } -#define GPIO_BANK(x) ((x) >> 5) -#define GPIO_OFFSET(x) ((x) & 0x1f) +#define GPIO_BANK(x) ((x % SGPIO_OUTPUT_OFFSET) >> 5) +#define GPIO_OFFSET(x) ((x % SGPIO_OUTPUT_OFFSET) & 0x1f) #define GPIO_BIT(x) BIT(GPIO_OFFSET(x)) static const struct aspeed_sgpio_bank *to_bank(unsigned int offset) { - unsigned int bank = GPIO_BANK(offset); + unsigned int bank; + + bank = GPIO_BANK(offset); WARN_ON(bank >= ARRAY_SIZE(aspeed_sgpio_banks)); return &aspeed_sgpio_banks[bank]; } +static int aspeed_sgpio_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, unsigned int ngpios) +{ + struct aspeed_sgpio *sgpio = gpiochip_get_data(gc); + int n = sgpio->n_sgpio; + int c = SGPIO_OUTPUT_OFFSET - n; + + WARN_ON(ngpios < MAX_NR_HW_SGPIO * 2); + + /* input GPIOs in the lower range */ + bitmap_set(valid_mask, 0, n); + bitmap_clear(valid_mask, n, c); + + /* output GPIOS above SGPIO_OUTPUT_OFFSET */ + bitmap_set(valid_mask, SGPIO_OUTPUT_OFFSET, n); + bitmap_clear(valid_mask, SGPIO_OUTPUT_OFFSET + n, c); + + return 0; +} + +static void aspeed_sgpio_irq_init_valid_mask(struct gpio_chip *gc, + unsigned long *valid_mask, unsigned int ngpios) +{ + struct aspeed_sgpio *sgpio = gpiochip_get_data(gc); + int n = sgpio->n_sgpio; + + WARN_ON(ngpios < MAX_NR_HW_SGPIO * 2); + + /* input GPIOs in the lower range */ + bitmap_set(valid_mask, 0, n); + bitmap_clear(valid_mask, n, ngpios - n); +} + +static bool aspeed_sgpio_is_input(unsigned int offset) +{ + return offset < SGPIO_OUTPUT_OFFSET; +} + static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset) { struct aspeed_sgpio *gpio = gpiochip_get_data(gc); const struct aspeed_sgpio_bank *bank = to_bank(offset); unsigned long flags; enum aspeed_sgpio_reg reg; - bool is_input; int rc = 0; spin_lock_irqsave(&gpio->lock, flags); - is_input = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset); - reg = is_input ? reg_val : reg_rdata; + reg = aspeed_sgpio_is_input(offset) ? reg_val : reg_rdata; rc = !!(ioread32(bank_reg(gpio, bank, reg)) & GPIO_BIT(offset)); spin_unlock_irqrestore(&gpio->lock, flags); @@ -143,22 +191,31 @@ static int aspeed_sgpio_get(struct gpio_chip *gc, unsigned int offset) return rc; } -static void sgpio_set_value(struct gpio_chip *gc, unsigned int offset, int val) +static int sgpio_set_value(struct gpio_chip *gc, unsigned int offset, int val) { struct aspeed_sgpio *gpio = gpiochip_get_data(gc); const struct aspeed_sgpio_bank *bank = to_bank(offset); - void __iomem *addr; + void __iomem *addr_r, *addr_w; u32 reg = 0; - addr = bank_reg(gpio, bank, reg_val); - reg = ioread32(addr); + if (aspeed_sgpio_is_input(offset)) + return -EINVAL; + + /* Since this is an output, read the cached value from rdata, then + * update val. */ + addr_r = bank_reg(gpio, bank, reg_rdata); + addr_w = bank_reg(gpio, bank, reg_val); + + reg = ioread32(addr_r); if (val) reg |= GPIO_BIT(offset); else reg &= ~GPIO_BIT(offset); - iowrite32(reg, addr); + iowrite32(reg, addr_w); + + return 0; } static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val) @@ -175,43 +232,28 @@ static void aspeed_sgpio_set(struct gpio_chip *gc, unsigned int offset, int val) static int aspeed_sgpio_dir_in(struct gpio_chip *gc, unsigned int offset) { - struct aspeed_sgpio *gpio = gpiochip_get_data(gc); - unsigned long flags; - - spin_lock_irqsave(&gpio->lock, flags); - gpio->dir_in[GPIO_BANK(offset)] |= GPIO_BIT(offset); - spin_unlock_irqrestore(&gpio->lock, flags); - - return 0; + return aspeed_sgpio_is_input(offset) ? 0 : -EINVAL; } static int aspeed_sgpio_dir_out(struct gpio_chip *gc, unsigned int offset, int val) { struct aspeed_sgpio *gpio = gpiochip_get_data(gc); unsigned long flags; + int rc; + + /* No special action is required for setting the direction; we'll + * error-out in sgpio_set_value if this isn't an output GPIO */ spin_lock_irqsave(&gpio->lock, flags); - - gpio->dir_in[GPIO_BANK(offset)] &= ~GPIO_BIT(offset); - sgpio_set_value(gc, offset, val); - + rc = sgpio_set_value(gc, offset, val); spin_unlock_irqrestore(&gpio->lock, flags); - return 0; + return rc; } static int aspeed_sgpio_get_direction(struct gpio_chip *gc, unsigned int offset) { - int dir_status; - struct aspeed_sgpio *gpio = gpiochip_get_data(gc); - unsigned long flags; - - spin_lock_irqsave(&gpio->lock, flags); - dir_status = gpio->dir_in[GPIO_BANK(offset)] & GPIO_BIT(offset); - spin_unlock_irqrestore(&gpio->lock, flags); - - return dir_status; - + return !!aspeed_sgpio_is_input(offset); } static void irqd_to_aspeed_sgpio_data(struct irq_data *d, @@ -402,6 +444,7 @@ static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio, irq = &gpio->chip.irq; irq->chip = &aspeed_sgpio_irqchip; + irq->init_valid_mask = aspeed_sgpio_irq_init_valid_mask; irq->handler = handle_bad_irq; irq->default_type = IRQ_TYPE_NONE; irq->parent_handler = aspeed_sgpio_irq_handler; @@ -452,11 +495,12 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev) if (rc < 0) { dev_err(&pdev->dev, "Could not read ngpios property\n"); return -EINVAL; - } else if (nr_gpios > MAX_NR_SGPIO) { + } else if (nr_gpios > MAX_NR_HW_SGPIO) { dev_err(&pdev->dev, "Number of GPIOs exceeds the maximum of %d: %d\n", - MAX_NR_SGPIO, nr_gpios); + MAX_NR_HW_SGPIO, nr_gpios); return -EINVAL; } + gpio->n_sgpio = nr_gpios; rc = of_property_read_u32(pdev->dev.of_node, "bus-frequency", &sgpio_freq); if (rc < 0) { @@ -497,7 +541,8 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev) spin_lock_init(&gpio->lock); gpio->chip.parent = &pdev->dev; - gpio->chip.ngpio = nr_gpios; + gpio->chip.ngpio = MAX_NR_HW_SGPIO * 2; + gpio->chip.init_valid_mask = aspeed_sgpio_init_valid_mask; gpio->chip.direction_input = aspeed_sgpio_dir_in; gpio->chip.direction_output = aspeed_sgpio_dir_out; gpio->chip.get_direction = aspeed_sgpio_get_direction; @@ -509,9 +554,6 @@ static int __init aspeed_sgpio_probe(struct platform_device *pdev) gpio->chip.label = dev_name(&pdev->dev); gpio->chip.base = -1; - /* set all SGPIO pins as input (1). */ - memset(gpio->dir_in, 0xff, sizeof(gpio->dir_in)); - aspeed_sgpio_setup_irqs(gpio, pdev); rc = devm_gpiochip_add_data(&pdev->dev, &gpio->chip, gpio); From bf0d394e885015941ed2d5724c0a6ed8d42dd95e Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 11 Sep 2020 09:51:05 +0800 Subject: [PATCH 137/322] gpio/aspeed-sgpio: don't enable all interrupts by default Currently, the IRQ setup for the SGPIO driver enables all interrupts in dual-edge trigger mode. Since the default handler is handle_bad_irq, any state change on input GPIOs will trigger bad IRQ warnings. This change applies sensible IRQ defaults: single-edge trigger, and all IRQs disabled. Signed-off-by: Jeremy Kerr Fixes: 7db47faae79b ("gpio: aspeed: Add SGPIO driver") Reviewed-by: Joel Stanley Acked-by: Andrew Jeffery Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aspeed-sgpio.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c index 5d678dbf1a62..a0eb00c024f6 100644 --- a/drivers/gpio/gpio-aspeed-sgpio.c +++ b/drivers/gpio/gpio-aspeed-sgpio.c @@ -452,17 +452,15 @@ static int aspeed_sgpio_setup_irqs(struct aspeed_sgpio *gpio, irq->parents = &gpio->irq; irq->num_parents = 1; - /* set IRQ settings and Enable Interrupt */ + /* Apply default IRQ settings */ for (i = 0; i < ARRAY_SIZE(aspeed_sgpio_banks); i++) { bank = &aspeed_sgpio_banks[i]; /* set falling or level-low irq */ iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type0)); /* trigger type is edge */ iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type1)); - /* dual edge trigger mode. */ - iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_type2)); - /* enable irq */ - iowrite32(0xffffffff, bank_reg(gpio, bank, reg_irq_enable)); + /* single edge trigger */ + iowrite32(0x00000000, bank_reg(gpio, bank, reg_irq_type2)); } return 0; From efb1676306f664625c0c546dd10d18d33c75e3fc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 23 Sep 2020 07:24:42 +0200 Subject: [PATCH 138/322] mt76: mt7615: reduce maximum VHT MPDU length to 7991 After fixing mac80211 to allow larger A-MSDUs in some cases, there have been reports of performance regressions and packet loss with some clients. It appears that the issue occurs when the hardware is transmitting A-MSDUs bigger than 8k. Limit the local VHT MPDU size capability to 7991, matching the value used for MT7915 as well. Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20200923052442.24141-1-nbd@nbd.name --- drivers/net/wireless/mediatek/mt76/mt7615/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c index fc1ebabfebac..1f57b43693bc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c @@ -460,7 +460,7 @@ void mt7615_init_device(struct mt7615_dev *dev) dev->mphy.sband_2g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING; dev->mphy.sband_5g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING; dev->mphy.sband_5g.sband.vht_cap.cap |= - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; mt7615_cap_dbdc_disable(dev); dev->phy.dfs_state = -1; From 3e640b1eec38e4c8eba160f26cba4f592e657f3d Mon Sep 17 00:00:00 2001 From: Tao Ren Date: Wed, 16 Sep 2020 13:42:16 -0700 Subject: [PATCH 139/322] gpio: aspeed: fix ast2600 bank properties GPIO_U is mapped to the least significant byte of input/output mask, and the byte in "output" mask should be 0 because GPIO_U is input only. All the other bits need to be 1 because GPIO_V/W/X support both input and output modes. Similarly, GPIO_Y/Z are mapped to the 2 least significant bytes, and the according bits need to be 1 because GPIO_Y/Z support both input and output modes. Fixes: ab4a85534c3e ("gpio: aspeed: Add in ast2600 details to Aspeed driver") Signed-off-by: Tao Ren Reviewed-by: Andrew Jeffery Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aspeed.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 879db23d8454..d07bf2c3f136 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -1114,8 +1114,8 @@ static const struct aspeed_gpio_config ast2500_config = static const struct aspeed_bank_props ast2600_bank_props[] = { /* input output */ - {5, 0xffffffff, 0x0000ffff}, /* U/V/W/X */ - {6, 0xffff0000, 0x0fff0000}, /* Y/Z */ + {5, 0xffffffff, 0xffffff00}, /* U/V/W/X */ + {6, 0x0000ffff, 0x0000ffff}, /* Y/Z */ { }, }; From be068f29034fb00530a053d18b8cf140c32b12b3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 24 Sep 2020 08:41:32 -0700 Subject: [PATCH 140/322] mm: fix misplaced unlock_page in do_wp_page() Commit 09854ba94c6a ("mm: do_wp_page() simplification") reorganized all the code around the page re-use vs copy, but in the process also moved the final unlock_page() around to after the wp_page_reuse() call. That normally doesn't matter - but it means that the unlock_page() is now done after releasing the page table lock. Again, not a big deal, you'd think. But it turns out that it's very wrong indeed, because once we've released the page table lock, we've basically lost our only reference to the page - the page tables - and it could now be free'd at any time. We do hold the mmap_sem, so no actual unmap() can happen, but madvise can come in and a MADV_DONTNEED will zap the page range - and free the page. So now the page may be free'd just as we're unlocking it, which in turn will usually trigger a "Bad page state" error in the freeing path. To make matters more confusing, by the time the debug code prints out the page state, the unlock has typically completed and everything looks fine again. This all doesn't happen in any normal situations, but it does trigger with the dirtyc0w_child LTP test. And it seems to trigger much more easily (but not expclusively) on s390 than elsewhere, probably because s390 doesn't do the "batch pages up for freeing after the TLB flush" that gives the unlock_page() more time to complete and makes the race harder to hit. Fixes: 09854ba94c6a ("mm: do_wp_page() simplification") Link: https://lore.kernel.org/lkml/a46e9bbef2ed4e17778f5615e818526ef848d791.camel@redhat.com/ Link: https://lore.kernel.org/linux-mm/c41149a8-211e-390b-af1d-d5eee690fecb@linux.alibaba.com/ Reported-by: Qian Cai Reported-by: Alex Shi Bisected-and-analyzed-by: Gerald Schaefer Tested-by: Gerald Schaefer Signed-off-by: Linus Torvalds --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory.c b/mm/memory.c index e315b1f1ef08..f3eb55975902 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2967,8 +2967,8 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) * page count reference, and the page is locked, * it's dark out, and we're wearing sunglasses. Hit it. */ - wp_page_reuse(vmf); unlock_page(page); + wp_page_reuse(vmf); return VM_FAULT_WRITE; } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == (VM_WRITE|VM_SHARED))) { From 516d980f85415d76ae3d0d2a871eb20243f46c95 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 23 Sep 2020 02:48:56 +0900 Subject: [PATCH 141/322] scripts/kallsyms: skip ppc compiler stub *.long_branch.* / *.plt_branch.* PowerPC allmodconfig often fails to build as follows: LD .tmp_vmlinux.kallsyms1 KSYM .tmp_vmlinux.kallsyms1.o LD .tmp_vmlinux.kallsyms2 KSYM .tmp_vmlinux.kallsyms2.o LD .tmp_vmlinux.kallsyms3 KSYM .tmp_vmlinux.kallsyms3.o LD vmlinux SORTTAB vmlinux SYSMAP System.map Inconsistent kallsyms data Try make KALLSYMS_EXTRA_PASS=1 as a workaround make[2]: *** [../Makefile:1162: vmlinux] Error 1 Setting KALLSYMS_EXTRA_PASS=1 does not help. This is caused by the compiler inserting stubs such as *.long_branch.* and *.plt_branch.* $ powerpc-linux-nm -n .tmp_vmlinux.kallsyms2 [ snip ] c00000000210c010 t 00000075.plt_branch.da9:19 c00000000210c020 t 00000075.plt_branch.1677:5 c00000000210c030 t 00000075.long_branch.memmove c00000000210c034 t 00000075.plt_branch.9e0:5 c00000000210c044 t 00000075.plt_branch.free_initrd_mem ... Actually, the problem mentioned in scripts/link-vmlinux.sh comments; "In theory it's possible this results in even more stubs, but unlikely" is happening here, and ends up with another kallsyms step required. scripts/kallsyms.c already ignores various compiler stubs. Let's do similar to make kallsysms for PowerPC always succeed in 2 steps. Reported-by: Guenter Roeck Signed-off-by: Masahiro Yamada Tested-by: Guenter Roeck --- scripts/kallsyms.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 0096cd965332..7ecd2ccba531 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -82,6 +82,7 @@ static char *sym_name(const struct sym_entry *s) static bool is_ignored_symbol(const char *name, char type) { + /* Symbol names that exactly match to the following are ignored.*/ static const char * const ignored_symbols[] = { /* * Symbols which vary between passes. Passes 1 and 2 must have @@ -104,6 +105,7 @@ static bool is_ignored_symbol(const char *name, char type) NULL }; + /* Symbol names that begin with the following are ignored.*/ static const char * const ignored_prefixes[] = { "$", /* local symbols for ARM, MIPS, etc. */ ".LASANPC", /* s390 kasan local symbols */ @@ -113,6 +115,7 @@ static bool is_ignored_symbol(const char *name, char type) NULL }; + /* Symbol names that end with the following are ignored.*/ static const char * const ignored_suffixes[] = { "_from_arm", /* arm */ "_from_thumb", /* arm */ @@ -120,9 +123,15 @@ static bool is_ignored_symbol(const char *name, char type) NULL }; + /* Symbol names that contain the following are ignored.*/ + static const char * const ignored_matches[] = { + ".long_branch.", /* ppc stub */ + ".plt_branch.", /* ppc stub */ + NULL + }; + const char * const *p; - /* Exclude symbols which vary between passes. */ for (p = ignored_symbols; *p; p++) if (!strcmp(name, *p)) return true; @@ -138,6 +147,11 @@ static bool is_ignored_symbol(const char *name, char type) return true; } + for (p = ignored_matches; *p; p++) { + if (strstr(name, *p)) + return true; + } + if (type == 'U' || type == 'u') return true; /* exclude debugging symbols */ From 87f92ac4c12758c4da3bbe4393f1d884b610b8a6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 24 Sep 2020 10:17:05 -0700 Subject: [PATCH 142/322] libbpf: Fix XDP program load regression for old kernels Fix regression in libbpf, introduced by XDP link change, which causes XDP programs to fail to be loaded into kernel due to specified BPF_XDP expected_attach_type. While kernel doesn't enforce expected_attach_type for BPF_PROG_TYPE_XDP, some old kernels already support XDP program, but they don't yet recognize expected_attach_type field in bpf_attr, so setting it to non-zero value causes program load to fail. Luckily, libbpf already has a mechanism to deal with such cases, so just make expected_attach_type optional for XDP programs. Fixes: dc8698cac7aa ("libbpf: Add support for BPF XDP link") Reported-by: Nikita Shirokov Reported-by: Udip Pant Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200924171705.3803628-1-andriin@fb.com --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7253b833576c..e493d6048143 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6925,7 +6925,7 @@ static const struct bpf_sec_def section_defs[] = { BPF_XDP_DEVMAP), BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP, BPF_XDP_CPUMAP), - BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP, + BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP, BPF_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), From ee6fa053019478918ca15eadaa93e516ddb9da8d Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 21 Sep 2020 13:38:05 +0300 Subject: [PATCH 143/322] KVM: x86: fix MSR_IA32_TSC read for nested migration MSR reads/writes should always access the L1 state, since the (nested) hypervisor should intercept all the msrs it wants to adjust, and these that it doesn't should be read by the guest as if the host had read it. However IA32_TSC is an exception. Even when not intercepted, guest still reads the value + TSC offset. The write however does not take any TSC offset into account. This is documented in Intel's SDM and seems also to happen on AMD as well. This creates a problem when userspace wants to read the IA32_TSC value and then write it. (e.g for migration) In this case it reads L2 value but write is interpreted as an L1 value. To fix this make the userspace initiated reads of IA32_TSC return L1 value as well. Huge thanks to Dave Gilbert for helping me understand this very confusing semantic of MSR writes. Signed-off-by: Maxim Levitsky Message-Id: <20200921103805.9102-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 67362607e396..e3fe1d126cc3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3224,9 +3224,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_POWER_CTL: msr_info->data = vcpu->arch.msr_ia32_power_ctl; break; - case MSR_IA32_TSC: - msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; + case MSR_IA32_TSC: { + /* + * Intel SDM states that MSR_IA32_TSC read adds the TSC offset + * even when not intercepted. AMD manual doesn't explicitly + * state this but appears to behave the same. + * + * On userspace reads and writes, however, we unconditionally + * operate L1's TSC value to ensure backwards-compatible + * behavior for migration. + */ + u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset : + vcpu->arch.tsc_offset; + + msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset; break; + } case MSR_MTRRcap: case 0x200 ... 0x2ff: return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); From 3701cb59d892b88d569427586f01491552f377b1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 24 Sep 2020 19:41:58 -0400 Subject: [PATCH 144/322] ep_create_wakeup_source(): dentry name can change under you... or get freed, for that matter, if it's a long (separately stored) name. Signed-off-by: Al Viro --- fs/eventpoll.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 16313180e4c1..4df61129566d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1448,7 +1448,7 @@ static int reverse_path_check(void) static int ep_create_wakeup_source(struct epitem *epi) { - const char *name; + struct name_snapshot n; struct wakeup_source *ws; if (!epi->ep->ws) { @@ -1457,8 +1457,9 @@ static int ep_create_wakeup_source(struct epitem *epi) return -ENOMEM; } - name = epi->ffd.file->f_path.dentry->d_name.name; - ws = wakeup_source_register(NULL, name); + take_dentry_name_snapshot(&n, epi->ffd.file->f_path.dentry); + ws = wakeup_source_register(NULL, n.name.name); + release_dentry_name_snapshot(&n); if (!ws) return -ENOMEM; From e49d8c22f1261c43a986a7fdbf677ac309682a07 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 22 Sep 2020 20:56:23 -0700 Subject: [PATCH 145/322] net_sched: defer tcf_idr_insert() in tcf_action_init_1() All TC actions call tcf_idr_insert() for new action at the end of their ->init(), so we can actually move it to a central place in tcf_action_init_1(). And once the action is inserted into the global IDR, other parallel process could free it immediately as its refcnt is still 1, so we can not fail after this, we need to move it after the goto action validation to avoid handling the failure case after insertion. This is found during code review, is not directly triggered by syzbot. And this prepares for the next patch. Cc: Vlad Buslov Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 2 -- net/sched/act_api.c | 38 ++++++++++++++++++++------------------ net/sched/act_bpf.c | 4 +--- net/sched/act_connmark.c | 1 - net/sched/act_csum.c | 3 --- net/sched/act_ct.c | 2 -- net/sched/act_ctinfo.c | 3 --- net/sched/act_gact.c | 2 -- net/sched/act_gate.c | 3 --- net/sched/act_ife.c | 3 --- net/sched/act_ipt.c | 2 -- net/sched/act_mirred.c | 2 -- net/sched/act_mpls.c | 2 -- net/sched/act_nat.c | 3 --- net/sched/act_pedit.c | 2 -- net/sched/act_police.c | 2 -- net/sched/act_sample.c | 2 -- net/sched/act_simple.c | 2 -- net/sched/act_skbedit.c | 2 -- net/sched/act_skbmod.c | 2 -- net/sched/act_tunnel_key.c | 3 --- net/sched/act_vlan.c | 2 -- 22 files changed, 21 insertions(+), 66 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index cb382a89ea58..87214927314a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -166,8 +166,6 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags); -void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a); - void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 063d8aaf2900..0030f00234ee 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -467,17 +467,6 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, } EXPORT_SYMBOL(tcf_idr_create_from_flags); -void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) -{ - struct tcf_idrinfo *idrinfo = tn->idrinfo; - - mutex_lock(&idrinfo->lock); - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ - WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index))); - mutex_unlock(&idrinfo->lock); -} -EXPORT_SYMBOL(tcf_idr_insert); - /* Cleanup idr index that was allocated but not initialized. */ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index) @@ -902,6 +891,16 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; +static void tcf_idr_insert(struct tc_action *a) +{ + struct tcf_idrinfo *idrinfo = a->idrinfo; + + mutex_lock(&idrinfo->lock); + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ + WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index))); + mutex_unlock(&idrinfo->lock); +} + struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -989,6 +988,16 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err < 0) goto err_mod; + if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && + !rcu_access_pointer(a->goto_chain)) { + tcf_action_destroy_1(a, bind); + NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); + return ERR_PTR(-EINVAL); + } + + if (err == ACT_P_CREATED) + tcf_idr_insert(a); + if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); @@ -1002,13 +1011,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err != ACT_P_CREATED) module_put(a_o->owner); - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && - !rcu_access_pointer(a->goto_chain)) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); - return ERR_PTR(-EINVAL); - } - return a; err_mod: diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 54d5652cfe6c..a4c7ba35a343 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -365,9 +365,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (res == ACT_P_CREATED) { - tcf_idr_insert(tn, *act); - } else { + if (res != ACT_P_CREATED) { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); tcf_bpf_cfg_cleanup(&old); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index f901421b0634..e19885d7fe2c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -139,7 +139,6 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci->net = net; ci->zone = parm->zone; - tcf_idr_insert(tn, *a); ret = ACT_P_CREATED; } else if (ret > 0) { ci = to_connmark(*a); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index f5826e457679..4fa4fcb842ba 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -110,9 +110,6 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, if (params_new) kfree_rcu(params_new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 2c3619165680..a780afdf570d 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1297,8 +1297,6 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, tcf_chain_put_by_act(goto_ch); if (params) call_rcu(¶ms->rcu, tcf_ct_params_free); - if (res == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return res; diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index b5042f3ea079..6084300e51ad 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -269,9 +269,6 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, if (cp_new) kfree_rcu(cp_new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 410e3bbfb9ca..73c3926358a0 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -140,8 +140,6 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; release_idr: tcf_idr_release(*a, bind); diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 1fb8d428d2c1..7c0771dd77a3 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -437,9 +437,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; chain_put: diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 5c568757643b..a2ddea04183a 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -627,9 +627,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; metadata_parse_err: if (goto_ch) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 400a2cfe8452..8dc3bec0d325 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -189,8 +189,6 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, ipt->tcfi_t = t; ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; err3: diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index b2705318993b..e24b7e2331cd 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -194,8 +194,6 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock(&mirred_list_lock); - - tcf_idr_insert(tn, *a); } return ret; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 8118e2640979..e298ec3b3c9e 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -273,8 +273,6 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 855a6fa16a62..1ebd2a86d980 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -93,9 +93,6 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; release_idr: tcf_idr_release(*a, bind); diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index c158bfed86d5..b45304446e13 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -238,8 +238,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0b431d493768..8d8452b1cdd4 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -201,8 +201,6 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (new) kfree_rcu(new, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; failure: diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 5e2df590bb58..3ebf9ede3cf1 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -116,8 +116,6 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 9813ca4006dd..a4f3d0f0daa9 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -157,8 +157,6 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, goto release_idr; } - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index d0652386c6e2..e5f3fb8b00e3 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -225,8 +225,6 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 39e6d94cfafb..81a1c67335be 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -190,8 +190,6 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 37f1e10f35e0..a229751ee8c4 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -537,9 +537,6 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); - return ret; put_chain: diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index a5ff9f68ab02..163b0385fd4c 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -229,8 +229,6 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (p) kfree_rcu(p, rcu); - if (ret == ACT_P_CREATED) - tcf_idr_insert(tn, *a); return ret; put_chain: if (goto_ch) From 0fedc63fadf0404a729e73a35349481c8009c02f Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 22 Sep 2020 20:56:24 -0700 Subject: [PATCH 146/322] net_sched: commit action insertions together syzbot is able to trigger a failure case inside the loop in tcf_action_init(), and when this happens we clean up with tcf_action_destroy(). But, as these actions are already inserted into the global IDR, other parallel process could free them before tcf_action_destroy(), then we will trigger a use-after-free. Fix this by deferring the insertions even later, after the loop, and committing all the insertions in a separate loop, so we will never fail in the middle of the insertions any more. One side effect is that the window between alloction and final insertion becomes larger, now it is more likely that the loop in tcf_del_walker() sees the placeholder -EBUSY pointer. So we have to check for error pointer in tcf_del_walker(). Reported-and-tested-by: syzbot+2287853d392e4b42374a@syzkaller.appspotmail.com Fixes: 0190c1d452a9 ("net: sched: atomically check-allocate action") Cc: Vlad Buslov Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 0030f00234ee..104b47f5184f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -307,6 +307,8 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { + if (IS_ERR(p)) + continue; ret = tcf_idr_release_unsafe(p); if (ret == ACT_P_DELETED) { module_put(ops->owner); @@ -891,14 +893,24 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; -static void tcf_idr_insert(struct tc_action *a) +static void tcf_idr_insert_many(struct tc_action *actions[]) { - struct tcf_idrinfo *idrinfo = a->idrinfo; + int i; - mutex_lock(&idrinfo->lock); - /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ - WARN_ON(!IS_ERR(idr_replace(&idrinfo->action_idr, a, a->tcfa_index))); - mutex_unlock(&idrinfo->lock); + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { + struct tc_action *a = actions[i]; + struct tcf_idrinfo *idrinfo; + + if (!a) + continue; + idrinfo = a->idrinfo; + mutex_lock(&idrinfo->lock); + /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if + * it is just created, otherwise this is just a nop. + */ + idr_replace(&idrinfo->action_idr, a, a->tcfa_index); + mutex_unlock(&idrinfo->lock); + } } struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, @@ -995,9 +1007,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, return ERR_PTR(-EINVAL); } - if (err == ACT_P_CREATED) - tcf_idr_insert(a); - if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); @@ -1053,6 +1062,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, actions[i - 1] = act; } + /* We have to commit them all together, because if any error happened in + * between, we could not handle the failure gracefully. + */ + tcf_idr_insert_many(actions); + *attr_size = tcf_action_full_attrs_size(sz); return i - 1; From 02a1b175b0e92d9e0fa5df3957ade8d733ceb6a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Wed, 23 Sep 2020 13:18:15 -0700 Subject: [PATCH 147/322] net/ipv4: always honour route mtu during forwarding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Documentation/networking/ip-sysctl.txt:46 says: ip_forward_use_pmtu - BOOLEAN By default we don't trust protocol path MTUs while forwarding because they could be easily forged and can lead to unwanted fragmentation by the router. You only need to enable this if you have user-space software which tries to discover path mtus by itself and depends on the kernel honoring this information. This is normally not the case. Default: 0 (disabled) Possible values: 0 - disabled 1 - enabled Which makes it pretty clear that setting it to 1 is a potential security/safety/DoS issue, and yet it is entirely reasonable to want forwarded traffic to honour explicitly administrator configured route mtus (instead of defaulting to device mtu). Indeed, I can't think of a single reason why you wouldn't want to. Since you configured a route mtu you probably know better... It is pretty common to have a higher device mtu to allow receiving large (jumbo) frames, while having some routes via that interface (potentially including the default route to the internet) specify a lower mtu. Note that ipv6 forwarding uses device mtu unless the route is locked (in which case it will use the route mtu). This approach is not usable for IPv4 where an 'mtu lock' on a route also has the side effect of disabling TCP path mtu discovery via disabling the IPv4 DF (don't frag) bit on all outgoing frames. I'm not aware of a way to lock a route from an IPv6 RA, so that also potentially seems wrong. Signed-off-by: Maciej Żenczykowski Cc: Eric Dumazet Cc: Willem de Bruijn Cc: Lorenzo Colitti Cc: Sunmeet Gill (Sunny) Cc: Vinay Paradkar Cc: Tyler Wear Cc: David Ahern Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/net/ip.h b/include/net/ip.h index b09c48d862cc..2a52787db64a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -436,12 +436,18 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst, bool forwarding) { struct net *net = dev_net(dst->dev); + unsigned int mtu; if (net->ipv4.sysctl_ip_fwd_use_pmtu || ip_mtu_locked(dst) || !forwarding) return dst_mtu(dst); + /* 'forwarding = true' case should always honour route mtu */ + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + return min(READ_ONCE(dst->dev->mtu), IP_MAX_MTU); } From ed46cd1d4cc4b2cf05f31fe25fc68d1a9d3589ba Mon Sep 17 00:00:00 2001 From: Xie He Date: Wed, 23 Sep 2020 11:18:18 -0700 Subject: [PATCH 148/322] drivers/net/wan/x25_asy: Correct the ndo_open and ndo_stop functions 1. Move the lapb_register/lapb_unregister calls into the ndo_open/ndo_stop functions. This makes the LAPB protocol start/stop when the network interface starts/stops. When the network interface is down, the LAPB protocol shouldn't be running and the LAPB module shoudn't be generating control frames. 2. Move netif_start_queue/netif_stop_queue into the ndo_open/ndo_stop functions. This makes the TX queue start/stop when the network interface starts/stops. (netif_stop_queue was originally in the ndo_stop function. But to make the code look better, I created a new function to use as ndo_stop, and made it call the original ndo_stop function. I moved netif_stop_queue from the original ndo_stop function to the new ndo_stop function.) Cc: Martin Schiller Signed-off-by: Xie He Signed-off-by: David S. Miller --- drivers/net/wan/x25_asy.c | 43 +++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 7ee980575208..c418767a890a 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -464,7 +464,6 @@ static int x25_asy_open(struct net_device *dev) { struct x25_asy *sl = netdev_priv(dev); unsigned long len; - int err; if (sl->tty == NULL) return -ENODEV; @@ -490,14 +489,7 @@ static int x25_asy_open(struct net_device *dev) sl->xleft = 0; sl->flags &= (1 << SLF_INUSE); /* Clear ESCAPE & ERROR flags */ - netif_start_queue(dev); - - /* - * Now attach LAPB - */ - err = lapb_register(dev, &x25_asy_callbacks); - if (err == LAPB_OK) - return 0; + return 0; /* Cleanup */ kfree(sl->xbuff); @@ -519,7 +511,6 @@ static int x25_asy_close(struct net_device *dev) if (sl->tty) clear_bit(TTY_DO_WRITE_WAKEUP, &sl->tty->flags); - netif_stop_queue(dev); sl->rcount = 0; sl->xleft = 0; spin_unlock(&sl->lock); @@ -604,7 +595,6 @@ static int x25_asy_open_tty(struct tty_struct *tty) static void x25_asy_close_tty(struct tty_struct *tty) { struct x25_asy *sl = tty->disc_data; - int err; /* First make sure we're connected. */ if (!sl || sl->magic != X25_ASY_MAGIC) @@ -615,11 +605,6 @@ static void x25_asy_close_tty(struct tty_struct *tty) dev_close(sl->dev); rtnl_unlock(); - err = lapb_unregister(sl->dev); - if (err != LAPB_OK) - pr_err("%s: lapb_unregister error: %d\n", - __func__, err); - tty->disc_data = NULL; sl->tty = NULL; x25_asy_free(sl); @@ -722,15 +707,39 @@ static int x25_asy_ioctl(struct tty_struct *tty, struct file *file, static int x25_asy_open_dev(struct net_device *dev) { + int err; struct x25_asy *sl = netdev_priv(dev); if (sl->tty == NULL) return -ENODEV; + + err = lapb_register(dev, &x25_asy_callbacks); + if (err != LAPB_OK) + return -ENOMEM; + + netif_start_queue(dev); + + return 0; +} + +static int x25_asy_close_dev(struct net_device *dev) +{ + int err; + + netif_stop_queue(dev); + + err = lapb_unregister(dev); + if (err != LAPB_OK) + pr_err("%s: lapb_unregister error: %d\n", + __func__, err); + + x25_asy_close(dev); + return 0; } static const struct net_device_ops x25_asy_netdev_ops = { .ndo_open = x25_asy_open_dev, - .ndo_stop = x25_asy_close, + .ndo_stop = x25_asy_close_dev, .ndo_start_xmit = x25_asy_xmit, .ndo_tx_timeout = x25_asy_timeout, .ndo_change_mtu = x25_asy_change_mtu, From f68910a8056f9451ee9fe7e1b962f7d90d326ad3 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Thu, 24 Sep 2020 09:31:51 +0800 Subject: [PATCH 149/322] hinic: fix wrong return value of mac-set cmd It should also be regarded as an error when hw return status=4 for PF's setting mac cmd. Only if PF return status=4 to VF should this cmd be taken special treatment. Fixes: 7dd29ee12865 ("hinic: add sriov feature support") Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_port.c | 6 +++--- drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 12 ++---------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c index 02cd635d6914..eb97f2d6b1ad 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c @@ -58,9 +58,9 @@ static int change_mac(struct hinic_dev *nic_dev, const u8 *addr, sizeof(port_mac_cmd), &port_mac_cmd, &out_size); if (err || out_size != sizeof(port_mac_cmd) || - (port_mac_cmd.status && - port_mac_cmd.status != HINIC_PF_SET_VF_ALREADY && - port_mac_cmd.status != HINIC_MGMT_STATUS_EXIST)) { + (port_mac_cmd.status && + (port_mac_cmd.status != HINIC_PF_SET_VF_ALREADY || !HINIC_IS_VF(hwif)) && + port_mac_cmd.status != HINIC_MGMT_STATUS_EXIST)) { dev_err(&pdev->dev, "Failed to change MAC, err: %d, status: 0x%x, out size: 0x%x\n", err, port_mac_cmd.status, out_size); return -EFAULT; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c index 4d63680f2143..f8a26459ff65 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c @@ -38,8 +38,7 @@ static int hinic_set_mac(struct hinic_hwdev *hwdev, const u8 *mac_addr, err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_MAC, &mac_info, sizeof(mac_info), &mac_info, &out_size); if (err || out_size != sizeof(mac_info) || - (mac_info.status && mac_info.status != HINIC_PF_SET_VF_ALREADY && - mac_info.status != HINIC_MGMT_STATUS_EXIST)) { + (mac_info.status && mac_info.status != HINIC_MGMT_STATUS_EXIST)) { dev_err(&hwdev->func_to_io.hwif->pdev->dev, "Failed to set MAC, err: %d, status: 0x%x, out size: 0x%x\n", err, mac_info.status, out_size); return -EIO; @@ -503,8 +502,7 @@ struct hinic_sriov_info *hinic_get_sriov_info_by_pcidev(struct pci_dev *pdev) static int hinic_check_mac_info(u8 status, u16 vlan_id) { - if ((status && status != HINIC_MGMT_STATUS_EXIST && - status != HINIC_PF_SET_VF_ALREADY) || + if ((status && status != HINIC_MGMT_STATUS_EXIST) || (vlan_id & CHECK_IPSU_15BIT && status == HINIC_MGMT_STATUS_EXIST)) return -EINVAL; @@ -546,12 +544,6 @@ static int hinic_update_mac(struct hinic_hwdev *hwdev, u8 *old_mac, return -EINVAL; } - if (mac_info.status == HINIC_PF_SET_VF_ALREADY) { - dev_warn(&hwdev->hwif->pdev->dev, - "PF has already set VF MAC. Ignore update operation\n"); - return HINIC_PF_SET_VF_ALREADY; - } - if (mac_info.status == HINIC_MGMT_STATUS_EXIST) dev_warn(&hwdev->hwif->pdev->dev, "MAC is repeated. Ignore update operation\n"); From dba1e4660a87927bdc03c23e36fd2c81a16a7ab1 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 24 Sep 2020 09:57:46 +0800 Subject: [PATCH 150/322] net: dsa: felix: convert TAS link speed based on phylink speed state->speed holds a value of 10, 100, 1000 or 2500, but QSYS_TAG_CONFIG_LINK_SPEED expects a value of 0, 1, 2, 3. So convert the speed to a proper value. Fixes: de143c0e274b ("net: dsa: felix: Configure Time-Aware Scheduler via taprio offload") Signed-off-by: Xiaoliang Yang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 6855c94256f8..36db631a55e6 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1284,8 +1284,28 @@ void vsc9959_mdio_bus_free(struct ocelot *ocelot) static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, u32 speed) { + u8 tas_speed; + + switch (speed) { + case SPEED_10: + tas_speed = OCELOT_SPEED_10; + break; + case SPEED_100: + tas_speed = OCELOT_SPEED_100; + break; + case SPEED_1000: + tas_speed = OCELOT_SPEED_1000; + break; + case SPEED_2500: + tas_speed = OCELOT_SPEED_2500; + break; + default: + tas_speed = OCELOT_SPEED_1000; + break; + } + ocelot_rmw_rix(ocelot, - QSYS_TAG_CONFIG_LINK_SPEED(speed), + QSYS_TAG_CONFIG_LINK_SPEED(tas_speed), QSYS_TAG_CONFIG_LINK_SPEED_M, QSYS_TAG_CONFIG, port); } From 4ab810a4e04ab6c851007033d39c13e6d3f55110 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Thu, 24 Sep 2020 10:11:13 +0800 Subject: [PATCH 151/322] net: mscc: ocelot: fix fields offset in SG_CONFIG_REG_3 INIT_IPS and GATE_ENABLE fields have a wrong offset in SG_CONFIG_REG_3. This register is used by stream gate control of PSFP, and it has not been used before, because PSFP is not implemented in ocelot driver. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller --- include/soc/mscc/ocelot_ana.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/soc/mscc/ocelot_ana.h b/include/soc/mscc/ocelot_ana.h index 841c6ec22b64..1669481d9779 100644 --- a/include/soc/mscc/ocelot_ana.h +++ b/include/soc/mscc/ocelot_ana.h @@ -252,10 +252,10 @@ #define ANA_SG_CONFIG_REG_3_LIST_LENGTH_M GENMASK(18, 16) #define ANA_SG_CONFIG_REG_3_LIST_LENGTH_X(x) (((x) & GENMASK(18, 16)) >> 16) #define ANA_SG_CONFIG_REG_3_GATE_ENABLE BIT(20) -#define ANA_SG_CONFIG_REG_3_INIT_IPS(x) (((x) << 24) & GENMASK(27, 24)) -#define ANA_SG_CONFIG_REG_3_INIT_IPS_M GENMASK(27, 24) -#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x) (((x) & GENMASK(27, 24)) >> 24) -#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE BIT(28) +#define ANA_SG_CONFIG_REG_3_INIT_IPS(x) (((x) << 21) & GENMASK(24, 21)) +#define ANA_SG_CONFIG_REG_3_INIT_IPS_M GENMASK(24, 21) +#define ANA_SG_CONFIG_REG_3_INIT_IPS_X(x) (((x) & GENMASK(24, 21)) >> 21) +#define ANA_SG_CONFIG_REG_3_INIT_GATE_STATE BIT(25) #define ANA_SG_GCL_GS_CONFIG_RSZ 0x4 From 0eb11dfe224f9bf0f7d66a68500a43a9b9c83b02 Mon Sep 17 00:00:00 2001 From: Wang Qing Date: Thu, 24 Sep 2020 14:50:24 +0800 Subject: [PATCH 152/322] net/ethernet/broadcom: fix spelling typo Modify the comment typo: "compliment" -> "complement". Signed-off-by: Wang Qing Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index bfc0e45d4a2b..5caa75b41b73 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -284,12 +284,12 @@ #define CCM_REG_GR_ARB_TYPE 0xd015c /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed; that the Store channel priority is - the compliment to 4 of the rest priorities - Aggregation channel; Load + the complement to 4 of the rest priorities - Aggregation channel; Load (FIC0) channel and Load (FIC1). */ #define CCM_REG_GR_LD0_PR 0xd0164 /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed; that the Store channel priority is - the compliment to 4 of the rest priorities - Aggregation channel; Load + the complement to 4 of the rest priorities - Aggregation channel; Load (FIC0) channel and Load (FIC1). */ #define CCM_REG_GR_LD1_PR 0xd0168 /* [RW 2] General flags index. */ @@ -4489,11 +4489,11 @@ #define TCM_REG_GR_ARB_TYPE 0x50114 /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Store channel is the - compliment of the other 3 groups. */ + complement of the other 3 groups. */ #define TCM_REG_GR_LD0_PR 0x5011c /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Store channel is the - compliment of the other 3 groups. */ + complement of the other 3 groups. */ #define TCM_REG_GR_LD1_PR 0x50120 /* [RW 4] The number of double REG-pairs; loaded from the STORM context and sent to STORM; for a specific connection type. The double REG-pairs are @@ -5020,11 +5020,11 @@ #define UCM_REG_GR_ARB_TYPE 0xe0144 /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Store channel group is - compliment to the others. */ + complement to the others. */ #define UCM_REG_GR_LD0_PR 0xe014c /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Store channel group is - compliment to the others. */ + complement to the others. */ #define UCM_REG_GR_LD1_PR 0xe0150 /* [RW 2] The queue index for invalidate counter flag decision. */ #define UCM_REG_INV_CFLG_Q 0xe00e4 @@ -5523,11 +5523,11 @@ #define XCM_REG_GR_ARB_TYPE 0x2020c /* [RW 2] Load (FIC0) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Channel group is the - compliment of the other 3 groups. */ + complement of the other 3 groups. */ #define XCM_REG_GR_LD0_PR 0x20214 /* [RW 2] Load (FIC1) channel group priority. The lowest priority is 0; the highest priority is 3. It is supposed that the Channel group is the - compliment of the other 3 groups. */ + complement of the other 3 groups. */ #define XCM_REG_GR_LD1_PR 0x20218 /* [RW 1] Input nig0 Interface enable. If 0 - the valid input is disregarded; acknowledge output is deasserted; all other signals are From 38f7e1c0c43dd25b06513137bb6fd35476f9ec6d Mon Sep 17 00:00:00 2001 From: Rohit Maheshwari Date: Thu, 24 Sep 2020 12:28:45 +0530 Subject: [PATCH 153/322] net/tls: race causes kernel panic BUG: kernel NULL pointer dereference, address: 00000000000000b8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 80000008b6fef067 P4D 80000008b6fef067 PUD 8b6fe6067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 12 PID: 23871 Comm: kworker/12:80 Kdump: loaded Tainted: G S 5.9.0-rc3+ #1 Hardware name: Supermicro X10SRA-F/X10SRA-F, BIOS 2.1 03/29/2018 Workqueue: events tx_work_handler [tls] RIP: 0010:tx_work_handler+0x1b/0x70 [tls] Code: dc fe ff ff e8 16 d4 a3 f6 66 0f 1f 44 00 00 0f 1f 44 00 00 55 53 48 8b 6f 58 48 8b bd a0 04 00 00 48 85 ff 74 1c 48 8b 47 28 <48> 8b 90 b8 00 00 00 83 e2 02 75 0c f0 48 0f ba b0 b8 00 00 00 00 RSP: 0018:ffffa44ace61fe88 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff91da9e45cc30 RCX: dead000000000122 RDX: 0000000000000001 RSI: ffff91da9e45cc38 RDI: ffff91d95efac200 RBP: ffff91da133fd780 R08: 0000000000000000 R09: 000073746e657665 R10: 8080808080808080 R11: 0000000000000000 R12: ffff91dad7d30700 R13: ffff91dab6561080 R14: 0ffff91dad7d3070 R15: ffff91da9e45cc38 FS: 0000000000000000(0000) GS:ffff91dad7d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000b8 CR3: 0000000906478003 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: process_one_work+0x1a7/0x370 worker_thread+0x30/0x370 ? process_one_work+0x370/0x370 kthread+0x114/0x130 ? kthread_park+0x80/0x80 ret_from_fork+0x22/0x30 tls_sw_release_resources_tx() waits for encrypt_pending, which can have race, so we need similar changes as in commit 0cada33241d9de205522e3858b18e506ca5cce2c here as well. Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance") Signed-off-by: Rohit Maheshwari Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls_sw.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9a3d9fedd7aa..95ab5545a931 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2143,10 +2143,15 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; + int pending; /* Wait for any pending async encryptions to complete */ - smp_store_mb(ctx->async_notify, true); - if (atomic_read(&ctx->encrypt_pending)) + spin_lock_bh(&ctx->encrypt_compl_lock); + ctx->async_notify = true; + pending = atomic_read(&ctx->encrypt_pending); + spin_unlock_bh(&ctx->encrypt_compl_lock); + + if (pending) crypto_wait_req(-EINPROGRESS, &ctx->async_wait); tls_tx_records(sk, -1); From 912aae27c6af6605eae967ab540c5e26bd76d421 Mon Sep 17 00:00:00 2001 From: Helmut Grohne Date: Thu, 24 Sep 2020 10:37:47 +0200 Subject: [PATCH 154/322] net: dsa: microchip: really look for phy-mode in port nodes The previous implementation failed to account for the "ports" node. The actual port nodes are not child nodes of the switch node, but a "ports" node sits in between. Fixes: edecfa98f602 ("net: dsa: microchip: look for phy-mode in port nodes") Signed-off-by: Helmut Grohne Signed-off-by: David S. Miller --- drivers/net/dsa/microchip/ksz_common.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 8e755b50c9c1..c796d42730ba 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -387,8 +387,8 @@ EXPORT_SYMBOL(ksz_switch_alloc); int ksz_switch_register(struct ksz_device *dev, const struct ksz_dev_ops *ops) { + struct device_node *port, *ports; phy_interface_t interface; - struct device_node *port; unsigned int port_num; int ret; @@ -429,13 +429,17 @@ int ksz_switch_register(struct ksz_device *dev, ret = of_get_phy_mode(dev->dev->of_node, &interface); if (ret == 0) dev->compat_interface = interface; - for_each_available_child_of_node(dev->dev->of_node, port) { - if (of_property_read_u32(port, "reg", &port_num)) - continue; - if (port_num >= dev->port_cnt) - return -EINVAL; - of_get_phy_mode(port, &dev->ports[port_num].interface); - } + ports = of_get_child_by_name(dev->dev->of_node, "ports"); + if (ports) + for_each_available_child_of_node(ports, port) { + if (of_property_read_u32(port, "reg", + &port_num)) + continue; + if (port_num >= dev->port_cnt) + return -EINVAL; + of_get_phy_mode(port, + &dev->ports[port_num].interface); + } dev->synclko_125 = of_property_read_bool(dev->dev->of_node, "microchip,synclko-125"); } From 1ec8e74855588cecb2620b28b877c08f45765374 Mon Sep 17 00:00:00 2001 From: Jamie Iles Date: Thu, 24 Sep 2020 15:56:45 +0100 Subject: [PATCH 155/322] net/fsl: quieten expected MDIO access failures MDIO reads can happen during PHY probing, and printing an error with dev_err can result in a large number of error messages during device probe. On a platform with a serial console this can result in excessively long boot times in a way that looks like an infinite loop when multiple busses are present. Since 0f183fd151c (net/fsl: enable extended scanning in xgmac_mdio) we perform more scanning so there are potentially more failures. Reduce the logging level to dev_dbg which is consistent with the Freescale enetc driver. Cc: Jeremy Linton Signed-off-by: Jamie Iles Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/xgmac_mdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 98be51d8b08c..bfa2826c5545 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -229,7 +229,7 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) /* Return all Fs if nothing was there */ if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && !priv->has_a011043) { - dev_err(&bus->dev, + dev_dbg(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); return 0xffff; From ad2b9b0f8d0107602bdc1f3b1ab90719842ace11 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Thu, 24 Sep 2020 15:23:14 -0700 Subject: [PATCH 156/322] tcp: skip DSACKs with dubious sequence ranges Currently, we use length of DSACKed range to compute number of delivered packets. And if sequence range in DSACK is corrupted, we can get bogus dsacked/acked count, and bogus cwnd. This patch put bounds on DSACKed range to skip update of data delivery and spurious retransmission information, if the DSACK is unlikely caused by sender's action: - DSACKed range shouldn't be greater than maximum advertised rwnd. - Total no. of DSACKed segments shouldn't be greater than total no. of retransmitted segs. Unlike spurious retransmits, network duplicates or corrupted DSACKs shouldn't be counted as delivery. Signed-off-by: Priyaranjan Jha Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 1 + net/ipv4/proc.c | 1 + net/ipv4/tcp_input.c | 32 +++++++++++++++++++++++++------- 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index cee9f8e6fce3..f84e7bcad6de 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -288,6 +288,7 @@ enum LINUX_MIB_TCPTIMEOUTREHASH, /* TCPTimeoutRehash */ LINUX_MIB_TCPDUPLICATEDATAREHASH, /* TCPDuplicateDataRehash */ LINUX_MIB_TCPDSACKRECVSEGS, /* TCPDSACKRecvSegs */ + LINUX_MIB_TCPDSACKIGNOREDDUBIOUS, /* TCPDSACKIgnoredDubious */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 1074df726ec0..8d5e1695b9aa 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -293,6 +293,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TcpTimeoutRehash", LINUX_MIB_TCPTIMEOUTREHASH), SNMP_MIB_ITEM("TcpDuplicateDataRehash", LINUX_MIB_TCPDUPLICATEDATAREHASH), SNMP_MIB_ITEM("TCPDSACKRecvSegs", LINUX_MIB_TCPDSACKRECVSEGS), + SNMP_MIB_ITEM("TCPDSACKIgnoredDubious", LINUX_MIB_TCPDSACKIGNOREDDUBIOUS), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 184ea556f50e..b1ce2054291d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -885,21 +885,34 @@ struct tcp_sacktag_state { struct rate_sample *rate; }; -/* Take a notice that peer is sending D-SACKs */ +/* Take a notice that peer is sending D-SACKs. Skip update of data delivery + * and spurious retransmission information if this DSACK is unlikely caused by + * sender's action: + * - DSACKed sequence range is larger than maximum receiver's window. + * - Total no. of DSACKed segments exceed the total no. of retransmitted segs. + */ static u32 tcp_dsack_seen(struct tcp_sock *tp, u32 start_seq, u32 end_seq, struct tcp_sacktag_state *state) { u32 seq_len, dup_segs = 1; - if (before(start_seq, end_seq)) { - seq_len = end_seq - start_seq; - if (seq_len > tp->mss_cache) - dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); - } + if (!before(start_seq, end_seq)) + return 0; + + seq_len = end_seq - start_seq; + /* Dubious DSACK: DSACKed range greater than maximum advertised rwnd */ + if (seq_len > tp->max_window) + return 0; + if (seq_len > tp->mss_cache) + dup_segs = DIV_ROUND_UP(seq_len, tp->mss_cache); + + tp->dsack_dups += dup_segs; + /* Skip the DSACK if dup segs weren't retransmitted by sender */ + if (tp->dsack_dups > tp->total_retrans) + return 0; tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; tp->rack.dsack_seen = 1; - tp->dsack_dups += dup_segs; state->flag |= FLAG_DSACKING_ACK; /* A spurious retransmission is delivered */ @@ -1153,6 +1166,11 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } dup_segs = tcp_dsack_seen(tp, start_seq_0, end_seq_0, state); + if (!dup_segs) { /* Skip dubious DSACK */ + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKIGNOREDDUBIOUS); + return false; + } + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDSACKRECVSEGS, dup_segs); /* D-SACK for already forgotten data... Do dumb counting. */ From e94ee171349db84c7cfdc5fefbebe414054d0924 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 25 Sep 2020 14:42:56 +1000 Subject: [PATCH 157/322] xfrm: Use correct address family in xfrm_state_find The struct flowi must never be interpreted by itself as its size depends on the address family. Therefore it must always be grouped with its original family value. In this particular instance, the original family value is lost in the function xfrm_state_find. Therefore we get a bogus read when it's coupled with the wrong family which would occur with inter- family xfrm states. This patch fixes it by keeping the original family value. Note that the same bug could potentially occur in LSM through the xfrm_state_pol_flow_match hook. I checked the current code there and it seems to be safe for now as only secid is used which is part of struct flowi_common. But that API should be changed so that so that we don't get new bugs in the future. We could do that by replacing fl with just secid or adding a family field. Reported-by: syzbot+577fbac3145a6eb2e7a5@syzkaller.appspotmail.com Fixes: 48b8d78315bf ("[XFRM]: State selection update to use inner...") Signed-off-by: Herbert Xu Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5ff392e6f3c1..efc89a92961d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1019,7 +1019,8 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, */ if (x->km.state == XFRM_STATE_VALID) { if ((x->sel.family && - !xfrm_selector_match(&x->sel, fl, x->sel.family)) || + (x->sel.family != family || + !xfrm_selector_match(&x->sel, fl, family))) || !security_xfrm_state_pol_flow_match(x, pol, fl)) return; @@ -1032,7 +1033,9 @@ static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, *acq_in_progress = 1; } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, x->sel.family) && + if ((!x->sel.family || + (x->sel.family == family && + xfrm_selector_match(&x->sel, fl, family))) && security_xfrm_state_pol_flow_match(x, pol, fl)) *error = -ESRCH; } @@ -1072,7 +1075,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, + xfrm_state_look_at(pol, x, fl, family, &best, &acquire_in_progress, &error); } if (best || acquire_in_progress) @@ -1089,7 +1092,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, encap_family, + xfrm_state_look_at(pol, x, fl, family, &best, &acquire_in_progress, &error); } From 8d214c481611b29458a57913bd786f0ac06f0605 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 23 Sep 2020 14:53:52 -0700 Subject: [PATCH 158/322] KVM: x86: Reset MMU context if guest toggles CR4.SMAP or CR4.PKE Reset the MMU context during kvm_set_cr4() if SMAP or PKE is toggled. Recent commits to (correctly) not reload PDPTRs when SMAP/PKE are toggled inadvertantly skipped the MMU context reset due to the mask of bits that triggers PDPTR loads also being used to trigger MMU context resets. Fixes: 427890aff855 ("kvm: x86: Toggling CR4.SMAP does not load PDPTEs in PAE mode") Fixes: cb957adb4ea4 ("kvm: x86: Toggling CR4.PKE does not load PDPTEs in PAE mode") Cc: Jim Mattson Cc: Peter Shier Cc: Oliver Upton Signed-off-by: Sean Christopherson Message-Id: <20200923215352.17756-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e3fe1d126cc3..58fa354b1446 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -976,6 +976,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP; + unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; if (kvm_valid_cr4(vcpu, cr4)) return 1; @@ -1003,7 +1004,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (kvm_x86_ops.set_cr4(vcpu, cr4)) return 1; - if (((cr4 ^ old_cr4) & pdptr_bits) || + if (((cr4 ^ old_cr4) & mmu_role_bits) || (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); From f3cd4850504ff612d0ea77a0aaf29b66c98fcefe Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 24 Sep 2020 14:55:54 -0600 Subject: [PATCH 159/322] io_uring: ensure open/openat2 name is cleaned on cancelation If we cancel these requests, we'll leak the memory associated with the filename. Add them to the table of ops that need cleaning, if REQ_F_NEED_CLEANUP is set. Cc: stable@vger.kernel.org Fixes: e62753e4e292 ("io_uring: call statx directly") Reviewed-by: Stefano Garzarella Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index e6004b92e553..0ab16df31288 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5671,6 +5671,11 @@ static void __io_clean_op(struct io_kiocb *req) io_put_file(req, req->splice.file_in, (req->splice.flags & SPLICE_F_FD_IN_FIXED)); break; + case IORING_OP_OPENAT: + case IORING_OP_OPENAT2: + if (req->open.filename) + putname(req->open.filename); + break; } req->flags &= ~REQ_F_NEED_CLEANUP; } From 3aab91774bbd8e571cfaddaf839aafd07718333c Mon Sep 17 00:00:00 2001 From: Jeffle Xu Date: Fri, 25 Sep 2020 14:00:31 +0800 Subject: [PATCH 160/322] block: remove unused BLK_QC_T_EAGAIN flag commit 7b6620d7db56 ("block: remove REQ_NOWAIT_INLINE") removed the REQ_NOWAIT_INLINE related code, but the diff wasn't applied to blk_types.h somehow. Then commit 2771cefeac49 ("block: remove the REQ_NOWAIT_INLINE flag") removed the REQ_NOWAIT_INLINE flag while the BLK_QC_T_EAGAIN flag still remains. Fixes: 7b6620d7db56 ("block: remove REQ_NOWAIT_INLINE") Signed-off-by: Jeffle Xu Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 4ecf4fed171f..b3fc5d3dd8ea 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -497,13 +497,12 @@ static inline int op_stat_group(unsigned int op) typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U -#define BLK_QC_T_EAGAIN -2U #define BLK_QC_T_SHIFT 16 #define BLK_QC_T_INTERNAL (1U << 31) static inline bool blk_qc_t_valid(blk_qc_t cookie) { - return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN; + return cookie != BLK_QC_T_NONE; } static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) From 75598a8fc0e0dff2aa5d46c62531b36a595f1d4f Mon Sep 17 00:00:00 2001 From: Sylwester Dziedziuch Date: Wed, 2 Sep 2020 12:54:59 +0000 Subject: [PATCH 161/322] iavf: Fix incorrect adapter get in iavf_resume When calling iavf_resume there was a crash because wrong function was used to get iavf_adapter and net_device pointers. Changed how iavf_resume is getting iavf_adapter and net_device pointers from pci_dev. Fixes: 5eae00c57f5e ("i40evf: main driver core") Signed-off-by: Sylwester Dziedziuch Reviewed-by: Aleksandr Loktionov Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/iavf/iavf_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index d870343cf689..cf539db79af9 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3806,8 +3806,8 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) static int __maybe_unused iavf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); - struct iavf_adapter *adapter = pci_get_drvdata(pdev); - struct net_device *netdev = adapter->netdev; + struct net_device *netdev = pci_get_drvdata(pdev); + struct iavf_adapter *adapter = netdev_priv(netdev); u32 err; pci_set_master(pdev); From 466e4392920083c58c7ee8905795453d62c4add7 Mon Sep 17 00:00:00 2001 From: Anirudh Venkataramanan Date: Wed, 2 Sep 2020 08:53:45 -0700 Subject: [PATCH 162/322] ice: Fix call trace on suspend It appears that the ice_suspend flow is missing a call to pci_save_state and this is triggering the message "State of device not saved by ice_suspend" and a call trace. Fix it. Fixes: 769c500dcc1e ("ice: Add advanced power mgmt for WoL") Signed-off-by: Anirudh Venkataramanan Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4634b48949bb..954f11f86f50 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4522,6 +4522,7 @@ static int __maybe_unused ice_suspend(struct device *dev) } ice_clear_interrupt_scheme(pf); + pci_save_state(pdev); pci_wake_from_d3(pdev, pf->wol_ena); pci_set_power_state(pdev, PCI_D3hot); return 0; From 135f4b9e9340dadb78e9737bb4eb9817b9c89dac Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 2 Sep 2020 08:53:46 -0700 Subject: [PATCH 163/322] ice: fix memory leak if register_netdev_fails The ice_setup_pf_sw function can cause a memory leak if register_netdev fails, due to accidentally failing to free the VSI rings. Fix the memory leak by using ice_vsi_release, ensuring we actually go through the full teardown process. This should be safe even if the netdevice is not registered because we will have set the netdev pointer to NULL, ensuring ice_vsi_release won't call unregister_netdev. An alternative fix would be moving management of the PF VSI netdev into the main VSI setup code. This is complicated and likely requires significant refactor in how we manage VSIs Fixes: 3a858ba392c3 ("ice: Add support for VSI allocation and deallocation") Signed-off-by: Jacob Keller Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 6 +++--- drivers/net/ethernet/intel/ice/ice_lib.h | 6 ------ drivers/net/ethernet/intel/ice/ice_main.c | 13 +++---------- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index f2682776f8c8..65cc78279501 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -246,7 +246,7 @@ static int ice_get_free_slot(void *array, int size, int curr) * ice_vsi_delete - delete a VSI from the switch * @vsi: pointer to VSI being removed */ -void ice_vsi_delete(struct ice_vsi *vsi) +static void ice_vsi_delete(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct ice_vsi_ctx *ctxt; @@ -313,7 +313,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) * * Returns 0 on success, negative on failure */ -int ice_vsi_clear(struct ice_vsi *vsi) +static int ice_vsi_clear(struct ice_vsi *vsi) { struct ice_pf *pf = NULL; struct device *dev; @@ -563,7 +563,7 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi) * ice_vsi_put_qs - Release queues from VSI to PF * @vsi: the VSI that is going to release queues */ -void ice_vsi_put_qs(struct ice_vsi *vsi) +static void ice_vsi_put_qs(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; int i; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index 981f3a156c24..3da17895a2b1 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -45,10 +45,6 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); -void ice_vsi_delete(struct ice_vsi *vsi); - -int ice_vsi_clear(struct ice_vsi *vsi); - #ifdef CONFIG_DCB int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); #endif /* CONFIG_DCB */ @@ -79,8 +75,6 @@ bool ice_is_reset_in_progress(unsigned long *state); void ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio); -void ice_vsi_put_qs(struct ice_vsi *vsi); - void ice_vsi_dis_irq(struct ice_vsi *vsi); void ice_vsi_free_irq(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 954f11f86f50..54a7f55eb8c1 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3169,10 +3169,8 @@ static int ice_setup_pf_sw(struct ice_pf *pf) return -EBUSY; vsi = ice_pf_vsi_setup(pf, pf->hw.port_info); - if (!vsi) { - status = -ENOMEM; - goto unroll_vsi_setup; - } + if (!vsi) + return -ENOMEM; status = ice_cfg_netdev(vsi); if (status) { @@ -3219,12 +3217,7 @@ unroll_napi_add: } unroll_vsi_setup: - if (vsi) { - ice_vsi_free_q_vectors(vsi); - ice_vsi_delete(vsi); - ice_vsi_put_qs(vsi); - ice_vsi_clear(vsi); - } + ice_vsi_release(vsi); return status; } From f6a07271bb1535d9549380461437cc48d9e19958 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 2 Sep 2020 08:53:47 -0700 Subject: [PATCH 164/322] ice: fix memory leak in ice_vsi_setup During ice_vsi_setup, if ice_cfg_vsi_lan fails, it does not properly release memory associated with the VSI rings. If we had used devres allocations for the rings, this would be ok. However, we use kzalloc and kfree_rcu for these ring structures. Using the correct label to cleanup the rings during ice_vsi_setup highlights an issue in the ice_vsi_clear_rings function: it can leave behind stale ring pointers in the q_vectors structure. When releasing rings, we must also ensure that no q_vector associated with the VSI will point to this ring again. To resolve this, loop over all q_vectors and release their ring mapping. Because we are about to free all rings, no q_vector should remain pointing to any of the rings in this VSI. Fixes: 5513b920a4f7 ("ice: Update Tx scheduler tree for VSI multi-Tx queue support") Signed-off-by: Jacob Keller Tested-by: Aaron Brown Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 65cc78279501..ebbb8f54871c 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1196,6 +1196,18 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) { int i; + /* Avoid stale references by clearing map from vector to ring */ + if (vsi->q_vectors) { + ice_for_each_q_vector(vsi, i) { + struct ice_q_vector *q_vector = vsi->q_vectors[i]; + + if (q_vector) { + q_vector->tx.ring = NULL; + q_vector->rx.ring = NULL; + } + } + } + if (vsi->tx_rings) { for (i = 0; i < vsi->alloc_txq; i++) { if (vsi->tx_rings[i]) { @@ -2291,7 +2303,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, if (status) { dev_err(dev, "VSI %d failed lan queue config, error %s\n", vsi->vsi_num, ice_stat_str(status)); - goto unroll_vector_base; + goto unroll_clear_rings; } /* Add switch rule to drop all Tx Flow Control Frames, of look up From 313b085851c13ca08320372a05a7047ea25d3dd4 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 20 Aug 2020 11:18:26 -0400 Subject: [PATCH 165/322] btrfs: move btrfs_scratch_superblocks into btrfs_dev_replace_finishing We need to move the closing of the src_device out of all the device replace locking, but we definitely want to zero out the superblock before we commit the last time to make sure the device is properly removed. Handle this by pushing btrfs_scratch_superblocks into btrfs_dev_replace_finishing, and then later on we'll move the src_device closing and freeing stuff where we need it to be. Reviewed-by: Nikolay Borisov Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 3 +++ fs/btrfs/volumes.c | 12 +++--------- fs/btrfs/volumes.h | 3 +++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index db93909b25e0..7cf48aeb6f14 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -745,6 +745,9 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, /* replace the sysfs entry */ btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device); btrfs_sysfs_update_devid(tgt_device); + if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state)) + btrfs_scratch_superblocks(fs_info, src_device->bdev, + src_device->name->str); btrfs_rm_dev_replace_free_srcdev(src_device); /* write back the superblocks */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 117b43367629..01ebdb69fbdb 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1999,9 +1999,9 @@ static u64 btrfs_num_devices(struct btrfs_fs_info *fs_info) return num_devices; } -static void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, - struct block_device *bdev, - const char *device_path) +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, + struct block_device *bdev, + const char *device_path) { struct btrfs_super_block *disk_super; int copy_num; @@ -2224,12 +2224,6 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) struct btrfs_fs_info *fs_info = srcdev->fs_info; struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; - if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) { - /* zero out the old super if it is writable */ - btrfs_scratch_superblocks(fs_info, srcdev->bdev, - srcdev->name->str); - } - btrfs_close_bdev(srcdev); synchronize_rcu(); btrfs_free_device(srcdev); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 5eea93916fbf..302c9234f7d0 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -573,6 +573,9 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info, struct btrfs_device *failing_dev); +void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, + struct block_device *bdev, + const char *device_path); int btrfs_bg_type_to_factor(u64 flags); const char *btrfs_bg_type_to_raid_name(u64 flags); From d6407613c1e2ef90213dee388aa16b6e1bd08cbc Mon Sep 17 00:00:00 2001 From: "M. Vefa Bicakci" Date: Tue, 22 Sep 2020 14:07:00 +0300 Subject: [PATCH 166/322] Revert "usbip: Implement a match function to fix usbip" This commit reverts commit 7a2f2974f265 ("usbip: Implement a match function to fix usbip"). In summary, commit d5643d2249b2 ("USB: Fix device driver race") inadvertently broke usbip functionality, which I resolved in an incorrect manner by introducing a match function to usbip, usbip_match(), that unconditionally returns true. However, the usbip_match function, as is, causes usbip to take over virtual devices used by syzkaller for USB fuzzing, which is a regression reported by Andrey Konovalov. Furthermore, in conjunction with the fix of another bug, handled by another patch titled "usbcore/driver: Fix specific driver selection" in this patch set, the usbip_match function causes unexpected USB subsystem behaviour when the usbip_host driver is loaded. The unexpected behaviour can be qualified as follows: - If commit 41160802ab8e ("USB: Simplify USB ID table match") is included in the kernel, then all USB devices are bound to the usbip_host driver, which appears to the user as if all USB devices were disconnected. - If the same commit (41160802ab8e) is not in the kernel (as is the case with v5.8.10) then all USB devices are re-probed and re-bound to their original device drivers, which appears to the user as a disconnection and re-connection of USB devices. Please note that this commit will make usbip non-operational again, until yet another patch in this patch set is merged, titled "usbcore/driver: Accommodate usbip". Cc: # 5.8: 41160802ab8e: USB: Simplify USB ID table match Cc: # 5.8 Cc: Bastien Nocera Cc: Valentina Manea Cc: Shuah Khan Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: Reported-by: Andrey Konovalov Tested-by: Andrey Konovalov Acked-by: Shuah Khan Signed-off-by: M. Vefa Bicakci Link: https://lore.kernel.org/r/20200922110703.720960-2-m.v.b@runbox.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/usbip/stub_dev.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 9d7d642022d1..2305d425e6c9 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -461,11 +461,6 @@ static void stub_disconnect(struct usb_device *udev) return; } -static bool usbip_match(struct usb_device *udev) -{ - return true; -} - #ifdef CONFIG_PM /* These functions need usb_port_suspend and usb_port_resume, @@ -491,7 +486,6 @@ struct usb_device_driver stub_driver = { .name = "usbip-host", .probe = stub_probe, .disconnect = stub_disconnect, - .match = usbip_match, #ifdef CONFIG_PM .suspend = stub_suspend, .resume = stub_resume, From aea850cd35ae3d266fe6f93fb9edb25e4a555230 Mon Sep 17 00:00:00 2001 From: "M. Vefa Bicakci" Date: Tue, 22 Sep 2020 14:07:01 +0300 Subject: [PATCH 167/322] usbcore/driver: Fix specific driver selection This commit resolves a bug in the selection/discovery of more specific USB device drivers for devices that are currently bound to generic USB device drivers. The bug is in the logic that determines whether a device currently bound to a generic USB device driver should be re-probed by a more specific USB device driver or not. The code in __usb_bus_reprobe_drivers() used to have the following lines: if (usb_device_match_id(udev, new_udriver->id_table) == NULL && (!new_udriver->match || new_udriver->match(udev) != 0)) return 0; ret = device_reprobe(dev); As the reader will notice, the code checks whether the USB device in consideration matches the identifier table (id_table) of a specific USB device_driver (new_udriver), followed by a similar check, but this time with the USB device driver's match function. However, the match function's return value is not checked correctly. When match() returns zero, it means that the specific USB device driver is *not* applicable to the USB device in question, but the code then goes on to reprobe the device with the new USB device driver under consideration. All this to say, the logic is inverted. This bug was found by code inspection and instrumentation while investigating the root cause of the issue reported by Andrey Konovalov, where usbip took over syzkaller's virtual USB devices in an undesired manner. The report is linked below. Fixes: d5643d2249b2 ("USB: Fix device driver race") Cc: # 5.8 Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: Bastien Nocera Cc: Shuah Khan Cc: Valentina Manea Cc: Tested-by: Andrey Konovalov Signed-off-by: M. Vefa Bicakci Link: https://lore.kernel.org/r/20200922110703.720960-3-m.v.b@runbox.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 7e73e989645b..715782995428 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -924,7 +924,7 @@ static int __usb_bus_reprobe_drivers(struct device *dev, void *data) udev = to_usb_device(dev); if (usb_device_match_id(udev, new_udriver->id_table) == NULL && - (!new_udriver->match || new_udriver->match(udev) != 0)) + (!new_udriver->match || new_udriver->match(udev) == 0)) return 0; ret = device_reprobe(dev); From 4df30e7603432704380b12fe40a604ee7f66746d Mon Sep 17 00:00:00 2001 From: "M. Vefa Bicakci" Date: Tue, 22 Sep 2020 14:07:02 +0300 Subject: [PATCH 168/322] usbcore/driver: Fix incorrect downcast This commit resolves a minor bug in the selection/discovery of more specific USB device drivers for devices that are currently bound to generic USB device drivers. The bug is related to the way a candidate USB device driver is compared against the generic USB device driver. The code in is_dev_usb_generic_driver() assumes that the device driver in question is a USB device driver by calling to_usb_device_driver(dev->driver) to downcast; however I have observed that this assumption is not always true, through code instrumentation. This commit avoids the incorrect downcast altogether by comparing the USB device's driver (i.e., dev->driver) to the generic USB device driver directly. This method was suggested by Alan Stern. This bug was found while investigating Andrey Konovalov's report indicating usbip device driver misbehaviour with the recently merged generic USB device driver selection feature. The report is linked below. Fixes: d5643d2249b2 ("USB: Fix device driver race") Cc: # 5.8 Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: Bastien Nocera Cc: Shuah Khan Cc: Valentina Manea Cc: Tested-by: Andrey Konovalov Signed-off-by: M. Vefa Bicakci Link: https://lore.kernel.org/r/20200922110703.720960-4-m.v.b@runbox.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 715782995428..b00b7fb1aad1 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -905,21 +905,14 @@ static int usb_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } -static bool is_dev_usb_generic_driver(struct device *dev) -{ - struct usb_device_driver *udd = dev->driver ? - to_usb_device_driver(dev->driver) : NULL; - - return udd == &usb_generic_driver; -} - static int __usb_bus_reprobe_drivers(struct device *dev, void *data) { struct usb_device_driver *new_udriver = data; struct usb_device *udev; int ret; - if (!is_dev_usb_generic_driver(dev)) + /* Don't reprobe if current driver isn't usb_generic_driver */ + if (dev->driver != &usb_generic_driver.drvwrap.driver) return 0; udev = to_usb_device(dev); From 3fce39601a1a34d940cf62858ee01ed9dac5d459 Mon Sep 17 00:00:00 2001 From: "M. Vefa Bicakci" Date: Tue, 22 Sep 2020 14:07:03 +0300 Subject: [PATCH 169/322] usbcore/driver: Accommodate usbip Commit 88b7381a939d ("USB: Select better matching USB drivers when available") inadvertently broke usbip functionality. The commit in question allows USB device drivers to be explicitly matched with USB devices via the use of driver-provided identifier tables and match functions, which is useful for a specialised device driver to be chosen for a device that can also be handled by another, more generic, device driver. Prior, the USB device section of usb_device_match() had an unconditional "return 1" statement, which allowed user-space to bind USB devices to the usbip_host device driver, if desired. However, the aforementioned commit changed the default/fallback return value to zero. This breaks device drivers such as usbip_host, so this commit restores the legacy behaviour, but only if a device driver does not have an id_table and a match() function. In addition, if usb_device_match is called for a device driver and device pair where the device does not match the id_table of the device driver in question, then the device driver will be disqualified for the device. This allows avoiding the default case of "return 1", which prevents undesirable probe() calls to a driver even though its id_table did not match the device. Finally, this commit changes the specialised-driver-to-generic-driver transition code so that when a device driver returns -ENODEV, a more generic device driver is only considered if the current device driver does not have an id_table and a match() function. This ensures that "generic" drivers such as usbip_host will not be considered specialised device drivers and will not cause the device to be locked in to the generic device driver, when a more specialised device driver could be tried. All of these changes restore usbip functionality without regressions, ensure that the specialised/generic device driver selection logic works as expected with the usb and apple-mfi-fastcharge drivers, and do not negatively affect the use of devices provided by dummy_hcd. Fixes: 88b7381a939d ("USB: Select better matching USB drivers when available") Cc: # 5.8 Cc: Bastien Nocera Cc: Valentina Manea Cc: Shuah Khan Cc: Greg Kroah-Hartman Cc: Alan Stern Cc: Tested-by: Andrey Konovalov Acked-by: Shuah Khan Signed-off-by: M. Vefa Bicakci Link: https://lore.kernel.org/r/20200922110703.720960-5-m.v.b@runbox.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index b00b7fb1aad1..b351962279e4 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -269,8 +269,30 @@ static int usb_probe_device(struct device *dev) if (error) return error; + /* Probe the USB device with the driver in hand, but only + * defer to a generic driver in case the current USB + * device driver has an id_table or a match function; i.e., + * when the device driver was explicitly matched against + * a device. + * + * If the device driver does not have either of these, + * then we assume that it can bind to any device and is + * not truly a more specialized/non-generic driver, so a + * return value of -ENODEV should not force the device + * to be handled by the generic USB driver, as there + * can still be another, more specialized, device driver. + * + * This accommodates the usbip driver. + * + * TODO: What if, in the future, there are multiple + * specialized USB device drivers for a particular device? + * In such cases, there is a need to try all matching + * specialised device drivers prior to setting the + * use_generic_driver bit. + */ error = udriver->probe(udev); - if (error == -ENODEV && udriver != &usb_generic_driver) { + if (error == -ENODEV && udriver != &usb_generic_driver && + (udriver->id_table || udriver->match)) { udev->use_generic_driver = 1; return -EPROBE_DEFER; } @@ -831,14 +853,17 @@ static int usb_device_match(struct device *dev, struct device_driver *drv) udev = to_usb_device(dev); udrv = to_usb_device_driver(drv); - if (udrv->id_table && - usb_device_match_id(udev, udrv->id_table) != NULL) { - return 1; - } + if (udrv->id_table) + return usb_device_match_id(udev, udrv->id_table) != NULL; if (udrv->match) return udrv->match(udev); - return 0; + + /* If the device driver under consideration does not have a + * id_table or a match function, then let the driver's probe + * function decide. + */ + return 1; } else if (is_usb_interface(dev)) { struct usb_interface *intf; From 62c774ed483174b994c79e0c9f596b315f1ddfaf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Sep 2020 09:01:53 -0600 Subject: [PATCH 170/322] io_uring: don't unconditionally set plug->nowait = true This causes all the bios to be submitted with REQ_NOWAIT, which can be problematic on either btrfs or on file systems that otherwise use a mix of block devices where only some of them support it. For now, just remove the setting of plug->nowait = true. Reported-by: Dan Melnic Reported-by: Brian Foster Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources") Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 0ab16df31288..ad828fa19af4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6353,9 +6353,6 @@ static void io_submit_state_start(struct io_submit_state *state, struct io_ring_ctx *ctx, unsigned int max_ios) { blk_start_plug(&state->plug); -#ifdef CONFIG_BLOCK - state->plug.nowait = true; -#endif state->comp.nr = 0; INIT_LIST_HEAD(&state->comp.list); state->comp.ctx = ctx; From 4bb05f30483fd21ea5413eaf1182768f251cf625 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 24 Sep 2020 13:41:57 -0500 Subject: [PATCH 171/322] KVM: SVM: Add a dedicated INVD intercept routine The INVD instruction intercept performs emulation. Emulation can't be done on an SEV guest because the guest memory is encrypted. Provide a dedicated intercept routine for the INVD intercept. And since the instruction is emulated as a NOP, just skip it instead. Fixes: 1654efcbc431 ("KVM: SVM: Add KVM_SEV_INIT command") Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 5764b87379cf..d4e18bda19c7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2183,6 +2183,12 @@ static int iret_interception(struct vcpu_svm *svm) return 1; } +static int invd_interception(struct vcpu_svm *svm) +{ + /* Treat an INVD instruction as a NOP and just skip it. */ + return kvm_skip_emulated_instruction(&svm->vcpu); +} + static int invlpg_interception(struct vcpu_svm *svm) { if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) @@ -2774,7 +2780,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_RDPMC] = rdpmc_interception, [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_IRET] = iret_interception, - [SVM_EXIT_INVD] = emulate_on_interception, + [SVM_EXIT_INVD] = invd_interception, [SVM_EXIT_PAUSE] = pause_interception, [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, From f38c7e3abfba9a9e180b34f642254c43782e7ffe Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 25 Sep 2020 15:23:43 -0600 Subject: [PATCH 172/322] io_uring: ensure async buffered read-retry is setup properly A previous commit for fixing up short reads botched the async retry path, so we ended up going to worker threads more often than we should. Fix this up, so retries work the way they originally were intended to. Fixes: 227c0c9673d8 ("io_uring: internally retry short reads") Reported-by: Hao_Xu Signed-off-by: Jens Axboe --- fs/io_uring.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index ad828fa19af4..11b8e428300d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3172,10 +3172,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, goto done; /* some cases will consume bytes even on error returns */ iov_iter_revert(iter, iov_count - iov_iter_count(iter)); - ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); - if (ret) - goto out_free; - return -EAGAIN; + ret = 0; + goto copy_iov; } else if (ret < 0) { /* make sure -ERESTARTSYS -> -EINTR is done */ goto done; From 1dac3b1bc66dc68dbb0c9f43adac71a7d0a0331a Mon Sep 17 00:00:00 2001 From: Ronak Doshi Date: Thu, 24 Sep 2020 23:11:29 -0700 Subject: [PATCH 173/322] vmxnet3: fix cksum offload issues for non-udp tunnels Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") added support for encapsulation offload. However, the inner offload capability is to be restrictued to UDP tunnels. This patch fixes the issue for non-udp tunnels by adding features check capability and filtering appropriate features for non-udp tunnels. Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") Signed-off-by: Ronak Doshi Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 5 ++--- drivers/net/vmxnet3/vmxnet3_ethtool.c | 28 +++++++++++++++++++++++++++ drivers/net/vmxnet3/vmxnet3_int.h | 4 ++++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 2818015324b8..336504b7531d 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1032,7 +1032,6 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, /* Use temporary descriptor to avoid touching bits multiple times */ union Vmxnet3_GenericDesc tempTxDesc; #endif - struct udphdr *udph; count = txd_estimate(skb); @@ -1135,8 +1134,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, gdesc->txd.om = VMXNET3_OM_ENCAP; gdesc->txd.msscof = ctx.mss; - udph = udp_hdr(skb); - if (udph->check) + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) gdesc->txd.oco = 1; } else { gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size; @@ -3371,6 +3369,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, .ndo_change_mtu = vmxnet3_change_mtu, .ndo_fix_features = vmxnet3_fix_features, .ndo_set_features = vmxnet3_set_features, + .ndo_features_check = vmxnet3_features_check, .ndo_get_stats64 = vmxnet3_get_stats64, .ndo_tx_timeout = vmxnet3_tx_timeout, .ndo_set_rx_mode = vmxnet3_set_mc, diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c index 1014693a5ceb..7ec8652f2c26 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethtool.c +++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c @@ -267,6 +267,34 @@ netdev_features_t vmxnet3_fix_features(struct net_device *netdev, return features; } +netdev_features_t vmxnet3_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + struct vmxnet3_adapter *adapter = netdev_priv(netdev); + + /* Validate if the tunneled packet is being offloaded by the device */ + if (VMXNET3_VERSION_GE_4(adapter) && + skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) { + u8 l4_proto = 0; + + switch (vlan_get_protocol(skb)) { + case htons(ETH_P_IP): + l4_proto = ip_hdr(skb)->protocol; + break; + case htons(ETH_P_IPV6): + l4_proto = ipv6_hdr(skb)->nexthdr; + break; + default: + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + + if (l4_proto != IPPROTO_UDP) + return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } + return features; +} + static void vmxnet3_enable_encap_offloads(struct net_device *netdev) { struct vmxnet3_adapter *adapter = netdev_priv(netdev); diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 5d2b062215a2..d958b92c9429 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -470,6 +470,10 @@ vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter); netdev_features_t vmxnet3_fix_features(struct net_device *netdev, netdev_features_t features); +netdev_features_t +vmxnet3_features_check(struct sk_buff *skb, + struct net_device *netdev, netdev_features_t features); + int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features); From ac322f86b56cb99d1c4224c209095aa67647c967 Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Fri, 25 Sep 2020 17:54:06 +0800 Subject: [PATCH 174/322] net: stmmac: Fix clock handling on remove path While unloading the dwmac-intel driver, clk_disable_unprepare() is being called twice in stmmac_dvr_remove() and intel_eth_pci_remove(). This causes kernel panic on the second call. Removing the second call of clk_disable_unprepare() in intel_eth_pci_remove(). Fixes: 09f012e64e4b ("stmmac: intel: Fix clock handling on error and remove paths") Cc: Andy Shevchenko Reviewed-by: Voon Weifeng Signed-off-by: Wong Vee Khee Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 2ac9dfb3462c..9e6d60e75f85 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -653,7 +653,6 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) pci_free_irq_vectors(pdev); - clk_disable_unprepare(priv->plat->stmmac_clk); clk_unregister_fixed_rate(priv->plat->stmmac_clk); pcim_iounmap_regions(pdev, BIT(0)); From 4663ff60257aec4ee1e2e969a7c046f0aff35ab8 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Fri, 25 Sep 2020 15:44:39 +0300 Subject: [PATCH 175/322] net: ethernet: cavium: octeon_mgmt: use phy_start and phy_stop To start also "phy state machine", with UP state as it should be, the phy_start() has to be used, in another case machine even is not triggered. After this change negotiation is supposed to be triggered by SM workqueue. It's not correct usage, but it appears after the following patch, so add it as a fix. Fixes: 74a992b3598a ("net: phy: add phy_check_link_status") Signed-off-by: Ivan Khoronzhuk Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 3e17ce0d2314..6cb2162a75d4 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1219,7 +1219,7 @@ static int octeon_mgmt_open(struct net_device *netdev) */ if (netdev->phydev) { netif_carrier_off(netdev); - phy_start_aneg(netdev->phydev); + phy_start(netdev->phydev); } netif_wake_queue(netdev); @@ -1247,8 +1247,10 @@ static int octeon_mgmt_stop(struct net_device *netdev) napi_disable(&p->napi); netif_stop_queue(netdev); - if (netdev->phydev) + if (netdev->phydev) { + phy_stop(netdev->phydev); phy_disconnect(netdev->phydev); + } netif_carrier_off(netdev); From f32f19339596b214c208c0dba716f4b6cc4f6958 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Sep 2020 06:38:07 -0700 Subject: [PATCH 176/322] bonding: set dev->needed_headroom in bond_setup_by_slave() syzbot managed to crash a host by creating a bond with a GRE device. For non Ethernet device, bonding calls bond_setup_by_slave() instead of ether_setup(), and unfortunately dev->needed_headroom was not copied from the new added member. [ 171.243095] skbuff: skb_under_panic: text:ffffffffa184b9ea len:116 put:20 head:ffff883f84012dc0 data:ffff883f84012dbc tail:0x70 end:0xd00 dev:bond0 [ 171.243111] ------------[ cut here ]------------ [ 171.243112] kernel BUG at net/core/skbuff.c:112! [ 171.243117] invalid opcode: 0000 [#1] SMP KASAN PTI [ 171.243469] gsmi: Log Shutdown Reason 0x03 [ 171.243505] Call Trace: [ 171.243506] [ 171.243512] [] skb_push+0x49/0x50 [ 171.243516] [] ipgre_header+0x2a/0xf0 [ 171.243520] [] neigh_connected_output+0xb7/0x100 [ 171.243524] [] ip6_finish_output2+0x383/0x490 [ 171.243528] [] __ip6_finish_output+0xa2/0x110 [ 171.243531] [] ip6_finish_output+0x2c/0xa0 [ 171.243534] [] ip6_output+0x69/0x110 [ 171.243537] [] ? ip6_output+0x110/0x110 [ 171.243541] [] mld_sendpack+0x1b2/0x2d0 [ 171.243544] [] ? mld_send_report+0xf0/0xf0 [ 171.243548] [] mld_ifc_timer_expire+0x2d7/0x3b0 [ 171.243551] [] ? mld_gq_timer_expire+0x50/0x50 [ 171.243556] [] call_timer_fn+0x30/0x130 [ 171.243559] [] expire_timers+0x4c/0x110 [ 171.243563] [] __run_timers+0x213/0x260 [ 171.243566] [] ? ktime_get+0x3d/0xa0 [ 171.243570] [] ? clockevents_program_event+0x7e/0xe0 [ 171.243574] [] ? sched_clock_cpu+0x15/0x190 [ 171.243577] [] run_timer_softirq+0x1d/0x40 [ 171.243581] [] __do_softirq+0x152/0x2f0 [ 171.243585] [] irq_exit+0x9f/0xb0 [ 171.243588] [] smp_apic_timer_interrupt+0xfd/0x1a0 [ 171.243591] [] apic_timer_interrupt+0x86/0x90 Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 42ef25ec0af5..14740d3053b8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1315,6 +1315,7 @@ static void bond_setup_by_slave(struct net_device *bond_dev, bond_dev->type = slave_dev->type; bond_dev->hard_header_len = slave_dev->hard_header_len; + bond_dev->needed_headroom = slave_dev->needed_headroom; bond_dev->addr_len = slave_dev->addr_len; memcpy(bond_dev->broadcast, slave_dev->broadcast, From 89d01748b2354e210b5d4ea47bc25a42a1b42c82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 25 Sep 2020 06:38:08 -0700 Subject: [PATCH 177/322] team: set dev->needed_headroom in team_setup_by_port() Some devices set needed_headroom. If we ignore it, we might end up crashing in various skb_push() for example in ipgre_header() since some layers assume enough headroom has been reserved. Fixes: 1d76efe1577b ("team: add support for non-ethernet devices") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/team/team.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 8c1e02752ff6..69dfb1a49cc8 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2112,6 +2112,7 @@ static void team_setup_by_port(struct net_device *dev, dev->header_ops = port_dev->header_ops; dev->type = port_dev->type; dev->hard_header_len = port_dev->hard_header_len; + dev->needed_headroom = port_dev->needed_headroom; dev->addr_len = port_dev->addr_len; dev->mtu = port_dev->mtu; memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len); From e42d72fea91f8f2e82b65808739ca04b7a8cd7a8 Mon Sep 17 00:00:00 2001 From: Wilken Gottwalt Date: Fri, 25 Sep 2020 15:58:57 +0200 Subject: [PATCH 178/322] net: usb: ax88179_178a: add Toshiba usb 3.0 adapter Adds the driver_info and usb ids of the AX88179 based Toshiba USB 3.0 ethernet adapter. Signed-off-by: Wilken Gottwalt Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index a38e868e44d4..125f7bf57590 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1828,6 +1828,19 @@ static const struct driver_info belkin_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info toshiba_info = { + .description = "Toshiba USB Ethernet Adapter", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct usb_device_id products[] = { { /* ASIX AX88179 10/100/1000 */ @@ -1861,6 +1874,10 @@ static const struct usb_device_id products[] = { /* Belkin B2B128 USB 3.0 Hub + Gigabit Ethernet Adapter */ USB_DEVICE(0x050d, 0x0128), .driver_info = (unsigned long)&belkin_info, +}, { + /* Toshiba USB 3.0 GBit Ethernet Adapter */ + USB_DEVICE(0x0930, 0x0a13), + .driver_info = (unsigned long)&toshiba_info, }, { }, }; From 72e27c38abb3dc8da7c05f829d44b3f3c46d06da Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Fri, 25 Sep 2020 17:35:30 +0300 Subject: [PATCH 179/322] dpaa2-eth: fix command version for Tx shaping When adding the support for TBF offload, the improper command version was added even though the command format is for the V2 of dpni_set_tx_shaping(). This does not affect the functionality of TBF since the only change between these two versions is the addition of the exceeded parameters which are not used in TBF. Still, fix the bug so that we keep things in sync. Fixes: 39344a89623d ("dpaa2-eth: add API for Tx shaping") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h index 593e3812af93..3c06f5fb5759 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h @@ -11,9 +11,11 @@ #define DPNI_VER_MAJOR 7 #define DPNI_VER_MINOR 0 #define DPNI_CMD_BASE_VERSION 1 +#define DPNI_CMD_2ND_VERSION 2 #define DPNI_CMD_ID_OFFSET 4 #define DPNI_CMD(id) (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_BASE_VERSION) +#define DPNI_CMD_V2(id) (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_2ND_VERSION) #define DPNI_CMDID_OPEN DPNI_CMD(0x801) #define DPNI_CMDID_CLOSE DPNI_CMD(0x800) @@ -45,7 +47,7 @@ #define DPNI_CMDID_SET_MAX_FRAME_LENGTH DPNI_CMD(0x216) #define DPNI_CMDID_GET_MAX_FRAME_LENGTH DPNI_CMD(0x217) #define DPNI_CMDID_SET_LINK_CFG DPNI_CMD(0x21A) -#define DPNI_CMDID_SET_TX_SHAPING DPNI_CMD(0x21B) +#define DPNI_CMDID_SET_TX_SHAPING DPNI_CMD_V2(0x21B) #define DPNI_CMDID_SET_MCAST_PROMISC DPNI_CMD(0x220) #define DPNI_CMDID_GET_MCAST_PROMISC DPNI_CMD(0x221) From 5e46e43c2ad92bca709944be806feb9bce044061 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 25 Sep 2020 08:26:16 -0700 Subject: [PATCH 180/322] MAINTAINERS: Add Vladimir as a maintainer for DSA Signed-off-by: Florian Fainelli Acked-by: Vladimir Oltean Acked-by: Andrew Lunn Acked-by: Vivien Didelot Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9350506a1127..6dc9ebf5bf76 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12077,6 +12077,7 @@ NETWORKING [DSA] M: Andrew Lunn M: Vivien Didelot M: Florian Fainelli +M: Vladimir Oltean S: Maintained F: Documentation/devicetree/bindings/net/dsa/ F: drivers/net/dsa/ From 059432495e209279bae12db3d2b0bc8c8fe987bb Mon Sep 17 00:00:00 2001 From: Igor Russkikh Date: Fri, 25 Sep 2020 23:27:35 +0300 Subject: [PATCH 181/322] net: atlantic: fix build when object tree is separate Driver subfolder files refer parent folder includes in an absolute manner. Makefile contains a -I for this, but apparently that does not work if object tree is separated. Adding srctree to fix that. Signed-off-by: Igor Russkikh Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile index 130a105d03f3..8ebcc68e807f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/Makefile +++ b/drivers/net/ethernet/aquantia/atlantic/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_AQTION) += atlantic.o -ccflags-y += -I$(src) +ccflags-y += -I$(srctree)/$(src) atlantic-objs := aq_main.o \ aq_nic.o \ @@ -33,4 +33,4 @@ atlantic-objs := aq_main.o \ atlantic-$(CONFIG_MACSEC) += aq_macsec.o -atlantic-$(CONFIG_PTP_1588_CLOCK) += aq_ptp.o \ No newline at end of file +atlantic-$(CONFIG_PTP_1588_CLOCK) += aq_ptp.o From e30d694c3381b565e043cf74b0bed059db1b4ac9 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 25 Sep 2020 08:21:14 -0700 Subject: [PATCH 182/322] Documentation/llvm: Fix clang target examples clang --target= is how we can specify a particular toolchain triple to be use, fix the two occurences in the documentation. Fixes: fcf1b6a35c16 ("Documentation/llvm: add documentation on building w/ Clang/LLVM") Signed-off-by: Florian Fainelli Reviewed-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- Documentation/kbuild/llvm.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index 334df758dce3..dae90c21aed3 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -39,10 +39,10 @@ which can help simplify cross compiling. :: ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang ``CROSS_COMPILE`` is not used to prefix the Clang compiler binary, instead -``CROSS_COMPILE`` is used to set a command line flag: ``--target ``. For +``CROSS_COMPILE`` is used to set a command line flag: ``--target=``. For example: :: - clang --target aarch64-linux-gnu foo.c + clang --target=aarch64-linux-gnu foo.c LLVM Utilities -------------- From 678ff6a7afccc43d352c1b8c94b6d8c0ee1464ad Mon Sep 17 00:00:00 2001 From: Shakeel Butt Date: Sat, 26 Sep 2020 07:13:41 -0700 Subject: [PATCH 183/322] mm: slab: fix potential double free in ___cache_free With the commit 10befea91b61 ("mm: memcg/slab: use a single set of kmem_caches for all allocations"), it becomes possible to call kfree() from the slabs_destroy(). The functions cache_flusharray() and do_drain() calls slabs_destroy() on array_cache of the local CPU without updating the size of the array_cache. This enables the kfree() call from the slabs_destroy() to recursively call cache_flusharray() which can potentially call free_block() on the same elements of the array_cache of the local CPU and causing double free and memory corruption. To fix the issue, simply update the local CPU array_cache cache before calling slabs_destroy(). Fixes: 10befea91b61 ("mm: memcg/slab: use a single set of kmem_caches for all allocations") Signed-off-by: Shakeel Butt Reviewed-by: Roman Gushchin Tested-by: Ming Lei Reported-by: kernel test robot Cc: Andrew Morton Cc: Ted Ts'o Signed-off-by: Linus Torvalds --- mm/slab.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 3160dff6fd76..f658e86ec8ce 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1632,6 +1632,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page) kmem_cache_free(cachep->freelist_cache, freelist); } +/* + * Update the size of the caches before calling slabs_destroy as it may + * recursively call kfree. + */ static void slabs_destroy(struct kmem_cache *cachep, struct list_head *list) { struct page *page, *n; @@ -2153,8 +2157,8 @@ static void do_drain(void *arg) spin_lock(&n->list_lock); free_block(cachep, ac->entry, ac->avail, node, &list); spin_unlock(&n->list_lock); - slabs_destroy(cachep, &list); ac->avail = 0; + slabs_destroy(cachep, &list); } static void drain_cpu_caches(struct kmem_cache *cachep) @@ -3402,9 +3406,9 @@ free_done: } #endif spin_unlock(&n->list_lock); - slabs_destroy(cachep, &list); ac->avail -= batchcount; memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); + slabs_destroy(cachep, &list); } /* From 41663430588c737dd735bad5a0d1ba325dcabd59 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 25 Sep 2020 21:19:01 -0700 Subject: [PATCH 184/322] mm, THP, swap: fix allocating cluster for swapfile by mistake SWP_FS is used to make swap_{read,write}page() go through the filesystem, and it's only used for swap files over NFS. So, !SWP_FS means non NFS for now, it could be either file backed or device backed. Something similar goes with legacy SWP_FILE. So in order to achieve the goal of the original patch, SWP_BLKDEV should be used instead. FS corruption can be observed with SSD device + XFS + fragmented swapfile due to CONFIG_THP_SWAP=y. I reproduced the issue with the following details: Environment: QEMU + upstream kernel + buildroot + NVMe (2 GB) Kernel config: CONFIG_BLK_DEV_NVME=y CONFIG_THP_SWAP=y Some reproducible steps: mkfs.xfs -f /dev/nvme0n1 mkdir /tmp/mnt mount /dev/nvme0n1 /tmp/mnt bs="32k" sz="1024m" # doesn't matter too much, I also tried 16m xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw xfs_io -f -c "pwrite -F -S 0 -b $bs 0 $sz" -c "fdatasync" /tmp/mnt/sw xfs_io -f -c "pwrite -R -b $bs 0 $sz" -c "fsync" /tmp/mnt/sw mkswap /tmp/mnt/sw swapon /tmp/mnt/sw stress --vm 2 --vm-bytes 600M # doesn't matter too much as well Symptoms: - FS corruption (e.g. checksum failure) - memory corruption at: 0xd2808010 - segfault Fixes: f0eea189e8e9 ("mm, THP, swap: Don't allocate huge cluster for file backed swap device") Fixes: 38d8b4e6bdc8 ("mm, THP, swap: delay splitting THP during swap out") Signed-off-by: Gao Xiang Signed-off-by: Andrew Morton Reviewed-by: "Huang, Ying" Reviewed-by: Yang Shi Acked-by: Rafael Aquini Cc: Matthew Wilcox Cc: Carlos Maiolino Cc: Eric Sandeen Cc: Dave Chinner Cc: Link: https://lkml.kernel.org/r/20200820045323.7809-1-hsiangkao@redhat.com Signed-off-by: Linus Torvalds --- mm/swapfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index 12f59e641b5e..debc94155f74 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1078,7 +1078,7 @@ start_over: goto nextsi; } if (size == SWAPFILE_CLUSTER) { - if (!(si->flags & SWP_FS)) + if (si->flags & SWP_BLKDEV) n_ret = swap_alloc_cluster(si, swp_entries); } else n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, From 8d3fe09d8d6645dcbbe2413cde58f51ceb6545a6 Mon Sep 17 00:00:00 2001 From: Muchun Song Date: Fri, 25 Sep 2020 21:19:05 -0700 Subject: [PATCH 185/322] mm: memcontrol: fix missing suffix of workingset_restore We forget to add the suffix to the workingset_restore string, so fix it. And also update the documentation of cgroup-v2.rst. Fixes: 170b04b7ae49 ("mm/workingset: prepare the workingset detection infrastructure for anon LRU") Signed-off-by: Muchun Song Signed-off-by: Andrew Morton Reviewed-by: Shakeel Butt Cc: Joonsoo Kim Cc: Johannes Weiner Cc: Vlastimil Babka Cc: Tejun Heo Cc: Zefan Li Cc: Jonathan Corbet Cc: Michal Hocko Cc: Vladimir Davydov Cc: Roman Gushchin Cc: Randy Dunlap Link: https://lkml.kernel.org/r/20200916100030.71698-1-songmuchun@bytedance.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/cgroup-v2.rst | 25 ++++++++++++++++++------- mm/memcontrol.c | 4 ++-- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 6be43781ec7f..baa07b30845e 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1324,15 +1324,26 @@ PAGE_SIZE multiple when read back. pgmajfault Number of major page faults incurred - workingset_refault - Number of refaults of previously evicted pages + workingset_refault_anon + Number of refaults of previously evicted anonymous pages. - workingset_activate - Number of refaulted pages that were immediately activated + workingset_refault_file + Number of refaults of previously evicted file pages. - workingset_restore - Number of restored pages which have been detected as an active - workingset before they got reclaimed. + workingset_activate_anon + Number of refaulted anonymous pages that were immediately + activated. + + workingset_activate_file + Number of refaulted file pages that were immediately activated. + + workingset_restore_anon + Number of restored anonymous pages which have been detected as + an active workingset before they got reclaimed. + + workingset_restore_file + Number of restored file pages which have been detected as an + active workingset before they got reclaimed. workingset_nodereclaim Number of times a shadow node has been reclaimed diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cfa6cbad21d5..6877c765b8d0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1538,9 +1538,9 @@ static char *memory_stat_format(struct mem_cgroup *memcg) memcg_page_state(memcg, WORKINGSET_ACTIVATE_ANON)); seq_buf_printf(&s, "workingset_activate_file %lu\n", memcg_page_state(memcg, WORKINGSET_ACTIVATE_FILE)); - seq_buf_printf(&s, "workingset_restore %lu\n", + seq_buf_printf(&s, "workingset_restore_anon %lu\n", memcg_page_state(memcg, WORKINGSET_RESTORE_ANON)); - seq_buf_printf(&s, "workingset_restore %lu\n", + seq_buf_printf(&s, "workingset_restore_file %lu\n", memcg_page_state(memcg, WORKINGSET_RESTORE_FILE)); seq_buf_printf(&s, "workingset_nodereclaim %lu\n", memcg_page_state(memcg, WORKINGSET_NODERECLAIM)); From d3f7b1bb204099f2f7306318896223e8599bb6a2 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 25 Sep 2020 21:19:10 -0700 Subject: [PATCH 186/322] mm/gup: fix gup_fast with dynamic page table folding Currently to make sure that every page table entry is read just once gup_fast walks perform READ_ONCE and pass pXd value down to the next gup_pXd_range function by value e.g.: static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) ... pudp = pud_offset(&p4d, addr); This function passes a reference on that local value copy to pXd_offset, and might get the very same pointer in return. This happens when the level is folded (on most arches), and that pointer should not be iterated. On s390 due to the fact that each task might have different 5,4 or 3-level address translation and hence different levels folded the logic is more complex and non-iteratable pointer to a local copy leads to severe problems. Here is an example of what happens with gup_fast on s390, for a task with 3-level paging, crossing a 2 GB pud boundary: // addr = 0x1007ffff000, end = 0x10080001000 static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; // pud_offset returns &p4d itself (a pointer to a value on stack) pudp = pud_offset(&p4d, addr); do { // on second iteratation reading "random" stack value pud_t pud = READ_ONCE(*pudp); // next = 0x10080000000, due to PUD_SIZE/MASK != PGDIR_SIZE/MASK on s390 next = pud_addr_end(addr, end); ... } while (pudp++, addr = next, addr != end); // pudp++ iterating over stack return 1; } This happens since s390 moved to common gup code with commit d1874a0c2805 ("s390/mm: make the pxd_offset functions more robust") and commit 1a42010cdc26 ("s390/mm: convert to the generic get_user_pages_fast code"). s390 tried to mimic static level folding by changing pXd_offset primitives to always calculate top level page table offset in pgd_offset and just return the value passed when pXd_offset has to act as folded. What is crucial for gup_fast and what has been overlooked is that PxD_SIZE/MASK and thus pXd_addr_end should also change correspondingly. And the latter is not possible with dynamic folding. To fix the issue in addition to pXd values pass original pXdp pointers down to gup_pXd_range functions. And introduce pXd_offset_lockless helpers, which take an additional pXd entry value parameter. This has already been discussed in https://lkml.kernel.org/r/20190418100218.0a4afd51@mschwideX1 Fixes: 1a42010cdc26 ("s390/mm: convert to the generic get_user_pages_fast code") Signed-off-by: Vasily Gorbik Signed-off-by: Andrew Morton Reviewed-by: Gerald Schaefer Reviewed-by: Alexander Gordeev Reviewed-by: Jason Gunthorpe Reviewed-by: Mike Rapoport Reviewed-by: John Hubbard Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Dave Hansen Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Jeff Dike Cc: Richard Weinberger Cc: Dave Hansen Cc: Andy Lutomirski Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Arnd Bergmann Cc: Andrey Ryabinin Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Claudio Imbrenda Cc: [5.2+] Link: https://lkml.kernel.org/r/patch.git-943f1e5dcff2.your-ad-here.call-01599856292-ext-8676@work.hours Signed-off-by: Linus Torvalds --- arch/s390/include/asm/pgtable.h | 42 +++++++++++++++++++++++---------- include/linux/pgtable.h | 10 ++++++++ mm/gup.c | 18 +++++++------- 3 files changed, 49 insertions(+), 21 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7eb01a5459cd..b55561cc8786 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1260,26 +1260,44 @@ static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address) #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address) -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address) { - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) - return (p4d_t *) pgd_deref(*pgd) + p4d_index(address); - return (p4d_t *) pgd; + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) + return (p4d_t *) pgd_deref(pgd) + p4d_index(address); + return (p4d_t *) pgdp; +} +#define p4d_offset_lockless p4d_offset_lockless + +static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address) +{ + return p4d_offset_lockless(pgdp, *pgdp, address); } -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address) { - if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) - return (pud_t *) p4d_deref(*p4d) + pud_index(address); - return (pud_t *) p4d; + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) + return (pud_t *) p4d_deref(p4d) + pud_index(address); + return (pud_t *) p4dp; +} +#define pud_offset_lockless pud_offset_lockless + +static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address) +{ + return pud_offset_lockless(p4dp, *p4dp, address); } #define pud_offset pud_offset -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address) { - if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) - return (pmd_t *) pud_deref(*pud) + pmd_index(address); - return (pmd_t *) pud; + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) + return (pmd_t *) pud_deref(pud) + pmd_index(address); + return (pmd_t *) pudp; +} +#define pmd_offset_lockless pmd_offset_lockless + +static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address) +{ + return pmd_offset_lockless(pudp, *pudp, address); } #define pmd_offset pmd_offset diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index e8cbc2e795d5..90654cb63e9e 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1427,6 +1427,16 @@ typedef unsigned int pgtbl_mod_mask; #define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED) #endif +#ifndef p4d_offset_lockless +#define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address) +#endif +#ifndef pud_offset_lockless +#define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address) +#endif +#ifndef pmd_offset_lockless +#define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address) +#endif + /* * p?d_leaf() - true if this entry is a final mapping to a physical address. * This differs from p?d_huge() by the fact that they are always available (if diff --git a/mm/gup.c b/mm/gup.c index e5739a1974d5..578bf5bd8bf8 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2485,13 +2485,13 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr, return 1; } -static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, +static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pmd_t *pmdp; - pmdp = pmd_offset(&pud, addr); + pmdp = pmd_offset_lockless(pudp, pud, addr); do { pmd_t pmd = READ_ONCE(*pmdp); @@ -2528,13 +2528,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, return 1; } -static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, +static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; - pudp = pud_offset(&p4d, addr); + pudp = pud_offset_lockless(p4dp, p4d, addr); do { pud_t pud = READ_ONCE(*pudp); @@ -2549,20 +2549,20 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, if (!gup_huge_pd(__hugepd(pud_val(pud)), addr, PUD_SHIFT, next, flags, pages, nr)) return 0; - } else if (!gup_pmd_range(pud, addr, next, flags, pages, nr)) + } else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr)) return 0; } while (pudp++, addr = next, addr != end); return 1; } -static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, +static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; p4d_t *p4dp; - p4dp = p4d_offset(&pgd, addr); + p4dp = p4d_offset_lockless(pgdp, pgd, addr); do { p4d_t p4d = READ_ONCE(*p4dp); @@ -2574,7 +2574,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr, P4D_SHIFT, next, flags, pages, nr)) return 0; - } else if (!gup_pud_range(p4d, addr, next, flags, pages, nr)) + } else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr)) return 0; } while (p4dp++, addr = next, addr != end); @@ -2602,7 +2602,7 @@ static void gup_pgd_range(unsigned long addr, unsigned long end, if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr, PGDIR_SHIFT, next, flags, pages, nr)) return; - } else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr)) + } else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr)) return; } while (pgdp++, addr = next, addr != end); } From 6c5c7b9f3352728af490183f71d350bb658ffb75 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Fri, 25 Sep 2020 21:19:14 -0700 Subject: [PATCH 187/322] mm/migrate: correct thp migration stats PageTransHuge returns true for both thp and hugetlb, so thp stats was counting both thp and hugetlb migrations. Exclude hugetlb migration by setting is_thp variable right. Clean up thp handling code too when we are there. Fixes: 1a5bae25e3cf ("mm/vmstat: add events for THP migration without split") Signed-off-by: Zi Yan Signed-off-by: Andrew Morton Reviewed-by: Daniel Jordan Cc: Anshuman Khandual Link: https://lkml.kernel.org/r/20200917210413.1462975-1-zi.yan@sent.com Signed-off-by: Linus Torvalds --- mm/migrate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index aecb1433cf3c..04a98bb2f568 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1446,7 +1446,7 @@ retry: * Capture required information that might get lost * during migration. */ - is_thp = PageTransHuge(page); + is_thp = PageTransHuge(page) && !PageHuge(page); nr_subpages = thp_nr_pages(page); cond_resched(); @@ -1472,7 +1472,7 @@ retry: * we encounter them after the rest of the list * is processed. */ - if (PageTransHuge(page) && !PageHuge(page)) { + if (is_thp) { lock_page(page); rc = split_huge_page_to_list(page, from); unlock_page(page); @@ -1481,8 +1481,7 @@ retry: nr_thp_split++; goto retry; } - } - if (is_thp) { + nr_thp_failed++; nr_failed += nr_subpages; goto out; From 1e1b6d63d6340764e00356873e5794225a2a03ea Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Fri, 25 Sep 2020 21:19:18 -0700 Subject: [PATCH 188/322] lib/string.c: implement stpcpy LLVM implemented a recent "libcall optimization" that lowers calls to `sprintf(dest, "%s", str)` where the return value is used to `stpcpy(dest, str) - dest`. This generally avoids the machinery involved in parsing format strings. `stpcpy` is just like `strcpy` except it returns the pointer to the new tail of `dest`. This optimization was introduced into clang-12. Implement this so that we don't observe linkage failures due to missing symbol definitions for `stpcpy`. Similar to last year's fire drill with: commit 5f074f3e192f ("lib/string.c: implement a basic bcmp") The kernel is somewhere between a "freestanding" environment (no full libc) and "hosted" environment (many symbols from libc exist with the same type, function signature, and semantics). As Peter Anvin notes, there's not really a great way to inform the compiler that you're targeting a freestanding environment but would like to opt-in to some libcall optimizations (see pr/47280 below), rather than opt-out. Arvind notes, -fno-builtin-* behaves slightly differently between GCC and Clang, and Clang is missing many __builtin_* definitions, which I consider a bug in Clang and am working on fixing. Masahiro summarizes the subtle distinction between compilers justly: To prevent transformation from foo() into bar(), there are two ways in Clang to do that; -fno-builtin-foo, and -fno-builtin-bar. There is only one in GCC; -fno-buitin-foo. (Any difference in that behavior in Clang is likely a bug from a missing __builtin_* definition.) Masahiro also notes: We want to disable optimization from foo() to bar(), but we may still benefit from the optimization from foo() into something else. If GCC implements the same transform, we would run into a problem because it is not -fno-builtin-bar, but -fno-builtin-foo that disables that optimization. In this regard, -fno-builtin-foo would be more future-proof than -fno-built-bar, but -fno-builtin-foo is still potentially overkill. We may want to prevent calls from foo() being optimized into calls to bar(), but we still may want other optimization on calls to foo(). It seems that compilers today don't quite provide the fine grain control over which libcall optimizations pseudo-freestanding environments would prefer. Finally, Kees notes that this interface is unsafe, so we should not encourage its use. As such, I've removed the declaration from any header, but it still needs to be exported to avoid linkage errors in modules. Reported-by: Sami Tolvanen Suggested-by: Andy Lavr Suggested-by: Arvind Sankar Suggested-by: Joe Perches Suggested-by: Kees Cook Suggested-by: Masahiro Yamada Suggested-by: Rasmus Villemoes Signed-off-by: Nick Desaulniers Signed-off-by: Andrew Morton Tested-by: Nathan Chancellor Cc: Link: https://lkml.kernel.org/r/20200914161643.938408-1-ndesaulniers@google.com Link: https://bugs.llvm.org/show_bug.cgi?id=47162 Link: https://bugs.llvm.org/show_bug.cgi?id=47280 Link: https://github.com/ClangBuiltLinux/linux/issues/1126 Link: https://man7.org/linux/man-pages/man3/stpcpy.3.html Link: https://pubs.opengroup.org/onlinepubs/9699919799/functions/stpcpy.html Link: https://reviews.llvm.org/D85963 Signed-off-by: Linus Torvalds --- lib/string.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/lib/string.c b/lib/string.c index 6012c385fb31..4288e0158d47 100644 --- a/lib/string.c +++ b/lib/string.c @@ -272,6 +272,30 @@ ssize_t strscpy_pad(char *dest, const char *src, size_t count) } EXPORT_SYMBOL(strscpy_pad); +/** + * stpcpy - copy a string from src to dest returning a pointer to the new end + * of dest, including src's %NUL-terminator. May overrun dest. + * @dest: pointer to end of string being copied into. Must be large enough + * to receive copy. + * @src: pointer to the beginning of string being copied from. Must not overlap + * dest. + * + * stpcpy differs from strcpy in a key way: the return value is a pointer + * to the new %NUL-terminating character in @dest. (For strcpy, the return + * value is a pointer to the start of @dest). This interface is considered + * unsafe as it doesn't perform bounds checking of the inputs. As such it's + * not recommended for usage. Instead, its definition is provided in case + * the compiler lowers other libcalls to stpcpy. + */ +char *stpcpy(char *__restrict__ dest, const char *__restrict__ src); +char *stpcpy(char *__restrict__ dest, const char *__restrict__ src) +{ + while ((*dest++ = *src++) != '\0') + /* nothing */; + return --dest; +} +EXPORT_SYMBOL(stpcpy); + #ifndef __HAVE_ARCH_STRCAT /** * strcat - Append one %NUL-terminated string to another From ffa550cd691b8ba6c6117ce986516635afe03c13 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Fri, 25 Sep 2020 21:19:21 -0700 Subject: [PATCH 189/322] lib/memregion.c: include memregion.h This addresses the following sparse warning: lib/memregion.c:8:5: warning: symbol 'memregion_alloc' was not declared. Should it be static? lib/memregion.c:14:6: warning: symbol 'memregion_free' was not declared. Should it be static? Reported-by: Hulk Robot Signed-off-by: Jason Yan Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200921142852.875312-1-yanaijie@huawei.com Signed-off-by: Linus Torvalds --- lib/memregion.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/memregion.c b/lib/memregion.c index 77c85b5251da..be5cfa5a3b57 100644 --- a/lib/memregion.c +++ b/lib/memregion.c @@ -2,6 +2,7 @@ /* identifiers for device / performance-differentiated memory regions */ #include #include +#include static DEFINE_IDA(memregion_ids); From a1cd6c2ae47ee10ff21e62475685d5b399e2ed4a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 25 Sep 2020 21:19:24 -0700 Subject: [PATCH 190/322] arch/x86/lib/usercopy_64.c: fix __copy_user_flushcache() cache writeback If we copy less than 8 bytes and if the destination crosses a cache line, __copy_user_flushcache would invalidate only the first cache line. This patch makes it invalidate the second cache line as well. Fixes: 0aed55af88345b ("x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations") Signed-off-by: Mikulas Patocka Signed-off-by: Andrew Morton Reviewed-by: Dan Williams Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: Toshi Kani Cc: "H. Peter Anvin" Cc: Al Viro Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Ross Zwisler Cc: Ingo Molnar Cc: Link: https://lkml.kernel.org/r/alpine.LRH.2.02.2009161451140.21915@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Linus Torvalds --- arch/x86/lib/usercopy_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b0dfac3d3df7..1847e993ac63 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -120,7 +120,7 @@ long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) */ if (size < 8) { if (!IS_ALIGNED(dest, 4) || size != 4) - clean_cache_range(dst, 1); + clean_cache_range(dst, size); } else { if (!IS_ALIGNED(dest, 8)) { dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); From c1d0da83358a2316d9be7f229f26126dbaa07468 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 25 Sep 2020 21:19:28 -0700 Subject: [PATCH 191/322] mm: replace memmap_context by meminit_context Patch series "mm: fix memory to node bad links in sysfs", v3. Sometimes, firmware may expose interleaved memory layout like this: Early memory node ranges node 1: [mem 0x0000000000000000-0x000000011fffffff] node 2: [mem 0x0000000120000000-0x000000014fffffff] node 1: [mem 0x0000000150000000-0x00000001ffffffff] node 0: [mem 0x0000000200000000-0x000000048fffffff] node 2: [mem 0x0000000490000000-0x00000007ffffffff] In that case, we can see memory blocks assigned to multiple nodes in sysfs: $ ls -l /sys/devices/system/memory/memory21 total 0 lrwxrwxrwx 1 root root 0 Aug 24 05:27 node1 -> ../../node/node1 lrwxrwxrwx 1 root root 0 Aug 24 05:27 node2 -> ../../node/node2 -rw-r--r-- 1 root root 65536 Aug 24 05:27 online -r--r--r-- 1 root root 65536 Aug 24 05:27 phys_device -r--r--r-- 1 root root 65536 Aug 24 05:27 phys_index drwxr-xr-x 2 root root 0 Aug 24 05:27 power -r--r--r-- 1 root root 65536 Aug 24 05:27 removable -rw-r--r-- 1 root root 65536 Aug 24 05:27 state lrwxrwxrwx 1 root root 0 Aug 24 05:25 subsystem -> ../../../../bus/memory -rw-r--r-- 1 root root 65536 Aug 24 05:25 uevent -r--r--r-- 1 root root 65536 Aug 24 05:27 valid_zones The same applies in the node's directory with a memory21 link in both the node1 and node2's directory. This is wrong but doesn't prevent the system to run. However when later, one of these memory blocks is hot-unplugged and then hot-plugged, the system is detecting an inconsistency in the sysfs layout and a BUG_ON() is raised: kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084! LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4 CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25 Call Trace: add_memory_resource+0x23c/0x340 (unreliable) __add_memory+0x5c/0xf0 dlpar_add_lmb+0x1b4/0x500 dlpar_memory+0x1f8/0xb80 handle_dlpar_errorlog+0xc0/0x190 dlpar_store+0x198/0x4a0 kobj_attr_store+0x30/0x50 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1b0/0x290 vfs_write+0xe8/0x290 ksys_write+0xdc/0x130 system_call_exception+0x160/0x270 system_call_common+0xf0/0x27c This has been seen on PowerPC LPAR. The root cause of this issue is that when node's memory is registered, the range used can overlap another node's range, thus the memory block is registered to multiple nodes in sysfs. There are two issues here: (a) The sysfs memory and node's layouts are broken due to these multiple links (b) The link errors in link_mem_sections() should not lead to a system panic. To address (a) register_mem_sect_under_node should not rely on the system state to detect whether the link operation is triggered by a hot plug operation or not. This is addressed by the patches 1 and 2 of this series. Issue (b) will be addressed separately. This patch (of 2): The memmap_context enum is used to detect whether a memory operation is due to a hot-add operation or happening at boot time. Make it general to the hotplug operation and rename it as meminit_context. There is no functional change introduced by this patch Suggested-by: David Hildenbrand Signed-off-by: Laurent Dufour Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J . Wysocki" Cc: Nathan Lynch Cc: Scott Cheloha Cc: Tony Luck Cc: Fenghua Yu Cc: Link: https://lkml.kernel.org/r/20200915094143.79181-1-ldufour@linux.ibm.com Link: https://lkml.kernel.org/r/20200915132624.9723-1-ldufour@linux.ibm.com Signed-off-by: Linus Torvalds --- arch/ia64/mm/init.c | 6 +++--- include/linux/mm.h | 2 +- include/linux/mmzone.h | 11 ++++++++--- mm/memory_hotplug.c | 2 +- mm/page_alloc.c | 10 +++++----- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 0b3fb4c7af29..8e7b8c6c576e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -538,7 +538,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMMAP_EARLY, NULL); + MEMINIT_EARLY, NULL); return 0; } @@ -547,8 +547,8 @@ memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { if (!vmem_map) { - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, - NULL); + memmap_init_zone(size, nid, zone, start_pfn, + MEMINIT_EARLY, NULL); } else { struct page *start; struct memmap_init_callback_data args; diff --git a/include/linux/mm.h b/include/linux/mm.h index b2f370f0b420..48614513eb66 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2416,7 +2416,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn, extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, - enum memmap_context, struct vmem_altmap *); + enum meminit_context, struct vmem_altmap *); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8379432f4f2f..0f7a4ff4b059 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -824,10 +824,15 @@ bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, unsigned long mark, int highest_zoneidx); -enum memmap_context { - MEMMAP_EARLY, - MEMMAP_HOTPLUG, +/* + * Memory initialization context, use to differentiate memory added by + * the platform statically or via memory hotplug interface. + */ +enum meminit_context { + MEMINIT_EARLY, + MEMINIT_HOTPLUG, }; + extern void init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, unsigned long size); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b11a269e2356..345308a8bd15 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -729,7 +729,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, * are reserved so nobody should be touching them so we should be safe */ memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, - MEMMAP_HOTPLUG, altmap); + MEMINIT_HOTPLUG, altmap); set_zone_contiguous(zone); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fab5e97dc9ca..5661fa164f13 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5975,7 +5975,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) * done. Non-atomic initialization, single-pass. */ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, - unsigned long start_pfn, enum memmap_context context, + unsigned long start_pfn, enum meminit_context context, struct vmem_altmap *altmap) { unsigned long pfn, end_pfn = start_pfn + size; @@ -6007,7 +6007,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, * There can be holes in boot-time mem_map[]s handed to this * function. They do not exist on hotplugged memory. */ - if (context == MEMMAP_EARLY) { + if (context == MEMINIT_EARLY) { if (overlap_memmap_init(zone, &pfn)) continue; if (defer_init(nid, pfn, end_pfn)) @@ -6016,7 +6016,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, page = pfn_to_page(pfn); __init_single_page(page, pfn, zone, nid); - if (context == MEMMAP_HOTPLUG) + if (context == MEMINIT_HOTPLUG) __SetPageReserved(page); /* @@ -6099,7 +6099,7 @@ void __ref memmap_init_zone_device(struct zone *zone, * check here not to call set_pageblock_migratetype() against * pfn out of zone. * - * Please note that MEMMAP_HOTPLUG path doesn't clear memmap + * Please note that MEMINIT_HOTPLUG path doesn't clear memmap * because this is done early in section_activate() */ if (!(pfn & (pageblock_nr_pages - 1))) { @@ -6137,7 +6137,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid, if (end_pfn > start_pfn) { size = end_pfn - start_pfn; memmap_init_zone(size, nid, zone, start_pfn, - MEMMAP_EARLY, NULL); + MEMINIT_EARLY, NULL); } } } From f85086f95fa36194eb0db5cd5c12e56801b98523 Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Fri, 25 Sep 2020 21:19:31 -0700 Subject: [PATCH 192/322] mm: don't rely on system state to detect hot-plug operations In register_mem_sect_under_node() the system_state's value is checked to detect whether the call is made during boot time or during an hot-plug operation. Unfortunately, that check against SYSTEM_BOOTING is wrong because regular memory is registered at SYSTEM_SCHEDULING state. In addition, memory hot-plug operation can be triggered at this system state by the ACPI [1]. So checking against the system state is not enough. The consequence is that on system with interleaved node's ranges like this: Early memory node ranges node 1: [mem 0x0000000000000000-0x000000011fffffff] node 2: [mem 0x0000000120000000-0x000000014fffffff] node 1: [mem 0x0000000150000000-0x00000001ffffffff] node 0: [mem 0x0000000200000000-0x000000048fffffff] node 2: [mem 0x0000000490000000-0x00000007ffffffff] This can be seen on PowerPC LPAR after multiple memory hot-plug and hot-unplug operations are done. At the next reboot the node's memory ranges can be interleaved and since the call to link_mem_sections() is made in topology_init() while the system is in the SYSTEM_SCHEDULING state, the node's id is not checked, and the sections registered to multiple nodes: $ ls -l /sys/devices/system/memory/memory21/node* total 0 lrwxrwxrwx 1 root root 0 Aug 24 05:27 node1 -> ../../node/node1 lrwxrwxrwx 1 root root 0 Aug 24 05:27 node2 -> ../../node/node2 In that case, the system is able to boot but if later one of theses memory blocks is hot-unplugged and then hot-plugged, the sysfs inconsistency is detected and this is triggering a BUG_ON(): kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4 CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25 Call Trace: add_memory_resource+0x23c/0x340 (unreliable) __add_memory+0x5c/0xf0 dlpar_add_lmb+0x1b4/0x500 dlpar_memory+0x1f8/0xb80 handle_dlpar_errorlog+0xc0/0x190 dlpar_store+0x198/0x4a0 kobj_attr_store+0x30/0x50 sysfs_kf_write+0x64/0x90 kernfs_fop_write+0x1b0/0x290 vfs_write+0xe8/0x290 ksys_write+0xdc/0x130 system_call_exception+0x160/0x270 system_call_common+0xf0/0x27c This patch addresses the root cause by not relying on the system_state value to detect whether the call is due to a hot-plug operation. An extra parameter is added to link_mem_sections() detailing whether the operation is due to a hot-plug operation. [1] According to Oscar Salvador, using this qemu command line, ACPI memory hotplug operations are raised at SYSTEM_SCHEDULING state: $QEMU -enable-kvm -machine pc -smp 4,sockets=4,cores=1,threads=1 -cpu host -monitor pty \ -m size=$MEM,slots=255,maxmem=4294967296k \ -numa node,nodeid=0,cpus=0-3,mem=512 -numa node,nodeid=1,mem=512 \ -object memory-backend-ram,id=memdimm0,size=134217728 -device pc-dimm,node=0,memdev=memdimm0,id=dimm0,slot=0 \ -object memory-backend-ram,id=memdimm1,size=134217728 -device pc-dimm,node=0,memdev=memdimm1,id=dimm1,slot=1 \ -object memory-backend-ram,id=memdimm2,size=134217728 -device pc-dimm,node=0,memdev=memdimm2,id=dimm2,slot=2 \ -object memory-backend-ram,id=memdimm3,size=134217728 -device pc-dimm,node=0,memdev=memdimm3,id=dimm3,slot=3 \ -object memory-backend-ram,id=memdimm4,size=134217728 -device pc-dimm,node=1,memdev=memdimm4,id=dimm4,slot=4 \ -object memory-backend-ram,id=memdimm5,size=134217728 -device pc-dimm,node=1,memdev=memdimm5,id=dimm5,slot=5 \ -object memory-backend-ram,id=memdimm6,size=134217728 -device pc-dimm,node=1,memdev=memdimm6,id=dimm6,slot=6 \ Fixes: 4fbce633910e ("mm/memory_hotplug.c: make register_mem_sect_under_node() a callback of walk_memory_range()") Signed-off-by: Laurent Dufour Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Fenghua Yu Cc: Nathan Lynch Cc: Scott Cheloha Cc: Tony Luck Cc: Link: https://lkml.kernel.org/r/20200915094143.79181-3-ldufour@linux.ibm.com Signed-off-by: Linus Torvalds --- drivers/base/node.c | 85 ++++++++++++++++++++++++++++---------------- include/linux/node.h | 11 +++--- mm/memory_hotplug.c | 3 +- 3 files changed, 64 insertions(+), 35 deletions(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index 508b80f6329b..50af16e68d98 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -761,14 +761,36 @@ static int __ref get_nid_for_pfn(unsigned long pfn) return pfn_to_nid(pfn); } +static int do_register_memory_block_under_node(int nid, + struct memory_block *mem_blk) +{ + int ret; + + /* + * If this memory block spans multiple nodes, we only indicate + * the last processed node. + */ + mem_blk->nid = nid; + + ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, + &mem_blk->dev.kobj, + kobject_name(&mem_blk->dev.kobj)); + if (ret) + return ret; + + return sysfs_create_link_nowarn(&mem_blk->dev.kobj, + &node_devices[nid]->dev.kobj, + kobject_name(&node_devices[nid]->dev.kobj)); +} + /* register memory section under specified node if it spans that node */ -static int register_mem_sect_under_node(struct memory_block *mem_blk, - void *arg) +static int register_mem_block_under_node_early(struct memory_block *mem_blk, + void *arg) { unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); unsigned long end_pfn = start_pfn + memory_block_pfns - 1; - int ret, nid = *(int *)arg; + int nid = *(int *)arg; unsigned long pfn; for (pfn = start_pfn; pfn <= end_pfn; pfn++) { @@ -785,38 +807,33 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk, } /* - * We need to check if page belongs to nid only for the boot - * case, during hotplug we know that all pages in the memory - * block belong to the same node. + * We need to check if page belongs to nid only at the boot + * case because node's ranges can be interleaved. */ - if (system_state == SYSTEM_BOOTING) { - page_nid = get_nid_for_pfn(pfn); - if (page_nid < 0) - continue; - if (page_nid != nid) - continue; - } + page_nid = get_nid_for_pfn(pfn); + if (page_nid < 0) + continue; + if (page_nid != nid) + continue; - /* - * If this memory block spans multiple nodes, we only indicate - * the last processed node. - */ - mem_blk->nid = nid; - - ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, - &mem_blk->dev.kobj, - kobject_name(&mem_blk->dev.kobj)); - if (ret) - return ret; - - return sysfs_create_link_nowarn(&mem_blk->dev.kobj, - &node_devices[nid]->dev.kobj, - kobject_name(&node_devices[nid]->dev.kobj)); + return do_register_memory_block_under_node(nid, mem_blk); } /* mem section does not span the specified node */ return 0; } +/* + * During hotplug we know that all pages in the memory block belong to the same + * node. + */ +static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, + void *arg) +{ + int nid = *(int *)arg; + + return do_register_memory_block_under_node(nid, mem_blk); +} + /* * Unregister a memory block device under the node it spans. Memory blocks * with multiple nodes cannot be offlined and therefore also never be removed. @@ -832,11 +849,19 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); } -int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) +int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, + enum meminit_context context) { + walk_memory_blocks_func_t func; + + if (context == MEMINIT_HOTPLUG) + func = register_mem_block_under_node_hotplug; + else + func = register_mem_block_under_node_early; + return walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), (void *)&nid, - register_mem_sect_under_node); + func); } #ifdef CONFIG_HUGETLBFS diff --git a/include/linux/node.h b/include/linux/node.h index 4866f32a02d8..014ba3ab2efd 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -99,11 +99,13 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) -extern int link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn); +int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context); #else static inline int link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn) + unsigned long end_pfn, + enum meminit_context context) { return 0; } @@ -128,7 +130,8 @@ static inline int register_one_node(int nid) if (error) return error; /* link memory sections under this node */ - error = link_mem_sections(nid, start_pfn, end_pfn); + error = link_mem_sections(nid, start_pfn, end_pfn, + MEMINIT_EARLY); } return error; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 345308a8bd15..ce3e73e3a5c1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1080,7 +1080,8 @@ int __ref add_memory_resource(int nid, struct resource *res) } /* link memory sections under this node.*/ - ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1)); + ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1), + MEMINIT_HOTPLUG); BUG_ON(ret); /* create new memmap entry */ From ce2684254bd4818ca3995c0d021fb62c4cf10a19 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 14 Sep 2020 23:32:15 -0700 Subject: [PATCH 193/322] mm: validate pmd after splitting syzbot reported the following KASAN splat: general protection fault, probably for non-canonical address 0xdffffc0000000003: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000018-0x000000000000001f] CPU: 1 PID: 6826 Comm: syz-executor142 Not tainted 5.9.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:__lock_acquire+0x84/0x2ae0 kernel/locking/lockdep.c:4296 Code: ff df 8a 04 30 84 c0 0f 85 e3 16 00 00 83 3d 56 58 35 08 00 0f 84 0e 17 00 00 83 3d 25 c7 f5 07 00 74 2c 4c 89 e8 48 c1 e8 03 <80> 3c 30 00 74 12 4c 89 ef e8 3e d1 5a 00 48 be 00 00 00 00 00 fc RSP: 0018:ffffc90004b9f850 EFLAGS: 00010006 Call Trace: lock_acquire+0x140/0x6f0 kernel/locking/lockdep.c:5006 __raw_spin_lock include/linux/spinlock_api_smp.h:142 [inline] _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:151 spin_lock include/linux/spinlock.h:354 [inline] madvise_cold_or_pageout_pte_range+0x52f/0x25c0 mm/madvise.c:389 walk_pmd_range mm/pagewalk.c:89 [inline] walk_pud_range mm/pagewalk.c:160 [inline] walk_p4d_range mm/pagewalk.c:193 [inline] walk_pgd_range mm/pagewalk.c:229 [inline] __walk_page_range+0xe7b/0x1da0 mm/pagewalk.c:331 walk_page_range+0x2c3/0x5c0 mm/pagewalk.c:427 madvise_pageout_page_range mm/madvise.c:521 [inline] madvise_pageout mm/madvise.c:557 [inline] madvise_vma mm/madvise.c:946 [inline] do_madvise+0x12d0/0x2090 mm/madvise.c:1145 __do_sys_madvise mm/madvise.c:1171 [inline] __se_sys_madvise mm/madvise.c:1169 [inline] __x64_sys_madvise+0x76/0x80 mm/madvise.c:1169 do_syscall_64+0x31/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 The backing vma was shmem. In case of split page of file-backed THP, madvise zaps the pmd instead of remapping of sub-pages. So we need to check pmd validity after split. Reported-by: syzbot+ecf80462cb7d5d552bc7@syzkaller.appspotmail.com Fixes: 1a4e58cce84e ("mm: introduce MADV_PAGEOUT") Signed-off-by: Minchan Kim Acked-by: Kirill A. Shutemov Signed-off-by: Linus Torvalds --- mm/madvise.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/madvise.c b/mm/madvise.c index d4aa5f776543..0e0d61003fc6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -381,9 +381,9 @@ huge_unlock: return 0; } +regular_page: if (pmd_trans_unstable(pmd)) return 0; -regular_page: #endif tlb_change_page_size(tlb, PAGE_SIZE); orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); From a2bd970aa62f2f7f80fd0d212b1d4ccea5df4aed Mon Sep 17 00:00:00 2001 From: Nicolas VINCENT Date: Wed, 23 Sep 2020 16:08:40 +0200 Subject: [PATCH 194/322] i2c: cpm: Fix i2c_ram structure the i2c_ram structure is missing the sdmatmp field mentionned in datasheet for MPC8272 at paragraph 36.5. With this field missing, the hardware would write past the allocated memory done through cpm_muram_alloc for the i2c_ram structure and land in memory allocated for the buffers descriptors corrupting the cbd_bufaddr field. Since this field is only set during setup(), the first i2c transaction would work and the following would send data read from an arbitrary memory location. Fixes: 61045dbe9d8d ("i2c: Add support for I2C bus on Freescale CPM1/CPM2 controllers") Signed-off-by: Nicolas VINCENT Acked-by: Jochen Friedrich Acked-by: Christophe Leroy Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-cpm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/i2c/busses/i2c-cpm.c b/drivers/i2c/busses/i2c-cpm.c index 1213e1932ccb..24d584a1c9a7 100644 --- a/drivers/i2c/busses/i2c-cpm.c +++ b/drivers/i2c/busses/i2c-cpm.c @@ -65,6 +65,9 @@ struct i2c_ram { char res1[4]; /* Reserved */ ushort rpbase; /* Relocation pointer */ char res2[2]; /* Reserved */ + /* The following elements are only for CPM2 */ + char res3[4]; /* Reserved */ + uint sdmatmp; /* Internal */ }; #define I2COM_START 0x80 From 8947efc077168c53b84d039881a7c967086a248a Mon Sep 17 00:00:00 2001 From: Tali Perry Date: Sun, 20 Sep 2020 23:48:09 +0300 Subject: [PATCH 195/322] i2c: npcm7xx: Clear LAST bit after a failed transaction. Due to a HW issue, in some scenarios the LAST bit might remain set. This will cause an unexpected NACK after reading 16 bytes on the next read. Example: if user tries to read from a missing device, get a NACK, then if the next command is a long read ( > 16 bytes), the master will stop reading after 16 bytes. To solve this, if a command fails, check if LAST bit is still set. If it does, reset the module. Fixes: 56a1485b102e (i2c: npcm7xx: Add Nuvoton NPCM I2C controller driver) Signed-off-by: Tali Perry Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-npcm7xx.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index dfcf04e1967f..2ad166355ec9 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2163,6 +2163,15 @@ static int npcm_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, if (bus->cmd_err == -EAGAIN) ret = i2c_recover_bus(adap); + /* + * After any type of error, check if LAST bit is still set, + * due to a HW issue. + * It cannot be cleared without resetting the module. + */ + if (bus->cmd_err && + (NPCM_I2CRXF_CTL_LAST_PEC & ioread8(bus->reg + NPCM_I2CRXF_CTL))) + npcm_i2c_reset(bus); + #if IS_ENABLED(CONFIG_I2C_SLAVE) /* reenable slave if it was enabled */ if (bus->slave) From 008cfe4418b3dbda2ff820cdd7b1a5ce458ae444 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 25 Sep 2020 18:25:57 -0400 Subject: [PATCH 196/322] mm: Introduce mm_struct.has_pinned (Commit message majorly collected from Jason Gunthorpe) Reduce the chance of false positive from page_maybe_dma_pinned() by keeping track if the mm_struct has ever been used with pin_user_pages(). This allows cases that might drive up the page ref_count to avoid any penalty from handling dma_pinned pages. Future work is planned, to provide a more sophisticated solution, likely to turn it into a real counter. For now, make it atomic_t but use it as a boolean for simplicity. Suggested-by: Jason Gunthorpe Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 10 ++++++++++ kernel/fork.c | 1 + mm/gup.c | 6 ++++++ 3 files changed, 17 insertions(+) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 496c3ff97cce..ed028af3cb19 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -436,6 +436,16 @@ struct mm_struct { */ atomic_t mm_count; + /** + * @has_pinned: Whether this mm has pinned any pages. This can + * be either replaced in the future by @pinned_vm when it + * becomes stable, or grow into a counter on its own. We're + * aggresive on this bit now - even if the pinned pages were + * unpinned later on, we'll still keep this bit set for the + * lifecycle of this mm just for simplicity. + */ + atomic_t has_pinned; + #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif diff --git a/kernel/fork.c b/kernel/fork.c index 49677d668de4..e65d8192d080 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1011,6 +1011,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_pgtables_bytes_init(mm); mm->map_count = 0; mm->locked_vm = 0; + atomic_set(&mm->has_pinned, 0); atomic64_set(&mm->pinned_vm, 0); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); diff --git a/mm/gup.c b/mm/gup.c index 578bf5bd8bf8..dfe781d2ad4c 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1255,6 +1255,9 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm, BUG_ON(*locked != 1); } + if (flags & FOLL_PIN) + atomic_set(¤t->mm->has_pinned, 1); + /* * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior * is to set FOLL_GET if the caller wants pages[] filled in (but has @@ -2660,6 +2663,9 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages, FOLL_FAST_ONLY))) return -EINVAL; + if (gup_flags & FOLL_PIN) + atomic_set(¤t->mm->has_pinned, 1); + if (!(gup_flags & FOLL_FAST_ONLY)) might_lock_read(¤t->mm->mmap_lock); From 7a4830c380f3a8b3425f6383deff58e65b2557b5 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 25 Sep 2020 18:25:58 -0400 Subject: [PATCH 197/322] mm/fork: Pass new vma pointer into copy_page_range() This prepares for the future work to trigger early cow on pinned pages during fork(). No functional change intended. Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- kernel/fork.c | 2 +- mm/memory.c | 14 +++++++++----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 48614513eb66..16b799a0522c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1646,7 +1646,7 @@ struct mmu_notifier_range; void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma); + struct vm_area_struct *vma, struct vm_area_struct *new); int follow_pte_pmd(struct mm_struct *mm, unsigned long address, struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); diff --git a/kernel/fork.c b/kernel/fork.c index e65d8192d080..da8d360fb032 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -589,7 +589,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, mm->map_count++; if (!(tmp->vm_flags & VM_WIPEONFORK)) - retval = copy_page_range(mm, oldmm, mpnt); + retval = copy_page_range(mm, oldmm, mpnt, tmp); if (tmp->vm_ops && tmp->vm_ops->open) tmp->vm_ops->open(tmp); diff --git a/mm/memory.c b/mm/memory.c index f3eb55975902..d56178721452 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -819,6 +819,7 @@ copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, + struct vm_area_struct *new, unsigned long addr, unsigned long end) { pte_t *orig_src_pte, *orig_dst_pte; @@ -889,6 +890,7 @@ again: static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma, + struct vm_area_struct *new, unsigned long addr, unsigned long end) { pmd_t *src_pmd, *dst_pmd; @@ -915,7 +917,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src if (pmd_none_or_clear_bad(src_pmd)) continue; if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, - vma, addr, next)) + vma, new, addr, next)) return -ENOMEM; } while (dst_pmd++, src_pmd++, addr = next, addr != end); return 0; @@ -923,6 +925,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, p4d_t *dst_p4d, p4d_t *src_p4d, struct vm_area_struct *vma, + struct vm_area_struct *new, unsigned long addr, unsigned long end) { pud_t *src_pud, *dst_pud; @@ -949,7 +952,7 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src if (pud_none_or_clear_bad(src_pud)) continue; if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, - vma, addr, next)) + vma, new, addr, next)) return -ENOMEM; } while (dst_pud++, src_pud++, addr = next, addr != end); return 0; @@ -957,6 +960,7 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, + struct vm_area_struct *new, unsigned long addr, unsigned long end) { p4d_t *src_p4d, *dst_p4d; @@ -971,14 +975,14 @@ static inline int copy_p4d_range(struct mm_struct *dst_mm, struct mm_struct *src if (p4d_none_or_clear_bad(src_p4d)) continue; if (copy_pud_range(dst_mm, src_mm, dst_p4d, src_p4d, - vma, addr, next)) + vma, new, addr, next)) return -ENOMEM; } while (dst_p4d++, src_p4d++, addr = next, addr != end); return 0; } int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, - struct vm_area_struct *vma) + struct vm_area_struct *vma, struct vm_area_struct *new) { pgd_t *src_pgd, *dst_pgd; unsigned long next; @@ -1033,7 +1037,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (pgd_none_or_clear_bad(src_pgd)) continue; if (unlikely(copy_p4d_range(dst_mm, src_mm, dst_pgd, src_pgd, - vma, addr, next))) { + vma, new, addr, next))) { ret = -ENOMEM; break; } From 70e806e4e645019102d0e09d4933654fb5fb58ce Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 25 Sep 2020 18:25:59 -0400 Subject: [PATCH 198/322] mm: Do early cow for pinned pages during fork() for ptes This allows copy_pte_range() to do early cow if the pages were pinned on the source mm. Currently we don't have an accurate way to know whether a page is pinned or not. The only thing we have is page_maybe_dma_pinned(). However that's good enough for now. Especially, with the newly added mm->has_pinned flag to make sure we won't affect processes that never pinned any pages. It would be easier if we can do GFP_KERNEL allocation within copy_one_pte(). Unluckily, we can't because we're with the page table locks held for both the parent and child processes. So the page allocation needs to be done outside copy_one_pte(). Some trick is there in copy_present_pte(), majorly the wrprotect trick to block concurrent fast-gup. Comments in the function should explain better in place. Oleg Nesterov reported a (probably harmless) bug during review that we didn't reset entry.val properly in copy_pte_range() so that potentially there's chance to call add_swap_count_continuation() multiple times on the same swp entry. However that should be harmless since even if it happens, the same function (add_swap_count_continuation()) will return directly noticing that there're enough space for the swp counter. So instead of a standalone stable patch, it is touched up in this patch directly. Link: https://lore.kernel.org/lkml/20200914143829.GA1424636@nvidia.com/ Suggested-by: Linus Torvalds Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds --- mm/memory.c | 207 +++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 190 insertions(+), 17 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index d56178721452..fcfc4ca36eba 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -773,15 +773,142 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, return 0; } -static inline void +/* + * Copy a present and normal page if necessary. + * + * NOTE! The usual case is that this doesn't need to do + * anything, and can just return a positive value. That + * will let the caller know that it can just increase + * the page refcount and re-use the pte the traditional + * way. + * + * But _if_ we need to copy it because it needs to be + * pinned in the parent (and the child should get its own + * copy rather than just a reference to the same page), + * we'll do that here and return zero to let the caller + * know we're done. + * + * And if we need a pre-allocated page but don't yet have + * one, return a negative error to let the preallocation + * code know so that it can do so outside the page table + * lock. + */ +static inline int +copy_present_page(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pte_t *dst_pte, pte_t *src_pte, + struct vm_area_struct *vma, struct vm_area_struct *new, + unsigned long addr, int *rss, struct page **prealloc, + pte_t pte, struct page *page) +{ + struct page *new_page; + + if (!is_cow_mapping(vma->vm_flags)) + return 1; + + /* + * The trick starts. + * + * What we want to do is to check whether this page may + * have been pinned by the parent process. If so, + * instead of wrprotect the pte on both sides, we copy + * the page immediately so that we'll always guarantee + * the pinned page won't be randomly replaced in the + * future. + * + * To achieve this, we do the following: + * + * 1. Write-protect the pte if it's writable. This is + * to protect concurrent write fast-gup with + * FOLL_PIN, so that we'll fail the fast-gup with + * the write bit removed. + * + * 2. Check page_maybe_dma_pinned() to see whether this + * page may have been pinned. + * + * The order of these steps is important to serialize + * against the fast-gup code (gup_pte_range()) on the + * pte check and try_grab_compound_head(), so that + * we'll make sure either we'll capture that fast-gup + * so we'll copy the pinned page here, or we'll fail + * that fast-gup. + * + * NOTE! Even if we don't end up copying the page, + * we won't undo this wrprotect(), because the normal + * reference copy will need it anyway. + */ + if (pte_write(pte)) + ptep_set_wrprotect(src_mm, addr, src_pte); + + /* + * These are the "normally we can just copy by reference" + * checks. + */ + if (likely(!atomic_read(&src_mm->has_pinned))) + return 1; + if (likely(!page_maybe_dma_pinned(page))) + return 1; + + /* + * Uhhuh. It looks like the page might be a pinned page, + * and we actually need to copy it. Now we can set the + * source pte back to being writable. + */ + if (pte_write(pte)) + set_pte_at(src_mm, addr, src_pte, pte); + + new_page = *prealloc; + if (!new_page) + return -EAGAIN; + + /* + * We have a prealloc page, all good! Take it + * over and copy the page & arm it. + */ + *prealloc = NULL; + copy_user_highpage(new_page, page, addr, vma); + __SetPageUptodate(new_page); + page_add_new_anon_rmap(new_page, new, addr, false); + lru_cache_add_inactive_or_unevictable(new_page, new); + rss[mm_counter(new_page)]++; + + /* All done, just insert the new page copy in the child */ + pte = mk_pte(new_page, new->vm_page_prot); + pte = maybe_mkwrite(pte_mkdirty(pte), new); + set_pte_at(dst_mm, addr, dst_pte, pte); + return 0; +} + +/* + * Copy one pte. Returns 0 if succeeded, or -EAGAIN if one preallocated page + * is required to copy this pte. + */ +static inline int copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, - unsigned long addr, int *rss) + struct vm_area_struct *new, + unsigned long addr, int *rss, struct page **prealloc) { unsigned long vm_flags = vma->vm_flags; pte_t pte = *src_pte; struct page *page; + page = vm_normal_page(vma, addr, pte); + if (page) { + int retval; + + retval = copy_present_page(dst_mm, src_mm, + dst_pte, src_pte, + vma, new, + addr, rss, prealloc, + pte, page); + if (retval <= 0) + return retval; + + get_page(page); + page_dup_rmap(page, false); + rss[mm_counter(page)]++; + } + /* * If it's a COW mapping, write protect it both * in the parent and the child @@ -807,14 +934,27 @@ copy_present_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (!(vm_flags & VM_UFFD_WP)) pte = pte_clear_uffd_wp(pte); - page = vm_normal_page(vma, addr, pte); - if (page) { - get_page(page); - page_dup_rmap(page, false); - rss[mm_counter(page)]++; - } - set_pte_at(dst_mm, addr, dst_pte, pte); + return 0; +} + +static inline struct page * +page_copy_prealloc(struct mm_struct *src_mm, struct vm_area_struct *vma, + unsigned long addr) +{ + struct page *new_page; + + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, addr); + if (!new_page) + return NULL; + + if (mem_cgroup_charge(new_page, src_mm, GFP_KERNEL)) { + put_page(new_page); + return NULL; + } + cgroup_throttle_swaprate(new_page, GFP_KERNEL); + + return new_page; } static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -825,16 +965,20 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t *orig_src_pte, *orig_dst_pte; pte_t *src_pte, *dst_pte; spinlock_t *src_ptl, *dst_ptl; - int progress = 0; + int progress, ret = 0; int rss[NR_MM_COUNTERS]; swp_entry_t entry = (swp_entry_t){0}; + struct page *prealloc = NULL; again: + progress = 0; init_rss_vec(rss); dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); - if (!dst_pte) - return -ENOMEM; + if (!dst_pte) { + ret = -ENOMEM; + goto out; + } src_pte = pte_offset_map(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); @@ -866,8 +1010,25 @@ again: progress += 8; continue; } - copy_present_pte(dst_mm, src_mm, dst_pte, src_pte, - vma, addr, rss); + /* copy_present_pte() will clear `*prealloc' if consumed */ + ret = copy_present_pte(dst_mm, src_mm, dst_pte, src_pte, + vma, new, addr, rss, &prealloc); + /* + * If we need a pre-allocated page for this pte, drop the + * locks, allocate, and try again. + */ + if (unlikely(ret == -EAGAIN)) + break; + if (unlikely(prealloc)) { + /* + * pre-alloc page cannot be reused by next time so as + * to strictly follow mempolicy (e.g., alloc_page_vma() + * will allocate page according to address). This + * could only happen if one pinned pte changed. + */ + put_page(prealloc); + prealloc = NULL; + } progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); @@ -879,13 +1040,25 @@ again: cond_resched(); if (entry.val) { - if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) + if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) { + ret = -ENOMEM; + goto out; + } + entry.val = 0; + } else if (ret) { + WARN_ON_ONCE(ret != -EAGAIN); + prealloc = page_copy_prealloc(src_mm, vma, addr); + if (!prealloc) return -ENOMEM; - progress = 0; + /* We've captured and resolved the error. Reset, try again. */ + ret = 0; } if (addr != end) goto again; - return 0; +out: + if (unlikely(prealloc)) + put_page(prealloc); + return ret; } static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, From d042035eaf5f9009ad927dc4d3ce848381ccdeed Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 25 Sep 2020 18:26:00 -0400 Subject: [PATCH 199/322] mm/thp: Split huge pmds/puds if they're pinned when fork() Pinned pages shouldn't be write-protected when fork() happens, because follow up copy-on-write on these pages could cause the pinned pages to be replaced by random newly allocated pages. For huge PMDs, we split the huge pmd if pinning is detected. So that future handling will be done by the PTE level (with our latest changes, each of the small pages will be copied). We can achieve this by let copy_huge_pmd() return -EAGAIN for pinned pages, so that we'll fallthrough in copy_pmd_range() and finally land the next copy_pte_range() call. Huge PUDs will be even more special - so far it does not support anonymous pages. But it can actually be done the same as the huge PMDs even if the split huge PUDs means to erase the PUD entries. It'll guarantee the follow up fault ins will remap the same pages in either parent/child later. This might not be the most efficient way, but it should be easy and clean enough. It should be fine, since we're tackling with a very rare case just to make sure userspaces that pinned some thps will still work even without MADV_DONTFORK and after they fork()ed. Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index faadc449cca5..da397779a6d4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1074,6 +1074,24 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, src_page = pmd_page(pmd); VM_BUG_ON_PAGE(!PageHead(src_page), src_page); + + /* + * If this page is a potentially pinned page, split and retry the fault + * with smaller page size. Normally this should not happen because the + * userspace should use MADV_DONTFORK upon pinned regions. This is a + * best effort that the pinned pages won't be replaced by another + * random page during the coming copy-on-write. + */ + if (unlikely(is_cow_mapping(vma->vm_flags) && + atomic_read(&src_mm->has_pinned) && + page_maybe_dma_pinned(src_page))) { + pte_free(dst_mm, pgtable); + spin_unlock(src_ptl); + spin_unlock(dst_ptl); + __split_huge_pmd(vma, src_pmd, addr, false, NULL); + return -EAGAIN; + } + get_page(src_page); page_dup_rmap(src_page, true); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); @@ -1177,6 +1195,16 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, /* No huge zero pud yet */ } + /* Please refer to comments in copy_huge_pmd() */ + if (unlikely(is_cow_mapping(vma->vm_flags) && + atomic_read(&src_mm->has_pinned) && + page_maybe_dma_pinned(pud_page(pud)))) { + spin_unlock(src_ptl); + spin_unlock(dst_ptl); + __split_huge_pud(vma, src_pud, addr); + return -EAGAIN; + } + pudp_set_wrprotect(src_mm, addr, src_pud); pud = pud_mkold(pud_wrprotect(pud)); set_pud_at(dst_mm, addr, dst_pud, pud); From 7dbbcf496f2a4b6d82cfc7810a0746e160b79762 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 26 Sep 2020 21:33:43 -0700 Subject: [PATCH 200/322] mdio: fix mdio-thunder.c dependency & build error Fix build error by selecting MDIO_DEVRES for MDIO_THUNDER. Fixes this build error: ld: drivers/net/phy/mdio-thunder.o: in function `thunder_mdiobus_pci_probe': drivers/net/phy/mdio-thunder.c:78: undefined reference to `devm_mdiobus_alloc_size' Fixes: 379d7ac7ca31 ("phy: mdio-thunder: Add driver for Cavium Thunder SoC MDIO buses.") Reported-by: kernel test robot Signed-off-by: Randy Dunlap Cc: Bartosz Golaszewski Cc: Andrew Lunn Cc: Heiner Kallweit Cc: netdev@vger.kernel.org Cc: David Daney Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 726e4b240e7e..1c5a10b672fc 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -222,6 +222,7 @@ config MDIO_THUNDER depends on 64BIT depends on PCI select MDIO_CAVIUM + select MDIO_DEVRES help This driver supports the MDIO interfaces found on Cavium ThunderX SoCs when the MDIO bus device appears as a PCI From 72865028582a678be1e05240e55d452e5c258eca Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 27 Sep 2020 09:42:11 +0300 Subject: [PATCH 201/322] mlxsw: spectrum_acl: Fix mlxsw_sp_acl_tcam_group_add()'s error path If mlxsw_sp_acl_tcam_group_id_get() fails, the mutex initialized earlier is not destroyed. Fix this by initializing the mutex after calling the function. This is symmetric to mlxsw_sp_acl_tcam_group_del(). Fixes: 5ec2ee28d27b ("mlxsw: spectrum_acl: Introduce a mutex to guard region list updates") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 5c020403342f..7cccc41dd69c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -292,13 +292,14 @@ mlxsw_sp_acl_tcam_group_add(struct mlxsw_sp_acl_tcam *tcam, int err; group->tcam = tcam; - mutex_init(&group->lock); INIT_LIST_HEAD(&group->region_list); err = mlxsw_sp_acl_tcam_group_id_get(tcam, &group->id); if (err) return err; + mutex_init(&group->lock); + return 0; } From 307eea32b202864c98d28174c14ddbf5b9367722 Mon Sep 17 00:00:00 2001 From: Marian-Cristian Rotariu Date: Sun, 27 Sep 2020 13:34:39 +0100 Subject: [PATCH 202/322] dt-bindings: net: renesas,ravb: Add support for r8a774e1 SoC Document RZ/G2H (R8A774E1) SoC bindings. Signed-off-by: Marian-Cristian Rotariu Signed-off-by: Lad Prabhakar Reviewed-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,ravb.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt index 032b76f14f4f..9119f1caf391 100644 --- a/Documentation/devicetree/bindings/net/renesas,ravb.txt +++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt @@ -21,6 +21,7 @@ Required properties: - "renesas,etheravb-r8a774a1" for the R8A774A1 SoC. - "renesas,etheravb-r8a774b1" for the R8A774B1 SoC. - "renesas,etheravb-r8a774c0" for the R8A774C0 SoC. + - "renesas,etheravb-r8a774e1" for the R8A774E1 SoC. - "renesas,etheravb-r8a7795" for the R8A7795 SoC. - "renesas,etheravb-r8a7796" for the R8A77960 SoC. - "renesas,etheravb-r8a77961" for the R8A77961 SoC. From 709a16be0593c08190982cfbdca6df95e6d5823b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 27 Sep 2020 19:44:29 +0200 Subject: [PATCH 203/322] r8169: fix RTL8168f/RTL8411 EPHY config Mistakenly bit 2 was set instead of bit 3 as in the vendor driver. Fixes: a7a92cf81589 ("r8169: sync PCIe PHY init with vendor driver 8.047.01") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index fc9e6626db55..5e867da1aad3 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2904,7 +2904,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp) { 0x08, 0x0001, 0x0002 }, { 0x09, 0x0000, 0x0080 }, { 0x19, 0x0000, 0x0224 }, - { 0x00, 0x0000, 0x0004 }, + { 0x00, 0x0000, 0x0008 }, { 0x0c, 0x3df0, 0x0200 }, }; @@ -2921,7 +2921,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp) { 0x06, 0x00c0, 0x0020 }, { 0x0f, 0xffff, 0x5200 }, { 0x19, 0x0000, 0x0224 }, - { 0x00, 0x0000, 0x0004 }, + { 0x00, 0x0000, 0x0008 }, { 0x0c, 0x3df0, 0x0200 }, }; From a1b8638ba1320e6684aa98233c15255eb803fac7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 27 Sep 2020 14:38:10 -0700 Subject: [PATCH 204/322] Linux 5.9-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 2b66d3398878..992d24467ca0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From 62c59a8786e6bb75569cee91dab66e9da3ff4b68 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 25 Sep 2020 16:49:51 +0800 Subject: [PATCH 205/322] memstick: Skip allocating card when removing host After commit 6827ca573c03 ("memstick: rtsx_usb_ms: Support runtime power management"), removing module rtsx_usb_ms will be stuck. The deadlock is caused by powering on and powering off at the same time, the former one is when memstick_check() is flushed, and the later is called by memstick_remove_host(). Soe let's skip allocating card to prevent this issue. Fixes: 6827ca573c03 ("memstick: rtsx_usb_ms: Support runtime power management") Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200925084952.13220-1-kai.heng.feng@canonical.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/memstick/core/memstick.c | 4 ++++ include/linux/memstick.h | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c index 693ee73eb291..ef03d6fafc5c 100644 --- a/drivers/memstick/core/memstick.c +++ b/drivers/memstick/core/memstick.c @@ -441,6 +441,9 @@ static void memstick_check(struct work_struct *work) } else if (host->card->stop) host->card->stop(host->card); + if (host->removing) + goto out_power_off; + card = memstick_alloc_card(host); if (!card) { @@ -545,6 +548,7 @@ EXPORT_SYMBOL(memstick_add_host); */ void memstick_remove_host(struct memstick_host *host) { + host->removing = 1; flush_workqueue(workqueue); mutex_lock(&host->lock); if (host->card) diff --git a/include/linux/memstick.h b/include/linux/memstick.h index da4c65f9435f..ebf73d4ee969 100644 --- a/include/linux/memstick.h +++ b/include/linux/memstick.h @@ -281,6 +281,7 @@ struct memstick_host { struct memstick_dev *card; unsigned int retries; + bool removing; /* Notify the host that some requests are pending. */ void (*request)(struct memstick_host *host); From afd7f30886b0b445a4240a99020458a9772f2b89 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 27 Sep 2020 12:48:21 +0200 Subject: [PATCH 206/322] mmc: sdhci: Workaround broken command queuing on Intel GLK based IRBIS models Commit bedf9fc01ff1 ("mmc: sdhci: Workaround broken command queuing on Intel GLK"), disabled command-queuing on Intel GLK based LENOVO models because of it being broken due to what is believed to be a bug in the BIOS. It seems that the BIOS of some IRBIS models, including the IRBIS NB111 model has the same issue, so disable command queuing there too. Fixes: bedf9fc01ff1 ("mmc: sdhci: Workaround broken command queuing on Intel GLK") BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=209397 Reported-and-tested-by: RussianNeuroMancer Signed-off-by: Hans de Goede Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20200927104821.5676-1-hdegoede@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index af413805bbf1..914f5184295f 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -794,7 +794,8 @@ static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) static bool glk_broken_cqhci(struct sdhci_pci_slot *slot) { return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_EMMC && - dmi_match(DMI_BIOS_VENDOR, "LENOVO"); + (dmi_match(DMI_BIOS_VENDOR, "LENOVO") || + dmi_match(DMI_SYS_VENDOR, "IRBIS")); } static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot) From d25e8fdebdad84219b498873300b7f11dd915b88 Mon Sep 17 00:00:00 2001 From: Ed Wildgoose Date: Mon, 28 Sep 2020 10:44:52 +0100 Subject: [PATCH 207/322] gpio: amd-fch: correct logic of GPIO_LINE_DIRECTION The original commit appears to have the logic reversed in amd_fch_gpio_get_direction. Also confirmed by observing the value of "direction" in the sys tree. Signed-off-by: Ed Wildgoose Fixes: e09d168f13f0 ("gpio: AMD G-Series PCH gpio driver") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-amd-fch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-amd-fch.c b/drivers/gpio/gpio-amd-fch.c index 4e44ba4d7423..2a21354ed6a0 100644 --- a/drivers/gpio/gpio-amd-fch.c +++ b/drivers/gpio/gpio-amd-fch.c @@ -92,7 +92,7 @@ static int amd_fch_gpio_get_direction(struct gpio_chip *gc, unsigned int gpio) ret = (readl_relaxed(ptr) & AMD_FCH_GPIO_FLAG_DIRECTION); spin_unlock_irqrestore(&priv->lock, flags); - return ret ? GPIO_LINE_DIRECTION_IN : GPIO_LINE_DIRECTION_OUT; + return ret ? GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN; } static void amd_fch_gpio_set(struct gpio_chip *gc, From 6a7548305a04ae0cd9e9f062c050f265d4bf2d2e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Thu, 24 Sep 2020 10:26:42 +0200 Subject: [PATCH 208/322] ARM: dts: bcm2835: Change firmware compatible from simple-bus to simple-mfd The current binding for the RPi firmware uses the simple-bus compatible as a fallback to benefit from its automatic probing of child nodes. However, simple-bus also comes with some constraints, like having the ranges, our case. Let's switch to simple-mfd that provides the same probing logic without those constraints. Signed-off-by: Maxime Ripard Link: https://lore.kernel.org/r/20200924082642.18144-1-maxime@cerno.tech Signed-off-by: Rob Herring --- .../bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml | 4 ++-- arch/arm/boot/dts/bcm2835-rpi.dtsi | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml index 17e4f20c8d39..6834f5e8df5f 100644 --- a/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml +++ b/Documentation/devicetree/bindings/arm/bcm/raspberrypi,bcm2835-firmware.yaml @@ -23,7 +23,7 @@ properties: compatible: items: - const: raspberrypi,bcm2835-firmware - - const: simple-bus + - const: simple-mfd mboxes: $ref: '/schemas/types.yaml#/definitions/phandle' @@ -57,7 +57,7 @@ required: examples: - | firmware { - compatible = "raspberrypi,bcm2835-firmware", "simple-bus"; + compatible = "raspberrypi,bcm2835-firmware", "simple-mfd"; mboxes = <&mailbox>; firmware_clocks: clocks { diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi index f7ae5a4530b8..d94357b21f7e 100644 --- a/arch/arm/boot/dts/bcm2835-rpi.dtsi +++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi @@ -13,7 +13,7 @@ soc { firmware: firmware { - compatible = "raspberrypi,bcm2835-firmware", "simple-bus"; + compatible = "raspberrypi,bcm2835-firmware", "simple-mfd"; #address-cells = <1>; #size-cells = <1>; From 8706e04ed7d6c95004d42b22a4db97d5b2eb73b2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Sep 2020 08:38:54 -0600 Subject: [PATCH 209/322] io_uring: always delete double poll wait entry on match syzbot reports a crash with tty polling, which is using the double poll handling: general protection fault, probably for non-canonical address 0xdffffc0000000009: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000048-0x000000000000004f] CPU: 0 PID: 6874 Comm: syz-executor749 Not tainted 5.9.0-rc6-next-20200924-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:io_poll_get_single fs/io_uring.c:4778 [inline] RIP: 0010:io_poll_double_wake+0x51/0x510 fs/io_uring.c:4845 Code: fc ff df 48 c1 ea 03 80 3c 02 00 0f 85 9e 03 00 00 48 b8 00 00 00 00 00 fc ff df 49 8b 5d 08 48 8d 7b 48 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 06 0f 8e 63 03 00 00 0f b6 6b 48 bf 06 00 00 RSP: 0018:ffffc90001c1fb70 EFLAGS: 00010006 RAX: dffffc0000000000 RBX: 0000000000000000 RCX: 0000000000000004 RDX: 0000000000000009 RSI: ffffffff81d9b3ad RDI: 0000000000000048 RBP: dffffc0000000000 R08: ffff8880a3cac798 R09: ffffc90001c1fc60 R10: fffff52000383f73 R11: 0000000000000000 R12: 0000000000000004 R13: ffff8880a3cac798 R14: ffff8880a3cac7a0 R15: 0000000000000004 FS: 0000000001f98880(0000) GS:ffff8880ae400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f18886916c0 CR3: 0000000094c5a000 CR4: 00000000001506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __wake_up_common+0x147/0x650 kernel/sched/wait.c:93 __wake_up_common_lock+0xd0/0x130 kernel/sched/wait.c:123 tty_ldisc_hangup+0x1cf/0x680 drivers/tty/tty_ldisc.c:735 __tty_hangup.part.0+0x403/0x870 drivers/tty/tty_io.c:625 __tty_hangup drivers/tty/tty_io.c:575 [inline] tty_vhangup+0x1d/0x30 drivers/tty/tty_io.c:698 pty_close+0x3f5/0x550 drivers/tty/pty.c:79 tty_release+0x455/0xf60 drivers/tty/tty_io.c:1679 __fput+0x285/0x920 fs/file_table.c:281 task_work_run+0xdd/0x190 kernel/task_work.c:141 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:165 [inline] exit_to_user_mode_prepare+0x1e2/0x1f0 kernel/entry/common.c:192 syscall_exit_to_user_mode+0x7a/0x2c0 kernel/entry/common.c:267 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x401210 which is due to a failure in removing the double poll wait entry if we hit a wakeup match. This can cause multiple invocations of the wakeup, which isn't safe. Cc: stable@vger.kernel.org # v5.8 Reported-by: syzbot+81b3883093f772addf6d@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 11b8e428300d..5dcc77b42f0d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4743,6 +4743,8 @@ static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode, if (mask && !(mask & poll->events)) return 0; + list_del_init(&wait->entry); + if (poll && poll->head) { bool done; From fad8e0de4426a776c9bcb060555e7c09e2d08db6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 28 Sep 2020 08:57:48 -0600 Subject: [PATCH 210/322] io_uring: fix potential ABBA deadlock in ->show_fdinfo() syzbot reports a potential lock deadlock between the normal IO path and ->show_fdinfo(): ====================================================== WARNING: possible circular locking dependency detected 5.9.0-rc6-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor.2/19710 is trying to acquire lock: ffff888098ddc450 (sb_writers#4){.+.+}-{0:0}, at: io_write+0x6b5/0xb30 fs/io_uring.c:3296 but task is already holding lock: ffff8880a11b8428 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&ctx->uring_lock){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 __io_uring_show_fdinfo fs/io_uring.c:8417 [inline] io_uring_show_fdinfo+0x194/0xc70 fs/io_uring.c:8460 seq_show+0x4a8/0x700 fs/proc/fd.c:65 seq_read+0x432/0x1070 fs/seq_file.c:208 do_loop_readv_writev fs/read_write.c:734 [inline] do_loop_readv_writev fs/read_write.c:721 [inline] do_iter_read+0x48e/0x6e0 fs/read_write.c:955 vfs_readv+0xe5/0x150 fs/read_write.c:1073 kernel_readv fs/splice.c:355 [inline] default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412 do_splice_to+0x137/0x170 fs/splice.c:871 splice_direct_to_actor+0x307/0x980 fs/splice.c:950 do_splice_direct+0x1b3/0x280 fs/splice.c:1059 do_sendfile+0x55f/0xd40 fs/read_write.c:1540 __do_sys_sendfile64 fs/read_write.c:1601 [inline] __se_sys_sendfile64 fs/read_write.c:1587 [inline] __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (&p->lock){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 seq_read+0x61/0x1070 fs/seq_file.c:155 pde_read fs/proc/inode.c:306 [inline] proc_reg_read+0x221/0x300 fs/proc/inode.c:318 do_loop_readv_writev fs/read_write.c:734 [inline] do_loop_readv_writev fs/read_write.c:721 [inline] do_iter_read+0x48e/0x6e0 fs/read_write.c:955 vfs_readv+0xe5/0x150 fs/read_write.c:1073 kernel_readv fs/splice.c:355 [inline] default_file_splice_read.constprop.0+0x4e6/0x9e0 fs/splice.c:412 do_splice_to+0x137/0x170 fs/splice.c:871 splice_direct_to_actor+0x307/0x980 fs/splice.c:950 do_splice_direct+0x1b3/0x280 fs/splice.c:1059 do_sendfile+0x55f/0xd40 fs/read_write.c:1540 __do_sys_sendfile64 fs/read_write.c:1601 [inline] __se_sys_sendfile64 fs/read_write.c:1587 [inline] __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1587 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (sb_writers#4){.+.+}-{0:0}: check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441 lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029 percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write+0x228/0x450 fs/super.c:1672 io_write+0x6b5/0xb30 fs/io_uring.c:3296 io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719 __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175 io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254 io_submit_sqe fs/io_uring.c:6324 [inline] io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521 __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 other info that might help us debug this: Chain exists of: sb_writers#4 --> &p->lock --> &ctx->uring_lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&ctx->uring_lock); lock(&p->lock); lock(&ctx->uring_lock); lock(sb_writers#4); *** DEADLOCK *** 1 lock held by syz-executor.2/19710: #0: ffff8880a11b8428 (&ctx->uring_lock){+.+.}-{3:3}, at: __do_sys_io_uring_enter+0xe9a/0x1bd0 fs/io_uring.c:8348 stack backtrace: CPU: 0 PID: 19710 Comm: syz-executor.2 Not tainted 5.9.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827 check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4441 lock_acquire+0x1f3/0xaf0 kernel/locking/lockdep.c:5029 percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write+0x228/0x450 fs/super.c:1672 io_write+0x6b5/0xb30 fs/io_uring.c:3296 io_issue_sqe+0x18f/0x5c50 fs/io_uring.c:5719 __io_queue_sqe+0x280/0x1160 fs/io_uring.c:6175 io_queue_sqe+0x692/0xfa0 fs/io_uring.c:6254 io_submit_sqe fs/io_uring.c:6324 [inline] io_submit_sqes+0x1761/0x2400 fs/io_uring.c:6521 __do_sys_io_uring_enter+0xeac/0x1bd0 fs/io_uring.c:8349 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x45e179 Code: 3d b2 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 0b b2 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f1194e74c78 EFLAGS: 00000246 ORIG_RAX: 00000000000001aa RAX: ffffffffffffffda RBX: 00000000000082c0 RCX: 000000000045e179 RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000004 RBP: 000000000118cf98 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 000000000118cf4c R13: 00007ffd1aa5756f R14: 00007f1194e759c0 R15: 000000000118cf4c Fix this by just not diving into details if we fail to trylock the io_uring mutex. We know the ctx isn't going away during this operation, but we cannot safely iterate buffers/files/personalities if we don't hold the io_uring mutex. Reported-by: syzbot+2f8fa4e860edc3066aba@syzkaller.appspotmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5dcc77b42f0d..556e4a2ead07 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -8414,11 +8414,19 @@ static int io_uring_show_cred(int id, void *p, void *data) static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) { + bool has_lock; int i; - mutex_lock(&ctx->uring_lock); + /* + * Avoid ABBA deadlock between the seq lock and the io_uring mutex, + * since fdinfo case grabs it in the opposite direction of normal use + * cases. If we fail to get the lock, we just don't iterate any + * structures that could be going away outside the io_uring mutex. + */ + has_lock = mutex_trylock(&ctx->uring_lock); + seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); - for (i = 0; i < ctx->nr_user_files; i++) { + for (i = 0; has_lock && i < ctx->nr_user_files; i++) { struct fixed_file_table *table; struct file *f; @@ -8430,13 +8438,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) seq_printf(m, "%5u: \n", i); } seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); - for (i = 0; i < ctx->nr_user_bufs; i++) { + for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { struct io_mapped_ubuf *buf = &ctx->user_bufs[i]; seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, (unsigned int) buf->len); } - if (!idr_is_empty(&ctx->personality_idr)) { + if (has_lock && !idr_is_empty(&ctx->personality_idr)) { seq_printf(m, "Personalities:\n"); idr_for_each(&ctx->personality_idr, io_uring_show_cred, m); } @@ -8451,7 +8459,8 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) req->task->task_works != NULL); } spin_unlock_irq(&ctx->completion_lock); - mutex_unlock(&ctx->uring_lock); + if (has_lock) + mutex_unlock(&ctx->uring_lock); } static void io_uring_show_fdinfo(struct seq_file *m, struct file *f) From a4d63c3732f1a0c91abcf5b7f32b4ef7dcd82025 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 28 Sep 2020 12:35:07 +0200 Subject: [PATCH 211/322] mm: do not rely on mm == current->mm in __get_user_pages_locked It seems likely this block was pasted from internal_get_user_pages_fast, which is not passed an mm struct and therefore uses current's. But __get_user_pages_locked is passed an explicit mm, and current->mm is not always valid. This was hit when being called from i915, which uses: pin_user_pages_remote-> __get_user_pages_remote-> __gup_longterm_locked-> __get_user_pages_locked Before, this would lead to an OOPS: BUG: kernel NULL pointer dereference, address: 0000000000000064 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page CPU: 10 PID: 1431 Comm: kworker/u33:1 Tainted: P S U O 5.9.0-rc7+ #140 Hardware name: LENOVO 20QTCTO1WW/20QTCTO1WW, BIOS N2OET47W (1.34 ) 08/06/2020 Workqueue: i915-userptr-acquire __i915_gem_userptr_get_pages_worker [i915] RIP: 0010:__get_user_pages_remote+0xd7/0x310 Call Trace: __i915_gem_userptr_get_pages_worker+0xc8/0x260 [i915] process_one_work+0x1ca/0x390 worker_thread+0x48/0x3c0 kthread+0x114/0x130 ret_from_fork+0x1f/0x30 CR2: 0000000000000064 This commit fixes the problem by using the mm pointer passed to the function rather than the bogus one in current. Fixes: 008cfe4418b3 ("mm: Introduce mm_struct.has_pinned") Tested-by: Chris Wilson Reported-by: Harald Arnesen Reviewed-by: Jason Gunthorpe Reviewed-by: Peter Xu Signed-off-by: Jason A. Donenfeld Signed-off-by: Linus Torvalds --- mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/gup.c b/mm/gup.c index dfe781d2ad4c..e869c634cc9a 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1256,7 +1256,7 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm, } if (flags & FOLL_PIN) - atomic_set(¤t->mm->has_pinned, 1); + atomic_set(&mm->has_pinned, 1); /* * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior From f2f3729fb65c5c2e6db234e6316b71a7bdc4b30b Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 28 Sep 2020 18:30:02 +0300 Subject: [PATCH 212/322] net: bridge: fdb: don't flush ext_learn entries When a user-space software manages fdb entries externally it should set the ext_learn flag which marks the fdb entry as externally managed and avoids expiring it (they're treated as static fdbs). Unfortunately on events where fdb entries are flushed (STP down, netlink fdb flush etc) these fdbs are also deleted automatically by the bridge. That in turn causes trouble for the managing user-space software (e.g. in MLAG setups we lose remote fdb entries on port flaps). These entries are completely externally managed so we should avoid automatically deleting them, the only exception are offloaded entries (i.e. BR_FDB_ADDED_BY_EXT_LEARN + BR_FDB_OFFLOADED). They are flushed as before. Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9db504baa094..32ac8343b0ba 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -413,6 +413,8 @@ void br_fdb_delete_by_port(struct net_bridge *br, if (!do_all) if (test_bit(BR_FDB_STATIC, &f->flags) || + (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) && + !test_bit(BR_FDB_OFFLOADED, &f->flags)) || (vid && f->key.vlan_id != vid)) continue; From 1aad8049909a6d3379175ef2824a68ac35c0b564 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Mon, 28 Sep 2020 11:31:03 -0700 Subject: [PATCH 213/322] net_sched: remove a redundant goto chain check All TC actions call tcf_action_check_ctrlact() to validate goto chain, so this check in tcf_action_init_1() is actually redundant. Remove it to save troubles of leaking memory. Fixes: e49d8c22f126 ("net_sched: defer tcf_idr_insert() in tcf_action_init_1()") Reported-by: Vlad Buslov Suggested-by: Davide Caratti Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Reviewed-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_api.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 104b47f5184f..5612b336e18e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -722,13 +722,6 @@ int tcf_action_destroy(struct tc_action *actions[], int bind) return ret; } -static int tcf_action_destroy_1(struct tc_action *a, int bind) -{ - struct tc_action *actions[] = { a, NULL }; - - return tcf_action_destroy(actions, bind); -} - static int tcf_action_put(struct tc_action *p) { return __tcf_action_put(p, false); @@ -1000,13 +993,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err < 0) goto err_mod; - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && - !rcu_access_pointer(a->goto_chain)) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); - return ERR_PTR(-EINVAL); - } - if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); From fe8300fd8d655ebc6b6297565665959d6d7bbe02 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 25 Sep 2020 18:13:02 +0000 Subject: [PATCH 214/322] net: core: add __netdev_upper_dev_unlink() The netdev_upper_dev_unlink() has to work differently according to flags. This idea is the same with __netdev_upper_dev_link(). In the following patches, new flags will be added. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/core/dev.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 266073e300b5..b7b3d6e15cda 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7762,16 +7762,8 @@ int netdev_master_upper_dev_link(struct net_device *dev, } EXPORT_SYMBOL(netdev_master_upper_dev_link); -/** - * netdev_upper_dev_unlink - Removes a link to upper device - * @dev: device - * @upper_dev: new upper device - * - * Removes a link to device which is upper to this one. The caller must hold - * the RTNL lock. - */ -void netdev_upper_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) +static void __netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) { struct netdev_notifier_changeupper_info changeupper_info = { .info = { @@ -7800,6 +7792,20 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, NULL); } + +/** + * netdev_upper_dev_unlink - Removes a link to upper device + * @dev: device + * @upper_dev: new upper device + * + * Removes a link to device which is upper to this one. The caller must hold + * the RTNL lock. + */ +void netdev_upper_dev_unlink(struct net_device *dev, + struct net_device *upper_dev) +{ + __netdev_upper_dev_unlink(dev, upper_dev); +} EXPORT_SYMBOL(netdev_upper_dev_unlink); static void __netdev_adjacent_dev_set(struct net_device *upper_dev, From eff7423365a6938d2d34dbce989febed2ae1f957 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 25 Sep 2020 18:13:12 +0000 Subject: [PATCH 215/322] net: core: introduce struct netdev_nested_priv for nested interface infrastructure Functions related to nested interface infrastructure such as netdev_walk_all_{ upper | lower }_dev() pass both private functions and "data" pointer to handle their own things. At this point, the data pointer type is void *. In order to make it easier to expand common variables and functions, this new netdev_nested_priv structure is added. In the following patch, a new member variable will be added into this struct to fix the lockdep issue. Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/infiniband/core/cache.c | 10 +++- drivers/infiniband/core/cma.c | 9 ++- drivers/infiniband/core/roce_gid_mgmt.c | 9 ++- drivers/infiniband/ulp/ipoib/ipoib_main.c | 9 ++- drivers/net/bonding/bond_alb.c | 9 ++- drivers/net/bonding/bond_main.c | 10 +++- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 37 ++++++++---- .../net/ethernet/mellanox/mlxsw/spectrum.c | 24 ++++---- .../ethernet/mellanox/mlxsw/spectrum_router.c | 11 +++- .../mellanox/mlxsw/spectrum_switchdev.c | 10 +++- drivers/net/ethernet/rocker/rocker_main.c | 9 ++- drivers/net/wireless/quantenna/qtnfmac/core.c | 10 +++- include/linux/netdevice.h | 16 +++-- net/bridge/br_arp_nd_proxy.c | 26 +++++--- net/bridge/br_vlan.c | 20 ++++--- net/core/dev.c | 59 ++++++++++++------- 16 files changed, 183 insertions(+), 95 deletions(-) diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index ffad73bb40ff..5a76611e684a 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1320,9 +1320,10 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr) } EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu); -static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) +static int get_lower_dev_vlan(struct net_device *lower_dev, + struct netdev_nested_priv *priv) { - u16 *vlan_id = data; + u16 *vlan_id = (u16 *)priv->data; if (is_vlan_dev(lower_dev)) *vlan_id = vlan_dev_vlan_id(lower_dev); @@ -1348,6 +1349,9 @@ static int get_lower_dev_vlan(struct net_device *lower_dev, void *data) int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, u16 *vlan_id, u8 *smac) { + struct netdev_nested_priv priv = { + .data = (void *)vlan_id, + }; struct net_device *ndev; rcu_read_lock(); @@ -1368,7 +1372,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, * the lower vlan device for this gid entry. */ netdev_walk_all_lower_dev_rcu(attr->ndev, - get_lower_dev_vlan, vlan_id); + get_lower_dev_vlan, &priv); } } rcu_read_unlock(); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7f0e91e92968..5888311b2119 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2865,9 +2865,10 @@ struct iboe_prio_tc_map { bool found; }; -static int get_lower_vlan_dev_tc(struct net_device *dev, void *data) +static int get_lower_vlan_dev_tc(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct iboe_prio_tc_map *map = data; + struct iboe_prio_tc_map *map = (struct iboe_prio_tc_map *)priv->data; if (is_vlan_dev(dev)) map->output_tc = get_vlan_ndev_tc(dev, map->input_prio); @@ -2886,16 +2887,18 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos) { struct iboe_prio_tc_map prio_tc_map = {}; int prio = rt_tos2priority(tos); + struct netdev_nested_priv priv; /* If VLAN device, get it directly from the VLAN netdev */ if (is_vlan_dev(ndev)) return get_vlan_ndev_tc(ndev, prio); prio_tc_map.input_prio = prio; + priv.data = (void *)&prio_tc_map; rcu_read_lock(); netdev_walk_all_lower_dev_rcu(ndev, get_lower_vlan_dev_tc, - &prio_tc_map); + &priv); rcu_read_unlock(); /* If map is found from lower device, use it; Otherwise * continue with the current netdevice to get priority to tc map. diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index 2860def84f4d..6b8364bb032d 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -531,10 +531,11 @@ struct upper_list { struct net_device *upper; }; -static int netdev_upper_walk(struct net_device *upper, void *data) +static int netdev_upper_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - struct list_head *upper_list = data; + struct list_head *upper_list = (struct list_head *)priv->data; if (!entry) return 0; @@ -553,12 +554,14 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port, struct net_device *ndev)) { struct net_device *ndev = cookie; + struct netdev_nested_priv priv; struct upper_list *upper_iter; struct upper_list *upper_temp; LIST_HEAD(upper_list); + priv.data = &upper_list; rcu_read_lock(); - netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &upper_list); + netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv); rcu_read_unlock(); handle_netdev(ib_dev, port, ndev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index ab75b7f745d4..f772fe8c5b66 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -342,9 +342,10 @@ struct ipoib_walk_data { struct net_device *result; }; -static int ipoib_upper_walk(struct net_device *upper, void *_data) +static int ipoib_upper_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { - struct ipoib_walk_data *data = _data; + struct ipoib_walk_data *data = (struct ipoib_walk_data *)priv->data; int ret = 0; if (ipoib_is_dev_match_addr_rcu(data->addr, upper)) { @@ -368,10 +369,12 @@ static int ipoib_upper_walk(struct net_device *upper, void *_data) static struct net_device *ipoib_get_net_dev_match_addr( const struct sockaddr *addr, struct net_device *dev) { + struct netdev_nested_priv priv; struct ipoib_walk_data data = { .addr = addr, }; + priv.data = (void *)&data; rcu_read_lock(); if (ipoib_is_dev_match_addr_rcu(addr, dev)) { dev_hold(dev); @@ -379,7 +382,7 @@ static struct net_device *ipoib_get_net_dev_match_addr( goto out; } - netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &data); + netdev_walk_all_upper_dev_rcu(dev, ipoib_upper_walk, &priv); out: rcu_read_unlock(); return data.result; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 4e1b7deb724b..c3091e00dd5f 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -942,9 +942,10 @@ struct alb_walk_data { bool strict_match; }; -static int alb_upper_dev_walk(struct net_device *upper, void *_data) +static int alb_upper_dev_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { - struct alb_walk_data *data = _data; + struct alb_walk_data *data = (struct alb_walk_data *)priv->data; bool strict_match = data->strict_match; struct bonding *bond = data->bond; struct slave *slave = data->slave; @@ -983,6 +984,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match) { struct bonding *bond = bond_get_bond_by_slave(slave); + struct netdev_nested_priv priv; struct alb_walk_data data = { .strict_match = strict_match, .mac_addr = mac_addr, @@ -990,6 +992,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], .bond = bond, }; + priv.data = (void *)&data; /* send untagged */ alb_send_lp_vid(slave, mac_addr, 0, 0); @@ -997,7 +1000,7 @@ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], * for that device. */ rcu_read_lock(); - netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data); + netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &priv); rcu_read_unlock(); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 14740d3053b8..84ecbc6fa0ff 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2511,22 +2511,26 @@ re_arm: } } -static int bond_upper_dev_walk(struct net_device *upper, void *data) +static int bond_upper_dev_walk(struct net_device *upper, + struct netdev_nested_priv *priv) { - __be32 ip = *((__be32 *)data); + __be32 ip = *(__be32 *)priv->data; return ip == bond_confirm_addr(upper, 0, ip); } static bool bond_has_this_ip(struct bonding *bond, __be32 ip) { + struct netdev_nested_priv priv = { + .data = (void *)&ip, + }; bool ret = false; if (ip == bond_confirm_addr(bond->dev, 0, ip)) return true; rcu_read_lock(); - if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &ip)) + if (netdev_walk_all_upper_dev_rcu(bond->dev, bond_upper_dev_walk, &priv)) ret = true; rcu_read_unlock(); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2f8a4cfc5fa1..86ca8b9ea1b8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5396,9 +5396,10 @@ static int ixgbe_fwd_ring_up(struct ixgbe_adapter *adapter, return err; } -static int ixgbe_macvlan_up(struct net_device *vdev, void *data) +static int ixgbe_macvlan_up(struct net_device *vdev, + struct netdev_nested_priv *priv) { - struct ixgbe_adapter *adapter = data; + struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data; struct ixgbe_fwd_adapter *accel; if (!netif_is_macvlan(vdev)) @@ -5415,8 +5416,12 @@ static int ixgbe_macvlan_up(struct net_device *vdev, void *data) static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter) { + struct netdev_nested_priv priv = { + .data = (void *)adapter, + }; + netdev_walk_all_upper_dev_rcu(adapter->netdev, - ixgbe_macvlan_up, adapter); + ixgbe_macvlan_up, &priv); } static void ixgbe_configure(struct ixgbe_adapter *adapter) @@ -9023,9 +9028,10 @@ static void ixgbe_set_prio_tc_map(struct ixgbe_adapter *adapter) } #endif /* CONFIG_IXGBE_DCB */ -static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data) +static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, + struct netdev_nested_priv *priv) { - struct ixgbe_adapter *adapter = data; + struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)priv->data; struct ixgbe_fwd_adapter *accel; int pool; @@ -9062,13 +9068,16 @@ static int ixgbe_reassign_macvlan_pool(struct net_device *vdev, void *data) static void ixgbe_defrag_macvlan_pools(struct net_device *dev) { struct ixgbe_adapter *adapter = netdev_priv(dev); + struct netdev_nested_priv priv = { + .data = (void *)adapter, + }; /* flush any stale bits out of the fwd bitmask */ bitmap_clear(adapter->fwd_bitmask, 1, 63); /* walk through upper devices reassigning pools */ netdev_walk_all_upper_dev_rcu(dev, ixgbe_reassign_macvlan_pool, - adapter); + &priv); } /** @@ -9242,14 +9251,18 @@ struct upper_walk_data { u8 queue; }; -static int get_macvlan_queue(struct net_device *upper, void *_data) +static int get_macvlan_queue(struct net_device *upper, + struct netdev_nested_priv *priv) { if (netif_is_macvlan(upper)) { struct ixgbe_fwd_adapter *vadapter = macvlan_accel_priv(upper); - struct upper_walk_data *data = _data; - struct ixgbe_adapter *adapter = data->adapter; - int ifindex = data->ifindex; + struct ixgbe_adapter *adapter; + struct upper_walk_data *data; + int ifindex; + data = (struct upper_walk_data *)priv->data; + ifindex = data->ifindex; + adapter = data->adapter; if (vadapter && upper->ifindex == ifindex) { data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx; data->action = data->queue; @@ -9265,6 +9278,7 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, { struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ]; unsigned int num_vfs = adapter->num_vfs, vf; + struct netdev_nested_priv priv; struct upper_walk_data data; struct net_device *upper; @@ -9284,8 +9298,9 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex, data.ifindex = ifindex; data.action = 0; data.queue = 0; + priv.data = (void *)&data; if (netdev_walk_all_upper_dev_rcu(adapter->netdev, - get_macvlan_queue, &data)) { + get_macvlan_queue, &priv)) { *action = data.action; *queue = data.queue; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 4186e29119c2..f3c0e241e1b4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -3690,13 +3690,13 @@ bool mlxsw_sp_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; } -static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data) +static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) { - struct mlxsw_sp_port **p_mlxsw_sp_port = data; int ret = 0; if (mlxsw_sp_port_dev_check(lower_dev)) { - *p_mlxsw_sp_port = netdev_priv(lower_dev); + priv->data = (void *)netdev_priv(lower_dev); ret = 1; } @@ -3705,15 +3705,16 @@ static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data) struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) { - struct mlxsw_sp_port *mlxsw_sp_port; + struct netdev_nested_priv priv = { + .data = NULL, + }; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - mlxsw_sp_port = NULL; - netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &mlxsw_sp_port); + netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &priv); - return mlxsw_sp_port; + return (struct mlxsw_sp_port *)priv.data; } struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) @@ -3726,16 +3727,17 @@ struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) { - struct mlxsw_sp_port *mlxsw_sp_port; + struct netdev_nested_priv priv = { + .data = NULL, + }; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - mlxsw_sp_port = NULL; netdev_walk_all_lower_dev_rcu(dev, mlxsw_sp_lower_dev_walk, - &mlxsw_sp_port); + &priv); - return mlxsw_sp_port; + return (struct mlxsw_sp_port *)priv.data; } struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 24f1fd1f8d56..460cb523312f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7351,9 +7351,10 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, return err; } -static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data) +static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct mlxsw_sp_rif *rif = data; + struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data; if (!netif_is_macvlan(dev)) return 0; @@ -7364,12 +7365,16 @@ static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data) static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif) { + struct netdev_nested_priv priv = { + .data = (void *)rif, + }; + if (!netif_is_macvlan_port(rif->dev)) return 0; netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n"); return netdev_walk_all_upper_dev_rcu(rif->dev, - __mlxsw_sp_rif_macvlan_flush, rif); + __mlxsw_sp_rif_macvlan_flush, &priv); } static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 72912afa6f72..6501ce94ace5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -136,9 +136,9 @@ bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, - void *data) + struct netdev_nested_priv *priv) { - struct mlxsw_sp *mlxsw_sp = data; + struct mlxsw_sp *mlxsw_sp = priv->data; mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); return 0; @@ -147,10 +147,14 @@ static int mlxsw_sp_bridge_device_upper_rif_destroy(struct net_device *dev, static void mlxsw_sp_bridge_device_rifs_destroy(struct mlxsw_sp *mlxsw_sp, struct net_device *dev) { + struct netdev_nested_priv priv = { + .data = (void *)mlxsw_sp, + }; + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, dev); netdev_walk_all_upper_dev_rcu(dev, mlxsw_sp_bridge_device_upper_rif_destroy, - mlxsw_sp); + &priv); } static int mlxsw_sp_bridge_device_vxlan_init(struct mlxsw_sp_bridge *bridge, diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index 42458a46ffaf..9cc31f7e0df1 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -3099,9 +3099,10 @@ struct rocker_walk_data { struct rocker_port *port; }; -static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data) +static int rocker_lower_dev_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) { - struct rocker_walk_data *data = _data; + struct rocker_walk_data *data = (struct rocker_walk_data *)priv->data; int ret = 0; if (rocker_port_dev_check_under(lower_dev, data->rocker)) { @@ -3115,6 +3116,7 @@ static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data) struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, struct rocker *rocker) { + struct netdev_nested_priv priv; struct rocker_walk_data data; if (rocker_port_dev_check_under(dev, rocker)) @@ -3122,7 +3124,8 @@ struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, data.rocker = rocker; data.port = NULL; - netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &data); + priv.data = (void *)&data; + netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &priv); return data.port; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c index 6aafff9d4231..e013ebe3079c 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.c +++ b/drivers/net/wireless/quantenna/qtnfmac/core.c @@ -671,9 +671,10 @@ bool qtnf_netdev_is_qtn(const struct net_device *ndev) return ndev->netdev_ops == &qtnf_netdev_ops; } -static int qtnf_check_br_ports(struct net_device *dev, void *data) +static int qtnf_check_br_ports(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct net_device *ndev = data; + struct net_device *ndev = (struct net_device *)priv->data; if (dev != ndev && netdev_port_same_parent_id(dev, ndev)) return -ENOTSUPP; @@ -686,6 +687,9 @@ static int qtnf_core_netdevice_event(struct notifier_block *nb, { struct net_device *ndev = netdev_notifier_info_to_dev(ptr); const struct netdev_notifier_changeupper_info *info; + struct netdev_nested_priv priv = { + .data = (void *)ndev, + }; struct net_device *brdev; struct qtnf_vif *vif; struct qtnf_bus *bus; @@ -725,7 +729,7 @@ static int qtnf_core_netdevice_event(struct notifier_block *nb, } else { ret = netdev_walk_all_lower_dev(brdev, qtnf_check_br_ports, - ndev); + &priv); } break; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7bd4fcdd0738..313803d6c781 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4455,6 +4455,10 @@ extern int dev_rx_weight; extern int dev_tx_weight; extern int gro_normal_batch; +struct netdev_nested_priv { + void *data; +}; + bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); @@ -4470,8 +4474,8 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *upper_dev, - void *data), - void *data); + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev); @@ -4508,12 +4512,12 @@ struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter); int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, - void *data), - void *data); + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv); int netdev_walk_all_lower_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *lower_dev, - void *data), - void *data); + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv); void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index b18cdf03edb3..dfec65eca8a6 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -88,9 +88,10 @@ static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p, } } -static int br_chk_addr_ip(struct net_device *dev, void *data) +static int br_chk_addr_ip(struct net_device *dev, + struct netdev_nested_priv *priv) { - __be32 ip = *(__be32 *)data; + __be32 ip = *(__be32 *)priv->data; struct in_device *in_dev; __be32 addr = 0; @@ -107,11 +108,15 @@ static int br_chk_addr_ip(struct net_device *dev, void *data) static bool br_is_local_ip(struct net_device *dev, __be32 ip) { - if (br_chk_addr_ip(dev, &ip)) + struct netdev_nested_priv priv = { + .data = (void *)&ip, + }; + + if (br_chk_addr_ip(dev, &priv)) return true; /* check if ip is configured on upper dev */ - if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip)) + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &priv)) return true; return false; @@ -361,9 +366,10 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, } } -static int br_chk_addr_ip6(struct net_device *dev, void *data) +static int br_chk_addr_ip6(struct net_device *dev, + struct netdev_nested_priv *priv) { - struct in6_addr *addr = (struct in6_addr *)data; + struct in6_addr *addr = (struct in6_addr *)priv->data; if (ipv6_chk_addr(dev_net(dev), addr, dev, 0)) return 1; @@ -374,11 +380,15 @@ static int br_chk_addr_ip6(struct net_device *dev, void *data) static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr) { - if (br_chk_addr_ip6(dev, addr)) + struct netdev_nested_priv priv = { + .data = (void *)addr, + }; + + if (br_chk_addr_ip6(dev, &priv)) return true; /* check if ip is configured on upper dev */ - if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr)) + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, &priv)) return true; return false; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 61c94cefa843..ee8780080be5 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1360,7 +1360,7 @@ static int br_vlan_is_bind_vlan_dev(const struct net_device *dev) } static int br_vlan_is_bind_vlan_dev_fn(struct net_device *dev, - __always_unused void *data) + __always_unused struct netdev_nested_priv *priv) { return br_vlan_is_bind_vlan_dev(dev); } @@ -1383,9 +1383,9 @@ struct br_vlan_bind_walk_data { }; static int br_vlan_match_bind_vlan_dev_fn(struct net_device *dev, - void *data_in) + struct netdev_nested_priv *priv) { - struct br_vlan_bind_walk_data *data = data_in; + struct br_vlan_bind_walk_data *data = priv->data; int found = 0; if (br_vlan_is_bind_vlan_dev(dev) && @@ -1403,10 +1403,13 @@ br_vlan_get_upper_bind_vlan_dev(struct net_device *dev, u16 vid) struct br_vlan_bind_walk_data data = { .vid = vid, }; + struct netdev_nested_priv priv = { + .data = (void *)&data, + }; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(dev, br_vlan_match_bind_vlan_dev_fn, - &data); + &priv); rcu_read_unlock(); return data.result; @@ -1487,9 +1490,9 @@ struct br_vlan_link_state_walk_data { }; static int br_vlan_link_state_change_fn(struct net_device *vlan_dev, - void *data_in) + struct netdev_nested_priv *priv) { - struct br_vlan_link_state_walk_data *data = data_in; + struct br_vlan_link_state_walk_data *data = priv->data; if (br_vlan_is_bind_vlan_dev(vlan_dev)) br_vlan_set_vlan_dev_state(data->br, vlan_dev); @@ -1503,10 +1506,13 @@ static void br_vlan_link_state_change(struct net_device *dev, struct br_vlan_link_state_walk_data data = { .br = br }; + struct netdev_nested_priv priv = { + .data = (void *)&data, + }; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(dev, br_vlan_link_state_change_fn, - &data); + &priv); rcu_read_unlock(); } diff --git a/net/core/dev.c b/net/core/dev.c index b7b3d6e15cda..a4a1fa806c5c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6812,9 +6812,10 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } -static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) +static int ____netdev_has_upper_dev(struct net_device *upper_dev, + struct netdev_nested_priv *priv) { - struct net_device *dev = data; + struct net_device *dev = (struct net_device *)priv->data; return upper_dev == dev; } @@ -6831,10 +6832,14 @@ static int ____netdev_has_upper_dev(struct net_device *upper_dev, void *data) bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + ASSERT_RTNL(); return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } EXPORT_SYMBOL(netdev_has_upper_dev); @@ -6851,8 +6856,12 @@ EXPORT_SYMBOL(netdev_has_upper_dev); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); @@ -6997,8 +7006,8 @@ static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, static int __netdev_walk_all_upper_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7010,7 +7019,7 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7046,8 +7055,8 @@ static int __netdev_walk_all_upper_dev(struct net_device *dev, int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7058,7 +7067,7 @@ int netdev_walk_all_upper_dev_rcu(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7094,10 +7103,14 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); static bool __netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { + struct netdev_nested_priv priv = { + .data = (void *)upper_dev, + }; + ASSERT_RTNL(); return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev, - upper_dev); + &priv); } /** @@ -7215,8 +7228,8 @@ static struct net_device *__netdev_next_lower_dev(struct net_device *dev, int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7227,7 +7240,7 @@ int netdev_walk_all_lower_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7262,8 +7275,8 @@ EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); static int __netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7275,7 +7288,7 @@ static int __netdev_walk_all_lower_dev(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } @@ -7364,13 +7377,15 @@ static u8 __netdev_lower_depth(struct net_device *dev) return max_depth; } -static int __netdev_update_upper_level(struct net_device *dev, void *data) +static int __netdev_update_upper_level(struct net_device *dev, + struct netdev_nested_priv *__unused) { dev->upper_level = __netdev_upper_depth(dev) + 1; return 0; } -static int __netdev_update_lower_level(struct net_device *dev, void *data) +static int __netdev_update_lower_level(struct net_device *dev, + struct netdev_nested_priv *__unused) { dev->lower_level = __netdev_lower_depth(dev) + 1; return 0; @@ -7378,8 +7393,8 @@ static int __netdev_update_lower_level(struct net_device *dev, void *data) int netdev_walk_all_lower_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *dev, - void *data), - void *data) + struct netdev_nested_priv *priv), + struct netdev_nested_priv *priv) { struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1]; struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1]; @@ -7390,7 +7405,7 @@ int netdev_walk_all_lower_dev_rcu(struct net_device *dev, while (1) { if (now != dev) { - ret = fn(now, data); + ret = fn(now, priv); if (ret) return ret; } From 1fc70edb7d7b5ce1ae32b0cf90183f4879ad421a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 25 Sep 2020 18:13:29 +0000 Subject: [PATCH 216/322] net: core: add nested_level variable in net_device This patch is to add a new variable 'nested_level' into the net_device structure. This variable will be used as a parameter of spin_lock_nested() of dev->addr_list_lock. netif_addr_lock() can be called recursively so spin_lock_nested() is used instead of spin_lock() and dev->lower_level is used as a parameter of spin_lock_nested(). But, dev->lower_level value can be updated while it is being used. So, lockdep would warn a possible deadlock scenario. When a stacked interface is deleted, netif_{uc | mc}_sync() is called recursively. So, spin_lock_nested() is called recursively too. At this moment, the dev->lower_level variable is used as a parameter of it. dev->lower_level value is updated when interfaces are being unlinked/linked immediately. Thus, After unlinking, dev->lower_level shouldn't be a parameter of spin_lock_nested(). A (macvlan) | B (vlan) | C (bridge) | D (macvlan) | E (vlan) | F (bridge) A->lower_level : 6 B->lower_level : 5 C->lower_level : 4 D->lower_level : 3 E->lower_level : 2 F->lower_level : 1 When an interface 'A' is removed, it releases resources. At this moment, netif_addr_lock() would be called. Then, netdev_upper_dev_unlink() is called recursively. Then dev->lower_level is updated. There is no problem. But, when the bridge module is removed, 'C' and 'F' interfaces are removed at once. If 'F' is removed first, a lower_level value is like below. A->lower_level : 5 B->lower_level : 4 C->lower_level : 3 D->lower_level : 2 E->lower_level : 1 F->lower_level : 1 Then, 'C' is removed. at this moment, netif_addr_lock() is called recursively. The ordering is like this. C(3)->D(2)->E(1)->F(1) At this moment, the lower_level value of 'E' and 'F' are the same. So, lockdep warns a possible deadlock scenario. In order to avoid this problem, a new variable 'nested_level' is added. This value is the same as dev->lower_level - 1. But this value is updated in rtnl_unlock(). So, this variable can be used as a parameter of spin_lock_nested() safely in the rtnl context. Test commands: ip link add br0 type bridge vlan_filtering 1 ip link add vlan1 link br0 type vlan id 10 ip link add macvlan2 link vlan1 type macvlan ip link add br3 type bridge vlan_filtering 1 ip link set macvlan2 master br3 ip link add vlan4 link br3 type vlan id 10 ip link add macvlan5 link vlan4 type macvlan ip link add br6 type bridge vlan_filtering 1 ip link set macvlan5 master br6 ip link add vlan7 link br6 type vlan id 10 ip link add macvlan8 link vlan7 type macvlan ip link set br0 up ip link set vlan1 up ip link set macvlan2 up ip link set br3 up ip link set vlan4 up ip link set macvlan5 up ip link set br6 up ip link set vlan7 up ip link set macvlan8 up modprobe -rv bridge Splat looks like: [ 36.057436][ T744] WARNING: possible recursive locking detected [ 36.058848][ T744] 5.9.0-rc6+ #728 Not tainted [ 36.059959][ T744] -------------------------------------------- [ 36.061391][ T744] ip/744 is trying to acquire lock: [ 36.062590][ T744] ffff8c4767509280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_set_rx_mode+0x19/0x30 [ 36.064922][ T744] [ 36.064922][ T744] but task is already holding lock: [ 36.066626][ T744] ffff8c4767769280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_uc_add+0x1e/0x60 [ 36.068851][ T744] [ 36.068851][ T744] other info that might help us debug this: [ 36.070731][ T744] Possible unsafe locking scenario: [ 36.070731][ T744] [ 36.072497][ T744] CPU0 [ 36.073238][ T744] ---- [ 36.074007][ T744] lock(&vlan_netdev_addr_lock_key); [ 36.075290][ T744] lock(&vlan_netdev_addr_lock_key); [ 36.076590][ T744] [ 36.076590][ T744] *** DEADLOCK *** [ 36.076590][ T744] [ 36.078515][ T744] May be due to missing lock nesting notation [ 36.078515][ T744] [ 36.080491][ T744] 3 locks held by ip/744: [ 36.081471][ T744] #0: ffffffff98571df0 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x236/0x490 [ 36.083614][ T744] #1: ffff8c4767769280 (&vlan_netdev_addr_lock_key){+...}-{2:2}, at: dev_uc_add+0x1e/0x60 [ 36.085942][ T744] #2: ffff8c476c8da280 (&bridge_netdev_addr_lock_key/4){+...}-{2:2}, at: dev_uc_sync+0x39/0x80 [ 36.088400][ T744] [ 36.088400][ T744] stack backtrace: [ 36.089772][ T744] CPU: 6 PID: 744 Comm: ip Not tainted 5.9.0-rc6+ #728 [ 36.091364][ T744] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 36.093630][ T744] Call Trace: [ 36.094416][ T744] dump_stack+0x77/0x9b [ 36.095385][ T744] __lock_acquire+0xbc3/0x1f40 [ 36.096522][ T744] lock_acquire+0xb4/0x3b0 [ 36.097540][ T744] ? dev_set_rx_mode+0x19/0x30 [ 36.098657][ T744] ? rtmsg_ifinfo+0x1f/0x30 [ 36.099711][ T744] ? __dev_notify_flags+0xa5/0xf0 [ 36.100874][ T744] ? rtnl_is_locked+0x11/0x20 [ 36.101967][ T744] ? __dev_set_promiscuity+0x7b/0x1a0 [ 36.103230][ T744] _raw_spin_lock_bh+0x38/0x70 [ 36.104348][ T744] ? dev_set_rx_mode+0x19/0x30 [ 36.105461][ T744] dev_set_rx_mode+0x19/0x30 [ 36.106532][ T744] dev_set_promiscuity+0x36/0x50 [ 36.107692][ T744] __dev_set_promiscuity+0x123/0x1a0 [ 36.108929][ T744] dev_set_promiscuity+0x1e/0x50 [ 36.110093][ T744] br_port_set_promisc+0x1f/0x40 [bridge] [ 36.111415][ T744] br_manage_promisc+0x8b/0xe0 [bridge] [ 36.112728][ T744] __dev_set_promiscuity+0x123/0x1a0 [ 36.113967][ T744] ? __hw_addr_sync_one+0x23/0x50 [ 36.115135][ T744] __dev_set_rx_mode+0x68/0x90 [ 36.116249][ T744] dev_uc_sync+0x70/0x80 [ 36.117244][ T744] dev_uc_add+0x50/0x60 [ 36.118223][ T744] macvlan_open+0x18e/0x1f0 [macvlan] [ 36.119470][ T744] __dev_open+0xd6/0x170 [ 36.120470][ T744] __dev_change_flags+0x181/0x1d0 [ 36.121644][ T744] dev_change_flags+0x23/0x60 [ 36.122741][ T744] do_setlink+0x30a/0x11e0 [ 36.123778][ T744] ? __lock_acquire+0x92c/0x1f40 [ 36.124929][ T744] ? __nla_validate_parse.part.6+0x45/0x8e0 [ 36.126309][ T744] ? __lock_acquire+0x92c/0x1f40 [ 36.127457][ T744] __rtnl_newlink+0x546/0x8e0 [ 36.128560][ T744] ? lock_acquire+0xb4/0x3b0 [ 36.129623][ T744] ? deactivate_slab.isra.85+0x6a1/0x850 [ 36.130946][ T744] ? __lock_acquire+0x92c/0x1f40 [ 36.132102][ T744] ? lock_acquire+0xb4/0x3b0 [ 36.133176][ T744] ? is_bpf_text_address+0x5/0xe0 [ 36.134364][ T744] ? rtnl_newlink+0x2e/0x70 [ 36.135445][ T744] ? rcu_read_lock_sched_held+0x32/0x60 [ 36.136771][ T744] ? kmem_cache_alloc_trace+0x2d8/0x380 [ 36.138070][ T744] ? rtnl_newlink+0x2e/0x70 [ 36.139164][ T744] rtnl_newlink+0x47/0x70 [ ... ] Fixes: 845e0ebb4408 ("net: change addr_list_lock back to static key") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- include/linux/netdevice.h | 52 ++++++++++++++++++++---- net/core/dev.c | 85 +++++++++++++++++++++++++++++++++------ net/core/dev_addr_lists.c | 12 +++--- 3 files changed, 122 insertions(+), 27 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 313803d6c781..9fdb3ebef306 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1955,6 +1955,7 @@ struct net_device { unsigned short type; unsigned short hard_header_len; unsigned char min_header_len; + unsigned char name_assign_type; unsigned short needed_headroom; unsigned short needed_tailroom; @@ -1965,21 +1966,28 @@ struct net_device { unsigned char addr_len; unsigned char upper_level; unsigned char lower_level; + unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; spinlock_t addr_list_lock; - unsigned char name_assign_type; - bool uc_promisc; + struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs; #ifdef CONFIG_SYSFS struct kset *queues_kset; +#endif +#ifdef CONFIG_LOCKDEP + struct list_head unlink_list; #endif unsigned int promiscuity; unsigned int allmulti; + bool uc_promisc; +#ifdef CONFIG_LOCKDEP + unsigned char nested_level; +#endif /* Protocol-specific pointers */ @@ -4260,17 +4268,23 @@ static inline void netif_tx_disable(struct net_device *dev) static inline void netif_addr_lock(struct net_device *dev) { - spin_lock(&dev->addr_list_lock); -} + unsigned char nest_level = 0; -static inline void netif_addr_lock_nested(struct net_device *dev) -{ - spin_lock_nested(&dev->addr_list_lock, dev->lower_level); +#ifdef CONFIG_LOCKDEP + nest_level = dev->nested_level; +#endif + spin_lock_nested(&dev->addr_list_lock, nest_level); } static inline void netif_addr_lock_bh(struct net_device *dev) { - spin_lock_bh(&dev->addr_list_lock); + unsigned char nest_level = 0; + +#ifdef CONFIG_LOCKDEP + nest_level = dev->nested_level; +#endif + local_bh_disable(); + spin_lock_nested(&dev->addr_list_lock, nest_level); } static inline void netif_addr_unlock(struct net_device *dev) @@ -4455,7 +4469,19 @@ extern int dev_rx_weight; extern int dev_tx_weight; extern int gro_normal_batch; +enum { + NESTED_SYNC_IMM_BIT, + NESTED_SYNC_TODO_BIT, +}; + +#define __NESTED_SYNC_BIT(bit) ((u32)1 << (bit)) +#define __NESTED_SYNC(name) __NESTED_SYNC_BIT(NESTED_SYNC_ ## name ## _BIT) + +#define NESTED_SYNC_IMM __NESTED_SYNC(IMM) +#define NESTED_SYNC_TODO __NESTED_SYNC(TODO) + struct netdev_nested_priv { + unsigned char flags; void *data; }; @@ -4465,6 +4491,16 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); +#ifdef CONFIG_LOCKDEP +static LIST_HEAD(net_unlink_list); + +static inline void net_unlink_todo(struct net_device *dev) +{ + if (list_empty(&dev->unlink_list)) + list_add_tail(&dev->unlink_list, &net_unlink_list); +} +#endif + /* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_upper_dev_rcu(dev, updev, iter) \ for (iter = &(dev)->adj_list.upper, \ diff --git a/net/core/dev.c b/net/core/dev.c index a4a1fa806c5c..4906b44af850 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7104,6 +7104,7 @@ static bool __netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev) { struct netdev_nested_priv priv = { + .flags = 0, .data = (void *)upper_dev, }; @@ -7385,9 +7386,19 @@ static int __netdev_update_upper_level(struct net_device *dev, } static int __netdev_update_lower_level(struct net_device *dev, - struct netdev_nested_priv *__unused) + struct netdev_nested_priv *priv) { dev->lower_level = __netdev_lower_depth(dev) + 1; + +#ifdef CONFIG_LOCKDEP + if (!priv) + return 0; + + if (priv->flags & NESTED_SYNC_IMM) + dev->nested_level = dev->lower_level - 1; + if (priv->flags & NESTED_SYNC_TODO) + net_unlink_todo(dev); +#endif return 0; } @@ -7665,6 +7676,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, void *upper_priv, void *upper_info, + struct netdev_nested_priv *priv, struct netlink_ext_ack *extack) { struct netdev_notifier_changeupper_info changeupper_info = { @@ -7721,9 +7733,9 @@ static int __netdev_upper_dev_link(struct net_device *dev, __netdev_update_upper_level(dev, NULL); __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); - __netdev_update_lower_level(upper_dev, NULL); + __netdev_update_lower_level(upper_dev, priv); __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, - NULL); + priv); return 0; @@ -7748,8 +7760,13 @@ int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + return __netdev_upper_dev_link(dev, upper_dev, false, - NULL, NULL, extack); + NULL, NULL, &priv, extack); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -7772,13 +7789,19 @@ int netdev_master_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + return __netdev_upper_dev_link(dev, upper_dev, true, - upper_priv, upper_info, extack); + upper_priv, upper_info, &priv, extack); } EXPORT_SYMBOL(netdev_master_upper_dev_link); static void __netdev_upper_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + struct netdev_nested_priv *priv) { struct netdev_notifier_changeupper_info changeupper_info = { .info = { @@ -7803,9 +7826,9 @@ static void __netdev_upper_dev_unlink(struct net_device *dev, __netdev_update_upper_level(dev, NULL); __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL); - __netdev_update_lower_level(upper_dev, NULL); + __netdev_update_lower_level(upper_dev, priv); __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level, - NULL); + priv); } /** @@ -7819,7 +7842,12 @@ static void __netdev_upper_dev_unlink(struct net_device *dev, void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { - __netdev_upper_dev_unlink(dev, upper_dev); + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_TODO, + .data = NULL, + }; + + __netdev_upper_dev_unlink(dev, upper_dev, &priv); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -7855,6 +7883,10 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev, struct net_device *dev, struct netlink_ext_ack *extack) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = NULL, + }; int err; if (!new_dev) @@ -7862,8 +7894,8 @@ int netdev_adjacent_change_prepare(struct net_device *old_dev, if (old_dev && new_dev != old_dev) netdev_adjacent_dev_disable(dev, old_dev); - - err = netdev_upper_dev_link(new_dev, dev, extack); + err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv, + extack); if (err) { if (old_dev && new_dev != old_dev) netdev_adjacent_dev_enable(dev, old_dev); @@ -7878,6 +7910,11 @@ void netdev_adjacent_change_commit(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev) { + struct netdev_nested_priv priv = { + .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO, + .data = NULL, + }; + if (!new_dev || !old_dev) return; @@ -7885,7 +7922,7 @@ void netdev_adjacent_change_commit(struct net_device *old_dev, return; netdev_adjacent_dev_enable(dev, old_dev); - netdev_upper_dev_unlink(old_dev, dev); + __netdev_upper_dev_unlink(old_dev, dev, &priv); } EXPORT_SYMBOL(netdev_adjacent_change_commit); @@ -7893,13 +7930,18 @@ void netdev_adjacent_change_abort(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev) { + struct netdev_nested_priv priv = { + .flags = 0, + .data = NULL, + }; + if (!new_dev) return; if (old_dev && new_dev != old_dev) netdev_adjacent_dev_enable(dev, old_dev); - netdev_upper_dev_unlink(new_dev, dev); + __netdev_upper_dev_unlink(new_dev, dev, &priv); } EXPORT_SYMBOL(netdev_adjacent_change_abort); @@ -10083,6 +10125,19 @@ static void netdev_wait_allrefs(struct net_device *dev) void netdev_run_todo(void) { struct list_head list; +#ifdef CONFIG_LOCKDEP + struct list_head unlink_list; + + list_replace_init(&net_unlink_list, &unlink_list); + + while (!list_empty(&unlink_list)) { + struct net_device *dev = list_first_entry(&unlink_list, + struct net_device, + unlink_list); + list_del(&dev->unlink_list); + dev->nested_level = dev->lower_level - 1; + } +#endif /* Snapshot list, allow later requests */ list_replace_init(&net_todo_list, &list); @@ -10295,6 +10350,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->gso_max_segs = GSO_MAX_SEGS; dev->upper_level = 1; dev->lower_level = 1; +#ifdef CONFIG_LOCKDEP + dev->nested_level = 0; + INIT_LIST_HEAD(&dev->unlink_list); +#endif INIT_LIST_HEAD(&dev->napi_list); INIT_LIST_HEAD(&dev->unreg_list); diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index 54cd568e7c2f..fa1c37ec40c9 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -637,7 +637,7 @@ int dev_uc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -667,7 +667,7 @@ int dev_uc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->uc, &from->uc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -700,7 +700,7 @@ void dev_uc_unsync(struct net_device *to, struct net_device *from) * larger. */ netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->uc, &from->uc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); @@ -867,7 +867,7 @@ int dev_mc_sync(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -897,7 +897,7 @@ int dev_mc_sync_multiple(struct net_device *to, struct net_device *from) if (to->addr_len != from->addr_len) return -EINVAL; - netif_addr_lock_nested(to); + netif_addr_lock(to); err = __hw_addr_sync_multiple(&to->mc, &from->mc, to->addr_len); if (!err) __dev_set_rx_mode(to); @@ -922,7 +922,7 @@ void dev_mc_unsync(struct net_device *to, struct net_device *from) /* See the above comments inside dev_uc_unsync(). */ netif_addr_lock_bh(from); - netif_addr_lock_nested(to); + netif_addr_lock(to); __hw_addr_unsync(&to->mc, &from->mc, to->addr_len); __dev_set_rx_mode(to); netif_addr_unlock(to); From a7809ff90ce6c48598d3c4ab54eb599bec1e9c42 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Sat, 26 Sep 2020 22:26:25 +0530 Subject: [PATCH 217/322] net: qrtr: ns: Protect radix_tree_deref_slot() using rcu read locks The rcu read locks are needed to avoid potential race condition while dereferencing radix tree from multiple threads. The issue was identified by syzbot. Below is the crash report: ============================= WARNING: suspicious RCU usage 5.7.0-syzkaller #0 Not tainted ----------------------------- include/linux/radix-tree.h:176 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by kworker/u4:1/21: #0: ffff88821b097938 ((wq_completion)qrtr_ns_handler){+.+.}-{0:0}, at: spin_unlock_irq include/linux/spinlock.h:403 [inline] #0: ffff88821b097938 ((wq_completion)qrtr_ns_handler){+.+.}-{0:0}, at: process_one_work+0x6df/0xfd0 kernel/workqueue.c:2241 #1: ffffc90000dd7d80 ((work_completion)(&qrtr_ns.work)){+.+.}-{0:0}, at: process_one_work+0x71e/0xfd0 kernel/workqueue.c:2243 stack backtrace: CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.7.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: qrtr_ns_handler qrtr_ns_worker Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1e9/0x30e lib/dump_stack.c:118 radix_tree_deref_slot include/linux/radix-tree.h:176 [inline] ctrl_cmd_new_lookup net/qrtr/ns.c:558 [inline] qrtr_ns_worker+0x2aff/0x4500 net/qrtr/ns.c:674 process_one_work+0x76e/0xfd0 kernel/workqueue.c:2268 worker_thread+0xa7f/0x1450 kernel/workqueue.c:2414 kthread+0x353/0x380 kernel/kthread.c:268 Fixes: 0c2204a4ad71 ("net: qrtr: Migrate nameservice to kernel from userspace") Reported-and-tested-by: syzbot+0f84f6eed90503da72fc@syzkaller.appspotmail.com Signed-off-by: Manivannan Sadhasivam Signed-off-by: David S. Miller --- net/qrtr/ns.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index d8252fdab851..934999b56d60 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -193,12 +193,13 @@ static int announce_servers(struct sockaddr_qrtr *sq) struct qrtr_server *srv; struct qrtr_node *node; void __rcu **slot; - int ret; + int ret = 0; node = node_get(qrtr_ns.local_node); if (!node) return 0; + rcu_read_lock(); /* Announce the list of servers registered in this node */ radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); @@ -206,11 +207,14 @@ static int announce_servers(struct sockaddr_qrtr *sq) ret = service_announce_new(sq, srv); if (ret < 0) { pr_err("failed to announce new service\n"); - return ret; + goto err_out; } } - return 0; +err_out: + rcu_read_unlock(); + + return ret; } static struct qrtr_server *server_add(unsigned int service, @@ -335,7 +339,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) struct qrtr_node *node; void __rcu **slot; struct kvec iv; - int ret; + int ret = 0; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); @@ -344,11 +348,13 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) if (!node) return 0; + rcu_read_lock(); /* Advertise removal of this client to all servers of remote node */ radix_tree_for_each_slot(slot, &node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); server_del(node, srv->port); } + rcu_read_unlock(); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); @@ -359,6 +365,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); pkt.client.node = cpu_to_le32(from->sq_node); + rcu_read_lock(); radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); @@ -372,11 +379,14 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0) { pr_err("failed to send bye cmd\n"); - return ret; + goto err_out; } } - return 0; +err_out: + rcu_read_unlock(); + + return ret; } static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, @@ -394,7 +404,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, struct list_head *li; void __rcu **slot; struct kvec iv; - int ret; + int ret = 0; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); @@ -434,6 +444,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, pkt.client.node = cpu_to_le32(node_id); pkt.client.port = cpu_to_le32(port); + rcu_read_lock(); radix_tree_for_each_slot(slot, &local_node->servers, &iter, 0) { srv = radix_tree_deref_slot(slot); @@ -447,11 +458,14 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0) { pr_err("failed to send del client cmd\n"); - return ret; + goto err_out; } } - return 0; +err_out: + rcu_read_unlock(); + + return ret; } static int ctrl_cmd_new_server(struct sockaddr_qrtr *from, @@ -554,6 +568,7 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, filter.service = service; filter.instance = instance; + rcu_read_lock(); radix_tree_for_each_slot(node_slot, &nodes, &node_iter, 0) { node = radix_tree_deref_slot(node_slot); @@ -568,6 +583,7 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, lookup_notify(from, srv, true); } } + rcu_read_unlock(); /* Empty notification, to indicate end of listing */ lookup_notify(from, NULL, true); From 996d585b079ad494a30cac10e08585bcd5345125 Mon Sep 17 00:00:00 2001 From: Vincent Huang Date: Mon, 28 Sep 2020 16:19:05 -0700 Subject: [PATCH 218/322] Input: trackpoint - enable Synaptics trackpoints Add Synaptics IDs in trackpoint_start_protocol() to mark them as valid. Signed-off-by: Vincent Huang Fixes: 6c77545af100 ("Input: trackpoint - add new trackpoint variant IDs") Reviewed-by: Harry Cutts Tested-by: Harry Cutts Link: https://lore.kernel.org/r/20200924053013.1056953-1-vincent.huang@tw.synaptics.com Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/trackpoint.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/mouse/trackpoint.c b/drivers/input/mouse/trackpoint.c index 854d5e758724..ef2fa0905208 100644 --- a/drivers/input/mouse/trackpoint.c +++ b/drivers/input/mouse/trackpoint.c @@ -282,6 +282,8 @@ static int trackpoint_start_protocol(struct psmouse *psmouse, case TP_VARIANT_ALPS: case TP_VARIANT_ELAN: case TP_VARIANT_NXP: + case TP_VARIANT_JYT_SYNAPTICS: + case TP_VARIANT_SYNAPTICS: if (variant_id) *variant_id = param[0]; if (firmware_id) From 5fc27b098dafb8e30794a9db0705074c7d766179 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Mon, 28 Sep 2020 16:21:17 -0700 Subject: [PATCH 219/322] Input: i8042 - add nopnp quirk for Acer Aspire 5 A515 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Touchpad on this laptop is not detected properly during boot, as PNP enumerates (wrongly) AUX port as disabled on this machine. Fix that by adding this board (with admittedly quite funny DMI identifiers) to nopnp quirk list. Reported-by: Andrés Barrantes Silman Signed-off-by: Jiri Kosina Link: https://lore.kernel.org/r/nycvar.YFH.7.76.2009252337340.3336@cbobk.fhfr.pm Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/serio/i8042-x86ia64io.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index 37fb9aa88f9c..a4c9b9652560 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -721,6 +721,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nopnp_table[] = { DMI_MATCH(DMI_BOARD_VENDOR, "MICRO-STAR INTERNATIONAL CO., LTD"), }, }, + { + /* Acer Aspire 5 A515 */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "Grumpy_PK"), + DMI_MATCH(DMI_BOARD_VENDOR, "PK"), + }, + }, { } }; From 9666ea66a74adfe295cb3a8760c76e1ef70f9caf Mon Sep 17 00:00:00 2001 From: Wilken Gottwalt Date: Mon, 28 Sep 2020 11:01:04 +0200 Subject: [PATCH 220/322] net: usb: ax88179_178a: fix missing stop entry in driver_info Adds the missing .stop entry in the Belkin driver_info structure. Fixes: e20bd60bf62a ("net: usb: asix88179_178a: Add support for the Belkin B2B128") Signed-off-by: Wilken Gottwalt Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 125f7bf57590..8f1798b95a02 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1823,6 +1823,7 @@ static const struct driver_info belkin_info = { .status = ax88179_status, .link_reset = ax88179_link_reset, .reset = ax88179_reset, + .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, From c92a79829c7c169139874aa1d4bf6da32d10c38a Mon Sep 17 00:00:00 2001 From: Wilken Gottwalt Date: Mon, 28 Sep 2020 11:17:40 +0200 Subject: [PATCH 221/322] net: usb: ax88179_178a: add MCT usb 3.0 adapter Adds the driver_info and usb ids of the AX88179 based MCT U3-A9003 USB 3.0 ethernet adapter. Signed-off-by: Wilken Gottwalt Signed-off-by: David S. Miller --- drivers/net/usb/ax88179_178a.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 8f1798b95a02..5541f3faedbc 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1842,6 +1842,19 @@ static const struct driver_info toshiba_info = { .tx_fixup = ax88179_tx_fixup, }; +static const struct driver_info mct_info = { + .description = "MCT USB 3.0 Gigabit Ethernet Adapter", + .bind = ax88179_bind, + .unbind = ax88179_unbind, + .status = ax88179_status, + .link_reset = ax88179_link_reset, + .reset = ax88179_reset, + .stop = ax88179_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, + .rx_fixup = ax88179_rx_fixup, + .tx_fixup = ax88179_tx_fixup, +}; + static const struct usb_device_id products[] = { { /* ASIX AX88179 10/100/1000 */ @@ -1879,6 +1892,10 @@ static const struct usb_device_id products[] = { /* Toshiba USB 3.0 GBit Ethernet Adapter */ USB_DEVICE(0x0930, 0x0a13), .driver_info = (unsigned long)&toshiba_info, +}, { + /* Magic Control Technology U3-A9003 USB 3.0 Gigabit Ethernet Adapter */ + USB_DEVICE(0x0711, 0x0179), + .driver_info = (unsigned long)&mct_info, }, { }, }; From 3ddf9b431b931544dc2f94e8f6a055ff501436fa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 28 Sep 2020 17:53:29 -0700 Subject: [PATCH 222/322] genetlink: add missing kdoc for validation flags Validation flags are missing kdoc, add it. Fixes: ef6243acb478 ("genetlink: optionally validate strictly/dumps") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/genetlink.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 6e5f1e1aa822..8899d7429ccb 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -138,6 +138,7 @@ genl_dumpit_info(struct netlink_callback *cb) * @cmd: command identifier * @internal_flags: flags used by the family * @flags: flags + * @validate: validation flags from enum genl_validate_flags * @doit: standard command callback * @start: start callback for dumps * @dumpit: callback for dumpers From 78b70155dc73280be2c7084a3be591161cdc6d0c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 28 Sep 2020 17:58:41 -0700 Subject: [PATCH 223/322] ethtool: mark netlink family as __ro_after_init Like all genl families ethtool_genl_family needs to not be a straight up constant, because it's modified/initialized by genl_register_family(). After init, however, it's only passed to genlmsg_put() & co. therefore we can mark it as __ro_after_init. Since genl_family structure contains function pointers mark this as a fix. Fixes: 2b4a8990b7df ("ethtool: introduce ethtool netlink interface") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ethtool/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5c2072765be7..0c3f54baec4e 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -866,7 +866,7 @@ static const struct genl_multicast_group ethtool_nl_mcgrps[] = { [ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME }, }; -static struct genl_family ethtool_genl_family = { +static struct genl_family ethtool_genl_family __ro_after_init = { .name = ETHTOOL_GENL_NAME, .version = ETHTOOL_GENL_VERSION, .netnsok = true, From c8d317aa1887b40b188ec3aaa6e9e524333caed1 Mon Sep 17 00:00:00 2001 From: Hao Xu Date: Tue, 29 Sep 2020 20:00:45 +0800 Subject: [PATCH 224/322] io_uring: fix async buffered reads when readahead is disabled The async buffered reads feature is not working when readahead is turned off. There are two things to concern: - when doing retry in io_read, not only the IOCB_WAITQ flag but also the IOCB_NOWAIT flag is still set, which makes it goes to would_block phase in generic_file_buffered_read() and then return -EAGAIN. After that, the io-wq thread work is queued, and later doing the async reads in the old way. - even if we remove IOCB_NOWAIT when doing retry, the feature is still not running properly, since in generic_file_buffered_read() it goes to lock_page_killable() after calling mapping->a_ops->readpage() to do IO, and thus causing process to sleep. Fixes: 1a0a7853b901 ("mm: support async buffered reads in generic_file_buffered_read()") Fixes: 3b2a4439e0ae ("io_uring: get rid of kiocb_wait_page_queue_init()") Signed-off-by: Hao Xu Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 ++ mm/filemap.c | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 556e4a2ead07..b89cc87830b1 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -3049,6 +3049,7 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, if (!wake_page_match(wpq, key)) return 0; + req->rw.kiocb.ki_flags &= ~IOCB_WAITQ; list_del_init(&wait->entry); init_task_work(&req->task_work, io_req_task_submit); @@ -3106,6 +3107,7 @@ static bool io_rw_should_retry(struct io_kiocb *req) wait->wait.flags = 0; INIT_LIST_HEAD(&wait->wait.entry); kiocb->ki_flags |= IOCB_WAITQ; + kiocb->ki_flags &= ~IOCB_NOWAIT; kiocb->ki_waitq = wait; io_get_req_task(req); diff --git a/mm/filemap.c b/mm/filemap.c index 1aaea26556cc..ea383478fc22 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2267,7 +2267,11 @@ readpage: } if (!PageUptodate(page)) { - error = lock_page_killable(page); + if (iocb->ki_flags & IOCB_WAITQ) + error = lock_page_async(page, iocb->ki_waitq); + else + error = lock_page_killable(page); + if (unlikely(error)) goto readpage_error; if (!PageUptodate(page)) { From 632bfb6323799c087fcb4108dfe59518609667a7 Mon Sep 17 00:00:00 2001 From: yangerkun Date: Sat, 5 Sep 2020 19:25:56 +0800 Subject: [PATCH 225/322] blk-mq: call commit_rqs while list empty but error happen Blk-mq should call commit_rqs once 'bd.last != true' and no more request will come(so virtscsi can kick the virtqueue, e.g.). We already do that in 'blk_mq_dispatch_rq_list/blk_mq_try_issue_list_directly' while list not empty and 'queued > 0'. However, we can seen the same scene once the last request in list call queue_rq and return error like BLK_STS_IOERR which will not requeue the request, and lead that list empty but need call commit_rqs too(Or the request for virtscsi will stay timeout until other request kick virtqueue). We found this problem by do fsstress test with offline/online virtscsi device repeat quickly. Fixes: d666ba98f849 ("blk-mq: add mq_ops->commit_rqs()") Reported-by: zhangyi (F) Signed-off-by: yangerkun Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index b3d2785eefe9..cdced4aca2e8 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1412,6 +1412,11 @@ out: hctx->dispatched[queued_to_index(queued)]++; + /* If we didn't flush the entire list, we could have told the driver + * there was more coming, but that turned out to be a lie. + */ + if ((!list_empty(list) || errors) && q->mq_ops->commit_rqs && queued) + q->mq_ops->commit_rqs(hctx); /* * Any items that need requeuing? Stuff them into hctx->dispatch, * that is where we will continue on next queue run. @@ -1425,14 +1430,6 @@ out: blk_mq_release_budgets(q, nr_budgets); - /* - * If we didn't flush the entire list, we could have told - * the driver there was more coming, but that turned out to - * be a lie. - */ - if (q->mq_ops->commit_rqs && queued) - q->mq_ops->commit_rqs(hctx); - spin_lock(&hctx->lock); list_splice_tail_init(list, &hctx->dispatch); spin_unlock(&hctx->lock); @@ -2079,6 +2076,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, struct list_head *list) { int queued = 0; + int errors = 0; while (!list_empty(list)) { blk_status_t ret; @@ -2095,6 +2093,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, break; } blk_mq_end_request(rq, ret); + errors++; } else queued++; } @@ -2104,7 +2103,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, * the driver there was more coming, but that turned out to * be a lie. */ - if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs && queued) + if ((!list_empty(list) || errors) && + hctx->queue->mq_ops->commit_rqs && queued) hctx->queue->mq_ops->commit_rqs(hctx); } From 9cf51446e68607136e42a4e531a30c888c472463 Mon Sep 17 00:00:00 2001 From: He Zhe Date: Mon, 28 Sep 2020 17:00:23 +0800 Subject: [PATCH 226/322] bpf, powerpc: Fix misuse of fallthrough in bpf_jit_comp() The user defined label following "fallthrough" is not considered by GCC and causes build failure. kernel-source/include/linux/compiler_attributes.h:208:41: error: attribute 'fallthrough' not preceding a case label or default label [-Werror] 208 define fallthrough _attribute((fallthrough_)) ^~~~~~~~~~~~~ Fixes: df561f6688fe ("treewide: Use fallthrough pseudo-keyword") Signed-off-by: He Zhe Signed-off-by: Daniel Borkmann Cc: Gustavo A. R. Silva Link: https://lore.kernel.org/bpf/20200928090023.38117-1-zhe.he@windriver.com --- arch/powerpc/net/bpf_jit_comp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 78d61f97371e..e809cb5a1631 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -475,7 +475,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; - fallthrough; cond_branch: /* same targets, can avoid doing the test :) */ if (filter[i].jt == filter[i].jf) { From 851e6f61cd076954f9d521e0d79b173ad3a2453b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 29 Sep 2020 12:27:23 -0400 Subject: [PATCH 227/322] tracing: Fix trace_find_next_entry() accounting of temp buffer size The temp buffer size variable for trace_find_next_entry() was incorrectly being updated when the size did not change. The temp buffer size should only be updated when it is reallocated. This is mostly an issue when used with ftrace_dump(). That's because ftrace_dump() can not allocate a new buffer, and instead uses a temporary buffer with a fix size. But the variable that keeps track of that size is incorrectly updated with each call, and it could fall into the path that would try to reallocate the buffer and produce a warning. ------------[ cut here ]------------ WARNING: CPU: 1 PID: 1601 at kernel/trace/trace.c:3548 trace_find_next_entry+0xd0/0xe0 Modules linked in [..] CPU: 1 PID: 1601 Comm: bash Not tainted 5.9.0-rc5-test+ #521 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 RIP: 0010:trace_find_next_entry+0xd0/0xe0 Code: 40 21 00 00 4c 89 e1 31 d2 4c 89 ee 48 89 df e8 c6 9e ff ff 89 ab 54 21 00 00 5b 5d 41 5c 41 5d c3 48 63 d5 eb bf 31 c0 eb f0 <0f> 0b 48 63 d5 eb b4 66 0f 1f 84 00 00 00 00 00 53 48 8d 8f 60 21 RSP: 0018:ffff95a4f2e8bd70 EFLAGS: 00010046 RAX: ffffffff96679fc0 RBX: ffffffff97910de0 RCX: ffffffff96679fc0 RDX: ffff95a4f2e8bd98 RSI: ffff95a4ee321098 RDI: ffffffff97913000 RBP: 0000000000000018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000046 R12: ffff95a4f2e8bd98 R13: 0000000000000000 R14: ffff95a4ee321098 R15: 00000000009aa301 FS: 00007f8565484740(0000) GS:ffff95a55aa40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055876bd43d90 CR3: 00000000b76e6003 CR4: 00000000001706e0 Call Trace: trace_print_lat_context+0x58/0x2d0 ? cpumask_next+0x16/0x20 print_trace_line+0x1a4/0x4f0 ftrace_dump.cold+0xad/0x12c __handle_sysrq.cold+0x51/0x126 write_sysrq_trigger+0x3f/0x4a proc_reg_write+0x53/0x80 vfs_write+0xca/0x210 ksys_write+0x70/0xf0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f8565579487 Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24 RSP: 002b:00007ffd40707948 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f8565579487 RDX: 0000000000000002 RSI: 000055876bd74de0 RDI: 0000000000000001 RBP: 000055876bd74de0 R08: 000000000000000a R09: 0000000000000001 R10: 000055876bdec280 R11: 0000000000000246 R12: 0000000000000002 R13: 00007f856564a500 R14: 0000000000000002 R15: 00007f856564a700 irq event stamp: 109958 ---[ end trace 7aab5b7e51484b00 ]--- Not only fix the updating of the temp buffer, but also do not free the temp buffer before a new buffer is allocated (there's no reason to not continue to use the current temp buffer if an allocation fails). Cc: stable@vger.kernel.org Fixes: 8e99cf91b99bb ("tracing: Do not allocate buffer in trace_find_next_entry() in atomic") Reported-by: Anna-Maria Behnsen Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2a7c26345e83..d3e5de717df2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3546,13 +3546,15 @@ struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, if (iter->ent && iter->ent != iter->temp) { if ((!iter->temp || iter->temp_size < iter->ent_size) && !WARN_ON_ONCE(iter->temp == static_temp_buf)) { - kfree(iter->temp); - iter->temp = kmalloc(iter->ent_size, GFP_KERNEL); - if (!iter->temp) + void *temp; + temp = kmalloc(iter->ent_size, GFP_KERNEL); + if (!temp) return NULL; + kfree(iter->temp); + iter->temp = temp; + iter->temp_size = iter->ent_size; } memcpy(iter->temp, iter->ent, iter->ent_size); - iter->temp_size = iter->ent_size; iter->ent = iter->temp; } entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts); From b40341fad6cc2daa195f8090fd3348f18fff640a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 29 Sep 2020 12:40:31 -0400 Subject: [PATCH 228/322] ftrace: Move RCU is watching check after recursion check The first thing that the ftrace function callback helper functions should do is to check for recursion. Peter Zijlstra found that when "rcu_is_watching()" had its notrace removed, it caused perf function tracing to crash. This is because the call of rcu_is_watching() is tested before function recursion is checked and and if it is traced, it will cause an infinite recursion loop. rcu_is_watching() should still stay notrace, but to prevent this should never had crashed in the first place. The recursion prevention must be the first thing done in callback functions. Link: https://lore.kernel.org/r/20200929112541.GM2628@hirez.programming.kicks-ass.net Cc: stable@vger.kernel.org Cc: Paul McKenney Fixes: c68c0fa293417 ("ftrace: Have ftrace_ops_get_func() handle RCU and PER_CPU flags too") Acked-by: Peter Zijlstra (Intel) Reported-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 603255f5f085..541453927c82 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6993,16 +6993,14 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - if ((op->flags & FTRACE_OPS_FL_RCU) && !rcu_is_watching()) - return; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); if (bit < 0) return; preempt_disable_notrace(); - op->func(ip, parent_ip, op, regs); + if (!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) + op->func(ip, parent_ip, op, regs); preempt_enable_notrace(); trace_clear_recursion(bit); From 1a03b8a35a957f9f38ecb8a97443b7380bbf6a8b Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 29 Sep 2020 09:58:06 +0800 Subject: [PATCH 229/322] virtio-net: don't disable guest csum when disable LRO Open vSwitch and Linux bridge will disable LRO of the interface when this interface added to them. Now when disable the LRO, the virtio-net csum is disable too. That drops the forwarding performance. Fixes: a02e8964eaf9 ("virtio-net: ethtool configurable LRO") Cc: Michael S. Tsirkin Cc: Jason Wang Cc: Willem de Bruijn Signed-off-by: Tonghao Zhang Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 263b005981bd..668685c09e65 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -63,6 +63,11 @@ static const unsigned long guest_offloads[] = { VIRTIO_NET_F_GUEST_CSUM }; +#define GUEST_OFFLOAD_LRO_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + struct virtnet_stat_desc { char desc[ETH_GSTRING_LEN]; size_t offset; @@ -2531,7 +2536,8 @@ static int virtnet_set_features(struct net_device *dev, if (features & NETIF_F_LRO) offloads = vi->guest_offloads_capable; else - offloads = 0; + offloads = vi->guest_offloads_capable & + ~GUEST_OFFLOAD_LRO_MASK; err = virtnet_set_guest_offloads(vi, offloads); if (err) From bbc4d71d63549bcd003a430de18a72a742d8c91e Mon Sep 17 00:00:00 2001 From: Willy Liu Date: Tue, 29 Sep 2020 10:10:49 +0800 Subject: [PATCH 230/322] net: phy: realtek: fix rtl8211e rx/tx delay config There are two chip pins named TXDLY and RXDLY which actually adds the 2ns delays to TXC and RXC for TXD/RXD latching. These two pins can config via 4.7k-ohm resistor to 3.3V hw setting, but also config via software setting (extension page 0xa4 register 0x1c bit13 12 and 11). The configuration register definitions from table 13 official PHY datasheet: PHYAD[2:0] = PHY Address AN[1:0] = Auto-Negotiation Mode = Interface Mode Select RX Delay = RX Delay TX Delay = TX Delay SELRGV = RGMII/GMII Selection This table describes how to config these hw pins via external pull-high or pull- low resistor. It is a misunderstanding that mapping it as register bits below: 8:6 = PHY Address 5:4 = Auto-Negotiation 3 = Interface Mode Select 2 = RX Delay 1 = TX Delay 0 = SELRGV So I removed these descriptions above and add related settings as below: 14 = reserved 13 = force Tx RX Delay controlled by bit12 bit11 12 = Tx Delay 11 = Rx Delay 10:0 = Test && debug settings reserved by realtek Test && debug settings are not recommend to modify by default. Fixes: f81dadbcf7fd ("net: phy: realtek: Add rtl8211e rx/tx delays config") Signed-off-by: Willy Liu Signed-off-by: David S. Miller --- drivers/net/phy/realtek.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 95dbe5e8e1d8..0f0960971800 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -1,6 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ -/* - * drivers/net/phy/realtek.c +/* drivers/net/phy/realtek.c * * Driver for Realtek PHYs * @@ -32,9 +31,9 @@ #define RTL8211F_TX_DELAY BIT(8) #define RTL8211F_RX_DELAY BIT(3) -#define RTL8211E_TX_DELAY BIT(1) -#define RTL8211E_RX_DELAY BIT(2) -#define RTL8211E_MODE_MII_GMII BIT(3) +#define RTL8211E_CTRL_DELAY BIT(13) +#define RTL8211E_TX_DELAY BIT(12) +#define RTL8211E_RX_DELAY BIT(11) #define RTL8201F_ISR 0x1e #define RTL8201F_IER 0x13 @@ -246,16 +245,16 @@ static int rtl8211e_config_init(struct phy_device *phydev) /* enable TX/RX delay for rgmii-* modes, and disable them for rgmii. */ switch (phydev->interface) { case PHY_INTERFACE_MODE_RGMII: - val = 0; + val = RTL8211E_CTRL_DELAY | 0; break; case PHY_INTERFACE_MODE_RGMII_ID: - val = RTL8211E_TX_DELAY | RTL8211E_RX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY; break; case PHY_INTERFACE_MODE_RGMII_RXID: - val = RTL8211E_RX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_RX_DELAY; break; case PHY_INTERFACE_MODE_RGMII_TXID: - val = RTL8211E_TX_DELAY; + val = RTL8211E_CTRL_DELAY | RTL8211E_TX_DELAY; break; default: /* the rest of the modes imply leaving delays as is. */ return 0; @@ -263,11 +262,12 @@ static int rtl8211e_config_init(struct phy_device *phydev) /* According to a sample driver there is a 0x1c config register on the * 0xa4 extension page (0x7) layout. It can be used to disable/enable - * the RX/TX delays otherwise controlled by RXDLY/TXDLY pins. It can - * also be used to customize the whole configuration register: - * 8:6 = PHY Address, 5:4 = Auto-Negotiation, 3 = Interface Mode Select, - * 2 = RX Delay, 1 = TX Delay, 0 = SELRGV (see original PHY datasheet - * for details). + * the RX/TX delays otherwise controlled by RXDLY/TXDLY pins. + * The configuration register definition: + * 14 = reserved + * 13 = Force Tx RX Delay controlled by bit12 bit11, + * 12 = RX Delay, 11 = TX Delay + * 10:0 = Test && debug settings reserved by realtek */ oldpage = phy_select_page(phydev, 0x7); if (oldpage < 0) @@ -277,7 +277,8 @@ static int rtl8211e_config_init(struct phy_device *phydev) if (ret) goto err_restore_page; - ret = __phy_modify(phydev, 0x1c, RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, + ret = __phy_modify(phydev, 0x1c, RTL8211E_CTRL_DELAY + | RTL8211E_TX_DELAY | RTL8211E_RX_DELAY, val); err_restore_page: From 460e985ea07ec23d46af257e84e57b5409576577 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 29 Sep 2020 14:20:24 +0300 Subject: [PATCH 231/322] net: dsa: felix: fix incorrect action offsets for VCAP IS2 The port mask width was larger than the actual number of ports, and therefore, all fields following this one were also shifted by the number of excess bits. But the driver doesn't use the REW_OP, SMAC_REPLACE_ENA or ACL_ID bits from the action vector, so the bug was inconsequential. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 36db631a55e6..d9c90849274e 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -685,12 +685,12 @@ static struct vcap_field vsc9959_vcap_is2_actions[] = { [VCAP_IS2_ACT_POLICE_ENA] = { 9, 1}, [VCAP_IS2_ACT_POLICE_IDX] = { 10, 9}, [VCAP_IS2_ACT_POLICE_VCAP_ONLY] = { 19, 1}, - [VCAP_IS2_ACT_PORT_MASK] = { 20, 11}, - [VCAP_IS2_ACT_REW_OP] = { 31, 9}, - [VCAP_IS2_ACT_SMAC_REPLACE_ENA] = { 40, 1}, - [VCAP_IS2_ACT_RSV] = { 41, 2}, - [VCAP_IS2_ACT_ACL_ID] = { 43, 6}, - [VCAP_IS2_ACT_HIT_CNT] = { 49, 32}, + [VCAP_IS2_ACT_PORT_MASK] = { 20, 6}, + [VCAP_IS2_ACT_REW_OP] = { 26, 9}, + [VCAP_IS2_ACT_SMAC_REPLACE_ENA] = { 35, 1}, + [VCAP_IS2_ACT_RSV] = { 36, 2}, + [VCAP_IS2_ACT_ACL_ID] = { 38, 6}, + [VCAP_IS2_ACT_HIT_CNT] = { 44, 32}, }; static const struct vcap_props vsc9959_vcap_props[] = { From eaa0355c668d2793e6ced32926fa22d518752845 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 29 Sep 2020 14:20:25 +0300 Subject: [PATCH 232/322] net: dsa: seville: fix VCAP IS2 action width Since the actions are packed together in the action RAM, an incorrect action width means that no action except the first one would behave correctly. The tc-flower offload has probably not been tested on this hardware since its introduction. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/seville_vsc9953.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 29df0797ecf5..110070a60041 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -706,7 +706,7 @@ static const struct vcap_props vsc9953_vcap_props[] = { .action_type_width = 1, .action_table = { [IS2_ACTION_TYPE_NORMAL] = { - .width = 44, + .width = 50, /* HIT_CNT not included */ .count = 2 }, [IS2_ACTION_TYPE_SMAC_SIP] = { From 64ff609b551914d8c2c51d828920f2ffc5237de7 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 28 Sep 2020 10:22:56 -0500 Subject: [PATCH 233/322] dt-bindings: Fix 'reg' size issues in zynqmp examples The default sizes in examples for 'reg' are 1 cell each. Fix the incorrect sizes in zynqmp examples: Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.example.dt.yaml: example-0: dma-controller@fd4c0000:reg:0: [0, 4249616384, 0, 4096] is too long From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.example.dt.yaml: example-0: display@fd4a0000:reg:0: [0, 4249485312, 0, 4096] is too long From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.example.dt.yaml: example-0: display@fd4a0000:reg:1: [0, 4249526272, 0, 4096] is too long From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.example.dt.yaml: example-0: display@fd4a0000:reg:2: [0, 4249530368, 0, 4096] is too long From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.example.dt.yaml: example-0: display@fd4a0000:reg:3: [0, 4249534464, 0, 4096] is too long From schema: /usr/local/lib/python3.8/dist-packages/dtschema/schemas/reg.yaml Cc: Hyun Kwon Cc: Laurent Pinchart Cc: Vinod Koul Cc: dri-devel@lists.freedesktop.org Cc: dmaengine@vger.kernel.org Acked-by: Michal Simek Signed-off-by: Rob Herring --- .../bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml | 8 ++++---- .../devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml index 52a939cade3b..7b9d468c3e52 100644 --- a/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml +++ b/Documentation/devicetree/bindings/display/xlnx/xlnx,zynqmp-dpsub.yaml @@ -145,10 +145,10 @@ examples: display@fd4a0000 { compatible = "xlnx,zynqmp-dpsub-1.7"; - reg = <0x0 0xfd4a0000 0x0 0x1000>, - <0x0 0xfd4aa000 0x0 0x1000>, - <0x0 0xfd4ab000 0x0 0x1000>, - <0x0 0xfd4ac000 0x0 0x1000>; + reg = <0xfd4a0000 0x1000>, + <0xfd4aa000 0x1000>, + <0xfd4ab000 0x1000>, + <0xfd4ac000 0x1000>; reg-names = "dp", "blend", "av_buf", "aud"; interrupts = <0 119 4>; interrupt-parent = <&gic>; diff --git a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml index 5de510f8c88c..2a595b18ff6c 100644 --- a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml +++ b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml @@ -57,7 +57,7 @@ examples: dma: dma-controller@fd4c0000 { compatible = "xlnx,zynqmp-dpdma"; - reg = <0x0 0xfd4c0000 0x0 0x1000>; + reg = <0xfd4c0000 0x1000>; interrupts = ; interrupt-parent = <&gic>; clocks = <&dpdma_clk>; From efe84d408bf41975db8506d3a1cc02e794e2309c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 19 Sep 2020 16:39:22 +0200 Subject: [PATCH 234/322] scripts/dtc: only append to HOST_EXTRACFLAGS instead of overwriting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with $ HOST_EXTRACFLAGS=-g make the expectation is that host tools are built with debug informations. This however doesn't happen if the Makefile assigns a new value to the HOST_EXTRACFLAGS instead of appending to it. So use += instead of := for the first assignment. Fixes: e3fd9b5384f3 ("scripts/dtc: consolidate include path options in Makefile") Signed-off-by: Uwe Kleine-König Signed-off-by: Rob Herring --- scripts/dtc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/dtc/Makefile b/scripts/dtc/Makefile index a698ece43fff..4852bf44e913 100644 --- a/scripts/dtc/Makefile +++ b/scripts/dtc/Makefile @@ -9,7 +9,7 @@ dtc-objs := dtc.o flattree.o fstree.o data.o livetree.o treesource.o \ dtc-objs += dtc-lexer.lex.o dtc-parser.tab.o # Source files need to get at the userspace version of libfdt_env.h to compile -HOST_EXTRACFLAGS := -I $(srctree)/$(src)/libfdt +HOST_EXTRACFLAGS += -I $(srctree)/$(src)/libfdt ifeq ($(shell pkg-config --exists yaml-0.1 2>/dev/null && echo yes),) ifneq ($(CHECK_DT_BINDING)$(CHECK_DTBS),) From 97cf32996c46d9935cc133d910a75fb687dd6144 Mon Sep 17 00:00:00 2001 From: Sudheesh Mavila Date: Tue, 15 Sep 2020 12:48:20 +0530 Subject: [PATCH 235/322] drm/amd/pm: Removed fixed clock in auto mode DPM SMU10_UMD_PSTATE_PEAK_FCLK value should not be used to set the DPM. Suggested-by: Evan Quan Reviewed-by: Evan Quan Signed-off-by: Sudheesh Mavila Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c index 9ee8cf8267c8..43f7adff6cb7 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c @@ -563,6 +563,8 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, struct smu10_hwmgr *data = hwmgr->backend; uint32_t min_sclk = hwmgr->display_config->min_core_set_clock; uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; + uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1; + uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1; if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -676,13 +678,13 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, hwmgr->display_config->num_display > 3 ? - SMU10_UMD_PSTATE_PEAK_FCLK : + data->clock_vol_info.vdd_dep_on_fclk->entries[0].clk : min_mclk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinSocclkByFreq, - SMU10_UMD_PSTATE_MIN_SOCCLK, + data->clock_vol_info.vdd_dep_on_socclk->entries[0].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinVcn, @@ -695,11 +697,11 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxFclkByFreq, - SMU10_UMD_PSTATE_PEAK_FCLK, + data->clock_vol_info.vdd_dep_on_fclk->entries[index_fclk].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxSocclkByFreq, - SMU10_UMD_PSTATE_PEAK_SOCCLK, + data->clock_vol_info.vdd_dep_on_socclk->entries[index_socclk].clk, NULL); smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetSoftMaxVcn, From 0c7014154d6397d6a35bf3759839207f1c702a42 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Wed, 23 Sep 2020 11:58:23 +0800 Subject: [PATCH 236/322] drm/amdgpu: remove gpu_info fw support for sienna_cichlid etc. Remove gpu_info fw support for sienna_cichlid etc., since the information can be retrieved from discovery binary. Signed-off-by: Jiansong Chen Reviewed-by: Likun Gao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index eb7cfe87042e..d0b8d0d341af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -80,8 +80,6 @@ MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/sienna_cichlid_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/navy_flounder_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -1600,6 +1598,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_VEGA20: + case CHIP_SIENNA_CICHLID: + case CHIP_NAVY_FLOUNDER: default: return 0; case CHIP_VEGA10: @@ -1631,12 +1631,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_NAVI12: chip_name = "navi12"; break; - case CHIP_SIENNA_CICHLID: - chip_name = "sienna_cichlid"; - break; - case CHIP_NAVY_FLOUNDER: - chip_name = "navy_flounder"; - break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); From 898c7302f4de1d91065e80fc46552b3ec70894ff Mon Sep 17 00:00:00 2001 From: Flora Cui Date: Wed, 23 Sep 2020 14:42:59 +0800 Subject: [PATCH 237/322] drm/amd/display: fix return value check for hdcp_work max_caps might be 0, thus hdcp_work might be ZERO_SIZE_PTR Signed-off-by: Flora Cui Reviewed-by: Feifei Xu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index 694c5bc93665..c2cd184f0bbd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -604,7 +604,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct int i = 0; hdcp_work = kcalloc(max_caps, sizeof(*hdcp_work), GFP_KERNEL); - if (hdcp_work == NULL) + if (ZERO_OR_NULL_PTR(hdcp_work)) return NULL; hdcp_work->srm = kcalloc(PSP_HDCP_SRM_FIRST_GEN_MAX_SIZE, sizeof(*hdcp_work->srm), GFP_KERNEL); From c73d05eaba1c2d60558364f2d980090ebec6b082 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 22 Sep 2020 11:34:16 -0400 Subject: [PATCH 238/322] drm/amdgpu/display: fix CFLAGS setup for DCN30 Properly handle clang and older versions of gcc. Fixes: e77165bf7b02a3 ("drm/amd/display: Add DCN3 blocks to Makefile") Acked-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn30/Makefile | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 025637a83c3b..bd2a068f9863 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -31,9 +31,21 @@ DCN30 = dcn30_init.o dcn30_hubbub.o dcn30_hubp.o dcn30_dpp.o dcn30_optc.o \ dcn30_dio_link_encoder.o dcn30_resource.o -CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse -mpreferred-stack-boundary=4 - +ifdef CONFIG_X86 CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -msse +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -msse +endif + +ifdef CONFIG_PPC64 +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec +endif + +ifdef CONFIG_ARM64 +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only +endif + ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 @@ -45,8 +57,10 @@ ifdef IS_OLD_GCC # GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 # (8B stack alignment). CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -mpreferred-stack-boundary=4 +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -mpreferred-stack-boundary=4 else CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o += -msse2 +CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o += -msse2 endif AMD_DAL_DCN30 = $(addprefix $(AMDDALPATH)/dc/dcn30/,$(DCN30)) From a39d0d7bdf8c21ac7645c02e9676b5cb2b804c31 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 28 Sep 2020 11:10:37 +0200 Subject: [PATCH 239/322] drm/amdgpu: restore proper ref count in amdgpu_display_crtc_set_config A recent attempt to fix a ref count leak in amdgpu_display_crtc_set_config() turned out to be doing too much and "fixed" an intended decrease as if it were a leak. Undo that part to restore the proper balance. This is the very nature of this function to increase or decrease the power reference count depending on the situation. Consequences of this bug is that the power reference would eventually get down to 0 while the display was still in use, resulting in that display switching off unexpectedly. Signed-off-by: Jean Delvare Fixes: e008fa6fb415 ("drm/amdgpu: fix ref count leak in amdgpu_display_crtc_set_config") Cc: stable@vger.kernel.org Cc: Navid Emamdoost Cc: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index d76172965199..44c1f6e00635 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -297,7 +297,7 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, take the current one */ if (active && !adev->have_disp_power_ref) { adev->have_disp_power_ref = true; - goto out; + return ret; } /* if we have no active crtcs, then drop the power ref we got before */ From 3c26d0314c10ceb77511e2cc81894001d488c1d0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 28 Sep 2020 14:16:25 -0400 Subject: [PATCH 240/322] drm/amdgpu/swsmu/smu12: fix force clock handling for mclk The state array is in the reverse order compared to other asics (high to low rather than low to high). Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1313 Reviewed-by: Prike Liang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/renoir_ppt.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index dbb676c482fd..15263cf210d5 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -232,14 +232,16 @@ static int renoir_get_profiling_clk_mask(struct smu_context *smu, *sclk_mask = 0; } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { if (mclk_mask) - *mclk_mask = 0; + /* mclk levels are in reverse order */ + *mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1; } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { if(sclk_mask) /* The sclk as gfxclk and has three level about max/min/current */ *sclk_mask = 3 - 1; if(mclk_mask) - *mclk_mask = NUM_MEMCLK_DPM_LEVELS - 1; + /* mclk levels are in reverse order */ + *mclk_mask = 0; if(soc_mask) *soc_mask = NUM_SOCCLK_DPM_LEVELS - 1; @@ -333,7 +335,7 @@ static int renoir_get_dpm_ultimate_freq(struct smu_context *smu, case SMU_UCLK: case SMU_FCLK: case SMU_MCLK: - ret = renoir_get_dpm_clk_limited(smu, clk_type, 0, min); + ret = renoir_get_dpm_clk_limited(smu, clk_type, NUM_MEMCLK_DPM_LEVELS - 1, min); if (ret) goto failed; break; From 548c7ba7dc9e9dacb43bd2af00b586da48015d04 Mon Sep 17 00:00:00 2001 From: Dirk Gouders Date: Sun, 27 Sep 2020 11:39:45 +0200 Subject: [PATCH 241/322] drm/amd/display: remove duplicate call to rn_vbios_smu_get_smu_version() Commit 78fe9f63947a2b ("drm/amd/display: Remove DISPCLK Limit Floor for Certain SMU Versions") added a call to rn_vbios_smu_get_smu_version() to set clk_mgr->smu_ver. That field is initialized prior to the if-statement, already. Fixes: 78fe9f63947a2b (drm/amd/display: Remove DISPCLK Limit Floor for Certain SMU Versions) Signed-off-by: Dirk Gouders Cc: Alex Deucher Cc: Sung Lee Cc: Yongqiang Sun Cc: Rodrigo Siqueira Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index 543afa34d87a..21a3073c8929 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -783,7 +783,6 @@ void rn_clk_mgr_construct( } else { struct clk_log_info log_info = {0}; - clk_mgr->smu_ver = rn_vbios_smu_get_smu_version(clk_mgr); clk_mgr->periodic_retraining_disabled = rn_vbios_smu_is_periodic_retraining_disabled(clk_mgr); /* SMU Version 55.51.0 and up no longer have an issue From 6667df916fceca7d585b34d8bb7a4aea2402f11f Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Tue, 29 Sep 2020 14:06:18 -0700 Subject: [PATCH 242/322] MAINTAINERS: Update MAINTAINERS for Intel ethernet drivers Add Jesse Brandeburg and myself; remove Jeff Kirsher. CC: Jeff Kirsher CC: Jesse Brandeburg Signed-off-by: Tony Nguyen Signed-off-by: David S. Miller --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6dc9ebf5bf76..eb45c9fdeb21 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8752,7 +8752,8 @@ F: include/drm/i915* F: include/uapi/drm/i915_drm.h INTEL ETHERNET DRIVERS -M: Jeff Kirsher +M: Jesse Brandeburg +M: Tony Nguyen L: intel-wired-lan@lists.osuosl.org (moderated for non-subscribers) S: Supported W: http://www.intel.com/support/feedback.htm From d120c9a81e32c43cba8017dec873b6a414898716 Mon Sep 17 00:00:00 2001 From: Kevin Brace Date: Tue, 29 Sep 2020 13:09:40 -0700 Subject: [PATCH 243/322] via-rhine: Fix for the hardware having a reset failure after resume In rhine_resume() and rhine_suspend(), the code calls netif_running() to see if the network interface is down or not. If it is down (i.e., netif_running() returning false), they will skip any housekeeping work within the function relating to the hardware. This becomes a problem when the hardware resumes from a standby since it is counting on rhine_resume() to map its MMIO and power up rest of the hardware. Not getting its MMIO remapped and rest of the hardware powered up lead to a soft reset failure and hardware disappearance. The solution is to map its MMIO and power up rest of the hardware inside rhine_open() before soft reset is to be performed. This solution was verified on ASUS P5V800-VM mainboard's integrated Rhine-II Ethernet MAC inside VIA Technologies VT8251 South Bridge. Signed-off-by: Kevin Brace Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-rhine.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 803247d51fe9..a20492da3407 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -1706,6 +1706,8 @@ static int rhine_open(struct net_device *dev) goto out_free_ring; alloc_tbufs(dev); + enable_mmio(rp->pioaddr, rp->quirks); + rhine_power_init(dev); rhine_chip_reset(dev); rhine_task_enable(rp); init_registers(dev); From aa15190cf2cf25ec7e6c6d7373ae3ca563d48601 Mon Sep 17 00:00:00 2001 From: Kevin Brace Date: Tue, 29 Sep 2020 13:09:41 -0700 Subject: [PATCH 244/322] via-rhine: VTunknown1 device is really VT8251 South Bridge The VIA Technologies VT8251 South Bridge's integrated Rhine-II Ethernet MAC comes has a PCI revision value of 0x7c. This was verified on ASUS P5V800-VM mainboard. Signed-off-by: Kevin Brace Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-rhine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index a20492da3407..d3a2be2e75d0 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -243,7 +243,7 @@ enum rhine_revs { VT8233 = 0x60, /* Integrated MAC */ VT8235 = 0x74, /* Integrated MAC */ VT8237 = 0x78, /* Integrated MAC */ - VTunknown1 = 0x7C, + VT8251 = 0x7C, /* Integrated MAC */ VT6105 = 0x80, VT6105_B0 = 0x83, VT6105L = 0x8A, From 9f5159e89d5d1535e3da0222805c355a86d3e2dc Mon Sep 17 00:00:00 2001 From: Kevin Brace Date: Tue, 29 Sep 2020 13:09:42 -0700 Subject: [PATCH 245/322] via-rhine: Eliminate version information Signed-off-by: Kevin Brace Signed-off-by: David S. Miller --- drivers/net/ethernet/via/via-rhine.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index d3a2be2e75d0..8e8cfe110d95 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -32,8 +32,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define DRV_NAME "via-rhine" -#define DRV_VERSION "1.5.1" -#define DRV_RELDATE "2010-10-09" #include @@ -117,10 +115,6 @@ static const int multicast_filter_limit = 32; #include #include -/* These identify the driver base version and may not be removed. */ -static const char version[] = - "v1.10-LK" DRV_VERSION " " DRV_RELDATE " Written by Donald Becker"; - MODULE_AUTHOR("Donald Becker "); MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver"); MODULE_LICENSE("GPL"); @@ -1051,11 +1045,6 @@ static int rhine_init_one_pci(struct pci_dev *pdev, u32 quirks = 0; #endif -/* when built into the kernel, we only print version if device is found */ -#ifndef MODULE - pr_info_once("%s\n", version); -#endif - rc = pci_enable_device(pdev); if (rc) goto err_out; @@ -2296,7 +2285,6 @@ static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *i struct device *hwdev = dev->dev.parent; strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); - strlcpy(info->version, DRV_VERSION, sizeof(info->version)); strlcpy(info->bus_info, dev_name(hwdev), sizeof(info->bus_info)); } @@ -2618,9 +2606,6 @@ static int __init rhine_init(void) int ret_pci, ret_platform; /* when a module, this is printed whether or not devices are found in probe */ -#ifdef MODULE - pr_info("%s\n", version); -#endif if (dmi_check_system(rhine_dmi_table)) { /* these BIOSes fail at PXE boot if chip is in D3 */ avoid_D3 = true; From 2b6b78e0828948fdc8374dc6fe6b2911252333c9 Mon Sep 17 00:00:00 2001 From: Kevin Brace Date: Tue, 29 Sep 2020 13:09:43 -0700 Subject: [PATCH 246/322] via-rhine: New device driver maintainer Signed-off-by: Kevin Brace Signed-off-by: David S. Miller --- MAINTAINERS | 3 ++- drivers/net/ethernet/via/via-rhine.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index eb45c9fdeb21..dd1840e00a38 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18283,7 +18283,8 @@ F: drivers/gpu/vga/vga_switcheroo.c F: include/linux/vga_switcheroo.h VIA RHINE NETWORK DRIVER -S: Orphan +S: Maintained +M: Kevin Brace F: drivers/net/ethernet/via/via-rhine.c VIA SD/MMC CARD CONTROLLER DRIVER diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c index 8e8cfe110d95..55b0ddab1776 100644 --- a/drivers/net/ethernet/via/via-rhine.c +++ b/drivers/net/ethernet/via/via-rhine.c @@ -2,7 +2,7 @@ /* Written 1998-2001 by Donald Becker. - Current Maintainer: Roger Luethi + Current Maintainer: Kevin Brace This software may be used and distributed according to the terms of the GNU General Public License (GPL), incorporated herein by reference. From 90fb702791bf99b959006972e8ee7bb4609f441b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 29 Sep 2020 17:18:34 -0700 Subject: [PATCH 247/322] autofs: use __kernel_write() for the autofs pipe writing autofs got broken in some configurations by commit 13c164b1a186 ("autofs: switch to kernel_write") because there is now an extra LSM permission check done by security_file_permission() in rw_verify_area(). autofs is one if the few places that really does want the much more limited __kernel_write(), because the write is an internal kernel one that shouldn't do any user permission checks (it also doesn't need the file_start_write/file_end_write logic, since it's just a pipe). There are a couple of other cases like that - accounting, core dumping, and splice - but autofs stands out because it can be built as a module. As a result, we need to export this internal __kernel_write() function again. We really don't want any other module to use this, but we don't have a "EXPORT_SYMBOL_FOR_AUTOFS_ONLY()". But we can mark it GPL-only to at least approximate that "internal use only" for licensing. While in this area, make autofs pass in NULL for the file position pointer, since it's always a pipe, and we now use a NULL file pointer for streaming file descriptors (see file_ppos() and commit 438ab720c675: "vfs: pass ppos=NULL to .read()/.write() of FMODE_STREAM files") This effectively reverts commits 9db977522449 ("fs: unexport __kernel_write") and 13c164b1a186 ("autofs: switch to kernel_write"). Fixes: 13c164b1a186 ("autofs: switch to kernel_write") Reported-by: Ondrej Mosnacek Acked-by: Christoph Hellwig Acked-by: Acked-by: Ian Kent Signed-off-by: Linus Torvalds --- fs/autofs/waitq.c | 2 +- fs/read_write.c | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index 74c886f7c51c..5ced859dac53 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -53,7 +53,7 @@ static int autofs_write(struct autofs_sb_info *sbi, mutex_lock(&sbi->pipe_mutex); while (bytes) { - wr = kernel_write(file, data, bytes, &file->f_pos); + wr = __kernel_write(file, data, bytes, NULL); if (wr <= 0) break; data += wr; diff --git a/fs/read_write.c b/fs/read_write.c index 5db58b8c78d0..d3428189f36b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -538,6 +538,14 @@ ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t inc_syscw(current); return ret; } +/* + * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", + * but autofs is one of the few internal kernel users that actually + * wants this _and_ can be built as a module. So we need to export + * this symbol for autofs, even though it really isn't appropriate + * for any other kernel modules. + */ +EXPORT_SYMBOL_GPL(__kernel_write); ssize_t kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) From 917944da3bfc7cb5ac3af26725af3371d3a12db0 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 29 Sep 2020 15:08:19 -0700 Subject: [PATCH 248/322] mptcp: Consistently use READ_ONCE/WRITE_ONCE with msk->ack_seq The msk->ack_seq value is sometimes read without the msk lock held, so make proper use of READ_ONCE and WRITE_ONCE. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 4 ++-- net/mptcp/protocol.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 7fa822b55c34..120ef39fe589 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -518,11 +518,11 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, if (subflow->use_64bit_ack) { ack_size = TCPOLEN_MPTCP_DSS_ACK64; - opts->ext_copy.data_ack = msk->ack_seq; + opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq); opts->ext_copy.ack64 = 1; } else { ack_size = TCPOLEN_MPTCP_DSS_ACK32; - opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq); + opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq); opts->ext_copy.ack64 = 0; } opts->ext_copy.use_ack = 1; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 365ba96c84b0..5d747c6a610e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -123,7 +123,7 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_ext_reset(skb); skb_orphan(skb); - msk->ack_seq += copy_len; + WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); tail = skb_peek_tail(&sk->sk_receive_queue); if (offset == 0 && tail) { @@ -261,7 +261,7 @@ static void mptcp_check_data_fin(struct sock *sk) if (mptcp_pending_data_fin(sk, &rcv_data_fin_seq)) { struct mptcp_subflow_context *subflow; - msk->ack_seq++; + WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); WRITE_ONCE(msk->rcv_data_fin, 0); sk->sk_shutdown |= RCV_SHUTDOWN; @@ -1720,7 +1720,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; - msk->ack_seq = ack_seq; + WRITE_ONCE(msk->ack_seq, ack_seq); } sock_reset_flag(nsk, SOCK_RCU_FREE); @@ -2072,7 +2072,7 @@ bool mptcp_finish_join(struct sock *sk) parent_sock = READ_ONCE(parent->sk_socket); if (parent_sock && !sk->sk_socket) mptcp_sock_graft(sk, parent_sock); - subflow->map_seq = msk->ack_seq; + subflow->map_seq = READ_ONCE(msk->ack_seq); return true; } From 1a49b2c2a501467587504e75bd049d43af38b692 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 29 Sep 2020 15:08:20 -0700 Subject: [PATCH 249/322] mptcp: Handle incoming 32-bit DATA_FIN values The peer may send a DATA_FIN mapping with either a 32-bit or 64-bit sequence number. When a 32-bit sequence number is received for the DATA_FIN, it must be expanded to 64 bits before comparing it to the last acked sequence number. This expansion was missing. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/93 Fixes: 3721b9b64676 ("mptcp: Track received DATA_FIN sequence number and add related helpers") Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- net/mptcp/options.c | 7 ++++--- net/mptcp/protocol.h | 2 +- net/mptcp/subflow.c | 16 +++++++++++++--- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 120ef39fe589..afa486912f5a 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -782,7 +782,7 @@ static void update_una(struct mptcp_sock *msk, } } -bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq) +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) { /* Skip if DATA_FIN was already received. * If updating simultaneously with the recvmsg loop, values @@ -792,7 +792,8 @@ bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq) if (READ_ONCE(msk->rcv_data_fin) || !READ_ONCE(msk->first)) return false; - WRITE_ONCE(msk->rcv_data_fin_seq, data_fin_seq); + WRITE_ONCE(msk->rcv_data_fin_seq, + expand_ack(READ_ONCE(msk->ack_seq), data_fin_seq, use_64bit)); WRITE_ONCE(msk->rcv_data_fin, 1); return true; @@ -875,7 +876,7 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && - mptcp_update_rcv_data_fin(msk, mp_opt.data_seq) && + mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) && schedule_work(&msk->work)) sock_hold(subflow->conn); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 60b27d44c184..20f04ac85409 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -387,7 +387,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); void mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); -bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq); +bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8cbeb68f3775..5f2fa935022d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -731,7 +731,8 @@ static enum mapping_status get_mapping_status(struct sock *ssk, if (mpext->data_fin == 1) { if (data_len == 1) { - bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq); + bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, + mpext->dsn64); pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); if (subflow->map_valid) { /* A DATA_FIN might arrive in a DSS @@ -748,8 +749,17 @@ static enum mapping_status get_mapping_status(struct sock *ssk, return MAPPING_DATA_FIN; } } else { - mptcp_update_rcv_data_fin(msk, mpext->data_seq + data_len); - pr_debug("DATA_FIN with mapping seq=%llu", mpext->data_seq + data_len); + u64 data_fin_seq = mpext->data_seq + data_len; + + /* If mpext->data_seq is a 32-bit value, data_fin_seq + * must also be limited to 32 bits. + */ + if (!mpext->dsn64) + data_fin_seq &= GENMASK_ULL(31, 0); + + mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); + pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", + data_fin_seq, mpext->dsn64); } /* Adjust for DATA_FIN using 1 byte of sequence space */ From bcf3a2953d36bbfb9bd44ccb3db0897d935cc485 Mon Sep 17 00:00:00 2001 From: Mark Mielke Date: Mon, 28 Sep 2020 00:33:29 -0400 Subject: [PATCH 250/322] scsi: iscsi: iscsi_tcp: Avoid holding spinlock while calling getpeername() The kernel may fail to boot or devices may fail to come up when initializing iscsi_tcp devices starting with Linux 5.8. Commit a79af8a64d39 ("[SCSI] iscsi_tcp: use iscsi_conn_get_addr_param libiscsi function") introduced getpeername() within the session spinlock. Commit 1b66d253610c ("bpf: Add get{peer, sock}name attach types for sock_addr") introduced BPF_CGROUP_RUN_SA_PROG_LOCK() within getpeername(), which acquires a mutex and when used from iscsi_tcp devices can now lead to "BUG: scheduling while atomic:" and subsequent damage. Ensure that the spinlock is released before calling getpeername() or getsockname(). sock_hold() and sock_put() are used to ensure that the socket reference is preserved until after the getpeername() or getsockname() complete. Link: https://bugzilla.redhat.com/show_bug.cgi?id=1877345 Link: https://lkml.org/lkml/2020/7/28/1085 Link: https://lkml.org/lkml/2020/8/31/459 Link: https://lore.kernel.org/r/20200928043329.606781-1-mark.mielke@gmail.com Fixes: a79af8a64d39 ("[SCSI] iscsi_tcp: use iscsi_conn_get_addr_param libiscsi function") Fixes: 1b66d253610c ("bpf: Add get{peer, sock}name attach types for sock_addr") Cc: stable@vger.kernel.org Reported-by: Marc Dionne Tested-by: Marc Dionne Reviewed-by: Mike Christie Signed-off-by: Mark Mielke Signed-off-by: Martin K. Petersen --- drivers/scsi/iscsi_tcp.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index b5dd1caae5e9..d10efb66cf19 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -736,6 +736,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; struct sockaddr_in6 addr; + struct socket *sock; int rc; switch(param) { @@ -747,13 +748,17 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, spin_unlock_bh(&conn->session->frwd_lock); return -ENOTCONN; } + sock = tcp_sw_conn->sock; + sock_hold(sock->sk); + spin_unlock_bh(&conn->session->frwd_lock); + if (param == ISCSI_PARAM_LOCAL_PORT) - rc = kernel_getsockname(tcp_sw_conn->sock, + rc = kernel_getsockname(sock, (struct sockaddr *)&addr); else - rc = kernel_getpeername(tcp_sw_conn->sock, + rc = kernel_getpeername(sock, (struct sockaddr *)&addr); - spin_unlock_bh(&conn->session->frwd_lock); + sock_put(sock->sk); if (rc < 0) return rc; @@ -775,6 +780,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, struct iscsi_tcp_conn *tcp_conn; struct iscsi_sw_tcp_conn *tcp_sw_conn; struct sockaddr_in6 addr; + struct socket *sock; int rc; switch (param) { @@ -789,16 +795,18 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, return -ENOTCONN; } tcp_conn = conn->dd_data; - tcp_sw_conn = tcp_conn->dd_data; - if (!tcp_sw_conn->sock) { + sock = tcp_sw_conn->sock; + if (!sock) { spin_unlock_bh(&session->frwd_lock); return -ENOTCONN; } - - rc = kernel_getsockname(tcp_sw_conn->sock, - (struct sockaddr *)&addr); + sock_hold(sock->sk); spin_unlock_bh(&session->frwd_lock); + + rc = kernel_getsockname(sock, + (struct sockaddr *)&addr); + sock_put(sock->sk); if (rc < 0) return rc; From f54c4442893b8dfbd3aff8e903c54dfff1aef990 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 25 Sep 2020 11:54:10 -0400 Subject: [PATCH 251/322] drm/vmwgfx: Fix error handling in get_node ttm_mem_type_manager_func.get_node was changed to return -ENOSPC instead of setting the node pointer to NULL. Unfortunately vmwgfx still had two places where it was explicitly converting -ENOSPC to 0 causing regressions. This fixes those spots by allowing -ENOSPC to be returned. That seems to fix recent regressions with vmwgfx. Signed-off-by: Zack Rusin Reviewed-by: Roland Scheidegger Reviewed-by: Martin Krastev Sigend-off-by: Roland Scheidegger --- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 2 +- drivers/gpu/drm/vmwgfx/vmwgfx_thp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index 4a76fc7114ad..f8bdd4ea294a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -55,7 +55,7 @@ static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man, id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL); if (id < 0) - return (id != -ENOMEM ? 0 : id); + return id; spin_lock(&gman->lock); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c index b7c816ba7166..c8b9335bccd8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c @@ -95,7 +95,7 @@ found_unlock: mem->start = node->start; } - return 0; + return ret; } From c14decfca23cfbeb423ad8be3961a58a31a5473c Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 29 Sep 2020 23:48:47 -0700 Subject: [PATCH 252/322] clocksource: clint: Export clint_time_val for modules clint_time_val will soon be used by the RISC-V implementation of random_get_entropy(), which is a static inline function that may be used by modules (at least CRYPTO_JITTERENTROPY=m). Reported-by: kernel test robot Signed-off-by: Palmer Dabbelt --- drivers/clocksource/timer-clint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-clint.c b/drivers/clocksource/timer-clint.c index d17367dee02c..6cfe2ab73eb0 100644 --- a/drivers/clocksource/timer-clint.c +++ b/drivers/clocksource/timer-clint.c @@ -38,6 +38,7 @@ static unsigned int clint_timer_irq; #ifdef CONFIG_RISCV_M_MODE u64 __iomem *clint_time_val; +EXPORT_SYMBOL(clint_time_val); #endif static void clint_send_ipi(const struct cpumask *target) From b195152536237acf65962b703394614d9c4d307b Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 30 Sep 2020 12:07:56 +0800 Subject: [PATCH 253/322] drm/amd/pm: setup APU dpm clock table in SMU HW initialization As the dpm clock table is needed during DC HW initialization. And that (DC HW initialization) comes before smu_late_init() where current APU dpm clock table setup is performed. So, NULL pointer dereference will be triggered. By moving APU dpm clock table setup to smu_hw_init(), this can be avoided. Fixes: 02cf91c113ea ("drm/amd/powerplay: postpone operations not required for hw setup to late_init") Acked-by: Nirmoy Das Acked-by: Alex Deucher Signed-off-by: Evan Quan Reported-by: Dirk Gouders Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/amdgpu_smu.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 63f945f9f331..8dc5abb6931e 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -479,17 +479,6 @@ static int smu_late_init(void *handle) return ret; } - /* - * Set initialized values (get from vbios) to dpm tables context such as - * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each - * type of clks. - */ - ret = smu_set_default_dpm_table(smu); - if (ret) { - dev_err(adev->dev, "Failed to setup default dpm clock tables!\n"); - return ret; - } - ret = smu_populate_umd_state_clk(smu); if (ret) { dev_err(adev->dev, "Failed to populate UMD state clocks!\n"); @@ -984,6 +973,17 @@ static int smu_smc_hw_setup(struct smu_context *smu) return ret; } + /* + * Set initialized values (get from vbios) to dpm tables context such as + * gfxclk, memclk, dcefclk, and etc. And enable the DPM feature for each + * type of clks. + */ + ret = smu_set_default_dpm_table(smu); + if (ret) { + dev_err(adev->dev, "Failed to setup default dpm clock tables!\n"); + return ret; + } + ret = smu_notify_display_change(smu); if (ret) return ret; From 95433a1305a000aa91f558b062ce614a3bb8ceb5 Mon Sep 17 00:00:00 2001 From: Jiansong Chen Date: Wed, 30 Sep 2020 15:30:24 +0800 Subject: [PATCH 254/322] drm/amdgpu: disable gfxoff temporarily for navy_flounder gfxoff is temporarily disabled for navy_flounder, since at present the feature caused some tdr when performing display operations. Signed-off-by: Jiansong Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 037a187aa42f..f73ce9721233 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3595,6 +3595,9 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; break; + case CHIP_NAVY_FLOUNDER: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + break; default: break; } From 0ec86e8e82b57cfb2f83a9de14732002816d3282 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 26 Aug 2020 11:30:34 -0700 Subject: [PATCH 255/322] ice: increase maximum wait time for flash write commands The ice driver needs to wait for a firmware response to each command to write a block of data to the scratch area used to update the device firmware. The driver currently waits for up to 1 second for this to be returned. It turns out that firmware might take longer than 1 second to return a completion in some cases. If this happens, the flash update will fail to complete. Fix this by increasing the maximum time that the driver will wait for both writing a block of data, and for activating the new NVM bank. The timeout for an erase command is already several minutes, as the firmware had to erase the entire bank which was already expected to take a minute or more in the worst case. In the case where firmware really won't respond, we will now take longer to fail. However, this ensures that if the firmware is simply slow to respond, the flash update can still complete. This new maximum timeout should not adversely increase the update time, as the implementation for wait_event_interruptible_timeout, and should wake very soon after we get a completion event. It is better for a flash update be slow but still succeed than to fail because we gave up too quickly. Fixes: d69ea414c9b4 ("ice: implement device flash update via devlink") Signed-off-by: Jacob Keller Tested-by: Brijesh Behera Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_fw_update.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_fw_update.c b/drivers/net/ethernet/intel/ice/ice_fw_update.c index deaefe00c9c0..8968fdd4816b 100644 --- a/drivers/net/ethernet/intel/ice/ice_fw_update.c +++ b/drivers/net/ethernet/intel/ice/ice_fw_update.c @@ -289,7 +289,13 @@ ice_write_one_nvm_block(struct ice_pf *pf, u16 module, u32 offset, return -EIO; } - err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, HZ, &event); + /* In most cases, firmware reports a write completion within a few + * milliseconds. However, it has been observed that a completion might + * take more than a second to complete in some cases. The timeout here + * is conservative and is intended to prevent failure to update when + * firmware is slow to respond. + */ + err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write, 15 * HZ, &event); if (err) { dev_err(dev, "Timed out waiting for firmware write completion for module 0x%02x, err %d\n", module, err); @@ -513,7 +519,7 @@ static int ice_switch_flash_banks(struct ice_pf *pf, u8 activate_flags, return -EIO; } - err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, HZ, + err = ice_aq_wait_for_event(pf, ice_aqc_opc_nvm_write_activate, 30 * HZ, &event); if (err) { dev_err(dev, "Timed out waiting for firmware to switch active flash banks, err %d\n", From be49b1ad299a638e6181aefc7bdbfd621b46a7ef Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 2 Sep 2020 08:53:42 -0700 Subject: [PATCH 256/322] ice: preserve NVM capabilities in safe mode If the driver initializes in safe mode, it will call ice_set_safe_mode_caps. This results in clearing the capabilities structures, in order to set them up for operating in safe mode, ensuring many features are disabled. This has a side effect of also clearing the capability bits that relate to NVM update. The result is that the device driver will not indicate support for unified update, even if the firmware is capable. Fix this by adding the relevant capability fields to the list of values we preserve. To simplify the code, use a common_cap structure instead of a handful of local variables. To reduce some duplication of the capability name, introduce a couple of macros used to restore the capabilities values from the cached copy. Fixes: de9b277ee032 ("ice: Add support for unified NVM update flow capability") Signed-off-by: Jacob Keller Tested-by: Brijesh Behera Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_common.c | 49 ++++++++++++--------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 34abfcea9858..7db5fd977367 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -2288,26 +2288,28 @@ void ice_set_safe_mode_caps(struct ice_hw *hw) { struct ice_hw_func_caps *func_caps = &hw->func_caps; struct ice_hw_dev_caps *dev_caps = &hw->dev_caps; - u32 valid_func, rxq_first_id, txq_first_id; - u32 msix_vector_first_id, max_mtu; + struct ice_hw_common_caps cached_caps; u32 num_funcs; /* cache some func_caps values that should be restored after memset */ - valid_func = func_caps->common_cap.valid_functions; - txq_first_id = func_caps->common_cap.txq_first_id; - rxq_first_id = func_caps->common_cap.rxq_first_id; - msix_vector_first_id = func_caps->common_cap.msix_vector_first_id; - max_mtu = func_caps->common_cap.max_mtu; + cached_caps = func_caps->common_cap; /* unset func capabilities */ memset(func_caps, 0, sizeof(*func_caps)); +#define ICE_RESTORE_FUNC_CAP(name) \ + func_caps->common_cap.name = cached_caps.name + /* restore cached values */ - func_caps->common_cap.valid_functions = valid_func; - func_caps->common_cap.txq_first_id = txq_first_id; - func_caps->common_cap.rxq_first_id = rxq_first_id; - func_caps->common_cap.msix_vector_first_id = msix_vector_first_id; - func_caps->common_cap.max_mtu = max_mtu; + ICE_RESTORE_FUNC_CAP(valid_functions); + ICE_RESTORE_FUNC_CAP(txq_first_id); + ICE_RESTORE_FUNC_CAP(rxq_first_id); + ICE_RESTORE_FUNC_CAP(msix_vector_first_id); + ICE_RESTORE_FUNC_CAP(max_mtu); + ICE_RESTORE_FUNC_CAP(nvm_unified_update); + ICE_RESTORE_FUNC_CAP(nvm_update_pending_nvm); + ICE_RESTORE_FUNC_CAP(nvm_update_pending_orom); + ICE_RESTORE_FUNC_CAP(nvm_update_pending_netlist); /* one Tx and one Rx queue in safe mode */ func_caps->common_cap.num_rxq = 1; @@ -2318,22 +2320,25 @@ void ice_set_safe_mode_caps(struct ice_hw *hw) func_caps->guar_num_vsi = 1; /* cache some dev_caps values that should be restored after memset */ - valid_func = dev_caps->common_cap.valid_functions; - txq_first_id = dev_caps->common_cap.txq_first_id; - rxq_first_id = dev_caps->common_cap.rxq_first_id; - msix_vector_first_id = dev_caps->common_cap.msix_vector_first_id; - max_mtu = dev_caps->common_cap.max_mtu; + cached_caps = dev_caps->common_cap; num_funcs = dev_caps->num_funcs; /* unset dev capabilities */ memset(dev_caps, 0, sizeof(*dev_caps)); +#define ICE_RESTORE_DEV_CAP(name) \ + dev_caps->common_cap.name = cached_caps.name + /* restore cached values */ - dev_caps->common_cap.valid_functions = valid_func; - dev_caps->common_cap.txq_first_id = txq_first_id; - dev_caps->common_cap.rxq_first_id = rxq_first_id; - dev_caps->common_cap.msix_vector_first_id = msix_vector_first_id; - dev_caps->common_cap.max_mtu = max_mtu; + ICE_RESTORE_DEV_CAP(valid_functions); + ICE_RESTORE_DEV_CAP(txq_first_id); + ICE_RESTORE_DEV_CAP(rxq_first_id); + ICE_RESTORE_DEV_CAP(msix_vector_first_id); + ICE_RESTORE_DEV_CAP(max_mtu); + ICE_RESTORE_DEV_CAP(nvm_unified_update); + ICE_RESTORE_DEV_CAP(nvm_update_pending_nvm); + ICE_RESTORE_DEV_CAP(nvm_update_pending_orom); + ICE_RESTORE_DEV_CAP(nvm_update_pending_netlist); dev_caps->num_funcs = num_funcs; /* one Tx and one Rx queue per function in safe mode */ From fc7d17551f776fda243493ce59894c1eec2a339f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 28 Sep 2020 11:33:42 +0800 Subject: [PATCH 257/322] cpufreq: intel_pstate: Fix missing return statement Fix missing return statement when writing "off" to intel_pstate status sysfs I/F. Fixes: 55671ea3257a ("cpufreq: intel_pstate: Free memory only when turning off") Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index a827b000ef51..9a515c460a00 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -2781,6 +2781,7 @@ static int intel_pstate_update_status(const char *buf, size_t size) cpufreq_unregister_driver(intel_pstate_driver); intel_pstate_driver_cleanup(); + return 0; } if (size == 6 && !strncmp(buf, "active", size)) { From eac53b3e304a8354dc2a2f38e60e712d0e110ef6 Mon Sep 17 00:00:00 2001 From: Yoann Congal Date: Tue, 29 Sep 2020 22:41:58 +0200 Subject: [PATCH 258/322] Documentation: PM: Fix a reStructuredText syntax error Fix a reStructuredText syntax error in the cpuidle PM admin-guide documentation: the ``...'' quotation marks are parsed as partial ''...'' reStructuredText markup and break the output formatting. This change them to "...". Signed-off-by: Yoann Congal Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/pm/cpuidle.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index a96a423e3779..6ebe163f9dfe 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -690,7 +690,7 @@ which of the two parameters is added to the kernel command line. In the instruction of the CPUs (which, as a rule, suspends the execution of the program and causes the hardware to attempt to enter the shallowest available idle state) for this purpose, and if ``idle=poll`` is used, idle CPUs will execute a -more or less ``lightweight'' sequence of instructions in a tight loop. [Note +more or less "lightweight" sequence of instructions in a tight loop. [Note that using ``idle=poll`` is somewhat drastic in many cases, as preventing idle CPUs from saving almost any energy at all may not be the only effect of it. For example, on Intel hardware it effectively prevents CPUs from using From 1a67b9263e06688d7f683714cace8dcc203f8c55 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 30 Sep 2020 13:20:23 +0200 Subject: [PATCH 259/322] ARM: imx6q: Fixup RCU usage for cpuidle The commit eb1f00237aca ("lockdep,trace: Expose tracepoints"), started to expose us for tracepoints. For imx6q cpuidle, this leads to an RCU splat according to below. [6.870684] [] (_raw_spin_lock) from [] (imx6q_enter_wait+0x18/0x9c) [6.878846] [] (imx6q_enter_wait) from [] (cpuidle_enter_state+0x168/0x5e4) To fix the problem, let's assign the corresponding idlestate->flags the CPUIDLE_FLAG_RCU_IDLE bit, which enables us to call rcu_idle_enter|exit() at the proper point. Reported-by: Dong Aisheng Suggested-by: Peter Zijlstra Signed-off-by: Ulf Hansson Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-imx/cpuidle-imx6q.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index 24dd5bbe60e4..094337dc1bc7 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -24,7 +24,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev, imx6_set_lpm(WAIT_UNCLOCKED); raw_spin_unlock(&cpuidle_lock); + rcu_idle_enter(); cpu_do_idle(); + rcu_idle_exit(); raw_spin_lock(&cpuidle_lock); if (num_idle_cpus-- == num_online_cpus()) @@ -44,7 +46,7 @@ static struct cpuidle_driver imx6q_cpuidle_driver = { { .exit_latency = 50, .target_residency = 75, - .flags = CPUIDLE_FLAG_TIMER_STOP, + .flags = CPUIDLE_FLAG_TIMER_STOP | CPUIDLE_FLAG_RCU_IDLE, .enter = imx6q_enter_wait, .name = "WAIT", .desc = "Clock off", From a466c85edc6fbe845facc8f57c408c544f42899e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 20 Aug 2020 11:18:27 -0400 Subject: [PATCH 260/322] btrfs: move btrfs_rm_dev_replace_free_srcdev outside of all locks When closing and freeing the source device we could end up doing our final blkdev_put() on the bdev, which will grab the bd_mutex. As such we want to be holding as few locks as possible, so move this call outside of the dev_replace->lock_finishing_cancel_unmount lock. Since we're modifying the fs_devices we need to make sure we're holding the uuid_mutex here, so take that as well. There's a report from syzbot probably hitting one of the cases where the bd_mutex and device_list_mutex are taken in the wrong order, however it's not with device replace, like this patch fixes. As there's no reproducer available so far, we can't verify the fix. https://lore.kernel.org/lkml/000000000000fc04d105afcf86d7@google.com/ dashboard link: https://syzkaller.appspot.com/bug?extid=84a0634dc5d21d488419 WARNING: possible circular locking dependency detected 5.9.0-rc5-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor.0/6878 is trying to acquire lock: ffff88804c17d780 (&bdev->bd_mutex){+.+.}-{3:3}, at: blkdev_put+0x30/0x520 fs/block_dev.c:1804 but task is already holding lock: ffff8880908cfce0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: close_fs_devices.part.0+0x2e/0x800 fs/btrfs/volumes.c:1159 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&fs_devs->device_list_mutex){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 btrfs_finish_chunk_alloc+0x281/0xf90 fs/btrfs/volumes.c:5255 btrfs_create_pending_block_groups+0x2f3/0x700 fs/btrfs/block-group.c:2109 __btrfs_end_transaction+0xf5/0x690 fs/btrfs/transaction.c:916 find_free_extent_update_loop fs/btrfs/extent-tree.c:3807 [inline] find_free_extent+0x23b7/0x2e60 fs/btrfs/extent-tree.c:4127 btrfs_reserve_extent+0x166/0x460 fs/btrfs/extent-tree.c:4206 cow_file_range+0x3de/0x9b0 fs/btrfs/inode.c:1063 btrfs_run_delalloc_range+0x2cf/0x1410 fs/btrfs/inode.c:1838 writepage_delalloc+0x150/0x460 fs/btrfs/extent_io.c:3439 __extent_writepage+0x441/0xd00 fs/btrfs/extent_io.c:3653 extent_write_cache_pages.constprop.0+0x69d/0x1040 fs/btrfs/extent_io.c:4249 extent_writepages+0xcd/0x2b0 fs/btrfs/extent_io.c:4370 do_writepages+0xec/0x290 mm/page-writeback.c:2352 __writeback_single_inode+0x125/0x1400 fs/fs-writeback.c:1461 writeback_sb_inodes+0x53d/0xf40 fs/fs-writeback.c:1721 wb_writeback+0x2ad/0xd40 fs/fs-writeback.c:1894 wb_do_writeback fs/fs-writeback.c:2039 [inline] wb_workfn+0x2dc/0x13e0 fs/fs-writeback.c:2080 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 -> #3 (sb_internal#2){.+.+}-{0:0}: percpu_down_read include/linux/percpu-rwsem.h:51 [inline] __sb_start_write+0x234/0x470 fs/super.c:1672 sb_start_intwrite include/linux/fs.h:1690 [inline] start_transaction+0xbe7/0x1170 fs/btrfs/transaction.c:624 find_free_extent_update_loop fs/btrfs/extent-tree.c:3789 [inline] find_free_extent+0x25e1/0x2e60 fs/btrfs/extent-tree.c:4127 btrfs_reserve_extent+0x166/0x460 fs/btrfs/extent-tree.c:4206 cow_file_range+0x3de/0x9b0 fs/btrfs/inode.c:1063 btrfs_run_delalloc_range+0x2cf/0x1410 fs/btrfs/inode.c:1838 writepage_delalloc+0x150/0x460 fs/btrfs/extent_io.c:3439 __extent_writepage+0x441/0xd00 fs/btrfs/extent_io.c:3653 extent_write_cache_pages.constprop.0+0x69d/0x1040 fs/btrfs/extent_io.c:4249 extent_writepages+0xcd/0x2b0 fs/btrfs/extent_io.c:4370 do_writepages+0xec/0x290 mm/page-writeback.c:2352 __writeback_single_inode+0x125/0x1400 fs/fs-writeback.c:1461 writeback_sb_inodes+0x53d/0xf40 fs/fs-writeback.c:1721 wb_writeback+0x2ad/0xd40 fs/fs-writeback.c:1894 wb_do_writeback fs/fs-writeback.c:2039 [inline] wb_workfn+0x2dc/0x13e0 fs/fs-writeback.c:2080 process_one_work+0x94c/0x1670 kernel/workqueue.c:2269 worker_thread+0x64c/0x1120 kernel/workqueue.c:2415 kthread+0x3b5/0x4a0 kernel/kthread.c:292 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:294 -> #2 ((work_completion)(&(&wb->dwork)->work)){+.+.}-{0:0}: __flush_work+0x60e/0xac0 kernel/workqueue.c:3041 wb_shutdown+0x180/0x220 mm/backing-dev.c:355 bdi_unregister+0x174/0x590 mm/backing-dev.c:872 del_gendisk+0x820/0xa10 block/genhd.c:933 loop_remove drivers/block/loop.c:2192 [inline] loop_control_ioctl drivers/block/loop.c:2291 [inline] loop_control_ioctl+0x3b1/0x480 drivers/block/loop.c:2257 vfs_ioctl fs/ioctl.c:48 [inline] __do_sys_ioctl fs/ioctl.c:753 [inline] __se_sys_ioctl fs/ioctl.c:739 [inline] __x64_sys_ioctl+0x193/0x200 fs/ioctl.c:739 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #1 (loop_ctl_mutex){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 lo_open+0x19/0xd0 drivers/block/loop.c:1893 __blkdev_get+0x759/0x1aa0 fs/block_dev.c:1507 blkdev_get fs/block_dev.c:1639 [inline] blkdev_open+0x227/0x300 fs/block_dev.c:1753 do_dentry_open+0x4b9/0x11b0 fs/open.c:817 do_open fs/namei.c:3251 [inline] path_openat+0x1b9a/0x2730 fs/namei.c:3368 do_filp_open+0x17e/0x3c0 fs/namei.c:3395 do_sys_openat2+0x16d/0x420 fs/open.c:1168 do_sys_open fs/open.c:1184 [inline] __do_sys_open fs/open.c:1192 [inline] __se_sys_open fs/open.c:1188 [inline] __x64_sys_open+0x119/0x1c0 fs/open.c:1188 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 -> #0 (&bdev->bd_mutex){+.+.}-{3:3}: check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4426 lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006 __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 blkdev_put+0x30/0x520 fs/block_dev.c:1804 btrfs_close_bdev fs/btrfs/volumes.c:1117 [inline] btrfs_close_bdev fs/btrfs/volumes.c:1107 [inline] btrfs_close_one_device fs/btrfs/volumes.c:1133 [inline] close_fs_devices.part.0+0x1a4/0x800 fs/btrfs/volumes.c:1161 close_fs_devices fs/btrfs/volumes.c:1193 [inline] btrfs_close_devices+0x95/0x1f0 fs/btrfs/volumes.c:1179 close_ctree+0x688/0x6cb fs/btrfs/disk-io.c:4149 generic_shutdown_super+0x144/0x370 fs/super.c:464 kill_anon_super+0x36/0x60 fs/super.c:1108 btrfs_kill_super+0x38/0x50 fs/btrfs/super.c:2265 deactivate_locked_super+0x94/0x160 fs/super.c:335 deactivate_super+0xad/0xd0 fs/super.c:366 cleanup_mnt+0x3a3/0x530 fs/namespace.c:1118 task_work_run+0xdd/0x190 kernel/task_work.c:141 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:163 [inline] exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265 entry_SYSCALL_64_after_hwframe+0x44/0xa9 other info that might help us debug this: Chain exists of: &bdev->bd_mutex --> sb_internal#2 --> &fs_devs->device_list_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&fs_devs->device_list_mutex); lock(sb_internal#2); lock(&fs_devs->device_list_mutex); lock(&bdev->bd_mutex); *** DEADLOCK *** 3 locks held by syz-executor.0/6878: #0: ffff88809070c0e0 (&type->s_umount_key#70){++++}-{3:3}, at: deactivate_super+0xa5/0xd0 fs/super.c:365 #1: ffffffff8a5b37a8 (uuid_mutex){+.+.}-{3:3}, at: btrfs_close_devices+0x23/0x1f0 fs/btrfs/volumes.c:1178 #2: ffff8880908cfce0 (&fs_devs->device_list_mutex){+.+.}-{3:3}, at: close_fs_devices.part.0+0x2e/0x800 fs/btrfs/volumes.c:1159 stack backtrace: CPU: 0 PID: 6878 Comm: syz-executor.0 Not tainted 5.9.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x198/0x1fd lib/dump_stack.c:118 check_noncircular+0x324/0x3e0 kernel/locking/lockdep.c:1827 check_prev_add kernel/locking/lockdep.c:2496 [inline] check_prevs_add kernel/locking/lockdep.c:2601 [inline] validate_chain kernel/locking/lockdep.c:3218 [inline] __lock_acquire+0x2a96/0x5780 kernel/locking/lockdep.c:4426 lock_acquire+0x1f3/0xae0 kernel/locking/lockdep.c:5006 __mutex_lock_common kernel/locking/mutex.c:956 [inline] __mutex_lock+0x134/0x10e0 kernel/locking/mutex.c:1103 blkdev_put+0x30/0x520 fs/block_dev.c:1804 btrfs_close_bdev fs/btrfs/volumes.c:1117 [inline] btrfs_close_bdev fs/btrfs/volumes.c:1107 [inline] btrfs_close_one_device fs/btrfs/volumes.c:1133 [inline] close_fs_devices.part.0+0x1a4/0x800 fs/btrfs/volumes.c:1161 close_fs_devices fs/btrfs/volumes.c:1193 [inline] btrfs_close_devices+0x95/0x1f0 fs/btrfs/volumes.c:1179 close_ctree+0x688/0x6cb fs/btrfs/disk-io.c:4149 generic_shutdown_super+0x144/0x370 fs/super.c:464 kill_anon_super+0x36/0x60 fs/super.c:1108 btrfs_kill_super+0x38/0x50 fs/btrfs/super.c:2265 deactivate_locked_super+0x94/0x160 fs/super.c:335 deactivate_super+0xad/0xd0 fs/super.c:366 cleanup_mnt+0x3a3/0x530 fs/namespace.c:1118 task_work_run+0xdd/0x190 kernel/task_work.c:141 tracehook_notify_resume include/linux/tracehook.h:188 [inline] exit_to_user_mode_loop kernel/entry/common.c:163 [inline] exit_to_user_mode_prepare+0x1e1/0x200 kernel/entry/common.c:190 syscall_exit_to_user_mode+0x7e/0x2e0 kernel/entry/common.c:265 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x460027 RSP: 002b:00007fff59216328 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6 RAX: 0000000000000000 RBX: 0000000000076035 RCX: 0000000000460027 RDX: 0000000000403188 RSI: 0000000000000002 RDI: 00007fff592163d0 RBP: 0000000000000333 R08: 0000000000000000 R09: 000000000000000b R10: 0000000000000005 R11: 0000000000000246 R12: 00007fff59217460 R13: 0000000002df2a60 R14: 0000000000000000 R15: 00007fff59217460 Signed-off-by: Josef Bacik [ add syzbot reference ] Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 3 ++- fs/btrfs/volumes.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 7cf48aeb6f14..62aece1bb8ec 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -748,7 +748,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &src_device->dev_state)) btrfs_scratch_superblocks(fs_info, src_device->bdev, src_device->name->str); - btrfs_rm_dev_replace_free_srcdev(src_device); /* write back the superblocks */ trans = btrfs_start_transaction(root, 0); @@ -757,6 +756,8 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); + btrfs_rm_dev_replace_free_srcdev(src_device); + return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 01ebdb69fbdb..1997a7d67f22 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2224,6 +2224,8 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) struct btrfs_fs_info *fs_info = srcdev->fs_info; struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; + mutex_lock(&uuid_mutex); + btrfs_close_bdev(srcdev); synchronize_rcu(); btrfs_free_device(srcdev); @@ -2252,6 +2254,7 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev) close_fs_devices(fs_devices); free_fs_devices(fs_devices); } + mutex_unlock(&uuid_mutex); } void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev) From 4c8f353272dd1262013873990c0fafd0e3c8f274 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 23 Sep 2020 15:30:16 +0100 Subject: [PATCH 261/322] btrfs: fix filesystem corruption after a device replace We use a device's allocation state tree to track ranges in a device used for allocated chunks, and we set ranges in this tree when allocating a new chunk. However after a device replace operation, we were not setting the allocated ranges in the new device's allocation state tree, so that tree is empty after a device replace. This means that a fitrim operation after a device replace will trim the device ranges that have allocated chunks and extents, as we trim every range for which there is not a range marked in the device's allocation state tree. It is also important during chunk allocation, since the device's allocation state is used to determine if a range is already allocated when allocating a new chunk. This is trivial to reproduce and the following script triggers the bug: $ cat reproducer.sh #!/bin/bash DEV1="/dev/sdg" DEV2="/dev/sdh" DEV3="/dev/sdi" wipefs -a $DEV1 $DEV2 $DEV3 &> /dev/null # Create a raid1 test fs on 2 devices. mkfs.btrfs -f -m raid1 -d raid1 $DEV1 $DEV2 > /dev/null mount $DEV1 /mnt/btrfs xfs_io -f -c "pwrite -S 0xab 0 10M" /mnt/btrfs/foo echo "Starting to replace $DEV1 with $DEV3" btrfs replace start -B $DEV1 $DEV3 /mnt/btrfs echo echo "Running fstrim" fstrim /mnt/btrfs echo echo "Unmounting filesystem" umount /mnt/btrfs echo "Mounting filesystem in degraded mode using $DEV3 only" wipefs -a $DEV1 $DEV2 &> /dev/null mount -o degraded $DEV3 /mnt/btrfs if [ $? -ne 0 ]; then dmesg | tail echo echo "Failed to mount in degraded mode" exit 1 fi echo echo "File foo data (expected all bytes = 0xab):" od -A d -t x1 /mnt/btrfs/foo umount /mnt/btrfs When running the reproducer: $ ./replace-test.sh wrote 10485760/10485760 bytes at offset 0 10 MiB, 2560 ops; 0.0901 sec (110.877 MiB/sec and 28384.5216 ops/sec) Starting to replace /dev/sdg with /dev/sdi Running fstrim Unmounting filesystem Mounting filesystem in degraded mode using /dev/sdi only mount: /mnt/btrfs: wrong fs type, bad option, bad superblock on /dev/sdi, missing codepage or helper program, or other error. [19581.748641] BTRFS info (device sdg): dev_replace from /dev/sdg (devid 1) to /dev/sdi started [19581.803842] BTRFS info (device sdg): dev_replace from /dev/sdg (devid 1) to /dev/sdi finished [19582.208293] BTRFS info (device sdi): allowing degraded mounts [19582.208298] BTRFS info (device sdi): disk space caching is enabled [19582.208301] BTRFS info (device sdi): has skinny extents [19582.212853] BTRFS warning (device sdi): devid 2 uuid 1f731f47-e1bb-4f00-bfbb-9e5a0cb4ba9f is missing [19582.213904] btree_readpage_end_io_hook: 25839 callbacks suppressed [19582.213907] BTRFS error (device sdi): bad tree block start, want 30490624 have 0 [19582.214780] BTRFS warning (device sdi): failed to read root (objectid=7): -5 [19582.231576] BTRFS error (device sdi): open_ctree failed Failed to mount in degraded mode So fix by setting all allocated ranges in the replace target device when the replace operation is finishing, when we are holding the chunk mutex and we can not race with new chunk allocations. A test case for fstests follows soon. Fixes: 1c11b63eff2a67 ("btrfs: replace pending/pinned chunks lists with io tree") CC: stable@vger.kernel.org # 5.2+ Reviewed-by: Nikolay Borisov Reviewed-by: Johannes Thumshirn Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/dev-replace.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 62aece1bb8ec..e4a1c6afe35d 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -599,6 +599,37 @@ static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) wake_up(&fs_info->dev_replace.replace_wait); } +/* + * When finishing the device replace, before swapping the source device with the + * target device we must update the chunk allocation state in the target device, + * as it is empty because replace works by directly copying the chunks and not + * through the normal chunk allocation path. + */ +static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev, + struct btrfs_device *tgtdev) +{ + struct extent_state *cached_state = NULL; + u64 start = 0; + u64 found_start; + u64 found_end; + int ret = 0; + + lockdep_assert_held(&srcdev->fs_info->chunk_mutex); + + while (!find_first_extent_bit(&srcdev->alloc_state, start, + &found_start, &found_end, + CHUNK_ALLOCATED, &cached_state)) { + ret = set_extent_bits(&tgtdev->alloc_state, found_start, + found_end, CHUNK_ALLOCATED); + if (ret) + break; + start = found_end + 1; + } + + free_extent_state(cached_state); + return ret; +} + static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret) { @@ -673,8 +704,14 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, dev_replace->time_stopped = ktime_get_real_seconds(); dev_replace->item_needs_writeback = 1; - /* replace old device with new one in mapping tree */ + /* + * Update allocation state in the new device and replace the old device + * with the new one in the mapping tree. + */ if (!scrub_ret) { + scrub_ret = btrfs_set_target_alloc_state(src_device, tgt_device); + if (scrub_ret) + goto error; btrfs_dev_replace_update_device_in_mapping_tree(fs_info, src_device, tgt_device); @@ -685,6 +722,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, btrfs_dev_name(src_device), src_device->devid, rcu_str_deref(tgt_device->name), scrub_ret); +error: up_write(&dev_replace->rwsem); mutex_unlock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex); From aa9887608e77b835d51f05a54940380391cd4e21 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Sun, 27 Sep 2020 11:09:16 +0530 Subject: [PATCH 262/322] RISC-V: Check clint_time_val before use The NoMMU kernel is broken for QEMU virt machine from Linux-5.9-rc6 because clint_time_val is used even before CLINT driver is probed at following places: 1. rand_initialize() calls get_cycles() which in-turn uses clint_time_val 2. boot_init_stack_canary() calls get_cycles() which in-turn uses clint_time_val The issue#1 (above) is fixed by providing custom random_get_entropy() for RISC-V NoMMU kernel. For issue#2 (above), we remove dependency of boot_init_stack_canary() on get_cycles() and this is aligned with the boot_init_stack_canary() implementations of ARM, ARM64 and MIPS kernel. Fixes: d5be89a8d118 ("RISC-V: Resurrect the MMIO timer implementation for M-mode systems") Signed-off-by: Anup Patel Tested-by: Damien Le Moal Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/stackprotector.h | 4 ---- arch/riscv/include/asm/timex.h | 13 +++++++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h index d95f7b2a7f37..5962f8891f06 100644 --- a/arch/riscv/include/asm/stackprotector.h +++ b/arch/riscv/include/asm/stackprotector.h @@ -5,7 +5,6 @@ #include #include -#include extern unsigned long __stack_chk_guard; @@ -18,12 +17,9 @@ extern unsigned long __stack_chk_guard; static __always_inline void boot_init_stack_canary(void) { unsigned long canary; - unsigned long tsc; /* Try to get a semi random initial value. */ get_random_bytes(&canary, sizeof(canary)); - tsc = get_cycles(); - canary += tsc + (tsc << BITS_PER_LONG/2); canary ^= LINUX_VERSION_CODE; canary &= CANARY_MASK; diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 7f659dda0032..ab104905d4db 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -33,6 +33,19 @@ static inline u32 get_cycles_hi(void) #define get_cycles_hi get_cycles_hi #endif /* CONFIG_64BIT */ +/* + * Much like MIPS, we may not have a viable counter to use at an early point + * in the boot process. Unfortunately we don't have a fallback, so instead + * we just return 0. + */ +static inline unsigned long random_get_entropy(void) +{ + if (unlikely(clint_time_val == NULL)) + return 0; + return get_cycles(); +} +#define random_get_entropy() random_get_entropy() + #else /* CONFIG_RISCV_M_MODE */ static inline cycles_t get_cycles(void) From a509a66a9d0d4f4e304d58fad38c078d0336c445 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 29 Sep 2020 15:25:22 +0200 Subject: [PATCH 263/322] arm64: permit ACPI core to map kernel memory used for table overrides Jonathan reports that the strict policy for memory mapped by the ACPI core breaks the use case of passing ACPI table overrides via initramfs. This is due to the fact that the memory type used for loading the initramfs in memory is not recognized as a memory type that is typically used by firmware to pass firmware tables. Since the purpose of the strict policy is to ensure that no AML or other ACPI code can manipulate any memory that is used by the kernel to keep its internal state or the state of user tasks, we can relax the permission check, and allow mappings of memory that is reserved and marked as NOMAP via memblock, and therefore not covered by the linear mapping to begin with. Fixes: 1583052d111f ("arm64/acpi: disallow AML memory opregions to access kernel memory") Fixes: 325f5585ec36 ("arm64/acpi: disallow writeable AML opregion mapping for EFI code regions") Reported-by: Jonathan Cameron Signed-off-by: Ard Biesheuvel Tested-by: Jonathan Cameron Cc: Sudeep Holla Cc: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20200929132522.18067-1-ardb@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/acpi.c | 22 ++++++++++++++++++++-- include/linux/acpi.h | 2 +- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index a85174d05473..cada0b816c8a 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -298,8 +298,21 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys); - return NULL; + if (memblock_is_map_memory(phys) || + !memblock_is_region_memory(phys, size)) { + pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys); + return NULL; + } + /* + * Mapping kernel memory is permitted if the region in + * question is covered by a single memblock with the + * NOMAP attribute set: this enables the use of ACPI + * table overrides passed via initramfs, which are + * reserved in memory using arch_reserve_mem_area() + * below. As this particular use case only requires + * read access, fall through to the R/O mapping case. + */ + fallthrough; case EFI_RUNTIME_SERVICES_CODE: /* @@ -388,3 +401,8 @@ int apei_claim_sea(struct pt_regs *regs) return err; } + +void arch_reserve_mem_area(acpi_physical_address addr, size_t size) +{ + memblock_mark_nomap(addr, size); +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 1e4cdc6c7ae2..64ae25c59d55 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -958,7 +958,7 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); -#ifdef CONFIG_X86 +#ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else static inline void arch_reserve_mem_area(acpi_physical_address addr, From 76a6b0b90d532ed9bb9f6069aa12859c185e5b99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 25 Sep 2020 11:21:15 +0200 Subject: [PATCH 264/322] =?UTF-8?q?MAINTAINERS:=20Add=20Pali=20Roh=C3=A1r?= =?UTF-8?q?=20as=20aardvark=20PCI=20maintainer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Link: https://lore.kernel.org/r/20200925092115.16546-1-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas Acked-by: Thomas Petazzzoni --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index deaafb617361..5959a24a4acb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13160,6 +13160,7 @@ F: drivers/firmware/pcdp.* PCI DRIVER FOR AARDVARK (Marvell Armada 3700) M: Thomas Petazzoni +M: Pali Rohár L: linux-pci@vger.kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained From e154b5b70368a84a19505a0be9b0096c66562b56 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Wed, 30 Sep 2020 21:38:27 +0530 Subject: [PATCH 265/322] octeontx2-af: Fix enable/disable of default NPC entries Packet replication feature present in Octeontx2 is a hardware linked list of PF and its VF interfaces so that broadcast packets are sent to all interfaces present in the list. It is driver job to add and delete a PF/VF interface to/from the list when the interface is brought up and down. This patch fixes the npc_enadis_default_entries function to handle broadcast replication properly if packet replication feature is present. Fixes: 40df309e4166 ("octeontx2-af: Support to enable/disable default MCAM entries") Signed-off-by: Subbaraya Sundeep Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/rvu.h | 3 ++- .../ethernet/marvell/octeontx2/af/rvu_nix.c | 5 ++-- .../ethernet/marvell/octeontx2/af/rvu_npc.c | 26 ++++++++++++++----- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index dcf25a092008..b89dde2c8b08 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -463,6 +463,7 @@ void rvu_nix_freemem(struct rvu *rvu); int rvu_get_nixlf_count(struct rvu *rvu); void rvu_nix_lf_teardown(struct rvu *rvu, u16 pcifunc, int blkaddr, int npalf); int nix_get_nixlf(struct rvu *rvu, u16 pcifunc, int *nixlf, int *nix_blkaddr); +int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); @@ -477,7 +478,7 @@ void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan); -void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc); +void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable); int rvu_npc_update_rxvlan(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_disable_mcam_entries(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_disable_default_entries(struct rvu *rvu, u16 pcifunc, int nixlf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 01a793105599..0fc70824fd6b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -17,7 +17,6 @@ #include "npc.h" #include "cgx.h" -static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add); static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, int type, int chan_id); @@ -2020,7 +2019,7 @@ static int nix_update_mce_list(struct nix_mce_list *mce_list, return 0; } -static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) +int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) { int err = 0, idx, next_idx, last_idx; struct nix_mce_list *mce_list; @@ -2065,7 +2064,7 @@ static int nix_update_bcast_mce_list(struct rvu *rvu, u16 pcifunc, bool add) /* Disable MCAM entry in NPC */ if (!mce_list->count) { - rvu_npc_disable_bcast_entry(rvu, pcifunc); + rvu_npc_enable_bcast_entry(rvu, pcifunc, false); goto end; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 0a214084406a..fbaf9bcd83f2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -530,7 +530,7 @@ void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, NIX_INTF_RX, &entry, true); } -void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc) +void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable) { struct npc_mcam *mcam = &rvu->hw->mcam; int blkaddr, index; @@ -543,7 +543,7 @@ void rvu_npc_disable_bcast_entry(struct rvu *rvu, u16 pcifunc) pcifunc = pcifunc & ~RVU_PFVF_FUNC_MASK; index = npc_get_nixlf_mcam_index(mcam, pcifunc, 0, NIXLF_BCAST_ENTRY); - npc_enable_mcam_entry(rvu, mcam, blkaddr, index, false); + npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); } void rvu_npc_update_flowkey_alg_idx(struct rvu *rvu, u16 pcifunc, int nixlf, @@ -622,23 +622,35 @@ static void npc_enadis_default_entries(struct rvu *rvu, u16 pcifunc, nixlf, NIXLF_UCAST_ENTRY); npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); - /* For PF, ena/dis promisc and bcast MCAM match entries */ - if (pcifunc & RVU_PFVF_FUNC_MASK) + /* For PF, ena/dis promisc and bcast MCAM match entries. + * For VFs add/delete from bcast list when RX multicast + * feature is present. + */ + if (pcifunc & RVU_PFVF_FUNC_MASK && !rvu->hw->cap.nix_rx_multicast) return; /* For bcast, enable/disable only if it's action is not * packet replication, incase if action is replication - * then this PF's nixlf is removed from bcast replication + * then this PF/VF's nixlf is removed from bcast replication * list. */ - index = npc_get_nixlf_mcam_index(mcam, pcifunc, + index = npc_get_nixlf_mcam_index(mcam, pcifunc & ~RVU_PFVF_FUNC_MASK, nixlf, NIXLF_BCAST_ENTRY); bank = npc_get_bank(mcam, index); *(u64 *)&action = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_ACTION(index & (mcam->banksize - 1), bank)); - if (action.op != NIX_RX_ACTIONOP_MCAST) + + /* VFs will not have BCAST entry */ + if (action.op != NIX_RX_ACTIONOP_MCAST && + !(pcifunc & RVU_PFVF_FUNC_MASK)) { npc_enable_mcam_entry(rvu, mcam, blkaddr, index, enable); + } else { + nix_update_bcast_mce_list(rvu, pcifunc, enable); + /* Enable PF's BCAST entry for packet replication */ + rvu_npc_enable_bcast_entry(rvu, pcifunc, enable); + } + if (enable) rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf); else From 89eae5e87b4fa799726a3e8911c90d418cb5d2b1 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Wed, 30 Sep 2020 21:38:52 +0530 Subject: [PATCH 266/322] octeontx2-pf: Fix TCP/UDP checksum offload for IPv6 frames For TCP/UDP checksum offload feature in Octeontx2 expects L3TYPE to be set irrespective of IP header checksum is being offloaded or not. Currently for IPv6 frames L3TYPE is not being set resulting in packet drop with checksum error. This patch fixes this issue. Fixes: 3ca6c4c88 ("octeontx2-pf: Add packet transmission support") Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 3a5b34a2a7a6..e46834e043be 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -524,6 +524,7 @@ static void otx2_sqe_add_hdr(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, sqe_hdr->ol3type = NIX_SENDL3TYPE_IP4_CKSUM; } else if (skb->protocol == htons(ETH_P_IPV6)) { proto = ipv6_hdr(skb)->nexthdr; + sqe_hdr->ol3type = NIX_SENDL3TYPE_IP6; } if (proto == IPPROTO_TCP) From 1ea0166da0509e987caa42c30a6a71f2c6ca1875 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 30 Sep 2020 21:39:14 +0530 Subject: [PATCH 267/322] octeontx2-pf: Fix the device state on error Currently in otx2_open on failure of nix_lf_start transmit queues are not stopped which are already started in link_event. Since the tx queues are not stopped network stack still try's to send the packets leading to driver crash while access the device resources. Fixes: 50fe6c02e ("octeontx2-pf: Register and handle link notifications") Signed-off-by: Hariprasad Kelam Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 75a8c407e815..5d620a39ea80 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1560,10 +1560,13 @@ int otx2_open(struct net_device *netdev) err = otx2_rxtx_enable(pf, true); if (err) - goto err_free_cints; + goto err_tx_stop_queues; return 0; +err_tx_stop_queues: + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); err_free_cints: otx2_free_cints(pf, qidx); vec = pci_irq_vector(pf->pdev, From 66a5209b53418111757716d71e52727b782eabd4 Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Wed, 30 Sep 2020 21:39:35 +0530 Subject: [PATCH 268/322] octeontx2-pf: Fix synchnorization issue in mbox Mbox implementation in octeontx2 driver has three states alloc, send and reset in mbox response. VF allocate and sends message to PF for processing, PF ACKs them back and reset the mbox memory. In some case we see synchronization issue where after msgs_acked is incremented and before mbox_reset API is called, if current execution is scheduled out and a different thread is scheduled in which checks for msgs_acked. Since the new thread sees msgs_acked == msgs_sent it will try to allocate a new message and to send a new mbox message to PF.Now if mbox_reset is scheduled in, PF will see '0' in msgs_send. This patch fixes the issue by calling mbox_reset before incrementing msgs_acked flag for last processing message and checks for valid message size. Fixes: d424b6c02 ("octeontx2-pf: Enable SRIOV and added VF mbox handling") Signed-off-by: Hariprasad Kelam Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/af/mbox.c | 12 ++++++++++-- drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 1 + drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 11 ++++++----- drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 4 ++-- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 387e33fa417a..2718fe201c14 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -17,7 +17,7 @@ static const u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); -void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) +void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid) { void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE); struct otx2_mbox_dev *mdev = &mbox->dev[devid]; @@ -26,13 +26,21 @@ void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) tx_hdr = hw_mbase + mbox->tx_start; rx_hdr = hw_mbase + mbox->rx_start; - spin_lock(&mdev->mbox_lock); mdev->msg_size = 0; mdev->rsp_size = 0; tx_hdr->num_msgs = 0; tx_hdr->msg_size = 0; rx_hdr->num_msgs = 0; rx_hdr->msg_size = 0; +} +EXPORT_SYMBOL(__otx2_mbox_reset); + +void otx2_mbox_reset(struct otx2_mbox *mbox, int devid) +{ + struct otx2_mbox_dev *mdev = &mbox->dev[devid]; + + spin_lock(&mdev->mbox_lock); + __otx2_mbox_reset(mbox, devid); spin_unlock(&mdev->mbox_lock); } EXPORT_SYMBOL(otx2_mbox_reset); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 6dfd0f90cd70..ab433789d2c3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -93,6 +93,7 @@ struct mbox_msghdr { }; void otx2_mbox_reset(struct otx2_mbox *mbox, int devid); +void __otx2_mbox_reset(struct otx2_mbox *mbox, int devid); void otx2_mbox_destroy(struct otx2_mbox *mbox); int otx2_mbox_init(struct otx2_mbox *mbox, void __force *hwbase, struct pci_dev *pdev, void __force *reg_base, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 5d620a39ea80..2fb45670aca4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -370,8 +370,8 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, dst_mbox = &pf->mbox; dst_size = dst_mbox->mbox.tx_size - ALIGN(sizeof(*mbox_hdr), MBOX_MSG_ALIGN); - /* Check if msgs fit into destination area */ - if (mbox_hdr->msg_size > dst_size) + /* Check if msgs fit into destination area and has valid size */ + if (mbox_hdr->msg_size > dst_size || !mbox_hdr->msg_size) return -EINVAL; dst_mdev = &dst_mbox->mbox.dev[0]; @@ -526,10 +526,10 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) end: offset = mbox->rx_start + msg->next_msgoff; + if (mdev->msgs_acked == (vf_mbox->up_num_msgs - 1)) + __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } - - otx2_mbox_reset(mbox, vf_idx); } static irqreturn_t otx2_pfvf_mbox_intr_handler(int irq, void *pf_irq) @@ -803,10 +803,11 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work) msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2_process_pfaf_mbox_msg(pf, msg); offset = mbox->rx_start + msg->next_msgoff; + if (mdev->msgs_acked == (af_mbox->num_msgs - 1)) + __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } - otx2_mbox_reset(mbox, 0); } static void otx2_handle_link_event(struct otx2_nic *pf) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 92a3db69a6cd..2f90f1721441 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -99,10 +99,10 @@ static void otx2vf_vfaf_mbox_handler(struct work_struct *work) msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2vf_process_vfaf_mbox_msg(af_mbox->pfvf, msg); offset = mbox->rx_start + msg->next_msgoff; + if (mdev->msgs_acked == (af_mbox->num_msgs - 1)) + __otx2_mbox_reset(mbox, 0); mdev->msgs_acked++; } - - otx2_mbox_reset(mbox, 0); } static int otx2vf_process_mbox_msg_up(struct otx2_nic *vf, From 5d8ff95a52c36740bf4e61202d08549e7a9caf20 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 14 Sep 2020 12:18:46 +0300 Subject: [PATCH 269/322] pinctrl: qcom: sm8250: correct sdc2_clk Correct sdc2_clk pin definition (register offset is wrong, verified by the msm-4.19 driver). Fixes: 4e3ec9e407ad ("pinctrl: qcom: Add sm8250 pinctrl driver.") Signed-off-by: Dmitry Baryshkov Reviewed-by: Bjorn Andersson Acked-by: Manivannan Sadhasivam Link: https://lore.kernel.org/r/20200914091846.55204-1-dmitry.baryshkov@linaro.org Signed-off-by: Linus Walleij --- drivers/pinctrl/qcom/pinctrl-sm8250.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pinctrl/qcom/pinctrl-sm8250.c b/drivers/pinctrl/qcom/pinctrl-sm8250.c index a660f1274b66..826df0d637ea 100644 --- a/drivers/pinctrl/qcom/pinctrl-sm8250.c +++ b/drivers/pinctrl/qcom/pinctrl-sm8250.c @@ -1308,7 +1308,7 @@ static const struct msm_pingroup sm8250_groups[] = { [178] = PINGROUP(178, WEST, _, _, _, _, _, _, _, _, _), [179] = PINGROUP(179, WEST, _, _, _, _, _, _, _, _, _), [180] = UFS_RESET(ufs_reset, 0xb8000), - [181] = SDC_PINGROUP(sdc2_clk, 0x7000, 14, 6), + [181] = SDC_PINGROUP(sdc2_clk, 0xb7000, 14, 6), [182] = SDC_PINGROUP(sdc2_cmd, 0xb7000, 11, 3), [183] = SDC_PINGROUP(sdc2_data, 0xb7000, 9, 0), }; From 39c4dbe4cc363accd676162c24b264f44c581490 Mon Sep 17 00:00:00 2001 From: Hanks Chen Date: Thu, 20 Aug 2020 19:22:25 +0800 Subject: [PATCH 270/322] pinctrl: mediatek: check mtk_is_virt_gpio input parameter check mtk_is_virt_gpio input parameter, virtual gpio need to support eint mode. add error handler for the ko case to fix this boot fail: pc : mtk_is_virt_gpio+0x20/0x38 [pinctrl_mtk_common_v2] lr : mtk_gpio_get_direction+0x44/0xb0 [pinctrl_paris] Fixes: edd546465002 ("pinctrl: mediatek: avoid virtual gpio trying to set reg") Signed-off-by: Hanks Chen Acked-by: Sean Wang Singed-off-by: Jie Yang Link: https://lore.kernel.org/r/1597922546-29633-1-git-send-email-hanks.chen@mediatek.com Signed-off-by: Linus Walleij --- drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 2f3dfb56c3fa..35bbe5935708 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -259,6 +259,10 @@ bool mtk_is_virt_gpio(struct mtk_pinctrl *hw, unsigned int gpio_n) desc = (const struct mtk_pin_desc *)&hw->soc->pins[gpio_n]; + /* if the GPIO is not supported for eint mode */ + if (desc->eint.eint_m == NO_EINT_SUPPORT) + return virt_gpio; + if (desc->funcs && !desc->funcs[desc->eint.eint_m].name) virt_gpio = true; From e09e200e07222467ef82367bff7cc6f44ad00397 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Sep 2020 17:20:12 +0300 Subject: [PATCH 271/322] gpio: pca953x: Use bitmap API over implicit GCC extension In IRQ handler we have to clear bitmap before use. Currently the GCC extension has been used for that. For sake of the consistency switch to bitmap API. As expected bloat-o-meter shows no difference in the object size. Signed-off-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20200930142013.59247-1-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 29342e5def82..7f64e6948574 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -814,10 +814,12 @@ static irqreturn_t pca953x_irq_handler(int irq, void *devid) { struct pca953x_chip *chip = devid; struct gpio_chip *gc = &chip->gpio_chip; - DECLARE_BITMAP(pending, MAX_LINE) = {}; + DECLARE_BITMAP(pending, MAX_LINE); int level; bool ret; + bitmap_zero(pending, MAX_LINE); + mutex_lock(&chip->i2c_lock); ret = pca953x_irq_pending(chip, pending); mutex_unlock(&chip->i2c_lock); From 8c1f1c34777bddb633d4a068a9c812d29974c6bd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 30 Sep 2020 17:20:13 +0300 Subject: [PATCH 272/322] gpio: pca953x: Correctly initialize registers 6 and 7 for PCA957x When driver has been converted to the bitmap API the non-bitmap functions started behaving differently on 32-bit BE architectures since the bytes in two consequent unsigned longs are in different order in comparison to byte array. Hence if the chip had had more than 32 lines the memset() call over it would have not set up upper lines correctly. Although it's currently a theoretical case (no supported chips of this type has 32+ lines), it's better to provide a clean code to avoid people thinking this is okay and potentially producing not fully working things. Fixes: 35d13d94893f ("gpio: pca953x: convert to use bitmap API") Signed-off-by: Andy Shevchenko Reviewed-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20200930142013.59247-2-andriy.shevchenko@linux.intel.com Signed-off-by: Linus Walleij --- drivers/gpio/gpio-pca953x.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 7f64e6948574..fb61f2fc6ed7 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -942,6 +942,7 @@ out: static int device_pca957x_init(struct pca953x_chip *chip, u32 invert) { DECLARE_BITMAP(val, MAX_LINE); + unsigned int i; int ret; ret = device_pca95xx_init(chip, invert); @@ -949,7 +950,9 @@ static int device_pca957x_init(struct pca953x_chip *chip, u32 invert) goto out; /* To enable register 6, 7 to control pull up and pull down */ - memset(val, 0x02, NBANK(chip)); + for (i = 0; i < NBANK(chip); i++) + bitmap_set_value8(val, 0x02, i * BANK_SZ); + ret = pca953x_write_regs(chip, PCA957X_BKEN, val); if (ret) goto out; From 452d62227958e149e5c7b0084a559c0957c3ff2a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 13 Jul 2020 15:15:14 +0100 Subject: [PATCH 273/322] KVM: arm64: Restore missing ISB on nVHE __tlb_switch_to_guest Commit a0e50aa3f4a8 ("KVM: arm64: Factor out stage 2 page table data from struct kvm") dropped the ISB after __load_guest_stage2(), only leaving the one that is required when the speculative AT workaround is in effect. As Andrew points it: "This alternative is 'backwards' to avoid a double ISB as there is one in __load_guest_stage2 when the workaround is active." Restore the missing ISB, conditionned on the AT workaround not being active. Fixes: a0e50aa3f4a8 ("KVM: arm64: Factor out stage 2 page table data from struct kvm") Reported-by: Andrew Scull Reported-by: Thomas Tai Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/nvhe/tlb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 69eae608d670..b15d65a42042 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -31,7 +31,14 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, isb(); } + /* + * __load_guest_stage2() includes an ISB only when the AT + * workaround is applied. Take care of the opposite condition, + * ensuring that we always have an ISB, but not two ISBs back + * to back. + */ __load_guest_stage2(mmu); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } static void __tlb_switch_to_host(struct tlb_inv_context *cxt) From 0bbe4ced53e36786eafc2ecbf9a1761f55b4a82e Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Sat, 26 Sep 2020 18:26:02 +0800 Subject: [PATCH 274/322] iommu/amd: Fix the overwritten field in IVMD header Commit 387caf0b759a ("iommu/amd: Treat per-device exclusion ranges as r/w unity-mapped regions") accidentally overwrites the 'flags' field in IVMD (struct ivmd_header) when the I/O virtualization memory definition is associated with the exclusion range entry. This leads to the corrupted IVMD table (incorrect checksum). The kdump kernel reports the invalid checksum: ACPI BIOS Warning (bug): Incorrect checksum in table [IVRS] - 0x5C, should be 0x60 (20200717/tbprint-177) AMD-Vi: [Firmware Bug]: IVRS invalid checksum Fix the above-mentioned issue by modifying the 'struct unity_map_entry' member instead of the IVMD header. Cleanup: The *exclusion_range* functions are not used anymore, so get rid of them. Fixes: 387caf0b759a ("iommu/amd: Treat per-device exclusion ranges as r/w unity-mapped regions") Reported-and-tested-by: Baoquan He Signed-off-by: Adrian Huang Cc: Jerry Snitselaar Link: https://lore.kernel.org/r/20200926102602.19177-1-adrianhuang0701@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/init.c | 56 +++++++--------------------------------- 1 file changed, 10 insertions(+), 46 deletions(-) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 445a08d23fed..1ba6b4cc56e8 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1103,25 +1103,6 @@ static int __init add_early_maps(void) return 0; } -/* - * Reads the device exclusion range from ACPI and initializes the IOMMU with - * it - */ -static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) -{ - if (!(m->flags & IVMD_FLAG_EXCL_RANGE)) - return; - - /* - * Treat per-device exclusion ranges as r/w unity-mapped regions - * since some buggy BIOSes might lead to the overwritten exclusion - * range (exclusion_start and exclusion_length members). This - * happens when there are multiple exclusion ranges (IVMD entries) - * defined in ACPI table. - */ - m->flags = (IVMD_FLAG_IW | IVMD_FLAG_IR | IVMD_FLAG_UNITY_MAP); -} - /* * Takes a pointer to an AMD IOMMU entry in the ACPI table and * initializes the hardware and our data structures with it. @@ -2073,30 +2054,6 @@ static void __init free_unity_maps(void) } } -/* called when we find an exclusion range definition in ACPI */ -static int __init init_exclusion_range(struct ivmd_header *m) -{ - int i; - - switch (m->type) { - case ACPI_IVMD_TYPE: - set_device_exclusion_range(m->devid, m); - break; - case ACPI_IVMD_TYPE_ALL: - for (i = 0; i <= amd_iommu_last_bdf; ++i) - set_device_exclusion_range(i, m); - break; - case ACPI_IVMD_TYPE_RANGE: - for (i = m->devid; i <= m->aux; ++i) - set_device_exclusion_range(i, m); - break; - default: - break; - } - - return 0; -} - /* called for unity map ACPI definition */ static int __init init_unity_map_range(struct ivmd_header *m) { @@ -2107,9 +2064,6 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (e == NULL) return -ENOMEM; - if (m->flags & IVMD_FLAG_EXCL_RANGE) - init_exclusion_range(m); - switch (m->type) { default: kfree(e); @@ -2133,6 +2087,16 @@ static int __init init_unity_map_range(struct ivmd_header *m) e->address_end = e->address_start + PAGE_ALIGN(m->range_length); e->prot = m->flags >> 1; + /* + * Treat per-device exclusion ranges as r/w unity-mapped regions + * since some buggy BIOSes might lead to the overwritten exclusion + * range (exclusion_start and exclusion_length members). This + * happens when there are multiple exclusion ranges (IVMD entries) + * defined in ACPI table. + */ + if (m->flags & IVMD_FLAG_EXCL_RANGE) + e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1; + DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" " range_start: %016llx range_end: %016llx flags: %x\n", s, PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start), From 0891fb39ba67bd7ae023ea0d367297ffff010781 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Wed, 30 Sep 2020 11:16:14 +0200 Subject: [PATCH 275/322] xen/events: don't use chip_data for legacy IRQs Since commit c330fb1ddc0a ("XEN uses irqdesc::irq_data_common::handler_data to store a per interrupt XEN data pointer which contains XEN specific information.") Xen is using the chip_data pointer for storing IRQ specific data. When running as a HVM domain this can result in problems for legacy IRQs, as those might use chip_data for their own purposes. Use a local array for this purpose in case of legacy IRQs, avoiding the double use. Cc: stable@vger.kernel.org Fixes: c330fb1ddc0a ("XEN uses irqdesc::irq_data_common::handler_data to store a per interrupt XEN data pointer which contains XEN specific information.") Signed-off-by: Juergen Gross Tested-by: Stefan Bader Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20200930091614.13660-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/events/events_base.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 90b8f56fbadb..6f02c18fa65c 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -92,6 +92,8 @@ static bool (*pirq_needs_eoi)(unsigned irq); /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) +static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; + static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; @@ -156,7 +158,18 @@ int get_evtchn_to_irq(evtchn_port_t evtchn) /* Get info for IRQ */ struct irq_info *info_for_irq(unsigned irq) { - return irq_get_chip_data(irq); + if (irq < nr_legacy_irqs()) + return legacy_info_ptrs[irq]; + else + return irq_get_chip_data(irq); +} + +static void set_info_for_irq(unsigned int irq, struct irq_info *info) +{ + if (irq < nr_legacy_irqs()) + legacy_info_ptrs[irq] = info; + else + irq_set_chip_data(irq, info); } /* Constructors for packed IRQ information. */ @@ -377,7 +390,7 @@ static void xen_irq_init(unsigned irq) info->type = IRQT_UNBOUND; info->refcnt = -1; - irq_set_chip_data(irq, info); + set_info_for_irq(irq, info); list_add_tail(&info->list, &xen_irq_list_head); } @@ -426,14 +439,14 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (WARN_ON(!info)) return; list_del(&info->list); - irq_set_chip_data(irq, NULL); + set_info_for_irq(irq, NULL); WARN_ON(info->refcnt > 0); @@ -603,7 +616,7 @@ EXPORT_SYMBOL_GPL(xen_irq_from_gsi); static void __unbind_from_irq(unsigned int irq) { evtchn_port_t evtchn = evtchn_from_irq(irq); - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (info->refcnt > 0) { info->refcnt--; @@ -1108,7 +1121,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi, void unbind_from_irqhandler(unsigned int irq, void *dev_id) { - struct irq_info *info = irq_get_chip_data(irq); + struct irq_info *info = info_for_irq(irq); if (WARN_ON(!info)) return; @@ -1142,7 +1155,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn) if (irq == -1) return -ENOENT; - info = irq_get_chip_data(irq); + info = info_for_irq(irq); if (!info) return -ENOENT; @@ -1170,7 +1183,7 @@ int evtchn_get(evtchn_port_t evtchn) if (irq == -1) goto done; - info = irq_get_chip_data(irq); + info = info_for_irq(irq); if (!info) goto done; From 1a3f2fd7fc4e8f24510830e265de2ffb8e3300d2 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sun, 27 Sep 2020 14:24:28 +0800 Subject: [PATCH 276/322] iommu/vt-d: Fix lockdep splat in iommu_flush_dev_iotlb() Lock(&iommu->lock) without disabling irq causes lockdep warnings. [ 12.703950] ======================================================== [ 12.703962] WARNING: possible irq lock inversion dependency detected [ 12.703975] 5.9.0-rc6+ #659 Not tainted [ 12.703983] -------------------------------------------------------- [ 12.703995] systemd-udevd/284 just changed the state of lock: [ 12.704007] ffffffffbd6ff4d8 (device_domain_lock){..-.}-{2:2}, at: iommu_flush_dev_iotlb.part.57+0x2e/0x90 [ 12.704031] but this lock took another, SOFTIRQ-unsafe lock in the past: [ 12.704043] (&iommu->lock){+.+.}-{2:2} [ 12.704045] and interrupts could create inverse lock ordering between them. [ 12.704073] other info that might help us debug this: [ 12.704085] Possible interrupt unsafe locking scenario: [ 12.704097] CPU0 CPU1 [ 12.704106] ---- ---- [ 12.704115] lock(&iommu->lock); [ 12.704123] local_irq_disable(); [ 12.704134] lock(device_domain_lock); [ 12.704146] lock(&iommu->lock); [ 12.704158] [ 12.704164] lock(device_domain_lock); [ 12.704174] *** DEADLOCK *** Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20200927062428.13713-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 87b17bac04c2..2239c211178b 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -2664,7 +2664,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, } /* Setup the PASID entry for requests without PASID: */ - spin_lock(&iommu->lock); + spin_lock_irqsave(&iommu->lock, flags); if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, dev, PASID_RID2PASID); @@ -2674,7 +2674,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, else ret = intel_pasid_setup_second_level(iommu, domain, dev, PASID_RID2PASID); - spin_unlock(&iommu->lock); + spin_unlock_irqrestore(&iommu->lock, flags); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(dev); From bb13a800620c2b9046187facdf7dfd45699f5a74 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 1 Oct 2020 08:44:19 +0200 Subject: [PATCH 277/322] r8169: fix handling ether_clk Petr reported that system freezes on r8169 driver load on a system using ether_clk. The original change was done under the assumption that the clock isn't needed for basic operations like chip register access. But obviously that was wrong. Therefore effectively revert the original change, and in addition leave the clock active when suspending and WoL is enabled. Chip may not be able to process incoming packets otherwise. Fixes: 9f0b54cd1672 ("r8169: move switching optional clock on/off to pll power functions") Reported-by: Petr Tesarik Tested-by: Petr Tesarik Signed-off-by: Heiner Kallweit Reviewed-by: Hans de Goede Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 44 +++++++++++++---------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5e867da1aad3..4ae1a6fbf0ba 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -2239,14 +2239,10 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) default: break; } - - clk_disable_unprepare(tp->clk); } static void rtl_pll_power_up(struct rtl8169_private *tp) { - clk_prepare_enable(tp->clk); - switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: @@ -4826,21 +4822,8 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp) #ifdef CONFIG_PM -static int __maybe_unused rtl8169_suspend(struct device *device) +static int rtl8169_net_resume(struct rtl8169_private *tp) { - struct rtl8169_private *tp = dev_get_drvdata(device); - - rtnl_lock(); - rtl8169_net_suspend(tp); - rtnl_unlock(); - - return 0; -} - -static int rtl8169_resume(struct device *device) -{ - struct rtl8169_private *tp = dev_get_drvdata(device); - rtl_rar_set(tp, tp->dev->dev_addr); if (tp->TxDescArray) @@ -4851,6 +4834,29 @@ static int rtl8169_resume(struct device *device) return 0; } +static int __maybe_unused rtl8169_suspend(struct device *device) +{ + struct rtl8169_private *tp = dev_get_drvdata(device); + + rtnl_lock(); + rtl8169_net_suspend(tp); + if (!device_may_wakeup(tp_to_dev(tp))) + clk_disable_unprepare(tp->clk); + rtnl_unlock(); + + return 0; +} + +static int __maybe_unused rtl8169_resume(struct device *device) +{ + struct rtl8169_private *tp = dev_get_drvdata(device); + + if (!device_may_wakeup(tp_to_dev(tp))) + clk_prepare_enable(tp->clk); + + return rtl8169_net_resume(tp); +} + static int rtl8169_runtime_suspend(struct device *device) { struct rtl8169_private *tp = dev_get_drvdata(device); @@ -4874,7 +4880,7 @@ static int rtl8169_runtime_resume(struct device *device) __rtl8169_set_wol(tp, tp->saved_wolopts); - return rtl8169_resume(device); + return rtl8169_net_resume(tp); } static int rtl8169_runtime_idle(struct device *device) From ef9da46ddef071e1bbb943afbbe9b38771855554 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 1 Oct 2020 09:23:02 +0200 Subject: [PATCH 278/322] r8169: fix data corruption issue on RTL8402 Petr reported that after resume from suspend RTL8402 partially truncates incoming packets, and re-initializing register RxConfig before the actual chip re-initialization sequence is needed to avoid the issue. Reported-by: Petr Tesarik Proposed-by: Petr Tesarik Tested-by: Petr Tesarik Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 4ae1a6fbf0ba..256bc4878ef1 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4854,6 +4854,10 @@ static int __maybe_unused rtl8169_resume(struct device *device) if (!device_may_wakeup(tp_to_dev(tp))) clk_prepare_enable(tp->clk); + /* Reportedly at least Asus X453MA truncates packets otherwise */ + if (tp->mac_version == RTL_GIGA_MAC_VER_37) + rtl_init_rxcfg(tp); + return rtl8169_net_resume(tp); } From 472e5b056f000a778abb41f1e443de58eb259783 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 1 Oct 2020 19:14:36 -0700 Subject: [PATCH 279/322] pipe: remove pipe_wait() and fix wakeup race with splice The pipe splice code still used the old model of waiting for pipe IO by using a non-specific "pipe_wait()" that waited for any pipe event to happen, which depended on all pipe IO being entirely serialized by the pipe lock. So by checking the state you were waiting for, and then adding yourself to the wait queue before dropping the lock, you were guaranteed to see all the wakeups. Strictly speaking, the actual wakeups were not done under the lock, but the pipe_wait() model still worked, because since the waiter held the lock when checking whether it should sleep, it would always see the current state, and the wakeup was always done after updating the state. However, commit 0ddad21d3e99 ("pipe: use exclusive waits when reading or writing") split the single wait-queue into two, and in the process also made the "wait for event" code wait for _two_ wait queues, and that then showed a race with the wakers that were not serialized by the pipe lock. It's only splice that used that "pipe_wait()" model, so the problem wasn't obvious, but Josef Bacik reports: "I hit a hang with fstest btrfs/187, which does a btrfs send into /dev/null. This works by creating a pipe, the write side is given to the kernel to write into, and the read side is handed to a thread that splices into a file, in this case /dev/null. The box that was hung had the write side stuck here [pipe_write] and the read side stuck here [splice_from_pipe_next -> pipe_wait]. [ more details about pipe_wait() scenario ] The problem is we're doing the prepare_to_wait, which sets our state each time, however we can be woken up either with reads or writes. In the case above we race with the WRITER waking us up, and re-set our state to INTERRUPTIBLE, and thus never break out of schedule" Josef had a patch that avoided the issue in pipe_wait() by just making it set the state only once, but the deeper problem is that pipe_wait() depends on a level of synchonization by the pipe mutex that it really shouldn't. And the whole "wait for any pipe state change" model really isn't very good to begin with. So rather than trying to work around things in pipe_wait(), remove that legacy model of "wait for arbitrary pipe event" entirely, and actually create functions that wait for the pipe actually being readable or writable, and can do so without depending on the pipe lock serializing everything. Fixes: 0ddad21d3e99 ("pipe: use exclusive waits when reading or writing") Link: https://lore.kernel.org/linux-fsdevel/bfa88b5ad6f069b2b679316b9e495a970130416c.1601567868.git.josef@toxicpanda.com/ Reported-by: Josef Bacik Reviewed-and-tested-by: Josef Bacik Signed-off-by: Linus Torvalds --- fs/pipe.c | 62 ++++++++++++++++++++++++++------------- fs/splice.c | 8 ++--- include/linux/pipe_fs_i.h | 5 ++-- 3 files changed, 48 insertions(+), 27 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 60dbee457143..117db82b10af 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -106,25 +106,6 @@ void pipe_double_lock(struct pipe_inode_info *pipe1, } } -/* Drop the inode semaphore and wait for a pipe event, atomically */ -void pipe_wait(struct pipe_inode_info *pipe) -{ - DEFINE_WAIT(rdwait); - DEFINE_WAIT(wrwait); - - /* - * Pipes are system-local resources, so sleeping on them - * is considered a noninteractive wait: - */ - prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); - prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE); - pipe_unlock(pipe); - schedule(); - finish_wait(&pipe->rd_wait, &rdwait); - finish_wait(&pipe->wr_wait, &wrwait); - pipe_lock(pipe); -} - static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -1035,12 +1016,52 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes) return do_pipe2(fildes, 0); } +/* + * This is the stupid "wait for pipe to be readable or writable" + * model. + * + * See pipe_read/write() for the proper kind of exclusive wait, + * but that requires that we wake up any other readers/writers + * if we then do not end up reading everything (ie the whole + * "wake_next_reader/writer" logic in pipe_read/write()). + */ +void pipe_wait_readable(struct pipe_inode_info *pipe) +{ + pipe_unlock(pipe); + wait_event_interruptible(pipe->rd_wait, pipe_readable(pipe)); + pipe_lock(pipe); +} + +void pipe_wait_writable(struct pipe_inode_info *pipe) +{ + pipe_unlock(pipe); + wait_event_interruptible(pipe->wr_wait, pipe_writable(pipe)); + pipe_lock(pipe); +} + +/* + * This depends on both the wait (here) and the wakeup (wake_up_partner) + * holding the pipe lock, so "*cnt" is stable and we know a wakeup cannot + * race with the count check and waitqueue prep. + * + * Normally in order to avoid races, you'd do the prepare_to_wait() first, + * then check the condition you're waiting for, and only then sleep. But + * because of the pipe lock, we can check the condition before being on + * the wait queue. + * + * We use the 'rd_wait' waitqueue for pipe partner waiting. + */ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) { + DEFINE_WAIT(rdwait); int cur = *cnt; while (cur == *cnt) { - pipe_wait(pipe); + prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); + pipe_unlock(pipe); + schedule(); + finish_wait(&pipe->rd_wait, &rdwait); + pipe_lock(pipe); if (signal_pending(current)) break; } @@ -1050,7 +1071,6 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) static void wake_up_partner(struct pipe_inode_info *pipe) { wake_up_interruptible_all(&pipe->rd_wait); - wake_up_interruptible_all(&pipe->wr_wait); } static int fifo_open(struct inode *inode, struct file *filp) diff --git a/fs/splice.c b/fs/splice.c index d7c8a7c4db07..c3d00dfc7344 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -563,7 +563,7 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des sd->need_wakeup = false; } - pipe_wait(pipe); + pipe_wait_readable(pipe); } return 1; @@ -1077,7 +1077,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) return -EAGAIN; if (signal_pending(current)) return -ERESTARTSYS; - pipe_wait(pipe); + pipe_wait_writable(pipe); } } @@ -1454,7 +1454,7 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) ret = -EAGAIN; break; } - pipe_wait(pipe); + pipe_wait_readable(pipe); } pipe_unlock(pipe); @@ -1493,7 +1493,7 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) ret = -ERESTARTSYS; break; } - pipe_wait(pipe); + pipe_wait_writable(pipe); } pipe_unlock(pipe); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 50afd0d0084c..5d2705f1d01c 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -240,8 +240,9 @@ extern unsigned int pipe_max_size; extern unsigned long pipe_user_pages_hard; extern unsigned long pipe_user_pages_soft; -/* Drop the inode semaphore and wait for a pipe event, atomically */ -void pipe_wait(struct pipe_inode_info *pipe); +/* Wait for a pipe to be readable/writable while dropping the pipe lock */ +void pipe_wait_readable(struct pipe_inode_info *); +void pipe_wait_writable(struct pipe_inode_info *); struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); From 21d64817c72470ed69483f59279ce557c0658826 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 2 Oct 2020 17:29:59 +0300 Subject: [PATCH 280/322] platform/x86: intel-vbtn: Revert "Fix SW_TABLET_MODE always reporting 1 on the HP Pavilion 11 x360" After discussion, see the Link tag, it appears that this is not good enough. So, revert it now and apply a better fix. This reverts commit d823346876a970522ff9e4d2b323c9b734dcc4de. Link: https://lore.kernel.org/platform-driver-x86/s5hft71klxl.wl-tiwai@suse.de/ Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel-vbtn.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index f443619e1e7e..e85d8e58320c 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -15,13 +15,9 @@ #include #include -/* Returned when NOT in tablet mode on some HP Stream x360 11 models */ -#define VGBS_TABLET_MODE_FLAG_ALT 0x10 /* When NOT in tablet mode, VGBS returns with the flag 0x40 */ -#define VGBS_TABLET_MODE_FLAG 0x40 -#define VGBS_DOCK_MODE_FLAG 0x80 - -#define VGBS_TABLET_MODE_FLAGS (VGBS_TABLET_MODE_FLAG | VGBS_TABLET_MODE_FLAG_ALT) +#define TABLET_MODE_FLAG 0x40 +#define DOCK_MODE_FLAG 0x80 MODULE_LICENSE("GPL"); MODULE_AUTHOR("AceLan Kao"); @@ -76,9 +72,9 @@ static void detect_tablet_mode(struct platform_device *device) if (ACPI_FAILURE(status)) return; - m = !(vgbs & VGBS_TABLET_MODE_FLAGS); + m = !(vgbs & TABLET_MODE_FLAG); input_report_switch(priv->input_dev, SW_TABLET_MODE, m); - m = (vgbs & VGBS_DOCK_MODE_FLAG) ? 1 : 0; + m = (vgbs & DOCK_MODE_FLAG) ? 1 : 0; input_report_switch(priv->input_dev, SW_DOCK, m); } From 8169bd3e6e193497cab781acddcff8fde5d0c416 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 30 Sep 2020 15:19:05 +0200 Subject: [PATCH 281/322] platform/x86: intel-vbtn: Switch to an allow-list for SW_TABLET_MODE reporting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2 recent commits: cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type") 1fac39fd0316 ("platform/x86: intel-vbtn: Also handle tablet-mode switch on "Detachable" and "Portable" chassis-types") Enabled reporting of SW_TABLET_MODE on more devices since the vbtn ACPI interface is used by the firmware on some of those devices to report this. Testing has shown that unconditionally enabling SW_TABLET_MODE reporting on all devices with a chassis type of 8 ("Portable") or 10 ("Notebook") which support the VGBS method is a very bad idea. Many of these devices are normal laptops (non 2-in-1) models with a VGBS which always returns 0, which we translate to SW_TABLET_MODE=1. This in turn causes userspace (libinput) to suppress events from the builtin keyboard and touchpad, making the laptop essentially unusable. Since the problem of wrongly reporting SW_TABLET_MODE=1 in combination with libinput, leads to a non-usable system. Where as OTOH many people will not even notice when SW_TABLET_MODE is not being reported, this commit changes intel_vbtn_has_switches() to use a DMI based allow-list. The new DMI based allow-list matches on the 31 ("Convertible") and 32 ("Detachable") chassis-types, as these clearly are 2-in-1s and so far if they support the intel-vbtn ACPI interface they all have properly working SW_TABLET_MODE reporting. Besides these 2 generic matches, it also contains model specific matches for 2-in-1 models which use a different chassis-type and which are known to have properly working SW_TABLET_MODE reporting. This has been tested on the following 2-in-1 devices: Dell Venue 11 Pro 7130 vPro HP Pavilion X2 10-p002nd HP Stream x360 Convertible PC 11 Medion E1239T Fixes: cfae58ed681c ("platform/x86: intel-vbtn: Only blacklist SW_TABLET_MODE on the 9 / "Laptop" chasis-type") BugLink: https://forum.manjaro.org/t/keyboard-and-touchpad-only-work-on-kernel-5-6/22668 BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1175599 Cc: Barnabás Pőcze Cc: Takashi Iwai Signed-off-by: Hans de Goede Signed-off-by: Andy Shevchenko --- drivers/platform/x86/intel-vbtn.c | 52 +++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index e85d8e58320c..f5901b0b07cd 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -167,20 +167,54 @@ static bool intel_vbtn_has_buttons(acpi_handle handle) return ACPI_SUCCESS(status); } +/* + * There are several laptops (non 2-in-1) models out there which support VGBS, + * but simply always return 0, which we translate to SW_TABLET_MODE=1. This in + * turn causes userspace (libinput) to suppress events from the builtin + * keyboard and touchpad, making the laptop essentially unusable. + * + * Since the problem of wrongly reporting SW_TABLET_MODE=1 in combination + * with libinput, leads to a non-usable system. Where as OTOH many people will + * not even notice when SW_TABLET_MODE is not being reported, a DMI based allow + * list is used here. This list mainly matches on the chassis-type of 2-in-1s. + * + * There are also some 2-in-1s which use the intel-vbtn ACPI interface to report + * SW_TABLET_MODE with a chassis-type of 8 ("Portable") or 10 ("Notebook"), + * these are matched on a per model basis, since many normal laptops with a + * possible broken VGBS ACPI-method also use these chassis-types. + */ +static const struct dmi_system_id dmi_switches_allow_list[] = { + { + .matches = { + DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "31" /* Convertible */), + }, + }, + { + .matches = { + DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "32" /* Detachable */), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Stream x360 Convertible PC 11"), + }, + }, + {} /* Array terminator */ +}; + static bool intel_vbtn_has_switches(acpi_handle handle) { - const char *chassis_type = dmi_get_system_info(DMI_CHASSIS_TYPE); unsigned long long vgbs; acpi_status status; - /* - * Some normal laptops have a VGBS method despite being non-convertible - * and their VGBS method always returns 0, causing detect_tablet_mode() - * to report SW_TABLET_MODE=1 to userspace, which causes issues. - * These laptops have a DMI chassis_type of 9 ("Laptop"), do not report - * switches on any devices with a DMI chassis_type of 9. - */ - if (chassis_type && strcmp(chassis_type, "9") == 0) + if (!dmi_check_system(dmi_switches_allow_list)) return false; status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs); From 9fb77799557d1537a059161bf645db7be9e358ae Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 25 Sep 2020 08:58:12 +0200 Subject: [PATCH 282/322] MAINTAINERS: Add Mark Gross and Hans de Goede as x86 platform drivers maintainers Darren Hart and Andy Shevchenko lately have not had enough time to maintain the x86 platform drivers, dropping their status to: "Odd Fixes". Mark Gross and Hans de Goede will take over maintainership of the x86 platform drivers. Replace Darren and Andy's entries with theirs and change the status to "Maintained". Signed-off-by: Hans de Goede Acked-by: Mark Gross Signed-off-by: Andy Shevchenko --- MAINTAINERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index deaafb617361..b1c97a16b6ce 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18857,10 +18857,10 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/mm F: arch/x86/mm/ X86 PLATFORM DRIVERS -M: Darren Hart -M: Andy Shevchenko +M: Hans de Goede +M: Mark Gross L: platform-driver-x86@vger.kernel.org -S: Odd Fixes +S: Maintained T: git git://git.infradead.org/linux-platform-drivers-x86.git F: drivers/platform/olpc/ F: drivers/platform/x86/ From be458311cdbb5d94820ffc4e40c5906085c0a507 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 1 Oct 2020 13:07:49 -0700 Subject: [PATCH 283/322] mm: memcg/slab: fix slab statistics in !SMP configuration Since commit ea426c2a7de8 ("mm: memcg: prepare for byte-sized vmstat items") the write side of slab counters accepts a value in bytes and converts it to pages. It happens in __mod_node_page_state(). However a non-SMP version of __mod_node_page_state() doesn't perform this conversion. It leads to incorrect (unrealistically high) slab counters values. Fix this by adding a similar conversion to the non-SMP version of __mod_node_page_state(). Signed-off-by: Roman Gushchin Reported-and-tested-by: Bastian Bittorf Fixes: ea426c2a7de8 ("mm: memcg: prepare for byte-sized vmstat items") Acked-by: Vlastimil Babka Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 91220ace31da..7557c1070fd7 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -312,6 +312,11 @@ static inline void __mod_zone_page_state(struct zone *zone, static inline void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item, int delta) { + if (vmstat_item_in_bytes(item)) { + VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1)); + delta >>= PAGE_SHIFT; + } + node_page_state_add(delta, pgdat, item); } From 09a6b0bc3be793ca8cba580b7992d73e9f68f15d Mon Sep 17 00:00:00 2001 From: Thibaut Sautereau Date: Fri, 2 Oct 2020 17:16:11 +0200 Subject: [PATCH 284/322] random32: Restore __latent_entropy attribute on net_rand_state Commit f227e3ec3b5c ("random32: update the net random state on interrupt and activity") broke compilation and was temporarily fixed by Linus in 83bdc7275e62 ("random32: remove net_rand_state from the latent entropy gcc plugin") by entirely moving net_rand_state out of the things handled by the latent_entropy GCC plugin. From what I understand when reading the plugin code, using the __latent_entropy attribute on a declaration was the wrong part and simply keeping the __latent_entropy attribute on the variable definition was the correct fix. Fixes: 83bdc7275e62 ("random32: remove net_rand_state from the latent entropy gcc plugin") Acked-by: Willy Tarreau Cc: Emese Revfy Signed-off-by: Thibaut Sautereau Signed-off-by: Linus Torvalds --- lib/random32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/random32.c b/lib/random32.c index 932345323af0..dfb9981ab798 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -49,7 +49,7 @@ static inline void prandom_state_selftest(void) } #endif -DEFINE_PER_CPU(struct rnd_state, net_rand_state); +DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy; /** * prandom_u32_state - seeded pseudo-random number generator. From 432161ea26d6d5e5c3f7306d9407d26ed1e1953e Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 13 Aug 2020 16:55:20 +0300 Subject: [PATCH 285/322] net/mlx5: Fix a race when moving command interface to polling mode As part of driver unload, it destroys the commands EQ (via FW command). As the commands EQ is destroyed, FW will not generate EQEs for any command that driver sends afterwards. Driver should poll for later commands status. Driver commands mode metadata is updated before the commands EQ is actually destroyed. This can lead for double completion handle by the driver (polling and interrupt), if a command is executed and completed by FW after the mode was changed, but before the EQ was destroyed. Fix that by using the mlx5_cmd_allowed_opcode mechanism to guarantee that only DESTROY_EQ command can be executed during this time period. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 31ef9f8420c8..1318d774b18f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -656,8 +656,10 @@ static void destroy_async_eqs(struct mlx5_core_dev *dev) cleanup_async_eq(dev, &table->pages_eq, "pages"); cleanup_async_eq(dev, &table->async_eq, "async"); + mlx5_cmd_allowed_opcode(dev, MLX5_CMD_OP_DESTROY_EQ); mlx5_cmd_use_polling(dev); cleanup_async_eq(dev, &table->cmd_eq, "cmd"); + mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL); mlx5_eq_notifier_unregister(dev, &table->cq_err_nb); } From 50b2412b7e7862c5af0cbf4b10d93bc5c712d021 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 4 Aug 2020 10:40:21 +0300 Subject: [PATCH 286/322] net/mlx5: Avoid possible free of command entry while timeout comp handler Upon command completion timeout, driver simulates a forced command completion. In a rare case where real interrupt for that command arrives simultaneously, it might release the command entry while the forced handler might still access it. Fix that by adding an entry refcount, to track current amount of allowed handlers. Command entry to be released only when this refcount is decremented to zero. Command refcount is always initialized to one. For callback commands, command completion handler is the symmetric flow to decrement it. For non-callback commands, it is wait_func(). Before ringing the doorbell, increment the refcount for the real completion handler. Once the real completion handler is called, it will decrement it. For callback commands, once the delayed work is scheduled, increment the refcount. Upon callback command completion handler, we will try to cancel the timeout callback. In case of success, we need to decrement the callback refcount as it will never run. In addition, gather the entry index free and the entry free into a one flow for all command types release. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 109 ++++++++++++------ include/linux/mlx5/driver.h | 2 + 2 files changed, 73 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1d91a0d0ab1d..c0055f5479ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -69,12 +69,10 @@ enum { MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, }; -static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, - struct mlx5_cmd_msg *in, - struct mlx5_cmd_msg *out, - void *uout, int uout_size, - mlx5_cmd_cbk_t cbk, - void *context, int page_queue) +static struct mlx5_cmd_work_ent * +cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, + struct mlx5_cmd_msg *out, void *uout, int uout_size, + mlx5_cmd_cbk_t cbk, void *context, int page_queue) { gfp_t alloc_flags = cbk ? GFP_ATOMIC : GFP_KERNEL; struct mlx5_cmd_work_ent *ent; @@ -83,6 +81,7 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, if (!ent) return ERR_PTR(-ENOMEM); + ent->idx = -EINVAL; ent->in = in; ent->out = out; ent->uout = uout; @@ -91,10 +90,16 @@ static struct mlx5_cmd_work_ent *alloc_cmd(struct mlx5_cmd *cmd, ent->context = context; ent->cmd = cmd; ent->page_queue = page_queue; + refcount_set(&ent->refcnt, 1); return ent; } +static void cmd_free_ent(struct mlx5_cmd_work_ent *ent) +{ + kfree(ent); +} + static u8 alloc_token(struct mlx5_cmd *cmd) { u8 token; @@ -109,7 +114,7 @@ static u8 alloc_token(struct mlx5_cmd *cmd) return token; } -static int alloc_ent(struct mlx5_cmd *cmd) +static int cmd_alloc_index(struct mlx5_cmd *cmd) { unsigned long flags; int ret; @@ -123,7 +128,7 @@ static int alloc_ent(struct mlx5_cmd *cmd) return ret < cmd->max_reg_cmds ? ret : -ENOMEM; } -static void free_ent(struct mlx5_cmd *cmd, int idx) +static void cmd_free_index(struct mlx5_cmd *cmd, int idx) { unsigned long flags; @@ -132,6 +137,22 @@ static void free_ent(struct mlx5_cmd *cmd, int idx) spin_unlock_irqrestore(&cmd->alloc_lock, flags); } +static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) +{ + refcount_inc(&ent->refcnt); +} + +static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) +{ + if (!refcount_dec_and_test(&ent->refcnt)) + return; + + if (ent->idx >= 0) + cmd_free_index(ent->cmd, ent->idx); + + cmd_free_ent(ent); +} + static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) { return cmd->cmd_buf + (idx << cmd->log_stride); @@ -219,11 +240,6 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent) ent->ret = -ETIMEDOUT; } -static void free_cmd(struct mlx5_cmd_work_ent *ent) -{ - kfree(ent); -} - static int verify_signature(struct mlx5_cmd_work_ent *ent) { struct mlx5_cmd_mailbox *next = ent->out->next; @@ -842,6 +858,7 @@ static void cb_timeout_handler(struct work_struct *work) mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ } static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg); @@ -873,14 +890,14 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = alloc_ent(cmd); + alloc_ret = cmd_alloc_index(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { ent->callback(-EAGAIN, ent->context); mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); - free_cmd(ent); + cmd_ent_put(ent); } else { ent->ret = -EAGAIN; complete(&ent->done); @@ -916,8 +933,8 @@ static void cmd_work_handler(struct work_struct *work) ent->ts1 = ktime_get_ns(); cmd_mode = cmd->mode; - if (ent->callback) - schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); + if (ent->callback && schedule_delayed_work(&ent->cb_timeout_work, cb_timeout)) + cmd_ent_get(ent); set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); /* Skip sending command to fw if internal error */ @@ -933,13 +950,10 @@ static void cmd_work_handler(struct work_struct *work) MLX5_SET(mbox_out, ent->out, syndrome, drv_synd); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); - /* no doorbell, no need to keep the entry */ - free_ent(cmd, ent->idx); - if (ent->callback) - free_cmd(ent); return; } + cmd_ent_get(ent); /* for the _real_ FW event on completion */ /* ring doorbell after the descriptor is valid */ mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); wmb(); @@ -1039,11 +1053,16 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, if (callback && page_queue) return -EINVAL; - ent = alloc_cmd(cmd, in, out, uout, uout_size, callback, context, - page_queue); + ent = cmd_alloc_ent(cmd, in, out, uout, uout_size, + callback, context, page_queue); if (IS_ERR(ent)) return PTR_ERR(ent); + /* put for this ent is when consumed, depending on the use case + * 1) (!callback) blocking flow: by caller after wait_func completes + * 2) (callback) flow: by mlx5_cmd_comp_handler() when ent is handled + */ + ent->token = token; ent->polling = force_polling; @@ -1062,12 +1081,10 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, } if (callback) - goto out; + goto out; /* mlx5_cmd_comp_handler() will put(ent) */ err = wait_func(dev, ent); - if (err == -ETIMEDOUT) - goto out; - if (err == -ECANCELED) + if (err == -ETIMEDOUT || err == -ECANCELED) goto out_free; ds = ent->ts2 - ent->ts1; @@ -1085,7 +1102,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, *status = ent->status; out_free: - free_cmd(ent); + cmd_ent_put(ent); out: return err; } @@ -1516,14 +1533,19 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force if (!forced) { mlx5_core_err(dev, "Command completion arrived after timeout (entry idx = %d).\n", ent->idx); - free_ent(cmd, ent->idx); - free_cmd(ent); + cmd_ent_put(ent); } continue; } - if (ent->callback) - cancel_delayed_work(&ent->cb_timeout_work); + if (ent->callback && cancel_delayed_work(&ent->cb_timeout_work)) + cmd_ent_put(ent); /* timeout work was canceled */ + + if (!forced || /* Real FW completion */ + pci_channel_offline(dev->pdev) || /* FW is inaccessible */ + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + cmd_ent_put(ent); + if (ent->page_queue) sem = &cmd->pages_sem; else @@ -1545,10 +1567,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force ent->ret, deliv_status_to_str(ent->status), ent->status); } - /* only real completion will free the entry slot */ - if (!forced) - free_ent(cmd, ent->idx); - if (ent->callback) { ds = ent->ts2 - ent->ts1; if (ent->op < MLX5_CMD_OP_MAX) { @@ -1576,10 +1594,13 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force free_msg(dev, ent->in); err = err ? err : ent->status; - if (!forced) - free_cmd(ent); + /* final consumer is done, release ent */ + cmd_ent_put(ent); callback(err, context); } else { + /* release wait_func() so mlx5_cmd_invoke() + * can make the final ent_put() + */ complete(&ent->done); } up(sem); @@ -1589,8 +1610,11 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) { + struct mlx5_cmd *cmd = &dev->cmd; + unsigned long bitmask; unsigned long flags; u64 vector; + int i; /* wait for pending handlers to complete */ mlx5_eq_synchronize_cmd_irq(dev); @@ -1599,11 +1623,20 @@ void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev) if (!vector) goto no_trig; + bitmask = vector; + /* we must increment the allocated entries refcount before triggering the completions + * to guarantee pending commands will not get freed in the meanwhile. + * For that reason, it also has to be done inside the alloc_lock. + */ + for_each_set_bit(i, &bitmask, (1 << cmd->log_sz)) + cmd_ent_get(cmd->ent_arr[i]); vector |= MLX5_TRIGGERED_CMD_COMP; spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags); mlx5_core_dbg(dev, "vector 0x%llx\n", vector); mlx5_cmd_comp_handler(dev, vector, true); + for_each_set_bit(i, &bitmask, (1 << cmd->log_sz)) + cmd_ent_put(cmd->ent_arr[i]); return; no_trig: diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index c145de0473bc..897156822f0d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -767,6 +767,8 @@ struct mlx5_cmd_work_ent { u64 ts2; u16 op; bool polling; + /* Track the max comp handlers */ + refcount_t refcnt; }; struct mlx5_pas { From 1d5558b1f0de81f54ddee05f3793acc5260d107f Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 21 Jul 2020 10:25:52 +0300 Subject: [PATCH 287/322] net/mlx5: poll cmd EQ in case of command timeout Once driver detects a command interface command timeout, it warns the user and returns timeout error to the caller. In such case, the entry of the command is not evacuated (because only real event interrupt is allowed to clear command interface entry). If the HW event interrupt of this entry will never arrive, this entry will be left unused forever. Command interface entries are limited and eventually we can end up without the ability to post a new command. In addition, if driver will not consume the EQE of the lost interrupt and rearm the EQ, no new interrupts will arrive for other commands. Add a resiliency mechanism for manually polling the command EQ in case of a command timeout. In case resiliency mechanism will find non-handled EQE, it will consume it, and the command interface will be fully functional again. Once the resiliency flow finished, wait another 5 seconds for the command interface to complete for this command entry. Define mlx5_cmd_eq_recover() to manage the cmd EQ polling resiliency flow. Add an async EQ spinlock to avoid races between resiliency flows and real interrupts that might run simultaneously. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 53 ++++++++++++++++--- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 40 +++++++++++++- .../net/ethernet/mellanox/mlx5/core/lib/eq.h | 2 + 3 files changed, 86 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c0055f5479ce..37dae95e61d5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -853,11 +853,21 @@ static void cb_timeout_handler(struct work_struct *work) struct mlx5_core_dev *dev = container_of(ent->cmd, struct mlx5_core_dev, cmd); + mlx5_cmd_eq_recover(dev); + + /* Maybe got handled by eq recover ? */ + if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + goto out; /* phew, already handled */ + } + ent->ret = -ETIMEDOUT; - mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", - mlx5_command_str(msg_to_opcode(ent->in)), - msg_to_opcode(ent->in)); + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", + ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); + +out: cmd_ent_put(ent); /* for the cmd_ent_get() took on schedule delayed work */ } @@ -997,6 +1007,35 @@ static const char *deliv_status_to_str(u8 status) } } +enum { + MLX5_CMD_TIMEOUT_RECOVER_MSEC = 5 * 1000, +}; + +static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, + struct mlx5_cmd_work_ent *ent) +{ + unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_RECOVER_MSEC); + + mlx5_cmd_eq_recover(dev); + + /* Re-wait on the ent->done after executing the recovery flow. If the + * recovery flow (or any other recovery flow running simultaneously) + * has recovered an EQE, it should cause the entry to be completed by + * the command interface. + */ + if (wait_for_completion_timeout(&ent->done, timeout)) { + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + return; + } + + mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx, + mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + + ent->ret = -ETIMEDOUT; + mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); +} + static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) { unsigned long timeout = msecs_to_jiffies(MLX5_CMD_TIMEOUT_MSEC); @@ -1008,12 +1047,10 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) ent->ret = -ECANCELED; goto out_err; } - if (cmd->mode == CMD_MODE_POLLING || ent->polling) { + if (cmd->mode == CMD_MODE_POLLING || ent->polling) wait_for_completion(&ent->done); - } else if (!wait_for_completion_timeout(&ent->done, timeout)) { - ent->ret = -ETIMEDOUT; - mlx5_cmd_comp_handler(dev, 1UL << ent->idx, true); - } + else if (!wait_for_completion_timeout(&ent->done, timeout)) + wait_func_handle_exec_timeout(dev, ent); out_err: err = ent->ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 1318d774b18f..22a19d391e17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -189,6 +189,29 @@ u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq) return count_eqe; } +static void mlx5_eq_async_int_lock(struct mlx5_eq_async *eq, unsigned long *flags) + __acquires(&eq->lock) +{ + if (in_irq()) + spin_lock(&eq->lock); + else + spin_lock_irqsave(&eq->lock, *flags); +} + +static void mlx5_eq_async_int_unlock(struct mlx5_eq_async *eq, unsigned long *flags) + __releases(&eq->lock) +{ + if (in_irq()) + spin_unlock(&eq->lock); + else + spin_unlock_irqrestore(&eq->lock, *flags); +} + +enum async_eq_nb_action { + ASYNC_EQ_IRQ_HANDLER = 0, + ASYNC_EQ_RECOVER = 1, +}; + static int mlx5_eq_async_int(struct notifier_block *nb, unsigned long action, void *data) { @@ -198,11 +221,14 @@ static int mlx5_eq_async_int(struct notifier_block *nb, struct mlx5_eq_table *eqt; struct mlx5_core_dev *dev; struct mlx5_eqe *eqe; + unsigned long flags; int num_eqes = 0; dev = eq->dev; eqt = dev->priv.eq_table; + mlx5_eq_async_int_lock(eq_async, &flags); + eqe = next_eqe_sw(eq); if (!eqe) goto out; @@ -223,8 +249,19 @@ static int mlx5_eq_async_int(struct notifier_block *nb, out: eq_update_ci(eq, 1); + mlx5_eq_async_int_unlock(eq_async, &flags); - return 0; + return unlikely(action == ASYNC_EQ_RECOVER) ? num_eqes : 0; +} + +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev) +{ + struct mlx5_eq_async *eq = &dev->priv.eq_table->cmd_eq; + int eqes; + + eqes = mlx5_eq_async_int(&eq->irq_nb, ASYNC_EQ_RECOVER, NULL); + if (eqes) + mlx5_core_warn(dev, "Recovered %d EQEs on cmd_eq\n", eqes); } static void init_eq_buf(struct mlx5_eq *eq) @@ -569,6 +606,7 @@ setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq, int err; eq->irq_nb.notifier_call = mlx5_eq_async_int; + spin_lock_init(&eq->lock); err = create_async_eq(dev, &eq->core, param); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 4aaca7400fb2..5c681e31983b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -37,6 +37,7 @@ struct mlx5_eq { struct mlx5_eq_async { struct mlx5_eq core; struct notifier_block irq_nb; + spinlock_t lock; /* To avoid irq EQ handle races with resiliency flows */ }; struct mlx5_eq_comp { @@ -81,6 +82,7 @@ void mlx5_cq_tasklet_cb(unsigned long data); struct cpumask *mlx5_eq_comp_cpumask(struct mlx5_core_dev *dev, int ix); u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq_comp *eq); +void mlx5_cmd_eq_recover(struct mlx5_core_dev *dev); void mlx5_eq_synchronize_async_irq(struct mlx5_core_dev *dev); void mlx5_eq_synchronize_cmd_irq(struct mlx5_core_dev *dev); From 410bd754cd73c4a2ac3856d9a03d7b08f9c906bf Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 31 Aug 2020 15:04:35 +0300 Subject: [PATCH 288/322] net/mlx5: Add retry mechanism to the command entry index allocation It is possible that new command entry index allocation will temporarily fail. The new command holds the semaphore, so it means that a free entry should be ready soon. Add one second retry mechanism before returning an error. Patch "net/mlx5: Avoid possible free of command entry while timeout comp handler" increase the possibility to bump into this temporarily failure as it delays the entry index release for non-callback commands. Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 37dae95e61d5..2b597ac365f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -883,6 +883,25 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } +static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) +{ + unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); + int idx; + +retry: + idx = cmd_alloc_index(cmd); + if (idx < 0 && time_before(jiffies, alloc_end)) { + /* Index allocation can fail on heavy load of commands. This is a temporary + * situation as the current command already holds the semaphore, meaning that + * another command completion is being handled and it is expected to release + * the entry index soon. + */ + cpu_relax(); + goto retry; + } + return idx; +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -900,7 +919,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index(cmd); + alloc_ret = cmd_alloc_index_retry(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { From b898ce7bccf13087719c021d829dab607c175246 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 11 Sep 2020 11:48:55 -0700 Subject: [PATCH 289/322] net/mlx5: cmdif, Avoid skipping reclaim pages if FW is not accessible In case of pci is offline reclaim_pages_cmd() will still try to call the FW to release FW pages, cmd_exec() in this case will return a silent success without actually calling the FW. This is wrong and will cause page leaks, what we should do is to detect pci offline or command interface un-available before tying to access the FW and manually release the FW pages in the driver. In this patch we share the code to check for FW command interface availability and we call it in sensitive places e.g. reclaim_pages_cmd(). Alternative fix: 1. Remove MLX5_CMD_OP_MANAGE_PAGES form mlx5_internal_err_ret_value, command success simulation list. 2. Always Release FW pages even if cmd_exec fails in reclaim_pages_cmd(). Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 17 +++++++++-------- .../net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +- include/linux/mlx5/driver.h | 1 + 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2b597ac365f8..2d1f4b3be9bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -902,6 +902,13 @@ retry: return idx; } +bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) +{ + return pci_channel_offline(dev->pdev) || + dev->cmd.state != MLX5_CMDIF_STATE_UP || + dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR; +} + static void cmd_work_handler(struct work_struct *work) { struct mlx5_cmd_work_ent *ent = container_of(work, struct mlx5_cmd_work_ent, work); @@ -967,10 +974,7 @@ static void cmd_work_handler(struct work_struct *work) set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); /* Skip sending command to fw if internal error */ - if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || - cmd->state != MLX5_CMDIF_STATE_UP || - !opcode_allowed(&dev->cmd, ent->op)) { + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { u8 status = 0; u32 drv_synd; @@ -1800,10 +1804,7 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, u8 token; opcode = MLX5_GET(mbox_in, in, opcode); - if (pci_channel_offline(dev->pdev) || - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR || - dev->cmd.state != MLX5_CMDIF_STATE_UP || - !opcode_allowed(&dev->cmd, opcode)) { + if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) { err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); MLX5_SET(mbox_out, out, status, status); MLX5_SET(mbox_out, out, syndrome, drv_synd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index f9b798af6335..c0e18f2ade99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -432,7 +432,7 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, u32 npages; u32 i = 0; - if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) + if (!mlx5_cmd_is_down(dev)) return mlx5_cmd_exec(dev, in, in_size, out, out_size); /* No hard feelings, we want our pages back! */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 897156822f0d..372100c755e7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -935,6 +935,7 @@ int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); +bool mlx5_cmd_is_down(struct mlx5_core_dev *dev); int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); From 732ebfab7fe96b7ac9a3df3208f14752a4bb6db3 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Mon, 31 Aug 2020 21:37:31 +0300 Subject: [PATCH 290/322] net/mlx5: Fix request_irqs error flow Fix error flow handling in request_irqs which try to free irq that we failed to request. It fixes the below trace. WARNING: CPU: 1 PID: 7587 at kernel/irq/manage.c:1684 free_irq+0x4d/0x60 CPU: 1 PID: 7587 Comm: bash Tainted: G W OE 4.15.15-1.el7MELLANOXsmp-x86_64 #1 Hardware name: Advantech SKY-6200/SKY-6200, BIOS F2.00 08/06/2020 RIP: 0010:free_irq+0x4d/0x60 RSP: 0018:ffffc9000ef47af0 EFLAGS: 00010282 RAX: ffff88001476ae00 RBX: 0000000000000655 RCX: 0000000000000000 RDX: ffff88001476ae00 RSI: ffffc9000ef47ab8 RDI: ffff8800398bb478 RBP: ffff88001476a838 R08: ffff88001476ae00 R09: 000000000000156d R10: 0000000000000000 R11: 0000000000000004 R12: ffff88001476a838 R13: 0000000000000006 R14: ffff88001476a888 R15: 00000000ffffffe4 FS: 00007efeadd32740(0000) GS:ffff88047fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc9cc010008 CR3: 00000001a2380004 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: mlx5_irq_table_create+0x38d/0x400 [mlx5_core] ? atomic_notifier_chain_register+0x50/0x60 mlx5_load_one+0x7ee/0x1130 [mlx5_core] init_one+0x4c9/0x650 [mlx5_core] pci_device_probe+0xb8/0x120 driver_probe_device+0x2a1/0x470 ? driver_allows_async_probing+0x30/0x30 bus_for_each_drv+0x54/0x80 __device_attach+0xa3/0x100 pci_bus_add_device+0x4a/0x90 pci_iov_add_virtfn+0x2dc/0x2f0 pci_enable_sriov+0x32e/0x420 mlx5_core_sriov_configure+0x61/0x1b0 [mlx5_core] ? kstrtoll+0x22/0x70 num_vf_store+0x4b/0x70 [mlx5_core] kernfs_fop_write+0x102/0x180 __vfs_write+0x26/0x140 ? rcu_all_qs+0x5/0x80 ? _cond_resched+0x15/0x30 ? __sb_start_write+0x41/0x80 vfs_write+0xad/0x1a0 SyS_write+0x42/0x90 do_syscall_64+0x60/0x110 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 24163189da48 ("net/mlx5: Separate IRQ request/free from EQ life cycle") Signed-off-by: Maor Gottlieb Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 373981a659c7..6fd974920394 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -115,7 +115,7 @@ static int request_irqs(struct mlx5_core_dev *dev, int nvec) return 0; err_request_irq: - for (; i >= 0; i--) { + while (i--) { struct mlx5_irq *irq = mlx5_irq_get(dev, i); int irqn = pci_irq_vector(dev->pdev, i); From 08a762cecc60e19cd64e379df6580f1997408e04 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 3 Aug 2020 16:22:42 +0300 Subject: [PATCH 291/322] net/mlx5e: Fix error path for RQ alloc Increase granularity of the error path to avoid unneeded free/release. Fix the cleanup to be symmetric to the order of creation. Fixes: 0ddf543226ac ("xdp/mlx5: setup xdp_rxq_info") Fixes: 422d4c401edd ("net/mlx5e: RX, Split WQ objects for different RQ types") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b3cda7b6e5e1..1fbd7a0f3ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -396,7 +396,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK; err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix); if (err < 0) - goto err_rq_wq_destroy; + goto err_rq_xdp_prog; rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); @@ -407,7 +407,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_wq_destroy; + goto err_rq_xdp; rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; @@ -429,13 +429,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5e_rq_alloc_mpwqe_info(rq, c); if (err) - goto err_free; + goto err_rq_mkey; break; default: /* MLX5_WQ_TYPE_CYCLIC */ err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq, &rq->wq_ctrl); if (err) - goto err_rq_wq_destroy; + goto err_rq_xdp; rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR]; @@ -450,19 +450,19 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, GFP_KERNEL, cpu_to_node(c->cpu)); if (!rq->wqe.frags) { err = -ENOMEM; - goto err_free; + goto err_rq_wq_destroy; } err = mlx5e_init_di_list(rq, wq_sz, c->cpu); if (err) - goto err_free; + goto err_rq_frags; rq->mkey_be = c->mkey_be; } err = mlx5e_rq_set_handlers(rq, params, xsk); if (err) - goto err_free; + goto err_free_by_rq_type; if (xsk) { err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, @@ -486,13 +486,13 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (IS_ERR(rq->page_pool)) { err = PTR_ERR(rq->page_pool); rq->page_pool = NULL; - goto err_free; + goto err_free_by_rq_type; } err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq, MEM_TYPE_PAGE_POOL, rq->page_pool); } if (err) - goto err_free; + goto err_free_by_rq_type; for (i = 0; i < wq_sz; i++) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { @@ -542,23 +542,25 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, return 0; -err_free: +err_free_by_rq_type: switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); +err_rq_mkey: mlx5_core_destroy_mkey(mdev, &rq->umr_mkey); break; default: /* MLX5_WQ_TYPE_CYCLIC */ - kvfree(rq->wqe.frags); mlx5e_free_di_list(rq); +err_rq_frags: + kvfree(rq->wqe.frags); } - err_rq_wq_destroy: + mlx5_wq_destroy(&rq->wq_ctrl); +err_rq_xdp: + xdp_rxq_info_unreg(&rq->xdp_rxq); +err_rq_xdp_prog: if (params->xdp_prog) bpf_prog_put(params->xdp_prog); - xdp_rxq_info_unreg(&rq->xdp_rxq); - page_pool_destroy(rq->page_pool); - mlx5_wq_destroy(&rq->wq_ctrl); return err; } From c3c9402373fe20e2d08c04f437ce4dcd252cffb2 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 20 Jul 2020 16:53:18 +0300 Subject: [PATCH 292/322] net/mlx5e: Add resiliency in Striding RQ mode for packets larger than MTU Prior to this fix, in Striding RQ mode the driver was vulnerable when receiving packets in the range (stride size - headroom, stride size]. Where stride size is calculated by mtu+headroom+tailroom aligned to the closest power of 2. Usually, this filtering is performed by the HW, except for a few cases: - Between 2 VFs over the same PF with different MTUs - On bluefield, when the host physical function sets a larger MTU than the ARM has configured on its representor and uplink representor. When the HW filtering is not present, packets that are larger than MTU might be harmful for the RQ's integrity, in the following impacts: 1) Overflow from one WQE to the next, causing a memory corruption that in most cases is unharmful: as the write happens to the headroom of next packet, which will be overwritten by build_skb(). In very rare cases, high stress/load, this is harmful. When the next WQE is not yet reposted and points to existing SKB head. 2) Each oversize packet overflows to the headroom of the next WQE. On the last WQE of the WQ, where addresses wrap-around, the address of the remainder headroom does not belong to the next WQE, but it is out of the memory region range. This results in a HW CQE error that moves the RQ into an error state. Solution: Add a page buffer at the end of each WQE to absorb the leak. Actually the maximal overflow size is headroom but since all memory units must be of the same size, we use page size to comply with UMR WQEs. The increase in memory consumption is of a single page per RQ. Initialize the mkey with all MTTs pointing to a default page. When the channels are activated, UMR WQEs will redirect the RX WQEs to the actual memory from the RQ's pool, while the overflow MTTs remain mapped to the default page. Fixes: 73281b78a37a ("net/mlx5e: Derive Striding RQ size from MTU") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 8 ++- .../net/ethernet/mellanox/mlx5/core/en_main.c | 55 +++++++++++++++++-- 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 90d5caabd6af..356f5852955f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -91,7 +91,12 @@ struct page_pool; #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) -#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +/* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between + * WQEs, This page will absorb write overflow by the hardware, when + * receiving packets larger than MTU. These oversize packets are + * dropped by the driver at a later stage. + */ +#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8)) #define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS)) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ @@ -617,6 +622,7 @@ struct mlx5e_rq { u32 rqn; struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; + struct mlx5e_dma_info wqe_overflow; /* XDP read-mostly */ struct xdp_rxq_info xdp_rxq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1fbd7a0f3ca6..b1a16fb9667e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -246,12 +246,17 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u64 npages, u8 page_shift, - struct mlx5_core_mkey *umr_mkey) + struct mlx5_core_mkey *umr_mkey, + dma_addr_t filler_addr) { - int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + struct mlx5_mtt *mtt; + int inlen; void *mkc; u32 *in; int err; + int i; + + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + sizeof(*mtt) * npages; in = kvzalloc(inlen, GFP_KERNEL); if (!in) @@ -271,6 +276,18 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, translations_octword_size, MLX5_MTT_OCTW(npages)); MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + MLX5_MTT_OCTW(npages)); + + /* Initialize the mkey with all MTTs pointing to a default + * page (filler_addr). When the channels are activated, UMR + * WQEs will redirect the RX WQEs to the actual memory from + * the RQ's pool, while the gaps (wqe_overflow) remain mapped + * to the default page. + */ + mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + for (i = 0 ; i < npages ; i++) + mtt[i].ptag = cpu_to_be64(filler_addr); err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); @@ -282,7 +299,8 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq { u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->mpwqe.wq)); - return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey); + return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey, + rq->wqe_overflow.addr); } static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix) @@ -350,6 +368,28 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_rq_cqe_err(rq); } +static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq) +{ + rq->wqe_overflow.page = alloc_page(GFP_KERNEL); + if (!rq->wqe_overflow.page) + return -ENOMEM; + + rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0, + PAGE_SIZE, rq->buff.map_dir); + if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) { + __free_page(rq->wqe_overflow.page); + return -ENOMEM; + } + return 0; +} + +static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq) +{ + dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE, + rq->buff.map_dir); + __free_page(rq->wqe_overflow.page); +} + static int mlx5e_alloc_rq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -409,6 +449,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, if (err) goto err_rq_xdp; + err = mlx5e_alloc_mpwqe_rq_drop_page(rq); + if (err) + goto err_rq_wq_destroy; + rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR]; wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq); @@ -424,7 +468,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) - goto err_rq_wq_destroy; + goto err_rq_drop_page; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); err = mlx5e_rq_alloc_mpwqe_info(rq, c); @@ -548,6 +592,8 @@ err_free_by_rq_type: kvfree(rq->mpwqe.info); err_rq_mkey: mlx5_core_destroy_mkey(mdev, &rq->umr_mkey); +err_rq_drop_page: + mlx5e_free_mpwqe_rq_drop_page(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ mlx5e_free_di_list(rq); @@ -582,6 +628,7 @@ static void mlx5e_free_rq(struct mlx5e_rq *rq) case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: kvfree(rq->mpwqe.info); mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); + mlx5e_free_mpwqe_rq_drop_page(rq); break; default: /* MLX5_WQ_TYPE_CYCLIC */ kvfree(rq->wqe.frags); From 2b0219898b8684075ff66027551fb02ea12f652b Mon Sep 17 00:00:00 2001 From: Maor Dickman Date: Tue, 1 Sep 2020 12:33:18 +0300 Subject: [PATCH 293/322] net/mlx5e: CT, Fix coverity issue The cited commit introduced the following coverity issue at function mlx5_tc_ct_rule_to_tuple_nat: - Memory - corruptions (OVERRUN) Overrunning array "tuple->ip.src_v6.in6_u.u6_addr32" of 4 4-byte elements at element index 7 (byte offset 31) using index "ip6_offset" (which evaluates to 7). In case of IPv6 destination address rewrite, ip6_offset values are between 4 to 7, which will cause memory overrun of array "tuple->ip.src_v6.in6_u.u6_addr32" to array "tuple->ip.dst_v6.in6_u.u6_addr32". Fixed by writing the value directly to array "tuple->ip.dst_v6.in6_u.u6_addr32" in case ip6_offset values are between 4 to 7. Fixes: bc562be9674b ("net/mlx5e: CT: Save ct entries tuples in hashtables") Signed-off-by: Maor Dickman Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index bc5f72ec3623..a8be40cbe325 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -246,8 +246,10 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple, case FLOW_ACT_MANGLE_HDR_TYPE_IP6: ip6_offset = (offset - offsetof(struct ipv6hdr, saddr)); ip6_offset /= 4; - if (ip6_offset < 8) + if (ip6_offset < 4) tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val); + else if (ip6_offset < 8) + tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val); else return -EOPNOTSUPP; break; From 3d093bc2369003b4ce6c3522d9b383e47c40045d Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 9 Aug 2020 12:34:21 +0300 Subject: [PATCH 294/322] net/mlx5e: Fix driver's declaration to support GRE offload Declare GRE offload support with respect to the inner protocol. Add a list of supported inner protocols on which the driver can offload checksum and GSO. For other protocols, inform the stack to do the needed operations. There is no noticeable impact on GRE performance. Fixes: 2729984149e6 ("net/mlx5e: Support TSO and TX checksum offloads for GRE tunnels") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b1a16fb9667e..42ec28e29834 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4226,6 +4226,21 @@ int mlx5e_get_vf_stats(struct net_device *dev, } #endif +static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev, + struct sk_buff *skb) +{ + switch (skb->inner_protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + case htons(ETH_P_TEB): + return true; + case htons(ETH_P_MPLS_UC): + case htons(ETH_P_MPLS_MC): + return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre); + } + return false; +} + static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, struct sk_buff *skb, netdev_features_t features) @@ -4248,7 +4263,9 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, switch (proto) { case IPPROTO_GRE: - return features; + if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb)) + return features; + break; case IPPROTO_IPIP: case IPPROTO_IPV6: if (mlx5e_tunnel_proto_supported(priv->mdev, IPPROTO_IPIP)) From 2608a2f831c47dfdf18885a7289be5af97182b05 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Wed, 12 Aug 2020 10:44:36 +0300 Subject: [PATCH 295/322] net/mlx5e: Fix return status when setting unsupported FEC mode Verify the configured FEC mode is supported by at least a single link mode before applying the command. Otherwise fail the command and return "Operation not supported". Prior to this patch, the command was successful, yet it falsely set all link modes to FEC auto mode - like configuring FEC mode to auto. Auto mode is the default configuration if a link mode doesn't support the configured FEC mode. Fixes: b5ede32d3329 ("net/mlx5e: Add support for FEC modes based on 50G per lane links") Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 96608dbb9314..308fd279669e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -569,6 +569,9 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) if (fec_policy >= (1 << MLX5E_FEC_LLRS_272_257_1) && !fec_50g_per_lane) return -EOPNOTSUPP; + if (fec_policy && !mlx5e_fec_in_caps(dev, fec_policy)) + return -EOPNOTSUPP; + MLX5_SET(pplm_reg, in, local_port, 1); err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); if (err) From 8c7353b6f716436ad0bfda2b5c5524ab2dde5894 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 13 Sep 2020 17:57:23 +0300 Subject: [PATCH 296/322] net/mlx5e: Fix VLAN cleanup flow Prior to this patch unloading an interface in promiscuous mode with RX VLAN filtering feature turned off - resulted in a warning. This is due to a wrong condition in the VLAN rules cleanup flow, which left the any-vid rules in the VLAN steering table. These rules prevented destroying the flow group and the flow table. The any-vid rules are removed in 2 flows, but none of them remove it in case both promiscuous is set and VLAN filtering is off. Fix the issue by changing the condition of the VLAN table cleanup flow to clean also in case of promiscuous mode. mlx5_core 0000:00:08.0: mlx5_destroy_flow_group:2123:(pid 28729): Flow group 20 wasn't destroyed, refcount > 1 mlx5_core 0000:00:08.0: mlx5_destroy_flow_group:2123:(pid 28729): Flow group 19 wasn't destroyed, refcount > 1 mlx5_core 0000:00:08.0: mlx5_destroy_flow_table:2112:(pid 28729): Flow table 262149 wasn't destroyed, refcount > 1 ... ... ------------[ cut here ]------------ FW pages counter is 11560 after reclaiming all pages WARNING: CPU: 1 PID: 28729 at drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c:660 mlx5_reclaim_startup_pages+0x178/0x230 [mlx5_core] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 Call Trace: mlx5_function_teardown+0x2f/0x90 [mlx5_core] mlx5_unload_one+0x71/0x110 [mlx5_core] remove_one+0x44/0x80 [mlx5_core] pci_device_remove+0x3e/0xc0 device_release_driver_internal+0xfb/0x1c0 device_release_driver+0x12/0x20 pci_stop_bus_device+0x68/0x90 pci_stop_and_remove_bus_device+0x12/0x20 hv_eject_device_work+0x6f/0x170 [pci_hyperv] ? __schedule+0x349/0x790 process_one_work+0x206/0x400 worker_thread+0x34/0x3f0 ? process_one_work+0x400/0x400 kthread+0x126/0x140 ? kthread_park+0x90/0x90 ret_from_fork+0x22/0x30 ---[ end trace 6283bde8d26170dc ]--- Fixes: 9df30601c843 ("net/mlx5e: Restore vlan filter after seamless reset") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 64d002d92250..55a4c3adaa05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -415,8 +415,12 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - if (priv->fs.vlan.cvlan_filter_disabled && - !(priv->netdev->flags & IFF_PROMISC)) + WARN_ON_ONCE(!(test_bit(MLX5E_STATE_DESTROYING, &priv->state))); + + /* must be called after DESTROY bit is set and + * set_rx_mode is called and flushed + */ + if (priv->fs.vlan.cvlan_filter_disabled) mlx5e_del_any_vid_rules(priv); } From d4a16052bccdd695982f89d815ca075825115821 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Sun, 13 Sep 2020 18:05:40 +0300 Subject: [PATCH 297/322] net/mlx5e: Fix VLAN create flow When interface is attached while in promiscuous mode and with VLAN filtering turned off, both configurations are not respected and VLAN filtering is performed. There are 2 flows which add the any-vid rules during interface attach: VLAN creation table and set rx mode. Each is relaying on the other to add any-vid rules, eventually non of them does. Fix this by adding any-vid rules on VLAN creation regardless of promiscuous mode. Fixes: 9df30601c843 ("net/mlx5e: Restore vlan filter after seamless reset") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 55a4c3adaa05..1f48f99c0997 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -217,6 +217,9 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, break; } + if (WARN_ONCE(*rule_p, "VLAN rule already exists type %d", rule_type)) + return 0; + *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(*rule_p)) { @@ -397,8 +400,7 @@ static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) for_each_set_bit(i, priv->fs.vlan.active_svlans, VLAN_N_VID) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_STAG_VID, i); - if (priv->fs.vlan.cvlan_filter_disabled && - !(priv->netdev->flags & IFF_PROMISC)) + if (priv->fs.vlan.cvlan_filter_disabled) mlx5e_add_any_vid_rules(priv); } From 1253935ad801485270194d5651acab04abc97b36 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Sun, 20 Sep 2020 19:59:08 +0300 Subject: [PATCH 298/322] net/mlx5e: Fix race condition on nhe->n pointer in neigh update Current neigh update event handler implementation takes reference to neighbour structure, assigns it to nhe->n, tries to schedule workqueue task and releases the reference if task was already enqueued. This results potentially overwriting existing nhe->n pointer with another neighbour instance, which causes double release of the instance (once in neigh update handler that failed to enqueue to workqueue and another one in neigh update workqueue task that processes updated nhe->n pointer instead of original one): [ 3376.512806] ------------[ cut here ]------------ [ 3376.513534] refcount_t: underflow; use-after-free. [ 3376.521213] Modules linked in: act_skbedit act_mirred act_tunnel_key vxlan ip6_udp_tunnel udp_tunnel nfnetlink act_gact cls_flower sch_ingress openvswitch nsh nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 mlx5_ib mlx5_core mlxfw pci_hyperv_intf ptp pps_core nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp rpcrdma rdma_ucm ib_umad ib_ipoib ib_iser rdma_cm ib_cm iw_cm rfkill ib_uverbs ib_core sunrpc kvm_intel kvm iTCO_wdt iTCO_vendor_support virtio_net irqbypass net_failover crc32_pclmul lpc_ich i2c_i801 failover pcspkr i2c_smbus mfd_core ghash_clmulni_intel sch_fq_codel drm i2c _core ip_tables crc32c_intel serio_raw [last unloaded: mlxfw] [ 3376.529468] CPU: 8 PID: 22756 Comm: kworker/u20:5 Not tainted 5.9.0-rc5+ #6 [ 3376.530399] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.org 04/01/2014 [ 3376.531975] Workqueue: mlx5e mlx5e_rep_neigh_update [mlx5_core] [ 3376.532820] RIP: 0010:refcount_warn_saturate+0xd8/0xe0 [ 3376.533589] Code: ff 48 c7 c7 e0 b8 27 82 c6 05 0b b6 09 01 01 e8 94 93 c1 ff 0f 0b c3 48 c7 c7 88 b8 27 82 c6 05 f7 b5 09 01 01 e8 7e 93 c1 ff <0f> 0b c3 0f 1f 44 00 00 8b 07 3d 00 00 00 c0 74 12 83 f8 01 74 13 [ 3376.536017] RSP: 0018:ffffc90002a97e30 EFLAGS: 00010286 [ 3376.536793] RAX: 0000000000000000 RBX: ffff8882de30d648 RCX: 0000000000000000 [ 3376.537718] RDX: ffff8882f5c28f20 RSI: ffff8882f5c18e40 RDI: ffff8882f5c18e40 [ 3376.538654] RBP: ffff8882cdf56c00 R08: 000000000000c580 R09: 0000000000001a4d [ 3376.539582] R10: 0000000000000731 R11: ffffc90002a97ccd R12: 0000000000000000 [ 3376.540519] R13: ffff8882de30d600 R14: ffff8882de30d640 R15: ffff88821e000900 [ 3376.541444] FS: 0000000000000000(0000) GS:ffff8882f5c00000(0000) knlGS:0000000000000000 [ 3376.542732] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3376.543545] CR2: 0000556e5504b248 CR3: 00000002c6f10005 CR4: 0000000000770ee0 [ 3376.544483] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 3376.545419] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 3376.546344] PKRU: 55555554 [ 3376.546911] Call Trace: [ 3376.547479] mlx5e_rep_neigh_update.cold+0x33/0xe2 [mlx5_core] [ 3376.548299] process_one_work+0x1d8/0x390 [ 3376.548977] worker_thread+0x4d/0x3e0 [ 3376.549631] ? rescuer_thread+0x3e0/0x3e0 [ 3376.550295] kthread+0x118/0x130 [ 3376.550914] ? kthread_create_worker_on_cpu+0x70/0x70 [ 3376.551675] ret_from_fork+0x1f/0x30 [ 3376.552312] ---[ end trace d84e8f46d2a77eec ]--- Fix the bug by moving work_struct to dedicated dynamically-allocated structure. This enabled every event handler to work on its own private neighbour pointer and removes the need for handling the case when task is already enqueued. Fixes: 232c001398ae ("net/mlx5e: Add support to neighbour update flow") Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- .../mellanox/mlx5/core/en/rep/neigh.c | 81 ++++++++++++------- .../net/ethernet/mellanox/mlx5/core/en_rep.h | 6 -- 2 files changed, 50 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c index 906292035088..58e27038c947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c @@ -110,11 +110,25 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct *work) rtnl_unlock(); } +struct neigh_update_work { + struct work_struct work; + struct neighbour *n; + struct mlx5e_neigh_hash_entry *nhe; +}; + +static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work) +{ + neigh_release(update_work->n); + mlx5e_rep_neigh_entry_release(update_work->nhe); + kfree(update_work); +} + static void mlx5e_rep_neigh_update(struct work_struct *work) { - struct mlx5e_neigh_hash_entry *nhe = - container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); - struct neighbour *n = nhe->n; + struct neigh_update_work *update_work = container_of(work, struct neigh_update_work, + work); + struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; + struct neighbour *n = update_work->n; struct mlx5e_encap_entry *e; unsigned char ha[ETH_ALEN]; struct mlx5e_priv *priv; @@ -146,30 +160,42 @@ static void mlx5e_rep_neigh_update(struct work_struct *work) mlx5e_rep_update_flows(priv, e, neigh_connected, ha); mlx5e_encap_put(priv, e); } - mlx5e_rep_neigh_entry_release(nhe); rtnl_unlock(); - neigh_release(n); + mlx5e_release_neigh_update_work(update_work); } -static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv, - struct mlx5e_neigh_hash_entry *nhe, - struct neighbour *n) +static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv, + struct neighbour *n) { - /* Take a reference to ensure the neighbour and mlx5 encap - * entry won't be destructed until we drop the reference in - * delayed work. - */ - neigh_hold(n); + struct neigh_update_work *update_work; + struct mlx5e_neigh_hash_entry *nhe; + struct mlx5e_neigh m_neigh = {}; - /* This assignment is valid as long as the the neigh reference - * is taken - */ - nhe->n = n; + update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC); + if (WARN_ON(!update_work)) + return NULL; - if (!queue_work(priv->wq, &nhe->neigh_update_work)) { - mlx5e_rep_neigh_entry_release(nhe); - neigh_release(n); + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* Obtain reference to nhe as last step in order not to release it in + * atomic context. + */ + rcu_read_lock(); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + rcu_read_unlock(); + if (!nhe) { + kfree(update_work); + return NULL; } + + INIT_WORK(&update_work->work, mlx5e_rep_neigh_update); + neigh_hold(n); + update_work->n = n; + update_work->nhe = nhe; + + return update_work; } static int mlx5e_rep_netevent_event(struct notifier_block *nb, @@ -181,7 +207,7 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, struct net_device *netdev = rpriv->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_neigh_hash_entry *nhe = NULL; - struct mlx5e_neigh m_neigh = {}; + struct neigh_update_work *update_work; struct neigh_parms *p; struct neighbour *n; bool found = false; @@ -196,17 +222,11 @@ static int mlx5e_rep_netevent_event(struct notifier_block *nb, #endif return NOTIFY_DONE; - m_neigh.dev = n->dev; - m_neigh.family = n->ops->family; - memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); - - rcu_read_lock(); - nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); - rcu_read_unlock(); - if (!nhe) + update_work = mlx5e_alloc_neigh_update_work(priv, n); + if (!update_work) return NOTIFY_DONE; - mlx5e_rep_queue_neigh_update_work(priv, nhe, n); + queue_work(priv->wq, &update_work->work); break; case NETEVENT_DELAY_PROBE_TIME_UPDATE: @@ -352,7 +372,6 @@ int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, (*nhe)->priv = priv; memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); - INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); spin_lock_init(&(*nhe)->encap_list_lock); INIT_LIST_HEAD(&(*nhe)->encap_list); refcount_set(&(*nhe)->refcnt, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 622c27ae4ac7..0d1562e20118 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -135,12 +135,6 @@ struct mlx5e_neigh_hash_entry { /* encap list sharing the same neigh */ struct list_head encap_list; - /* valid only when the neigh reference is taken during - * neigh_update_work workqueue callback. - */ - struct neighbour *n; - struct work_struct neigh_update_work; - /* neigh hash entry can be deleted only when the refcount is zero. * refcount is needed to avoid neigh hash entry removal by TC, while * it's used by the neigh notification call. From a95bc734e60449e7b073ff7ff70c35083b290ae9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 2 Oct 2020 09:46:04 +0200 Subject: [PATCH 299/322] netlink: fix policy dump leak If userspace doesn't complete the policy dump, we leak the allocated state. Fix this. Fixes: d07dcf9aadd6 ("netlink: add infrastructure to expose policies to userspace") Signed-off-by: Johannes Berg Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/netlink.h | 3 ++- net/netlink/genetlink.c | 9 ++++++++- net/netlink/policy.c | 24 ++++++++++-------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/net/netlink.h b/include/net/netlink.h index 8e0eb2c9c528..271620f6bc7f 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1934,7 +1934,8 @@ void nla_get_range_signed(const struct nla_policy *pt, int netlink_policy_dump_start(const struct nla_policy *policy, unsigned int maxtype, unsigned long *state); -bool netlink_policy_dump_loop(unsigned long *state); +bool netlink_policy_dump_loop(unsigned long state); int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state); +void netlink_policy_dump_free(unsigned long state); #endif diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 1eb65a7a27fd..c4b4d3376227 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1079,7 +1079,7 @@ static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb) if (err) return err; - while (netlink_policy_dump_loop(&cb->args[1])) { + while (netlink_policy_dump_loop(cb->args[1])) { void *hdr; struct nlattr *nest; @@ -1113,6 +1113,12 @@ nla_put_failure: return skb->len; } +static int ctrl_dumppolicy_done(struct netlink_callback *cb) +{ + netlink_policy_dump_free(cb->args[1]); + return 0; +} + static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETFAMILY, @@ -1123,6 +1129,7 @@ static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETPOLICY, .dumpit = ctrl_dumppolicy, + .done = ctrl_dumppolicy_done, }, }; diff --git a/net/netlink/policy.c b/net/netlink/policy.c index 641ffbdd977a..0176b59ce530 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -84,7 +84,6 @@ int netlink_policy_dump_start(const struct nla_policy *policy, unsigned int policy_idx; int err; - /* also returns 0 if "*_state" is our ERR_PTR() end marker */ if (*_state) return 0; @@ -140,21 +139,11 @@ static bool netlink_policy_dump_finished(struct nl_policy_dump *state) !state->policies[state->policy_idx].policy; } -bool netlink_policy_dump_loop(unsigned long *_state) +bool netlink_policy_dump_loop(unsigned long _state) { - struct nl_policy_dump *state = (void *)*_state; + struct nl_policy_dump *state = (void *)_state; - if (IS_ERR(state)) - return false; - - if (netlink_policy_dump_finished(state)) { - kfree(state); - /* store end marker instead of freed state */ - *_state = (unsigned long)ERR_PTR(-ENOENT); - return false; - } - - return true; + return !netlink_policy_dump_finished(state); } int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state) @@ -309,3 +298,10 @@ nla_put_failure: nla_nest_cancel(skb, policy); return -ENOBUFS; } + +void netlink_policy_dump_free(unsigned long _state) +{ + struct nl_policy_dump *state = (void *)_state; + + kfree(state); +} From a93bdcb94a0b3ca72046151412c2389dca681d2a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 2 Oct 2020 07:49:45 +0200 Subject: [PATCH 300/322] net: core: document two new elements of struct net_device As warned by "make htmldocs", there are two new struct elements that aren't documented: ../include/linux/netdevice.h:2159: warning: Function parameter or member 'unlink_list' not described in 'net_device' ../include/linux/netdevice.h:2159: warning: Function parameter or member 'nested_level' not described in 'net_device' Fixes: 1fc70edb7d7b ("net: core: add nested_level variable in net_device") Signed-off-by: Mauro Carvalho Chehab Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9fdb3ebef306..18dec08439f9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1851,6 +1851,11 @@ enum netdev_priv_flags { * @udp_tunnel_nic: UDP tunnel offload state * @xdp_state: stores info on attached XDP BPF programs * + * @nested_level: Used as as a parameter of spin_lock_nested() of + * dev->addr_list_lock. + * @unlink_list: As netif_addr_lock() can be called recursively, + * keep a list of interfaces to be deleted. + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ From f30e25a9d1b25ac8d40071c4dc2679ad0fcdc55a Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Fri, 2 Oct 2020 10:56:04 +0300 Subject: [PATCH 301/322] net: usb: pegasus: Proper error handing when setting pegasus' MAC address v2: If reading the MAC address from eeprom fail don't throw an error, use randomly generated MAC instead. Either way the adapter will soldier on and the return type of set_ethernet_addr() can be reverted to void. v1: Fix a bug in set_ethernet_addr() which does not take into account possible errors (or partial reads) returned by its helpers. This can potentially lead to writing random data into device's MAC address registers. Signed-off-by: Petko Manolov Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index e92cb51a2c77..060a8a03e6c4 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -360,28 +360,47 @@ fail: } #endif /* PEGASUS_WRITE_EEPROM */ -static inline void get_node_id(pegasus_t *pegasus, __u8 *id) +static inline int get_node_id(pegasus_t *pegasus, u8 *id) { - int i; - __u16 w16; + int i, ret; + u16 w16; for (i = 0; i < 3; i++) { - read_eprom_word(pegasus, i, &w16); + ret = read_eprom_word(pegasus, i, &w16); + if (ret < 0) + return ret; ((__le16 *) id)[i] = cpu_to_le16(w16); } + + return 0; } static void set_ethernet_addr(pegasus_t *pegasus) { - __u8 node_id[6]; + int ret; + u8 node_id[6]; if (pegasus->features & PEGASUS_II) { - get_registers(pegasus, 0x10, sizeof(node_id), node_id); + ret = get_registers(pegasus, 0x10, sizeof(node_id), node_id); + if (ret < 0) + goto err; } else { - get_node_id(pegasus, node_id); - set_registers(pegasus, EthID, sizeof(node_id), node_id); + ret = get_node_id(pegasus, node_id); + if (ret < 0) + goto err; + ret = set_registers(pegasus, EthID, sizeof(node_id), node_id); + if (ret < 0) + goto err; } + memcpy(pegasus->net->dev_addr, node_id, sizeof(node_id)); + + return; +err: + eth_hw_addr_random(pegasus->net); + dev_info(&pegasus->intf->dev, "software assigned MAC address.\n"); + + return; } static inline int reset_mac(pegasus_t *pegasus) From c381b07941adc2274ce552daf86c94701c5e265a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 2 Oct 2020 16:27:28 +0800 Subject: [PATCH 302/322] net: introduce helper sendpage_ok() in include/linux/net.h The original problem was from nvme-over-tcp code, who mistakenly uses kernel_sendpage() to send pages allocated by __get_free_pages() without __GFP_COMP flag. Such pages don't have refcount (page_count is 0) on tail pages, sending them by kernel_sendpage() may trigger a kernel panic from a corrupted kernel heap, because these pages are incorrectly freed in network stack as page_count 0 pages. This patch introduces a helper sendpage_ok(), it returns true if the checking page, - is not slab page: PageSlab(page) is false. - has page refcount: page_count(page) is not zero All drivers who want to send page to remote end by kernel_sendpage() may use this helper to check whether the page is OK. If the helper does not return true, the driver should try other non sendpage method (e.g. sock_no_sendpage()) to handle the page. Signed-off-by: Coly Li Cc: Chaitanya Kulkarni Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Jan Kara Cc: Jens Axboe Cc: Mikhail Skorzhinskii Cc: Philipp Reisner Cc: Sagi Grimberg Cc: Vlastimil Babka Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/net.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/linux/net.h b/include/linux/net.h index d48ff1180879..ae713c851342 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -286,6 +287,21 @@ do { \ #define net_get_random_once_wait(buf, nbytes) \ get_random_once_wait((buf), (nbytes)) +/* + * E.g. XFS meta- & log-data is in slab pages, or bcache meta + * data pages, or other high order pages allocated by + * __get_free_pages() without __GFP_COMP, which have a page_count + * of 0 and/or have PageSlab() set. We cannot use send_page for + * those, as that does get_page(); put_page(); and would cause + * either a VM_BUG directly, or __page_cache_release a page that + * would actually still be referenced by someone, leading to some + * obscure delayed Oops somewhere else. + */ +static inline bool sendpage_ok(struct page *page) +{ + return !PageSlab(page) && page_count(page) >= 1; +} + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, From 7b62d31d3f399079e7de7cc43e85d6481170970a Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 2 Oct 2020 16:27:29 +0800 Subject: [PATCH 303/322] net: add WARN_ONCE in kernel_sendpage() for improper zero-copy send If a page sent into kernel_sendpage() is a slab page or it doesn't have ref_count, this page is improper to send by the zero copy sendpage() method. Otherwise such page might be unexpected released in network code path and causes impredictable panic due to kernel memory management data structure corruption. This path adds a WARN_ON() on the sending page before sends it into the concrete zero-copy sendpage() method, if the page is improper for the zero-copy sendpage() method, a warning message can be observed before the consequential unpredictable kernel panic. This patch does not change existing kernel_sendpage() behavior for the improper page zero-copy send, it just provides hint warning message for following potential panic due the kernel memory heap corruption. Signed-off-by: Coly Li Cc: Cong Wang Cc: Christoph Hellwig Cc: David S. Miller Cc: Sridhar Samudrala Signed-off-by: David S. Miller --- net/socket.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/socket.c b/net/socket.c index 0c0144604f81..58cac2da5f66 100644 --- a/net/socket.c +++ b/net/socket.c @@ -3638,9 +3638,11 @@ EXPORT_SYMBOL(kernel_getpeername); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { - if (sock->ops->sendpage) + if (sock->ops->sendpage) { + /* Warn in case the improper page to zero-copy send */ + WARN_ONCE(!sendpage_ok(page), "improper page for zero-copy send"); return sock->ops->sendpage(sock, page, offset, size, flags); - + } return sock_no_sendpage(sock, page, offset, size, flags); } EXPORT_SYMBOL(kernel_sendpage); From 7d4194abfc4de13a2663c7fee6891de8360f7a52 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 2 Oct 2020 16:27:30 +0800 Subject: [PATCH 304/322] nvme-tcp: check page by sendpage_ok() before calling kernel_sendpage() Currently nvme_tcp_try_send_data() doesn't use kernel_sendpage() to send slab pages. But for pages allocated by __get_free_pages() without __GFP_COMP, which also have refcount as 0, they are still sent by kernel_sendpage() to remote end, this is problematic. The new introduced helper sendpage_ok() checks both PageSlab tag and page_count counter, and returns true if the checking page is OK to be sent by kernel_sendpage(). This patch fixes the page checking issue of nvme_tcp_try_send_data() with sendpage_ok(). If sendpage_ok() returns true, send this page by kernel_sendpage(), otherwise use sock_no_sendpage to handle this page. Signed-off-by: Coly Li Cc: Chaitanya Kulkarni Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Jan Kara Cc: Jens Axboe Cc: Mikhail Skorzhinskii Cc: Philipp Reisner Cc: Sagi Grimberg Cc: Vlastimil Babka Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- drivers/nvme/host/tcp.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8f4f29f18b8c..d6a3e1487354 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -913,12 +913,11 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) else flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; - /* can't zcopy slab pages */ - if (unlikely(PageSlab(page))) { - ret = sock_no_sendpage(queue->sock, page, offset, len, + if (sendpage_ok(page)) { + ret = kernel_sendpage(queue->sock, page, offset, len, flags); } else { - ret = kernel_sendpage(queue->sock, page, offset, len, + ret = sock_no_sendpage(queue->sock, page, offset, len, flags); } if (ret <= 0) From cf83a17edeeb36195596d2dae060a7c381db35f1 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 2 Oct 2020 16:27:31 +0800 Subject: [PATCH 305/322] tcp: use sendpage_ok() to detect misused .sendpage commit a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects") adds the checks for Slab pages, but the pages don't have page_count are still missing from the check. Network layer's sendpage method is not designed to send page_count 0 pages neither, therefore both PageSlab() and page_count() should be both checked for the sending page. This is exactly what sendpage_ok() does. This patch uses sendpage_ok() in do_tcp_sendpages() to detect misused .sendpage, to make the code more robust. Fixes: a10674bf2406 ("tcp: detecting the misuse of .sendpage for Slab objects") Suggested-by: Eric Dumazet Signed-off-by: Coly Li Cc: Vasily Averin Cc: David S. Miller Cc: stable@vger.kernel.org Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 31f3b858db81..2135ee7c806d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -970,7 +970,8 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (IS_ENABLED(CONFIG_DEBUG_VM) && - WARN_ONCE(PageSlab(page), "page must not be a Slab one")) + WARN_ONCE(!sendpage_ok(page), + "page must not be a Slab one and have page_count > 0")) return -EINVAL; /* Wait for a connection to finish. One exception is TCP Fast Open From fb25ebe1b212c37781b23a7f7cd21c0bb2f6eb83 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 2 Oct 2020 16:27:32 +0800 Subject: [PATCH 306/322] drbd: code cleanup by using sendpage_ok() to check page for kernel_sendpage() In _drbd_send_page() a page is checked by following code before sending it by kernel_sendpage(), (page_count(page) < 1) || PageSlab(page) If the check is true, this page won't be send by kernel_sendpage() and handled by sock_no_sendpage(). This kind of check is exactly what macro sendpage_ok() does, which is introduced into include/linux/net.h to solve a similar send page issue in nvme-tcp code. This patch uses macro sendpage_ok() to replace the open coded checks to page type and refcount in _drbd_send_page(), as a code cleanup. Signed-off-by: Coly Li Cc: Philipp Reisner Cc: Sagi Grimberg Signed-off-by: David S. Miller --- drivers/block/drbd/drbd_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 04b6bde9419d..573dbf6f0c31 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1553,7 +1553,7 @@ static int _drbd_send_page(struct drbd_peer_device *peer_device, struct page *pa * put_page(); and would cause either a VM_BUG directly, or * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ - if (drbd_disable_sendpage || (page_count(page) < 1) || PageSlab(page)) + if (drbd_disable_sendpage || !sendpage_ok(page)) return _drbd_no_send_page(peer_device, page, offset, size, msg_flags); msg_flags |= MSG_NOSIGNAL; From 6aa25c737705466020e74ad44679d3758b8961f6 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 2 Oct 2020 16:27:33 +0800 Subject: [PATCH 307/322] scsi: libiscsi: use sendpage_ok() in iscsi_tcp_segment_map() In iscsci driver, iscsi_tcp_segment_map() uses the following code to check whether the page should or not be handled by sendpage: if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg))) The "page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg)" part is to make sure the page can be sent to network layer's zero copy path. This part is exactly what sendpage_ok() does. This patch uses use sendpage_ok() in iscsi_tcp_segment_map() to replace the original open coded checks. Signed-off-by: Coly Li Reviewed-by: Lee Duncan Acked-by: Martin K. Petersen Cc: Vasily Averin Cc: Cong Wang Cc: Mike Christie Cc: Chris Leech Cc: Christoph Hellwig Cc: Hannes Reinecke Signed-off-by: David S. Miller --- drivers/scsi/libiscsi_tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 37e5d4e48c2f..83f14b2c8804 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -128,7 +128,7 @@ static void iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv) * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ - if (!recv && page_count(sg_page(sg)) >= 1 && !PageSlab(sg_page(sg))) + if (!recv && sendpage_ok(sg_page(sg))) return; if (recv) { From 40efc4dc73956e1fab177783e55f30117517c542 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Fri, 2 Oct 2020 16:27:34 +0800 Subject: [PATCH 308/322] libceph: use sendpage_ok() in ceph_tcp_sendpage() In libceph, ceph_tcp_sendpage() does the following checks before handle the page by network layer's zero copy sendpage method, if (page_count(page) >= 1 && !PageSlab(page)) This check is exactly what sendpage_ok() does. This patch replace the open coded checks by sendpage_ok() as a code cleanup. Signed-off-by: Coly Li Acked-by: Jeff Layton Cc: Ilya Dryomov Signed-off-by: David S. Miller --- net/ceph/messenger.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index bdfd66ba3843..d4d7a0e52491 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -575,7 +575,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page, * coalescing neighboring slab objects into a single frag which * triggers one of hardened usercopy checks. */ - if (page_count(page) >= 1 && !PageSlab(page)) + if (sendpage_ok(page)) sendpage = sock->ops->sendpage; else sendpage = sock_no_sendpage; From 9d8c05ad5627b3a650087c14c67dd1d22a6368ab Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 2 Oct 2020 12:39:44 +0200 Subject: [PATCH 309/322] tcp: fix syn cookied MPTCP request socket leak If a syn-cookies request socket don't pass MPTCP-level validation done in syn_recv_sock(), we need to release it immediately, or it will be leaked. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/89 Fixes: 9466a1ccebbe ("mptcp: enable JOIN requests even if cookies are in use") Reported-and-tested-by: Geliang Tang Reviewed-by: Matthieu Baerts Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/ipv4/syncookies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index f0794f0232ba..e03756631541 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,7 +214,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, sock_rps_save_rxhash(child, skb); if (rsk_drop_req(req)) { - refcount_set(&req->rsk_refcnt, 2); + reqsk_put(req); return child; } From b502e6ecdc3b6d381bd72c5f879bc1e00d6fe7db Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 29 Sep 2020 08:31:32 -0400 Subject: [PATCH 310/322] KVM: VMX: update PFEC_MASK/PFEC_MATCH together with PF intercept The PFEC_MASK and PFEC_MATCH fields in the VMCS reverse the meaning of the #PF intercept bit in the exception bitmap when they do not match. This means that, if PFEC_MASK and/or PFEC_MATCH are set, the hypervisor can get a vmexit for #PF exceptions even when the corresponding bit is clear in the exception bitmap. This is unexpected and is promptly detected by a WARN_ON_ONCE. To fix it, reset PFEC_MASK and PFEC_MATCH when the #PF intercept is disabled (as is common with enable_ept && !allow_smaller_maxphyaddr). Reported-by: Qian Cai > Reported-by: Naresh Kamboju Tested-by: Naresh Kamboju Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index f0384e93548a..f4e9c310032a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -794,6 +794,18 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; + else { + /* + * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched + * between guest and host. In that case we only care about present + * faults. For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in + * prepare_vmcs02_rare. + */ + bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR)); + int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0; + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask); + vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask); + } vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -4355,16 +4367,6 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmx->pt_desc.guest.output_mask = 0x7F; vmcs_write64(GUEST_IA32_RTIT_CTL, 0); } - - /* - * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched - * between guest and host. In that case we only care about present - * faults. - */ - if (enable_ept) { - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, PFERR_PRESENT_MASK); - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, PFERR_PRESENT_MASK); - } } static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) From 484cfaca95925f1a38ded6d0561de06a70409a32 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Fri, 2 Oct 2020 22:21:41 -0700 Subject: [PATCH 311/322] mm, slub: restore initial kmem_cache flags The routine that applies debug flags to the kmem_cache slabs inadvertantly prevents non-debug flags from being applied to those same objects. That is, if slub_debug=, is specified, non-debugged slabs will end up having flags of zero, and the slabs may be unusable. Fix this by including the input flags for non-matching slabs with the contents of slub_debug, so that the caches are created as expected alongside any debugging options that may be requested. With this, we can remove the check for a NULL slub_debug_string, since it's covered by the loop itself. Fixes: e17f1dfba37b ("mm, slub: extend slub_debug syntax for multiple blocks") Signed-off-by: Eric Farman Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Kees Cook Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Link: https://lkml.kernel.org/r/20200930161931.28575-1-farman@linux.ibm.com Signed-off-by: Linus Torvalds --- mm/slub.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index d4177aecedf6..6d3574013b2f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1413,10 +1413,6 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, char *next_block; slab_flags_t block_flags; - /* If slub_debug = 0, it folds into the if conditional. */ - if (!slub_debug_string) - return flags | slub_debug; - len = strlen(name); next_block = slub_debug_string; /* Go through all blocks of debug options, see if any matches our slab's name */ @@ -1450,7 +1446,7 @@ slab_flags_t kmem_cache_flags(unsigned int object_size, } } - return slub_debug; + return flags | slub_debug; } #else /* !CONFIG_SLUB_DEBUG */ static inline void setup_object_debug(struct kmem_cache *s, From 1d91df85f399adbe4f318f3e74ac5a5d84c0ca7c Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Fri, 2 Oct 2020 22:21:45 -0700 Subject: [PATCH 312/322] mm/page_alloc: handle a missing case for memalloc_nocma_{save/restore} APIs memalloc_nocma_{save/restore} APIs can be used to skip page allocation on CMA area, but, there is a missing case and the page on CMA area could be allocated even if APIs are used. This patch handles this case to fix the potential issue. For now, these APIs are used to prevent long-term pinning on the CMA page. When the long-term pinning is requested on the CMA page, it is migrated to the non-CMA page before pinning. This non-CMA page is allocated by using memalloc_nocma_{save/restore} APIs. If APIs doesn't work as intended, the CMA page is allocated and it is pinned for a long time. This long-term pin for the CMA page causes cma_alloc() failure and it could result in wrong behaviour on the device driver who uses the cma_alloc(). Missing case is an allocation from the pcplist. MIGRATE_MOVABLE pcplist could have the pages on CMA area so we need to skip it if ALLOC_CMA isn't specified. Fixes: 8510e69c8efe (mm/page_alloc: fix memalloc_nocma_{save/restore} APIs) Signed-off-by: Joonsoo Kim Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: "Aneesh Kumar K . V" Cc: Mel Gorman Link: https://lkml.kernel.org/r/1601429472-12599-1-git-send-email-iamjoonsoo.kim@lge.com Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5661fa164f13..6866533de8e6 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3367,9 +3367,16 @@ struct page *rmqueue(struct zone *preferred_zone, struct page *page; if (likely(order == 0)) { - page = rmqueue_pcplist(preferred_zone, zone, gfp_flags, + /* + * MIGRATE_MOVABLE pcplist could have the pages on CMA area and + * we need to skip it when CMA area isn't allowed. + */ + if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA || + migratetype != MIGRATE_MOVABLE) { + page = rmqueue_pcplist(preferred_zone, zone, gfp_flags, migratetype, alloc_flags); - goto out; + goto out; + } } /* @@ -3381,7 +3388,13 @@ struct page *rmqueue(struct zone *preferred_zone, do { page = NULL; - if (alloc_flags & ALLOC_HARDER) { + /* + * order-0 request can reach here when the pcplist is skipped + * due to non-CMA allocation context. HIGHATOMIC area is + * reserved for high-order atomic allocation, so order-0 + * request should skip it. + */ + if (order > 0 && alloc_flags & ALLOC_HARDER) { page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); if (page) trace_mm_page_alloc_zone_locked(page, order, migratetype); From d43ca1386bf21b783d618e3a5f61c3b2e8759df2 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 2 Oct 2020 22:21:48 -0700 Subject: [PATCH 313/322] scripts/spelling.txt: fix malformed entry One of the entries has three fields "mistake||correction||correction" rather than the expected two fields "mistake||correction". Fix it. Signed-off-by: Eric Biggers Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200930234359.255295-1-ebiggers@kernel.org Signed-off-by: Linus Torvalds --- scripts/spelling.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index f253681e7e2a..feb2efaaa5e6 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -589,7 +589,7 @@ explictly||explicitly expresion||expression exprimental||experimental extened||extended -exteneded||extended||extended +exteneded||extended extensability||extensibility extention||extension extenstion||extension From 388e201d41fa1ed8f2dce0f0567f56f8e919ffb0 Mon Sep 17 00:00:00 2001 From: "Vineetha G. Jaya Kumaran" Date: Thu, 1 Oct 2020 23:56:09 +0800 Subject: [PATCH 314/322] net: stmmac: Modify configuration method of EEE timers Ethtool manual stated that the tx-timer is the "the amount of time the device should stay in idle mode prior to asserting its Tx LPI". The previous implementation for "ethtool --set-eee tx-timer" sets the LPI TW timer duration which is not correct. Hence, this patch fixes the "ethtool --set-eee tx-timer" to configure the EEE LPI timer. The LPI TW Timer will be using the defined default value instead of "ethtool --set-eee tx-timer" which follows the EEE LS timer implementation. Changelog V2 *Not removing/modifying the eee_timer. *EEE LPI timer can be configured through ethtool and also the eee_timer module param. *EEE TW Timer will be configured with default value only, not able to be configured through ethtool or module param. This follows the implementation of the EEE LS Timer. Fixes: d765955d2ae0 ("stmmac: add the Energy Efficient Ethernet support") Signed-off-by: Vineetha G. Jaya Kumaran Signed-off-by: Voon Weifeng Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 ++ .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 12 +++++++++- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 23 ++++++++++++------- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 9c02fc754bf1..545696971f65 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -203,6 +203,8 @@ struct stmmac_priv { int eee_enabled; int eee_active; int tx_lpi_timer; + int tx_lpi_enabled; + int eee_tw_timer; unsigned int mode; unsigned int chain_mode; int extend_desc; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 430a4b32ec1e..814879f91f76 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -665,6 +665,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev, edata->eee_enabled = priv->eee_enabled; edata->eee_active = priv->eee_active; edata->tx_lpi_timer = priv->tx_lpi_timer; + edata->tx_lpi_enabled = priv->tx_lpi_enabled; return phylink_ethtool_get_eee(priv->phylink, edata); } @@ -678,6 +679,10 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, if (!priv->dma_cap.eee) return -EOPNOTSUPP; + if (priv->tx_lpi_enabled != edata->tx_lpi_enabled) + netdev_warn(priv->dev, + "Setting EEE tx-lpi is not supported\n"); + if (!edata->eee_enabled) stmmac_disable_eee_mode(priv); @@ -685,7 +690,12 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, if (ret) return ret; - priv->tx_lpi_timer = edata->tx_lpi_timer; + if (edata->eee_enabled && + priv->tx_lpi_timer != edata->tx_lpi_timer) { + priv->tx_lpi_timer = edata->tx_lpi_timer; + stmmac_eee_init(priv); + } + return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 89b2b3472852..b56b13d64ab4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -94,7 +94,7 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE | static int eee_timer = STMMAC_DEFAULT_LPI_TIMER; module_param(eee_timer, int, 0644); MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec"); -#define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x)) +#define STMMAC_LPI_T(x) (jiffies + usecs_to_jiffies(x)) /* By default the driver will use the ring mode to manage tx and rx descriptors, * but allow user to force to use the chain instead of the ring @@ -370,7 +370,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer); stmmac_enable_eee_mode(priv); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /** @@ -383,7 +383,7 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) */ bool stmmac_eee_init(struct stmmac_priv *priv) { - int tx_lpi_timer = priv->tx_lpi_timer; + int eee_tw_timer = priv->eee_tw_timer; /* Using PCS we cannot dial with the phy registers at this stage * so we do not support extra feature like EEE. @@ -403,7 +403,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv) if (priv->eee_enabled) { netdev_dbg(priv->dev, "disable EEE\n"); del_timer_sync(&priv->eee_ctrl_timer); - stmmac_set_eee_timer(priv, priv->hw, 0, tx_lpi_timer); + stmmac_set_eee_timer(priv, priv->hw, 0, eee_tw_timer); } mutex_unlock(&priv->lock); return false; @@ -411,11 +411,12 @@ bool stmmac_eee_init(struct stmmac_priv *priv) if (priv->eee_active && !priv->eee_enabled) { timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS, - tx_lpi_timer); + eee_tw_timer); } + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); + mutex_unlock(&priv->lock); netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n"); return true; @@ -930,6 +931,7 @@ static void stmmac_mac_link_down(struct phylink_config *config, stmmac_mac_set(priv, priv->ioaddr, false); priv->eee_active = false; + priv->tx_lpi_enabled = false; stmmac_eee_init(priv); stmmac_set_eee_pls(priv, priv->hw, false); } @@ -1027,6 +1029,7 @@ static void stmmac_mac_link_up(struct phylink_config *config, if (phy && priv->dma_cap.eee) { priv->eee_active = phy_init_eee(phy, 1) >= 0; priv->eee_enabled = stmmac_eee_init(priv); + priv->tx_lpi_enabled = priv->eee_enabled; stmmac_set_eee_pls(priv, priv->hw, true); } } @@ -2061,7 +2064,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) { stmmac_enable_eee_mode(priv); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer)); + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /* We still have pending packets, let's call for a new scheduling */ @@ -2694,7 +2697,11 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) netdev_warn(priv->dev, "PTP init failed\n"); } - priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; + priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS; + + /* Convert the timer from msec to usec */ + if (!priv->tx_lpi_timer) + priv->tx_lpi_timer = eee_timer * 1000; if (priv->use_riwt) { if (!priv->rx_riwt) From 1f7e877c20517735bceff1535e1b7fa846b2f215 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 1 Oct 2020 10:54:49 -0700 Subject: [PATCH 315/322] net: hinic: fix DEVLINK build errors Fix many (lots deleted here) build errors in hinic by selecting NET_DEVLINK. ld: drivers/net/ethernet/huawei/hinic/hinic_hw_dev.o: in function `mgmt_watchdog_timeout_event_handler': hinic_hw_dev.c:(.text+0x30a): undefined reference to `devlink_health_report' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_fw_reporter_dump': hinic_devlink.c:(.text+0x1c): undefined reference to `devlink_fmsg_u32_pair_put' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_fw_reporter_dump': hinic_devlink.c:(.text+0x126): undefined reference to `devlink_fmsg_binary_pair_put' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_hw_reporter_dump': hinic_devlink.c:(.text+0x1ba): undefined reference to `devlink_fmsg_string_pair_put' ld: hinic_devlink.c:(.text+0x227): undefined reference to `devlink_fmsg_u8_pair_put' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_alloc': hinic_devlink.c:(.text+0xaee): undefined reference to `devlink_alloc' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_free': hinic_devlink.c:(.text+0xb04): undefined reference to `devlink_free' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_register': hinic_devlink.c:(.text+0xb26): undefined reference to `devlink_register' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_devlink_unregister': hinic_devlink.c:(.text+0xb46): undefined reference to `devlink_unregister' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_health_reporters_create': hinic_devlink.c:(.text+0xb75): undefined reference to `devlink_health_reporter_create' ld: hinic_devlink.c:(.text+0xb95): undefined reference to `devlink_health_reporter_create' ld: hinic_devlink.c:(.text+0xbac): undefined reference to `devlink_health_reporter_destroy' ld: drivers/net/ethernet/huawei/hinic/hinic_devlink.o: in function `hinic_health_reporters_destroy': Fixes: 51ba902a16e6 ("net-next/hinic: Initialize hw interface") Signed-off-by: Randy Dunlap Cc: Bin Luo Cc: "David S. Miller" Cc: Jakub Kicinski Cc: Aviad Krawczyk Cc: Zhao Chen Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/huawei/hinic/Kconfig b/drivers/net/ethernet/huawei/hinic/Kconfig index 936e2dd3bb13..b47bd5440c5f 100644 --- a/drivers/net/ethernet/huawei/hinic/Kconfig +++ b/drivers/net/ethernet/huawei/hinic/Kconfig @@ -6,6 +6,7 @@ config HINIC tristate "Huawei Intelligent PCIE Network Interface Card" depends on (PCI_MSI && (X86 || ARM64)) + select NET_DEVLINK help This driver supports HiNIC PCIE Ethernet cards. To compile this driver as part of the kernel, choose Y here. From 790ca79d3e7f76d77291540e1728f400c6ad931f Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 2 Oct 2020 21:47:43 +0200 Subject: [PATCH 316/322] net: typhoon: Fix a typo Typoon --> Typhoon s/Typoon/Typhoon/ Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/typhoon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/typhoon.h b/drivers/net/ethernet/3com/typhoon.h index 2f634c64d5d1..38e6dcab4e94 100644 --- a/drivers/net/ethernet/3com/typhoon.h +++ b/drivers/net/ethernet/3com/typhoon.h @@ -33,7 +33,7 @@ struct basic_ring { u32 lastWrite; }; -/* The Typoon transmit ring -- same as a basic ring, plus: +/* The Typhoon transmit ring -- same as a basic ring, plus: * lastRead: where we're at in regard to cleaning up the ring * writeRegister: register to use for writing (different for Hi & Lo rings) */ From 9a9e77495958c7382b2438bc19746dd3aaaabb8e Mon Sep 17 00:00:00 2001 From: Anant Thazhemadam Date: Mon, 5 Oct 2020 02:25:36 +0530 Subject: [PATCH 317/322] net: team: fix memory leak in __team_options_register The variable "i" isn't initialized back correctly after the first loop under the label inst_rollback gets executed. The value of "i" is assigned to be option_count - 1, and the ensuing loop (under alloc_rollback) begins by initializing i--. Thus, the value of i when the loop begins execution will now become i = option_count - 2. Thus, when kfree(dst_opts[i]) is called in the second loop in this order, (i.e., inst_rollback followed by alloc_rollback), dst_optsp[option_count - 2] is the first element freed, and dst_opts[option_count - 1] does not get freed, and thus, a memory leak is caused. This memory leak can be fixed, by assigning i = option_count (instead of option_count - 1). Fixes: 80f7c6683fe0 ("team: add support for per-port options") Reported-by: syzbot+69b804437cfec30deac3@syzkaller.appspotmail.com Tested-by: syzbot+69b804437cfec30deac3@syzkaller.appspotmail.com Signed-off-by: Anant Thazhemadam Signed-off-by: David S. Miller --- drivers/net/team/team.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 69dfb1a49cc8..bcc4a4c011f1 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -287,7 +287,7 @@ inst_rollback: for (i--; i >= 0; i--) __team_option_inst_del_option(team, dst_opts[i]); - i = option_count - 1; + i = option_count; alloc_rollback: for (i--; i >= 0; i--) kfree(dst_opts[i]); From 580e4273d7a883ececfefa692c1f96bdbacb99b5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 2 Oct 2020 12:13:34 -0700 Subject: [PATCH 318/322] net_sched: check error pointer in tcf_dump_walker() Although we take RTNL on dump path, it is possible to skip RTNL on insertion path. So the following race condition is possible: rtnl_lock() // no rtnl lock mutex_lock(&idrinfo->lock); // insert ERR_PTR(-EBUSY) mutex_unlock(&idrinfo->lock); tc_dump_action() rtnl_unlock() So we have to skip those temporary -EBUSY entries on dump path too. Reported-and-tested-by: syzbot+b47bc4f247856fb4d9e1@syzkaller.appspotmail.com Fixes: 0fedc63fadf0 ("net_sched: commit action insertions together") Cc: Vlad Buslov Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 5612b336e18e..798430e1a79f 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -235,6 +235,8 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, index++; if (index < s_i) continue; + if (IS_ERR(p)) + continue; if (jiffy_since && time_after(jiffy_since, From f4544e5361da5050ff5c0330ceea095cb5dbdd72 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Sat, 3 Oct 2020 11:51:21 -0700 Subject: [PATCH 319/322] net: mvneta: fix double free of txq->buf clang static analysis reports this problem: drivers/net/ethernet/marvell/mvneta.c:3465:2: warning: Attempt to free released memory kfree(txq->buf); ^~~~~~~~~~~~~~~ When mvneta_txq_sw_init() fails to alloc txq->tso_hdrs, it frees without poisoning txq->buf. The error is caught in the mvneta_setup_txqs() caller which handles the error by cleaning up all of the txqs with a call to mvneta_txq_sw_deinit which also frees txq->buf. Since mvneta_txq_sw_deinit is a general cleaner, all of the partial cleaning in mvneta_txq_sw_deinit()'s error handling is not needed. Fixes: 2adb719d74f6 ("net: mvneta: Implement software TSO") Signed-off-by: Tom Rix Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c4345e3d616f..5bf0409f5d42 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3400,24 +3400,15 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp, txq->last_desc = txq->size - 1; txq->buf = kmalloc_array(txq->size, sizeof(*txq->buf), GFP_KERNEL); - if (!txq->buf) { - dma_free_coherent(pp->dev->dev.parent, - txq->size * MVNETA_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_phys); + if (!txq->buf) return -ENOMEM; - } /* Allocate DMA buffers for TSO MAC/IP/TCP headers */ txq->tso_hdrs = dma_alloc_coherent(pp->dev->dev.parent, txq->size * TSO_HEADER_SIZE, &txq->tso_hdrs_phys, GFP_KERNEL); - if (!txq->tso_hdrs) { - kfree(txq->buf); - dma_free_coherent(pp->dev->dev.parent, - txq->size * MVNETA_DESC_ALIGNED_SIZE, - txq->descs, txq->descs_phys); + if (!txq->tso_hdrs) return -ENOMEM; - } /* Setup XPS mapping */ if (txq_number > 1) From 4296adc3e32f5d544a95061160fe7e127be1b9ff Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 2 Oct 2020 21:53:08 +0200 Subject: [PATCH 320/322] net/core: check length before updating Ethertype in skb_mpls_{push,pop} Openvswitch allows to drop a packet's Ethernet header, therefore skb_mpls_push() and skb_mpls_pop() might be called with ethernet=true and mac_len=0. In that case the pointer passed to skb_mod_eth_type() doesn't point to an Ethernet header and the new Ethertype is written at unexpected locations. Fix this by verifying that mac_len is big enough to contain an Ethernet header. Fixes: fa4e0f8855fc ("net/sched: fix corrupted L2 header with MPLS 'push' and 'pop' actions") Signed-off-by: Guillaume Nault Acked-by: Davide Caratti Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6faf73d6a0f7..2b48cb0cc684 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -5622,7 +5622,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, lse->label_stack_entry = mpls_lse; skb_postpush_rcsum(skb, lse, MPLS_HLEN); - if (ethernet) + if (ethernet && mac_len >= ETH_HLEN) skb_mod_eth_type(skb, eth_hdr(skb), mpls_proto); skb->protocol = mpls_proto; @@ -5662,7 +5662,7 @@ int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, skb_reset_mac_header(skb); skb_set_network_header(skb, mac_len); - if (ethernet) { + if (ethernet && mac_len >= ETH_HLEN) { struct ethhdr *hdr; /* use mpls_hdr() to get ethertype to account for VLANs. */ From 549738f15da0e5a00275977623be199fbbf7df50 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 4 Oct 2020 16:04:34 -0700 Subject: [PATCH 321/322] Linux 5.9-rc8 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 992d24467ca0..f84d7e4ca0be 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 9 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From 720ef73d1a239e33c3ad8fac356b9b1348e68aaf Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Sat, 3 Oct 2020 01:09:16 +0800 Subject: [PATCH 322/322] platform/x86: thinkpad_acpi: re-initialize ACPI buffer size when reuse Evaluating ACPI _BCL could fail, then ACPI buffer size will be set to 0. When reuse this ACPI buffer, AE_BUFFER_OVERFLOW will be triggered. Re-initialize buffer size will make ACPI evaluate successfully. Fixes: 46445b6b896fd ("thinkpad-acpi: fix handle locate for video and query of _BCL") Signed-off-by: Aaron Ma Signed-off-by: Andy Shevchenko --- drivers/platform/x86/thinkpad_acpi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 7cb3b71c67fe..e5a62fa9a879 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -6829,8 +6829,10 @@ static int __init tpacpi_query_bcl_levels(acpi_handle handle) list_for_each_entry(child, &device->children, node) { acpi_status status = acpi_evaluate_object(child->handle, "_BCL", NULL, &buffer); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { + buffer.length = ACPI_ALLOCATE_BUFFER; continue; + } obj = (union acpi_object *)buffer.pointer; if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {