From a500f3bd787f8224341e44b238f318c407b10897 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Tue, 28 Jan 2020 12:57:39 +0000 Subject: [PATCH 001/372] iio: adc: at91-sama5d2_adc: fix differential channels in triggered mode The differential channels require writing the channel offset register (COR). Otherwise they do not work in differential mode. The configuration of COR is missing in triggered mode. Fixes: 5e1a1da0f8c9 ("iio: adc: at91-sama5d2_adc: add hw trigger and buffer support") Signed-off-by: Eugen Hristev Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/at91-sama5d2_adc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index a5c7771227d5..9d96f7d08b95 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -723,6 +723,7 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) for_each_set_bit(bit, indio->active_scan_mask, indio->num_channels) { struct iio_chan_spec const *chan = at91_adc_chan_get(indio, bit); + u32 cor; if (!chan) continue; @@ -731,6 +732,20 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state) chan->type == IIO_PRESSURE) continue; + if (state) { + cor = at91_adc_readl(st, AT91_SAMA5D2_COR); + + if (chan->differential) + cor |= (BIT(chan->channel) | + BIT(chan->channel2)) << + AT91_SAMA5D2_COR_DIFF_OFFSET; + else + cor &= ~(BIT(chan->channel) << + AT91_SAMA5D2_COR_DIFF_OFFSET); + + at91_adc_writel(st, AT91_SAMA5D2_COR, cor); + } + if (state) { at91_adc_writel(st, AT91_SAMA5D2_CHER, BIT(chan->channel)); From e19ac9d9a978f8238a85a28ed624094a497d5ae6 Mon Sep 17 00:00:00 2001 From: Olivier Moysan Date: Tue, 21 Jan 2020 12:02:56 +0100 Subject: [PATCH 002/372] iio: adc: stm32-dfsdm: fix sleep in atomic context This commit fixes the error message: "BUG: sleeping function called from invalid context at kernel/irq/chip.c" Suppress the trigger irq handler. Make the buffer transfers directly in DMA callback, instead. Push buffers without timestamps, as timestamps are not supported in DFSDM driver. Fixes: 11646e81d775 ("iio: adc: stm32-dfsdm: add support for buffer modes") Signed-off-by: Olivier Moysan Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/adc/stm32-dfsdm-adc.c | 43 +++++++------------------------ 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/drivers/iio/adc/stm32-dfsdm-adc.c b/drivers/iio/adc/stm32-dfsdm-adc.c index 2aad2cda6943..76a60d93fe23 100644 --- a/drivers/iio/adc/stm32-dfsdm-adc.c +++ b/drivers/iio/adc/stm32-dfsdm-adc.c @@ -842,31 +842,6 @@ static inline void stm32_dfsdm_process_data(struct stm32_dfsdm_adc *adc, } } -static irqreturn_t stm32_dfsdm_adc_trigger_handler(int irq, void *p) -{ - struct iio_poll_func *pf = p; - struct iio_dev *indio_dev = pf->indio_dev; - struct stm32_dfsdm_adc *adc = iio_priv(indio_dev); - int available = stm32_dfsdm_adc_dma_residue(adc); - - while (available >= indio_dev->scan_bytes) { - s32 *buffer = (s32 *)&adc->rx_buf[adc->bufi]; - - stm32_dfsdm_process_data(adc, buffer); - - iio_push_to_buffers_with_timestamp(indio_dev, buffer, - pf->timestamp); - available -= indio_dev->scan_bytes; - adc->bufi += indio_dev->scan_bytes; - if (adc->bufi >= adc->buf_sz) - adc->bufi = 0; - } - - iio_trigger_notify_done(indio_dev->trig); - - return IRQ_HANDLED; -} - static void stm32_dfsdm_dma_buffer_done(void *data) { struct iio_dev *indio_dev = data; @@ -874,11 +849,6 @@ static void stm32_dfsdm_dma_buffer_done(void *data) int available = stm32_dfsdm_adc_dma_residue(adc); size_t old_pos; - if (indio_dev->currentmode & INDIO_BUFFER_TRIGGERED) { - iio_trigger_poll_chained(indio_dev->trig); - return; - } - /* * FIXME: In Kernel interface does not support cyclic DMA buffer,and * offers only an interface to push data samples per samples. @@ -906,7 +876,15 @@ static void stm32_dfsdm_dma_buffer_done(void *data) adc->bufi = 0; old_pos = 0; } - /* regular iio buffer without trigger */ + /* + * In DMA mode the trigger services of IIO are not used + * (e.g. no call to iio_trigger_poll). + * Calling irq handler associated to the hardware trigger is not + * relevant as the conversions have already been done. Data + * transfers are performed directly in DMA callback instead. + * This implementation avoids to call trigger irq handler that + * may sleep, in an atomic context (DMA irq handler context). + */ if (adc->dev_data->type == DFSDM_IIO) iio_push_to_buffers(indio_dev, buffer); } @@ -1536,8 +1514,7 @@ static int stm32_dfsdm_adc_init(struct iio_dev *indio_dev) } ret = iio_triggered_buffer_setup(indio_dev, - &iio_pollfunc_store_time, - &stm32_dfsdm_adc_trigger_handler, + &iio_pollfunc_store_time, NULL, &stm32_dfsdm_buffer_setup_ops); if (ret) { stm32_dfsdm_dma_release(indio_dev); From d560bb42814ca7ff6c9944310644d45eb53ca1ae Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 30 Jan 2020 20:24:03 +0100 Subject: [PATCH 003/372] ARC: Cleanup old Kconfig IO scheduler options CONFIG_IOSCHED_DEADLINE and CONFIG_IOSCHED_CFQ are gone since commit f382fb0bcef4 ("block: remove legacy IO schedulers"). The IOSCHED_DEADLINE was replaced by MQ_IOSCHED_DEADLINE and it will be now enabled by default (along with MQ_IOSCHED_KYBER). Signed-off-by: Krzysztof Kozlowski Signed-off-by: Vineet Gupta --- arch/arc/configs/nps_defconfig | 2 -- arch/arc/configs/nsimosci_defconfig | 2 -- arch/arc/configs/nsimosci_hs_defconfig | 2 -- arch/arc/configs/nsimosci_hs_smp_defconfig | 2 -- 4 files changed, 8 deletions(-) diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig index 07f26ed39f02..f7a978dfdf1d 100644 --- a/arch/arc/configs/nps_defconfig +++ b/arch/arc/configs/nps_defconfig @@ -21,8 +21,6 @@ CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_ARC_PLAT_EZNPS=y CONFIG_SMP=y CONFIG_NR_CPUS=4096 diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig index 5dd470b6609e..bf39a0091679 100644 --- a/arch/arc/configs/nsimosci_defconfig +++ b/arch/arc/configs/nsimosci_defconfig @@ -20,8 +20,6 @@ CONFIG_ISA_ARCOMPACT=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci" # CONFIG_COMPACTION is not set CONFIG_NET=y diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig index 3532e86f7bff..7121bd71c543 100644 --- a/arch/arc/configs/nsimosci_hs_defconfig +++ b/arch/arc/configs/nsimosci_hs_defconfig @@ -19,8 +19,6 @@ CONFIG_PERF_EVENTS=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_ISA_ARCV2=y CONFIG_ARC_BUILTIN_DTB_NAME="nsimosci_hs" # CONFIG_COMPACTION is not set diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig index d90448bee064..f9863b294a70 100644 --- a/arch/arc/configs/nsimosci_hs_smp_defconfig +++ b/arch/arc/configs/nsimosci_hs_smp_defconfig @@ -14,8 +14,6 @@ CONFIG_PERF_EVENTS=y CONFIG_KPROBES=y CONFIG_MODULES=y # CONFIG_BLK_DEV_BSG is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set CONFIG_ISA_ARCV2=y CONFIG_SMP=y # CONFIG_ARC_TIMERS_64BIT is not set From a5760db25c6e68bf8cd34e1642196826c08715ed Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 31 Jan 2020 17:49:33 -0800 Subject: [PATCH 004/372] ARC: fix some Kconfig typos Fix language typos in arch/arc/Kconfig. Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ff2a393b635c..7124ab82dfa3 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -154,7 +154,7 @@ config ARC_CPU_HS help Support for ARC HS38x Cores based on ARCv2 ISA The notable features are: - - SMP configurations of upto 4 core with coherency + - SMP configurations of up to 4 cores with coherency - Optional L2 Cache and IO-Coherency - Revised Interrupt Architecture (multiple priorites, reg banks, auto stack switch, auto regfile save/restore) @@ -192,7 +192,7 @@ config ARC_SMP_HALT_ON_RESET help In SMP configuration cores can be configured as Halt-on-reset or they could all start at same time. For Halt-on-reset, non - masters are parked until Master kicks them so they can start of + masters are parked until Master kicks them so they can start off at designated entry point. For other case, all jump to common entry point and spin wait for Master's signal. From 0acdf63d2296f9542437a4842ad94554d240eab6 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 7 Feb 2020 17:05:10 -0800 Subject: [PATCH 005/372] ARC: fpu: fix randconfig build error reported by 0-day test service Reported-by: kbuild test robot Link: http://lists.infradead.org/pipermail/linux-snps-arc/2020-February/006845.html Signed-off-by: Vineet Gupta --- arch/arc/include/asm/fpu.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/include/asm/fpu.h b/arch/arc/include/asm/fpu.h index 64347250fdf5..006bcf88a7a5 100644 --- a/arch/arc/include/asm/fpu.h +++ b/arch/arc/include/asm/fpu.h @@ -43,6 +43,8 @@ extern void fpu_init_task(struct pt_regs *regs); #endif /* !CONFIG_ISA_ARCOMPACT */ +struct task_struct; + extern void fpu_save_restore(struct task_struct *p, struct task_struct *n); #else /* !CONFIG_ARC_FPU_SAVE_RESTORE */ From 3b00b042eeaaea43b8702b2dd4b9cb0916a2e052 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Feb 2020 11:00:47 +0100 Subject: [PATCH 006/372] ARC: Replace by The ARC platform code is not a clock provider, and just needs to call of_clk_init(). Hence it can include instead of . Signed-off-by: Geert Uytterhoeven Reviewed-by: Stephen Boyd Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index e1c647490f00..aa41af6ef4ac 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -8,11 +8,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include #include From 4a4472fdc098fb78f52a0848788faf46674a8423 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Feb 2020 10:43:17 +0100 Subject: [PATCH 007/372] of: clk: Make of_clk_get_parent_{count,name}() parameter const of_clk_get_parent_count() and of_clk_get_parent_name() never modify the device nodes passed, so they can be const. Signed-off-by: Geert Uytterhoeven Link: https://lkml.kernel.org/r/20200212094317.1150-1-geert+renesas@glider.be Signed-off-by: Stephen Boyd --- drivers/clk/clk.c | 4 ++-- include/linux/of_clk.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f0f2b599fd7e..95adf6c6db3d 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4713,7 +4713,7 @@ EXPORT_SYMBOL(of_clk_get_by_name); * * Returns: The number of clocks that are possible parents of this node */ -unsigned int of_clk_get_parent_count(struct device_node *np) +unsigned int of_clk_get_parent_count(const struct device_node *np) { int count; @@ -4725,7 +4725,7 @@ unsigned int of_clk_get_parent_count(struct device_node *np) } EXPORT_SYMBOL_GPL(of_clk_get_parent_count); -const char *of_clk_get_parent_name(struct device_node *np, int index) +const char *of_clk_get_parent_name(const struct device_node *np, int index) { struct of_phandle_args clkspec; struct property *prop; diff --git a/include/linux/of_clk.h b/include/linux/of_clk.h index c86fcad23fc2..31b73a0da9db 100644 --- a/include/linux/of_clk.h +++ b/include/linux/of_clk.h @@ -11,17 +11,17 @@ struct of_device_id; #if defined(CONFIG_COMMON_CLK) && defined(CONFIG_OF) -unsigned int of_clk_get_parent_count(struct device_node *np); -const char *of_clk_get_parent_name(struct device_node *np, int index); +unsigned int of_clk_get_parent_count(const struct device_node *np); +const char *of_clk_get_parent_name(const struct device_node *np, int index); void of_clk_init(const struct of_device_id *matches); #else /* !CONFIG_COMMON_CLK || !CONFIG_OF */ -static inline unsigned int of_clk_get_parent_count(struct device_node *np) +static inline unsigned int of_clk_get_parent_count(const struct device_node *np) { return 0; } -static inline const char *of_clk_get_parent_name(struct device_node *np, +static inline const char *of_clk_get_parent_name(const struct device_node *np, int index) { return NULL; From 8de427d52da30d3bad06d416e486a6b1bd0f3850 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Tue, 11 Feb 2020 17:43:55 +0530 Subject: [PATCH 008/372] clk: qcom: videocc: Update the clock flag for video_cc_vcodec0_core_clk The clock disable signal for video_cc_vcodec0_core_clk is tied to vcodec0_gdsc which is supported in the HW control mode. Thus turning off the clock would be taken care automatically when the GDSC turns OFF by hardware and clock driver does not require to poll on the CLK_OFF bit. Signed-off-by: Taniya Das Link: https://lkml.kernel.org/r/1581423235-21341-1-git-send-email-tdas@codeaurora.org Fixes: 253dc75a0bb8 ("clk: qcom: Add video clock controller driver for SC7180") Signed-off-by: Stephen Boyd --- drivers/clk/qcom/videocc-sc7180.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/videocc-sc7180.c b/drivers/clk/qcom/videocc-sc7180.c index c363c3cc544e..276e5ecd4840 100644 --- a/drivers/clk/qcom/videocc-sc7180.c +++ b/drivers/clk/qcom/videocc-sc7180.c @@ -97,7 +97,7 @@ static struct clk_branch video_cc_vcodec0_axi_clk = { static struct clk_branch video_cc_vcodec0_core_clk = { .halt_reg = 0x890, - .halt_check = BRANCH_HALT, + .halt_check = BRANCH_HALT_VOTED, .clkr = { .enable_reg = 0x890, .enable_mask = BIT(0), From efbd9129dfe87fdeaa3c44e8246b019a775c484b Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Tue, 11 Feb 2020 17:43:56 +0530 Subject: [PATCH 009/372] clk: qcom: dispcc: Remove support of disp_cc_mdss_rscc_ahb_clk The disp_cc_mdss_rscc_ahb_clk is default enabled from hardware and thus does not require to be marked CRITICAL. This which would allow the RCG to be turned OFF when the display turns OFF and not blocking XO. Signed-off-by: Taniya Das Link: https://lkml.kernel.org/r/1581423236-21341-2-git-send-email-tdas@codeaurora.org Fixes: dd3d06622138 ("clk: qcom: Add display clock controller driver for SC7180") Signed-off-by: Stephen Boyd --- drivers/clk/qcom/dispcc-sc7180.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/clk/qcom/dispcc-sc7180.c b/drivers/clk/qcom/dispcc-sc7180.c index dd7af41e47eb..0a5d395bce93 100644 --- a/drivers/clk/qcom/dispcc-sc7180.c +++ b/drivers/clk/qcom/dispcc-sc7180.c @@ -592,24 +592,6 @@ static struct clk_branch disp_cc_mdss_rot_clk = { }, }; -static struct clk_branch disp_cc_mdss_rscc_ahb_clk = { - .halt_reg = 0x400c, - .halt_check = BRANCH_HALT, - .clkr = { - .enable_reg = 0x400c, - .enable_mask = BIT(0), - .hw.init = &(struct clk_init_data){ - .name = "disp_cc_mdss_rscc_ahb_clk", - .parent_data = &(const struct clk_parent_data){ - .hw = &disp_cc_mdss_ahb_clk_src.clkr.hw, - }, - .num_parents = 1, - .flags = CLK_IS_CRITICAL | CLK_SET_RATE_PARENT, - .ops = &clk_branch2_ops, - }, - }, -}; - static struct clk_branch disp_cc_mdss_rscc_vsync_clk = { .halt_reg = 0x4008, .halt_check = BRANCH_HALT, @@ -687,7 +669,6 @@ static struct clk_regmap *disp_cc_sc7180_clocks[] = { [DISP_CC_MDSS_PCLK0_CLK_SRC] = &disp_cc_mdss_pclk0_clk_src.clkr, [DISP_CC_MDSS_ROT_CLK] = &disp_cc_mdss_rot_clk.clkr, [DISP_CC_MDSS_ROT_CLK_SRC] = &disp_cc_mdss_rot_clk_src.clkr, - [DISP_CC_MDSS_RSCC_AHB_CLK] = &disp_cc_mdss_rscc_ahb_clk.clkr, [DISP_CC_MDSS_RSCC_VSYNC_CLK] = &disp_cc_mdss_rscc_vsync_clk.clkr, [DISP_CC_MDSS_VSYNC_CLK] = &disp_cc_mdss_vsync_clk.clkr, [DISP_CC_MDSS_VSYNC_CLK_SRC] = &disp_cc_mdss_vsync_clk_src.clkr, From 3e8cb8b2eaeb22f540f1cbc00cbb594047b7ba89 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 13 Feb 2020 09:16:07 +0100 Subject: [PATCH 010/372] fuse: fix stack use after return Normal, synchronous requests will have their args allocated on the stack. After the FR_FINISHED bit is set by receiving the reply from the userspace fuse server, the originating task may return and reuse the stack frame, resulting in an Oops if the args structure is dereferenced. Fix by setting a flag in the request itself upon initializing, indicating whether it has an asynchronous ->end() callback. Reported-by: Kyle Sanderson Reported-by: Michael Stapelberg Fixes: 2b319d1f6f92 ("fuse: don't dereference req->args on finished request") Cc: # v5.4 Tested-by: Michael Stapelberg Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 6 +++--- fs/fuse/fuse_i.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8e02d76fe104..97eec7522bf2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -276,12 +276,10 @@ static void flush_bg_queue(struct fuse_conn *fc) void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) { struct fuse_iqueue *fiq = &fc->iq; - bool async; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; - async = req->args->end; /* * test_and_set_bit() implies smp_mb() between bit * changing and below intr_entry check. Pairs with @@ -324,7 +322,7 @@ void fuse_request_end(struct fuse_conn *fc, struct fuse_req *req) wake_up(&req->waitq); } - if (async) + if (test_bit(FR_ASYNC, &req->flags)) req->args->end(fc, req->args, req->out.h.error); put_request: fuse_put_request(fc, req); @@ -471,6 +469,8 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) req->in.h.opcode = args->opcode; req->in.h.nodeid = args->nodeid; req->args = args; + if (args->end) + __set_bit(FR_ASYNC, &req->flags); } ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index aa75e2305b75..ca344bf71404 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -301,6 +301,7 @@ struct fuse_io_priv { * FR_SENT: request is in userspace, waiting for an answer * FR_FINISHED: request is finished * FR_PRIVATE: request is on private list + * FR_ASYNC: request is asynchronous */ enum fuse_req_flag { FR_ISREPLY, @@ -314,6 +315,7 @@ enum fuse_req_flag { FR_SENT, FR_FINISHED, FR_PRIVATE, + FR_ASYNC, }; /** From 29e8c8253d7d5265f58122c0a7902e26df6c6f61 Mon Sep 17 00:00:00 2001 From: Fabrice Gasnier Date: Fri, 14 Feb 2020 17:46:35 +0100 Subject: [PATCH 011/372] iio: trigger: stm32-timer: disable master mode when stopping Master mode should be disabled when stopping. This mainly impacts possible other use-case after timer has been stopped. Currently, master mode remains set (from start routine). Fixes: 6fb34812c2a2 ("iio: stm32 trigger: Add support for TRGO2 triggers") Signed-off-by: Fabrice Gasnier Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/trigger/stm32-timer-trigger.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 2e0d32aa8436..2f82e8c32186 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -161,7 +161,8 @@ static int stm32_timer_start(struct stm32_timer_trigger *priv, return 0; } -static void stm32_timer_stop(struct stm32_timer_trigger *priv) +static void stm32_timer_stop(struct stm32_timer_trigger *priv, + struct iio_trigger *trig) { u32 ccer, cr1; @@ -179,6 +180,12 @@ static void stm32_timer_stop(struct stm32_timer_trigger *priv) regmap_write(priv->regmap, TIM_PSC, 0); regmap_write(priv->regmap, TIM_ARR, 0); + /* Force disable master mode */ + if (stm32_timer_is_trgo2_name(trig->name)) + regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS2, 0); + else + regmap_update_bits(priv->regmap, TIM_CR2, TIM_CR2_MMS, 0); + /* Make sure that registers are updated */ regmap_update_bits(priv->regmap, TIM_EGR, TIM_EGR_UG, TIM_EGR_UG); } @@ -197,7 +204,7 @@ static ssize_t stm32_tt_store_frequency(struct device *dev, return ret; if (freq == 0) { - stm32_timer_stop(priv); + stm32_timer_stop(priv, trig); } else { ret = stm32_timer_start(priv, trig, freq); if (ret) From 5eb40257047fb11085d582b7b9ccd0bffe900726 Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Mon, 17 Feb 2020 11:01:35 +0800 Subject: [PATCH 012/372] clk: imx8mn: Fix incorrect clock defines IMX8MN_CLK_I2C4 and IMX8MN_CLK_UART1's index definitions are incorrect, fix them. Fixes: 1e80936a42e1 ("dt-bindings: imx: Add clock binding doc for i.MX8MN") Signed-off-by: Anson Huang Signed-off-by: Shawn Guo --- include/dt-bindings/clock/imx8mn-clock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dt-bindings/clock/imx8mn-clock.h b/include/dt-bindings/clock/imx8mn-clock.h index 0f2b8423ce1d..65ac6eb6c733 100644 --- a/include/dt-bindings/clock/imx8mn-clock.h +++ b/include/dt-bindings/clock/imx8mn-clock.h @@ -122,8 +122,8 @@ #define IMX8MN_CLK_I2C1 105 #define IMX8MN_CLK_I2C2 106 #define IMX8MN_CLK_I2C3 107 -#define IMX8MN_CLK_I2C4 118 -#define IMX8MN_CLK_UART1 119 +#define IMX8MN_CLK_I2C4 108 +#define IMX8MN_CLK_UART1 109 #define IMX8MN_CLK_UART2 110 #define IMX8MN_CLK_UART3 111 #define IMX8MN_CLK_UART4 112 From b500c086e4110829a308c23e83a7cdc65b26228a Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Fri, 14 Feb 2020 12:03:24 +0100 Subject: [PATCH 013/372] iio: magnetometer: ak8974: Fix negative raw values in sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At the moment, reading from in_magn_*_raw in sysfs tends to return large values around 65000, even though the output of ak8974 is actually limited to ±32768. This happens because the value is never converted to the signed 16-bit integer variant. Add an explicit cast to s16 to fix this. Fixes: 7c94a8b2ee8c ("iio: magn: add a driver for AK8974") Signed-off-by: Stephan Gerhold Reviewed-by: Linus Waleij Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/magnetometer/ak8974.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c index fc7e910f8e8b..d32996702110 100644 --- a/drivers/iio/magnetometer/ak8974.c +++ b/drivers/iio/magnetometer/ak8974.c @@ -564,7 +564,7 @@ static int ak8974_read_raw(struct iio_dev *indio_dev, * We read all axes and discard all but one, for optimized * reading, use the triggered buffer. */ - *val = le16_to_cpu(hw_values[chan->address]); + *val = (s16)le16_to_cpu(hw_values[chan->address]); ret = IIO_VAL_INT; } From 45939ce292b4b11159719faaf60aba7d58d5fe33 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 28 Jan 2020 20:22:13 +0100 Subject: [PATCH 014/372] ARM: 8957/1: VDSO: Match ARMv8 timer in cntvct_functional() It is possible for a system with an ARMv8 timer to run a 32-bit kernel. When this happens we will unconditionally have the vDSO code remove the __vdso_gettimeofday and __vdso_clock_gettime symbols because cntvct_functional() returns false since it does not match that compatibility string. Fixes: ecf99a439105 ("ARM: 8331/1: VDSO initialization, mapping, and synchronization") Signed-off-by: Florian Fainelli Signed-off-by: Russell King --- arch/arm/kernel/vdso.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index c89ac1b9d28b..e0330a25e1c6 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -94,6 +94,8 @@ static bool __init cntvct_functional(void) * this. */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); if (!np) goto out_put; From f87b1c49bc675da30d8e1e8f4b60b800312c7b90 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 10 Feb 2020 02:04:17 +0100 Subject: [PATCH 015/372] ARM: 8958/1: rename missed uaccess .fixup section When the uaccess .fixup section was renamed to .text.fixup, one case was missed. Under ld.bfd, the orphaned section was moved close to .text (since they share the "ax" bits), so things would work normally on uaccess faults. Under ld.lld, the orphaned section was placed outside the .text section, making it unreachable. Link: https://github.com/ClangBuiltLinux/linux/issues/282 Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1020633#c44 Link: https://lore.kernel.org/r/nycvar.YSQ.7.76.1912032147340.17114@knanqh.ubzr Link: https://lore.kernel.org/lkml/202002071754.F5F073F1D@keescook/ Fixes: c4a84ae39b4a5 ("ARM: 8322/1: keep .text and .fixup regions closer together") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Reviewed-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Signed-off-by: Russell King --- arch/arm/lib/copy_from_user.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 95b2e1ce559c..f8016e3db65d 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -118,7 +118,7 @@ ENTRY(arm_copy_from_user) ENDPROC(arm_copy_from_user) - .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3} From 89604523a76eb3e13014b2bdab7f8870becee284 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 18 Feb 2020 09:15:34 +0100 Subject: [PATCH 016/372] ARM: 8961/2: Fix Kbuild issue caused by per-task stack protector GCC plugin When using plugins, GCC requires that the -fplugin= options precedes any of its plugin arguments appearing on the command line as well. This is usually not a concern, but as it turns out, this requirement is causing some issues with ARM's per-task stack protector plugin and Kbuild's implementation of $(cc-option). When the per-task stack protector plugin is enabled, and we tweak the implementation of cc-option not to pipe the stderr output of GCC to /dev/null, the following output is generated when GCC is executed in the context of cc-option: cc1: error: plugin arm_ssp_per_task_plugin should be specified before \ -fplugin-arg-arm_ssp_per_task_plugin-tso=1 in the command line cc1: error: plugin arm_ssp_per_task_plugin should be specified before \ -fplugin-arg-arm_ssp_per_task_plugin-offset=24 in the command line These errors will cause any option passed to cc-option to be treated as unsupported, which is obviously incorrect. The cause of this issue is the fact that the -fplugin= argument is added to GCC_PLUGINS_CFLAGS, whereas the arguments above are added to KBUILD_CFLAGS, and the contents of the former get filtered out of the latter before being passed to the GCC running the cc-option test, and so the -fplugin= option does not appear at all on the GCC command line. Adding the arguments to GCC_PLUGINS_CFLAGS instead of KBUILD_CFLAGS would be the correct approach here, if it weren't for the fact that we are using $(eval) to defer the moment that they are added until after asm-offsets.h is generated, which is after the point where the contents of GCC_PLUGINS_CFLAGS are added to KBUILD_CFLAGS. So instead, we have to add our plugin arguments to both. For similar reasons, we cannot append DISABLE_ARM_SSP_PER_TASK_PLUGIN to KBUILD_CFLAGS, as it will be passed to GCC when executing in the context of cc-option, whereas the other plugin arguments will have been filtered out, resulting in a similar error and false negative result as above. So add it to ccflags-y instead. Fixes: 189af4657186da08 ("ARM: smp: add support for per-task stack canaries") Reported-by: Merlijn Wajer Tested-by: Tony Lindgren Acked-by: Kees Cook Reviewed-by: Masahiro Yamada Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/Makefile | 4 +++- arch/arm/boot/compressed/Makefile | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index db857d07114f..1fc32b611f8a 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -307,13 +307,15 @@ endif ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y) prepare: stack_protector_prepare stack_protector_prepare: prepare0 - $(eval KBUILD_CFLAGS += \ + $(eval SSP_PLUGIN_CFLAGS := \ -fplugin-arg-arm_ssp_per_task_plugin-tso=$(shell \ awk '{if ($$2 == "THREAD_SZ_ORDER") print $$3;}'\ include/generated/asm-offsets.h) \ -fplugin-arg-arm_ssp_per_task_plugin-offset=$(shell \ awk '{if ($$2 == "TI_STACK_CANARY") print $$3;}'\ include/generated/asm-offsets.h)) + $(eval KBUILD_CFLAGS += $(SSP_PLUGIN_CFLAGS)) + $(eval GCC_PLUGINS_CFLAGS += $(SSP_PLUGIN_CFLAGS)) endif all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index da599c3a1193..9c11e7490292 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -101,7 +101,6 @@ clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \ $(libfdt) $(libfdt_hdrs) hyp-stub.S KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) ifeq ($(CONFIG_FUNCTION_TRACER),y) ORIG_CFLAGS := $(KBUILD_CFLAGS) @@ -117,7 +116,8 @@ CFLAGS_fdt_ro.o := $(nossp-flags-y) CFLAGS_fdt_rw.o := $(nossp-flags-y) CFLAGS_fdt_wip.o := $(nossp-flags-y) -ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin -I$(obj) +ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \ + -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. From 1cef21842ff3b6043c459b6462183e70295b5b19 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Fri, 21 Feb 2020 15:21:38 -0500 Subject: [PATCH 017/372] NFS: Ensure the fs_context has the correct fs_type before mounting This is necessary because unless userspace explicitly requests fstype "nfs4" (either via "mount -t nfs4" or by calling the "mount.nfs4" helper directly), the fstype will default to "nfs". This was fine on older kernels because the super_block->s_type was set via mount_info->nfs_mod->nfs_fs, which was set when parsing the mount options and subsequently passed in the "type" argument of sget(). After commit f2aedb713c28 ("NFS: Add fs_context support."), sget_fc(), which has no "type" argument, is called instead. In sget_fc(), the super_block->s_type is set via fs_context->fs_type, which was set when the filesystem context was initially created. Reported-by: Patrick Steinhardt Fixes: f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index e1b938457ab9..b616263b0eb6 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -1240,6 +1240,13 @@ static int nfs_fs_context_validate(struct fs_context *fc) } ctx->nfs_mod = nfs_mod; } + + /* Ensure the filesystem context has the correct fs_type */ + if (fc->fs_type != ctx->nfs_mod->nfs_fs) { + module_put(fc->fs_type->owner); + __module_get(ctx->nfs_mod->nfs_fs->owner); + fc->fs_type = ctx->nfs_mod->nfs_fs; + } return 0; out_no_device_name: From 1821b26a1fed8fca57a96ef87bac7a6a48e78815 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Thu, 20 Feb 2020 08:06:20 -0500 Subject: [PATCH 018/372] NFS: Don't hard-code the fs_type when submounting Hard-coding the fstype causes "nfs4" mounts to appear as "nfs", which breaks scripts that do "umount -at nfs4". Reported-by: Patrick Steinhardt Fixes: f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index ad6077404947..f3ece8ed3203 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -153,7 +153,7 @@ struct vfsmount *nfs_d_automount(struct path *path) /* Open a new filesystem context, transferring parameters from the * parent superblock, including the network namespace. */ - fc = fs_context_for_submount(&nfs_fs_type, path->dentry); + fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, path->dentry); if (IS_ERR(fc)) return ERR_CAST(fc); From 75a9b9176157f3095d3099adf512b5a233addbc7 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 25 Feb 2020 11:05:22 -0500 Subject: [PATCH 019/372] NFS: Fix leak of ctx->nfs_server.hostname If userspace passes an nfs_mount_data struct in the data argument of mount(2), then nfs23_parse_monolithic() or nfs4_parse_monolithic() will allocate memory for ctx->nfs_server.hostname. This needs to be freed in nfs_parse_source(), which also allocates memory for ctx->nfs_server.hostname, otherwise a leak will occur. Reported-by: syzbot+193c375dcddb4f345091@syzkaller.appspotmail.com Fixes: f2aedb713c28 ("NFS: Add fs_context support.") Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/fs_context.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index b616263b0eb6..e113fcb4bb4c 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -832,6 +832,8 @@ static int nfs_parse_source(struct fs_context *fc, if (len > maxnamlen) goto out_hostname; + kfree(ctx->nfs_server.hostname); + /* N.B. caller will free nfs_server.hostname in all cases */ ctx->nfs_server.hostname = kmemdup_nul(dev_name, len, GFP_KERNEL); if (!ctx->nfs_server.hostname) From 55dee1bc0d72877b99805e42e0205087e98b9edd Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Mon, 24 Feb 2020 16:29:32 -0500 Subject: [PATCH 020/372] nfs: add minor version to nfs_server_key for fscache An NFS client that mounts multiple exports from the same NFS server with higher NFSv4 versions disabled (i.e. 4.2) and without forcing a specific NFS version results in fscache index cookie collisions and the following messages: [ 570.004348] FS-Cache: Duplicate cookie detected Each nfs_client structure should have its own fscache index cookie, so add the minorversion to nfs_server_key. Link: https://bugzilla.kernel.org/show_bug.cgi?id=200145 Signed-off-by: Scott Mayhew Signed-off-by: Dave Wysochanski Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 1 + fs/nfs/fscache.c | 2 ++ fs/nfs/nfs4client.c | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 989c30c98511..f1ff3076e4a4 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -153,6 +153,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL) goto error_0; + clp->cl_minorversion = cl_init->minorversion; clp->cl_nfs_mod = cl_init->nfs_mod; if (!try_module_get(clp->cl_nfs_mod->owner)) goto error_dealloc; diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 52270bfac120..1abf126c2df4 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -31,6 +31,7 @@ static DEFINE_SPINLOCK(nfs_fscache_keys_lock); struct nfs_server_key { struct { uint16_t nfsversion; /* NFS protocol version */ + uint32_t minorversion; /* NFSv4 minor version */ uint16_t family; /* address family */ __be16 port; /* IP port */ } hdr; @@ -55,6 +56,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp) memset(&key, 0, sizeof(key)); key.hdr.nfsversion = clp->rpc_ops->version; + key.hdr.minorversion = clp->cl_minorversion; key.hdr.family = clp->cl_addr.ss_family; switch (clp->cl_addr.ss_family) { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 0cd767e5c977..0bd77cc1f639 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -216,7 +216,6 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) INIT_LIST_HEAD(&clp->cl_ds_clients); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client"); clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; - clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; clp->cl_mig_gen = 1; #if IS_ENABLED(CONFIG_NFS_V4_1) From d364847eed890211444ad74496bb549f838c6018 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2020 14:55:15 +0100 Subject: [PATCH 021/372] x86/mce/therm_throt: Undo thermal polling properly on CPU offline Chris Wilson reported splats from running the thermal throttling workqueue callback on offlined CPUs. The problem is that that callback should not even run on offlined CPUs but it happens nevertheless because the offlining callback thermal_throttle_offline() does not symmetrically undo the setup work done in its onlining counterpart. IOW, 1. The thermal interrupt vector should be masked out before ... 2. ... cancelling any pending work synchronously so that no new work is enqueued anymore. Do those things and fix the issue properly. [ bp: Write commit message. ] Fixes: f6656208f04e ("x86/mce/therm_throt: Optimize notifications of thermal throttle") Reported-by: Chris Wilson Tested-by: Pandruvada, Srinivas Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/158120068234.18291.7938335950259651295@skylake-alporthouse-com --- arch/x86/kernel/cpu/mce/therm_throt.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/therm_throt.c b/arch/x86/kernel/cpu/mce/therm_throt.c index 58b4ee3cda77..f36dc0742085 100644 --- a/arch/x86/kernel/cpu/mce/therm_throt.c +++ b/arch/x86/kernel/cpu/mce/therm_throt.c @@ -486,9 +486,14 @@ static int thermal_throttle_offline(unsigned int cpu) { struct thermal_state *state = &per_cpu(thermal_state, cpu); struct device *dev = get_cpu_device(cpu); + u32 l; - cancel_delayed_work(&state->package_throttle.therm_work); - cancel_delayed_work(&state->core_throttle.therm_work); + /* Mask the thermal vector before draining evtl. pending work */ + l = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED); + + cancel_delayed_work_sync(&state->package_throttle.therm_work); + cancel_delayed_work_sync(&state->core_throttle.therm_work); state->package_throttle.rate_control_active = false; state->core_throttle.rate_control_active = false; From 59b5809655bdafb0767d3fd00a3e41711aab07e6 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 25 Feb 2020 17:17:37 -0800 Subject: [PATCH 022/372] x86/mce: Fix logic and comments around MSR_PPIN_CTL There are two implemented bits in the PPIN_CTL MSR: Bit 0: LockOut (R/WO) Set 1 to prevent further writes to MSR_PPIN_CTL. Bit 1: Enable_PPIN (R/W) If 1, enables MSR_PPIN to be accessible using RDMSR. If 0, an attempt to read MSR_PPIN will cause #GP. So there are four defined values: 0: PPIN is disabled, PPIN_CTL may be updated 1: PPIN is disabled. PPIN_CTL is locked against updates 2: PPIN is enabled. PPIN_CTL may be updated 3: PPIN is enabled. PPIN_CTL is locked against updates Code would only enable the X86_FEATURE_INTEL_PPIN feature for case "2". When it should have done so for both case "2" and case "3". Fix the final test to just check for the enable bit. Also fix some of the other comments in this function. Fixes: 3f5a7896a509 ("x86/mce: Include the PPIN in MCE records when available") Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Cc: Link: https://lkml.kernel.org/r/20200226011737.9958-1-tony.luck@intel.com --- arch/x86/kernel/cpu/mce/intel.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 5627b1091b85..f996ffb887bc 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -493,17 +493,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } From cff20b3151ccab690715cb6cf0f5da5cccb32adf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2020 11:13:19 -0300 Subject: [PATCH 023/372] perf tests bp_account: Make global variable static To fix the build with newer gccs, that without this patch exit with: LD /tmp/build/perf/tests/perf-in.o ld: /tmp/build/perf/tests/bp_account.o:/git/perf/tools/perf/tests/bp_account.c:22: multiple definition of `the_var'; /tmp/build/perf/tests/bp_signal.o:/git/perf/tools/perf/tests/bp_signal.c:38: first defined here make[4]: *** [/git/perf/tools/build/Makefile.build:145: /tmp/build/perf/tests/perf-in.o] Error 1 First noticed in fedora:rawhide/32 with: [perfbuilder@a5ff49d6e6e4 ~]$ gcc --version gcc (GCC) 10.0.1 20200216 (Red Hat 10.0.1-0.8) Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bp_account.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index d0b935356274..489b50604cf2 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c @@ -19,7 +19,7 @@ #include "../perf-sys.h" #include "cloexec.h" -volatile long the_var; +static volatile long the_var; static noinline int test_function(void) { From ebcb9464a2ae3a547e97de476575c82ece0e93e2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2020 11:23:03 -0300 Subject: [PATCH 024/372] perf env: Do not return pointers to local variables It is possible to return a pointer to a local variable when looking up the architecture name for the running system and no normalization is done on that value, i.e. we may end up returning the uts.machine local variable. While this doesn't happen on most arches, as normalization takes place, lets fix this by making that a static variable and optimize it a bit by not always running uname(), only the first time. Noticed in fedora rawhide running with: [perfbuilder@a5ff49d6e6e4 ~]$ gcc --version gcc (GCC) 10.0.1 20200216 (Red Hat 10.0.1-0.8) Reported-by: Jiri Olsa Cc: Adrian Hunter Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/env.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6242a9215df7..4154f944f474 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -343,11 +343,11 @@ static const char *normalize_arch(char *arch) const char *perf_env__arch(struct perf_env *env) { - struct utsname uts; char *arch_name; if (!env || !env->arch) { /* Assume local operation */ - if (uname(&uts) < 0) + static struct utsname uts = { .machine[0] = '\0', }; + if (uts.machine[0] == '\0' && uname(&uts) < 0) return NULL; arch_name = uts.machine; } else From 7125f204501ed55493593209c6c71ac7c38f6b6c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2020 11:55:47 -0300 Subject: [PATCH 025/372] perf parse-events: Use asprintf() instead of strncpy() to read tracepoint files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the code more compact by using asprintf() instead of malloc()+strncpy() which also uses less memory and avoids these warnings with gcc 10: CC /tmp/build/perf/util/cloexec.o In file included from /usr/include/string.h:495, from util/parse-events.h:12, from util/parse-events.c:18: In function ‘strncpy’, inlined from ‘tracepoint_id_to_path’ at util/parse-events.c:271:5: /usr/include/bits/string_fortified.h:106:10: error: ‘__builtin_strncpy’ offset [275, 511] from the object at ‘sys_dirent’ is out of the bounds of referenced subobject ‘d_name’ with type ‘char[256]’ at offset 19 [-Werror=array-bounds] 106 | return __builtin___strncpy_chk (__dest, __src, __len, __bos (__dest)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/dirent.h:61, from util/parse-events.c:5: util/parse-events.c: In function ‘tracepoint_id_to_path’: /usr/include/bits/dirent.h:33:10: note: subobject ‘d_name’ declared here 33 | char d_name[256]; /* We must not include limits.h! */ | ^~~~~~ In file included from /usr/include/string.h:495, from util/parse-events.h:12, from util/parse-events.c:18: In function ‘strncpy’, inlined from ‘tracepoint_id_to_path’ at util/parse-events.c:273:5: /usr/include/bits/string_fortified.h:106:10: error: ‘__builtin_strncpy’ offset [275, 511] from the object at ‘evt_dirent’ is out of the bounds of referenced subobject ‘d_name’ with type ‘char[256]’ at offset 19 [-Werror=array-bounds] 106 | return __builtin___strncpy_chk (__dest, __src, __len, __bos (__dest)); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In file included from /usr/include/dirent.h:61, from util/parse-events.c:5: util/parse-events.c: In function ‘tracepoint_id_to_path’: /usr/include/bits/dirent.h:33:10: note: subobject ‘d_name’ declared here 33 | char d_name[256]; /* We must not include limits.h! */ | ^~~~~~ CC /tmp/build/perf/util/call-path.o Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200302145535.GA28183@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index c01ba6f8fdad..a14995835d85 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -257,21 +257,15 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) path = zalloc(sizeof(*path)); if (!path) return NULL; - path->system = malloc(MAX_EVENT_LENGTH); - if (!path->system) { + if (asprintf(&path->system, "%.*s", MAX_EVENT_LENGTH, sys_dirent->d_name) < 0) { free(path); return NULL; } - path->name = malloc(MAX_EVENT_LENGTH); - if (!path->name) { + if (asprintf(&path->name, "%.*s", MAX_EVENT_LENGTH, evt_dirent->d_name) < 0) { zfree(&path->system); free(path); return NULL; } - strncpy(path->system, sys_dirent->d_name, - MAX_EVENT_LENGTH); - strncpy(path->name, evt_dirent->d_name, - MAX_EVENT_LENGTH); return path; } } From 2de7fb60a4740135e03cf55c1982e393ccb87b6b Mon Sep 17 00:00:00 2001 From: Mike Gilbert Date: Wed, 26 Feb 2020 14:33:59 -0500 Subject: [PATCH 026/372] cpupower: avoid multiple definition with gcc -fno-common Building cpupower with -fno-common in CFLAGS results in errors due to multiple definitions of the 'cpu_count' and 'start_time' variables. ./utils/idle_monitor/snb_idle.o:./utils/idle_monitor/cpupower-monitor.h:28: multiple definition of `cpu_count'; ./utils/idle_monitor/nhm_idle.o:./utils/idle_monitor/cpupower-monitor.h:28: first defined here ... ./utils/idle_monitor/cpuidle_sysfs.o:./utils/idle_monitor/cpuidle_sysfs.c:22: multiple definition of `start_time'; ./utils/idle_monitor/amd_fam14h_idle.o:./utils/idle_monitor/amd_fam14h_idle.c:85: first defined here The -fno-common option will be enabled by default in GCC 10. Bug: https://bugs.gentoo.org/707462 Signed-off-by: Mike Gilbert Signed-off-by: Shuah Khan --- tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c | 2 +- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 2 +- tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 2 ++ tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c index 33dc34db4f3c..20f46348271b 100644 --- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c @@ -82,7 +82,7 @@ static struct pci_access *pci_acc; static struct pci_dev *amd_fam14h_pci_dev; static int nbp1_entered; -struct timespec start_time; +static struct timespec start_time; static unsigned long long timediff; #ifdef DEBUG diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index 3c4cee160b0e..a65f7d011513 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -19,7 +19,7 @@ struct cpuidle_monitor cpuidle_sysfs_monitor; static unsigned long long **previous_count; static unsigned long long **current_count; -struct timespec start_time; +static struct timespec start_time; static unsigned long long timediff; static int cpuidle_get_count_percent(unsigned int id, double *percent, diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 6d44fec55ad5..7c77045fef52 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -27,6 +27,8 @@ struct cpuidle_monitor *all_monitors[] = { 0 }; +int cpu_count; + static struct cpuidle_monitor *monitors[MONITORS_MAX]; static unsigned int avail_monitors; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h index 5b5eb1da0cce..c559d3115330 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h @@ -25,7 +25,7 @@ #endif #define CSTATE_DESC_LEN 60 -int cpu_count; +extern int cpu_count; /* Hard to define the right names ...: */ enum power_range_e { From 02d715b4a8182f4887d82df82a7b83aced647760 Mon Sep 17 00:00:00 2001 From: Amol Grover Date: Sun, 23 Feb 2020 22:25:39 +0530 Subject: [PATCH 027/372] iommu/vt-d: Fix RCU list debugging warnings dmar_drhd_units is traversed using list_for_each_entry_rcu() outside of an RCU read side critical section but under the protection of dmar_global_lock. Hence add corresponding lockdep expression to silence the following false-positive warnings: [ 1.603975] ============================= [ 1.603976] WARNING: suspicious RCU usage [ 1.603977] 5.5.4-stable #17 Not tainted [ 1.603978] ----------------------------- [ 1.603980] drivers/iommu/intel-iommu.c:4769 RCU-list traversed in non-reader section!! [ 1.603869] ============================= [ 1.603870] WARNING: suspicious RCU usage [ 1.603872] 5.5.4-stable #17 Not tainted [ 1.603874] ----------------------------- [ 1.603875] drivers/iommu/dmar.c:293 RCU-list traversed in non-reader section!! Tested-by: Madhuparna Bhowmik Signed-off-by: Amol Grover Cc: stable@vger.kernel.org Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index f64ca27dc210..712be8bc6a7c 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -69,8 +69,9 @@ struct dmar_pci_notify_info { extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; -#define for_each_drhd_unit(drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) +#define for_each_drhd_unit(drhd) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) #define for_each_active_drhd_unit(drhd) \ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ @@ -81,7 +82,8 @@ extern struct list_head dmar_drhd_units; if (i=drhd->iommu, drhd->ignored) {} else #define for_each_iommu(i, drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) \ if (i=drhd->iommu, 0) {} else static inline bool dmar_rcu_check(void) From 9a11997e757bcf716c1b199ea7bd1abbadc4b357 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 2 Mar 2020 15:15:36 +0800 Subject: [PATCH 028/372] KVM: X86: Fix dereference null cpufreq policy Naresh Kamboju reported: Linux version 5.6.0-rc4 (oe-user@oe-host) (gcc version (GCC)) #1 SMP Sun Mar 1 22:59:08 UTC 2020 kvm: no hardware support BUG: kernel NULL pointer dereference, address: 000000000000028c #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.6.0-rc4 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), 04/01/2014 RIP: 0010:kobject_put+0x12/0x1c0 Call Trace: cpufreq_cpu_put+0x15/0x20 kvm_arch_init+0x1f6/0x2b0 kvm_init+0x31/0x290 ? svm_check_processor_compat+0xd/0xd ? svm_check_processor_compat+0xd/0xd svm_init+0x21/0x23 do_one_initcall+0x61/0x2f0 ? rdinit_setup+0x30/0x30 ? rcu_read_lock_sched_held+0x4f/0x80 kernel_init_freeable+0x219/0x279 ? rest_init+0x250/0x250 kernel_init+0xe/0x110 ret_from_fork+0x27/0x50 Modules linked in: CR2: 000000000000028c ---[ end trace 239abf40c55c409b ]--- RIP: 0010:kobject_put+0x12/0x1c0 cpufreq policy which is get by cpufreq_cpu_get() can be NULL if it is failure, this patch takes care of it. Fixes: aaec7c03de (KVM: x86: avoid useless copy of cpufreq policy) Reported-by: Naresh Kamboju Cc: Naresh Kamboju Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5de200663f51..3156e25b0774 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7195,10 +7195,12 @@ static void kvm_timer_init(void) cpu = get_cpu(); policy = cpufreq_cpu_get(cpu); - if (policy && policy->cpuinfo.max_freq) - max_tsc_khz = policy->cpuinfo.max_freq; + if (policy) { + if (policy->cpuinfo.max_freq) + max_tsc_khz = policy->cpuinfo.max_freq; + cpufreq_cpu_put(policy); + } put_cpu(); - cpufreq_cpu_put(policy); #endif cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); From aaca21007ba14005feca74a079888c49a9b6f6ac Mon Sep 17 00:00:00 2001 From: Haiwei Li Date: Mon, 2 Mar 2020 20:19:28 +0800 Subject: [PATCH 029/372] KVM: SVM: Fix the svm vmexit code for WRMSR In svm, exit_code for MSR writes is not EXIT_REASON_MSR_WRITE which belongs to vmx. According to amd manual, SVM_EXIT_MSR(7ch) is the exit_code of VMEXIT_MSR due to RDMSR or WRMSR access to protected MSR. Additionally, the processor indicates in the VMCB's EXITINFO1 whether a RDMSR(EXITINFO1=0) or WRMSR(EXITINFO1=1) was intercepted. Signed-off-by: Haiwei Li Fixes: 1e9e2622a149 ("KVM: VMX: FIXED+PHYSICAL mode single target IPI fastpath", 2019-11-21) Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 24c0b2ba8fb9..91000501756e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -6312,7 +6312,8 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, enum exit_fastpath_completion *exit_fastpath) { if (!is_guest_mode(vcpu) && - to_svm(vcpu)->vmcb->control.exit_code == EXIT_REASON_MSR_WRITE) + to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && + to_svm(vcpu)->vmcb->control.exit_info_1) *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } From 77a1bce84bba01f3f143d77127b72e872b573795 Mon Sep 17 00:00:00 2001 From: Yonghyun Hwang Date: Wed, 26 Feb 2020 12:30:06 -0800 Subject: [PATCH 030/372] iommu/vt-d: Fix a bug in intel_iommu_iova_to_phys() for huge page intel_iommu_iova_to_phys() has a bug when it translates an IOVA for a huge page onto its corresponding physical address. This commit fixes the bug by accomodating the level of page entry for the IOVA and adds IOVA's lower address to the physical address. Cc: Acked-by: Lu Baolu Reviewed-by: Moritz Fischer Signed-off-by: Yonghyun Hwang Fixes: 3871794642579 ("VT-d: Changes to support KVM") Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6fa6de2b6ad5..33593fea0250 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5700,8 +5700,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, u64 phys = 0; pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); - if (pte) - phys = dma_pte_addr(pte); + if (pte && dma_pte_present(pte)) + phys = dma_pte_addr(pte) + + (iova & (BIT_MASK(level_to_offset_bits(level) + + VTD_PAGE_SHIFT) - 1)); return phys; } From 08090744f2dbba6b10d38fb17443c81f66798ca0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 28 Feb 2020 14:18:55 +0000 Subject: [PATCH 031/372] iommu/io-pgtable-arm: Fix IOVA validation for 32-bit Since we ony support the TTB1 quirk for AArch64 contexts, and consequently only for 64-bit builds, the sign-extension aspect of the "are all bits above IAS consistent?" check should implicitly only apply to 64-bit IOVAs. Change the type of the cast to ensure that 32-bit longs don't inadvertently get sign-extended, and thus considered invalid, if they happen to be above 2GB in the TTB0 region. Reported-by: Stephan Gerhold Signed-off-by: Robin Murphy Acked-by: Acked-by: Will Deacon Fixes: db6903010aa5 ("iommu/io-pgtable-arm: Prepare for TTBR1 usage") Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 983b08477e64..04fbd4bf0ff9 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -468,7 +468,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, arm_lpae_iopte *ptep = data->pgd; int ret, lvl = data->start_level; arm_lpae_iopte prot; - long iaext = (long)iova >> cfg->ias; + long iaext = (s64)iova >> cfg->ias; /* If no access, then nothing to do */ if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) @@ -645,7 +645,7 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); struct io_pgtable_cfg *cfg = &data->iop.cfg; arm_lpae_iopte *ptep = data->pgd; - long iaext = (long)iova >> cfg->ias; + long iaext = (s64)iova >> cfg->ias; if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) return 0; From 342993f96ab24d5864ab1216f46c0b199c2baf8e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 3 Mar 2020 15:33:15 +0100 Subject: [PATCH 032/372] KVM: x86: clear stale x86_emulate_ctxt->intercept value After commit 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Hyper-V guests on KVM stopped booting with: kvm_nested_vmexit: rip fffff802987d6169 reason EPT_VIOLATION info1 181 info2 0 int_info 0 int_info_err 0 kvm_page_fault: address febd0000 error_code 181 kvm_emulate_insn: 0:fffff802987d6169: f3 a5 kvm_emulate_insn: 0:fffff802987d6169: f3 a5 FAIL kvm_inj_exception: #UD (0x0) "f3 a5" is a "rep movsw" instruction, which should not be intercepted at all. Commit c44b4c6ab80e ("KVM: emulate: clean up initializations in init_decode_cache") reduced the number of fields cleared by init_decode_cache() claiming that they are being cleared elsewhere, 'intercept', however, is left uncleared if the instruction does not have any of the "slow path" flags (NotImpl, Stack, Op3264, Sse, Mmx, CheckPerm, NearBranch, No16 and of course Intercept itself). Fixes: c44b4c6ab80e ("KVM: emulate: clean up initializations in init_decode_cache") Fixes: 07721feee46b ("KVM: nVMX: Don't emulate instructions in guest mode") Cc: stable@vger.kernel.org Suggested-by: Paolo Bonzini Signed-off-by: Vitaly Kuznetsov Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index dd19fb3539e0..bc00642e5d3b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5173,6 +5173,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->fetch.ptr = ctxt->fetch.data; ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; + ctxt->intercept = x86_intercept_none; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { From d718fdc3e752ba51ddb2b5554d3db98a09355cc2 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 3 Mar 2020 15:33:16 +0100 Subject: [PATCH 033/372] KVM: x86: remove stale comment from struct x86_emulate_ctxt Commit c44b4c6ab80e ("KVM: emulate: clean up initializations in init_decode_cache") did some field shuffling and instead of [opcode_len, _regs) started clearing [has_seg_override, modrm). The comment about clearing fields altogether is not true anymore. Fixes: c44b4c6ab80e ("KVM: emulate: clean up initializations in init_decode_cache") Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 2a8f2bd2e5cf..c06e8353efd3 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -360,7 +360,6 @@ struct x86_emulate_ctxt { u64 d; unsigned long _eip; struct operand memop; - /* Fields above regs are cleared together. */ unsigned long _regs[NR_VCPU_REGS]; struct operand *memopp; struct fetch_cache fetch; From ecc421e05bab97cf3ff4fe456ade47ef84dba8c2 Mon Sep 17 00:00:00 2001 From: Cyril Hrubis Date: Tue, 3 Mar 2020 16:06:38 +0100 Subject: [PATCH 034/372] sys/sysinfo: Respect boottime inside time namespace The sysinfo() syscall includes uptime in seconds but has no correction for time namespaces which makes it inconsistent with the /proc/uptime inside of a time namespace. Add the missing time namespace adjustment call. Signed-off-by: Cyril Hrubis Signed-off-by: Thomas Gleixner Reviewed-by: Dmitry Safonov Link: https://lkml.kernel.org/r/20200303150638.7329-1-chrubis@suse.cz --- kernel/sys.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/sys.c b/kernel/sys.c index f9bc5c303e3f..d325f3ab624a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -2546,6 +2547,7 @@ static int do_sysinfo(struct sysinfo *info) memset(info, 0, sizeof(struct sysinfo)); ktime_get_boottime_ts64(&tp); + timens_add_boottime(&tp); info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); From e4d9b04b973b2dbce7b42af95ea70d07da1c936d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2020 12:09:38 -0300 Subject: [PATCH 035/372] perf bench: Share some global variables to fix build with gcc 10 Noticed with gcc 10 (fedora rawhide) that those variables were not being declared as static, so end up with: ld: /tmp/build/perf/bench/epoll-wait.o:/git/perf/tools/perf/bench/epoll-wait.c:93: multiple definition of `end'; /tmp/build/perf/bench/futex-hash.o:/git/perf/tools/perf/bench/futex-hash.c:40: first defined here ld: /tmp/build/perf/bench/epoll-wait.o:/git/perf/tools/perf/bench/epoll-wait.c:93: multiple definition of `start'; /tmp/build/perf/bench/futex-hash.o:/git/perf/tools/perf/bench/futex-hash.c:40: first defined here ld: /tmp/build/perf/bench/epoll-wait.o:/git/perf/tools/perf/bench/epoll-wait.c:93: multiple definition of `runtime'; /tmp/build/perf/bench/futex-hash.o:/git/perf/tools/perf/bench/futex-hash.c:40: first defined here ld: /tmp/build/perf/bench/epoll-ctl.o:/git/perf/tools/perf/bench/epoll-ctl.c:38: multiple definition of `end'; /tmp/build/perf/bench/futex-hash.o:/git/perf/tools/perf/bench/futex-hash.c:40: first defined here ld: /tmp/build/perf/bench/epoll-ctl.o:/git/perf/tools/perf/bench/epoll-ctl.c:38: multiple definition of `start'; /tmp/build/perf/bench/futex-hash.o:/git/perf/tools/perf/bench/futex-hash.c:40: first defined here ld: /tmp/build/perf/bench/epoll-ctl.o:/git/perf/tools/perf/bench/epoll-ctl.c:38: multiple definition of `runtime'; /tmp/build/perf/bench/futex-hash.o:/git/perf/tools/perf/bench/futex-hash.c:40: first defined here make[4]: *** [/git/perf/tools/build/Makefile.build:145: /tmp/build/perf/bench/perf-in.o] Error 1 Prefix those with bench__ and add them to bench/bench.h, so that we can share those on the tools needing to access those variables from signal handlers. Acked-by: Thomas Gleixner Cc: Adrian Hunter Cc: Davidlohr Bueso Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20200303155811.GD13702@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/bench.h | 4 ++++ tools/perf/bench/epoll-ctl.c | 7 +++---- tools/perf/bench/epoll-wait.c | 11 +++++------ tools/perf/bench/futex-hash.c | 12 ++++++------ tools/perf/bench/futex-lock-pi.c | 11 +++++------ 5 files changed, 23 insertions(+), 22 deletions(-) diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index fddb3ced9db6..4aa6de1aa67d 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -2,6 +2,10 @@ #ifndef BENCH_H #define BENCH_H +#include + +extern struct timeval bench__start, bench__end, bench__runtime; + /* * The madvise transparent hugepage constants were added in glibc * 2.13. For compatibility with older versions of glibc, define these diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index bb617e568841..a7526c05df38 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -35,7 +35,6 @@ static unsigned int nthreads = 0; static unsigned int nsecs = 8; -struct timeval start, end, runtime; static bool done, __verbose, randomize; /* @@ -94,8 +93,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void nest_epollfd(void) @@ -361,7 +360,7 @@ int bench_epoll_ctl(int argc, const char **argv) threads_starting = nthreads; - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); do_threads(worker, cpu); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 7af694437f4e..d1c5cb526b9f 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -90,7 +90,6 @@ static unsigned int nthreads = 0; static unsigned int nsecs = 8; -struct timeval start, end, runtime; static bool wdone, done, __verbose, randomize, nonblocking; /* @@ -276,8 +275,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void print_summary(void) @@ -287,7 +286,7 @@ static void print_summary(void) printf("\nAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", avg, rel_stddev_stats(stddev, avg), - (int) runtime.tv_sec); + (int)bench__runtime.tv_sec); } static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) @@ -479,7 +478,7 @@ int bench_epoll_wait(int argc, const char **argv) threads_starting = nthreads; - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); do_threads(worker, cpu); @@ -519,7 +518,7 @@ int bench_epoll_wait(int argc, const char **argv) qsort(worker, nthreads, sizeof(struct worker), cmpworker); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops/runtime.tv_sec; + unsigned long t = worker[i].ops / bench__runtime.tv_sec; update_stats(&throughput_stats, t); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 8ba0c3330a9a..21776862e940 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -37,7 +37,7 @@ static unsigned int nfutexes = 1024; static bool fshared = false, done = false, silent = false; static int futex_flag = 0; -struct timeval start, end, runtime; +struct timeval bench__start, bench__end, bench__runtime; static pthread_mutex_t thread_lock; static unsigned int threads_starting; static struct stats throughput_stats; @@ -103,8 +103,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void print_summary(void) @@ -114,7 +114,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), - (int) runtime.tv_sec); + (int)bench__runtime.tv_sec); } int bench_futex_hash(int argc, const char **argv) @@ -161,7 +161,7 @@ int bench_futex_hash(int argc, const char **argv) threads_starting = nthreads; pthread_attr_init(&thread_attr); - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); for (i = 0; i < nthreads; i++) { worker[i].tid = i; worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); @@ -204,7 +204,7 @@ int bench_futex_hash(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops/runtime.tv_sec; + unsigned long t = worker[i].ops / bench__runtime.tv_sec; update_stats(&throughput_stats, t); if (!silent) { if (nfutexes == 1) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index d0cae8125423..30d97121dc4f 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -37,7 +37,6 @@ static bool silent = false, multi = false; static bool done = false, fshared = false; static unsigned int nthreads = 0; static int futex_flag = 0; -struct timeval start, end, runtime; static pthread_mutex_t thread_lock; static unsigned int threads_starting; static struct stats throughput_stats; @@ -64,7 +63,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), - (int) runtime.tv_sec); + (int)bench__runtime.tv_sec); } static void toggle_done(int sig __maybe_unused, @@ -73,8 +72,8 @@ static void toggle_done(int sig __maybe_unused, { /* inform all threads that we're done for the day */ done = true; - gettimeofday(&end, NULL); - timersub(&end, &start, &runtime); + gettimeofday(&bench__end, NULL); + timersub(&bench__end, &bench__start, &bench__runtime); } static void *workerfn(void *arg) @@ -185,7 +184,7 @@ int bench_futex_lock_pi(int argc, const char **argv) threads_starting = nthreads; pthread_attr_init(&thread_attr); - gettimeofday(&start, NULL); + gettimeofday(&bench__start, NULL); create_threads(worker, thread_attr, cpu); pthread_attr_destroy(&thread_attr); @@ -211,7 +210,7 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_mutex_destroy(&thread_lock); for (i = 0; i < nthreads; i++) { - unsigned long t = worker[i].ops/runtime.tv_sec; + unsigned long t = worker[i].ops / bench__runtime.tv_sec; update_stats(&throughput_stats, t); if (!silent) From b5c0951860ba98cfe1936b5c0739450875d51451 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 2 Mar 2020 16:03:34 -0300 Subject: [PATCH 036/372] perf symbols: Don't try to find a vmlinux file when looking for kernel modules The dso->kernel value is now set to everything that is in machine->kmaps, but that was being used to decide if vmlinux lookup is needed, which ended up making that lookup be made for kernel modules, that now have dso->kernel set, leading to these kinds of warnings when running on a machine with compressed kernel modules, like fedora:31: [root@five ~]# perf record -F 10000 -a sleep 2 [ perf record: Woken up 1 times to write data ] lzma: fopen failed on vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /usr/lib/debug/boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /lib/modules/5.5.5-200.fc31.x86_64/build/vmlinux: 'No such file or directory' lzma: fopen failed on vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /usr/lib/debug/boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /lib/modules/5.5.5-200.fc31.x86_64/build/vmlinux: 'No such file or directory' lzma: fopen failed on vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /usr/lib/debug/boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /lib/modules/5.5.5-200.fc31.x86_64/build/vmlinux: 'No such file or directory' lzma: fopen failed on vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /usr/lib/debug/boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /lib/modules/5.5.5-200.fc31.x86_64/build/vmlinux: 'No such file or directory' lzma: fopen failed on vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux: 'No such file or directory' lzma: fopen failed on /boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /usr/lib/debug/boot/vmlinux-5.5.5-200.fc31.x86_64: 'No such file or directory' lzma: fopen failed on /lib/modules/5.5.5-200.fc31.x86_64/build/vmlinux: 'No such file or directory' [ perf record: Captured and wrote 1.024 MB perf.data (1366 samples) ] [root@five ~]# This happens when collecting the buildid, when we find samples for kernel modules, fix it by checking if the looked up DSO is a kernel module by other means. Fixes: 02213cec64bb ("perf maps: Mark module DSOs with kernel type") Tested-by: Jiri Olsa Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Kim Phillips Cc: Michael Petlan Cc: Namhyung Kim Cc: Ravi Bangoria Link: http://lore.kernel.org/lkml/20200302191007.GD10335@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1077013d8ce2..26bc6a0096ce 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1622,7 +1622,12 @@ int dso__load(struct dso *dso, struct map *map) goto out; } - if (dso->kernel) { + kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; + + if (dso->kernel && !kmod) { if (dso->kernel == DSO_TYPE_KERNEL) ret = dso__load_kernel_sym(dso, map); else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) @@ -1650,12 +1655,6 @@ int dso__load(struct dso *dso, struct map *map) if (!name) goto out; - kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE || - dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP; - - /* * Read the build id if possible. This is required for * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work From e9d0e7511fda92a6511904996dd0aa57b6d7687a Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 3 Mar 2020 13:17:16 +0300 Subject: [PATCH 037/372] thunderbolt: Fix error code in tb_port_is_width_supported() This function is type bool, and it's supposed to return true on success. Unfortunately, this path takes negative error codes and casts them to bool (true) so it's treated as success instead of failure. Fixes: 91c0c12080d0 ("thunderbolt: Add support for lane bonding") Signed-off-by: Dan Carpenter Signed-off-by: Mika Westerberg --- drivers/thunderbolt/switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 7d6ecc342508..a2ce99051c51 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -954,7 +954,7 @@ static bool tb_port_is_width_supported(struct tb_port *port, int width) ret = tb_port_read(port, &phy, TB_CFG_PORT, port->cap_phy + LANE_ADP_CS_0, 1); if (ret) - return ret; + return false; widths = (phy & LANE_ADP_CS_0_SUPPORTED_WIDTH_MASK) >> LANE_ADP_CS_0_SUPPORTED_WIDTH_SHIFT; From 65ac74f1de3334852fb7d9b1b430fa5a06524276 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 4 Mar 2020 11:11:17 +0000 Subject: [PATCH 038/372] iommu/dma: Fix MSI reservation allocation The way cookie_init_hw_msi_region() allocates the iommu_dma_msi_page structures doesn't match the way iommu_put_dma_cookie() frees them. The former performs a single allocation of all the required structures, while the latter tries to free them one at a time. It doesn't quite work for the main use case (the GICv3 ITS where the range is 64kB) when the base granule size is 4kB. This leads to a nice slab corruption on teardown, which is easily observable by simply creating a VF on a SRIOV-capable device, and tearing it down immediately (no need to even make use of it). Fortunately, this only affects systems where the ITS isn't translated by the SMMU, which are both rare and non-standard. Fix it by allocating iommu_dma_msi_page structures one at a time. Fixes: 7c1b058c8b5a3 ("iommu/dma: Handle IOMMU API reserved regions") Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Cc: Robin Murphy Cc: Joerg Roedel Cc: Will Deacon Cc: stable@vger.kernel.org Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a2e96a5fd9a7..ba128d1cdaee 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -177,15 +177,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); - if (!msi_page) - return -ENOMEM; - for (i = 0; i < num_pages; i++) { - msi_page[i].phys = start; - msi_page[i].iova = start; - INIT_LIST_HEAD(&msi_page[i].list); - list_add(&msi_page[i].list, &cookie->msi_page_list); + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) + return -ENOMEM; + + msi_page->phys = start; + msi_page->iova = start; + INIT_LIST_HEAD(&msi_page->list); + list_add(&msi_page->list, &cookie->msi_page_list); start += iovad->granule; } From a754acc3e4bcf0b70f4356fc450fec72875762da Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 4 Mar 2020 18:42:21 +0800 Subject: [PATCH 039/372] KVM: fix Kconfig menu text for -Werror This was evidently copy and pasted from the i915 driver, but the text wasn't updated. Fixes: 4f337faf1c55 ("KVM: allow disabling -Werror") Signed-off-by: Jason A. Donenfeld Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 1bb4927030af..9fea0757db92 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -68,7 +68,7 @@ config KVM_WERROR depends on (X86_64 && !KASAN) || !COMPILE_TEST depends on EXPERT help - Add -Werror to the build flags for (and only for) i915.ko. + Add -Werror to the build flags for KVM. If in doubt, say "N". From e8dca30f7118461d47e1c3510d0e31b277439151 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Thu, 5 Mar 2020 00:25:09 +0100 Subject: [PATCH 040/372] drm/bridge: dw-hdmi: fix AVI frame colorimetry CTA-861-F explicitly states that for RGB colorspace colorimetry should be set to "none". Fix that. Acked-by: Laurent Pinchart Fixes: def23aa7e982 ("drm: bridge: dw-hdmi: Switch to V4L bus format and encodings") Signed-off-by: Jernej Skrabec Link: https://patchwork.freedesktop.org/patch/msgid/20200304232512.51616-2-jernej.skrabec@siol.net --- drivers/gpu/drm/bridge/synopsys/dw-hdmi.c | 46 +++++++++++++---------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 67fca439bbfb..24965e53d351 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -1624,28 +1624,34 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode) frame.colorspace = HDMI_COLORSPACE_RGB; /* Set up colorimetry */ - switch (hdmi->hdmi_data.enc_out_encoding) { - case V4L2_YCBCR_ENC_601: - if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) - frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; - else + if (!hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) { + switch (hdmi->hdmi_data.enc_out_encoding) { + case V4L2_YCBCR_ENC_601: + if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601) + frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; + else + frame.colorimetry = HDMI_COLORIMETRY_ITU_601; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; + break; + case V4L2_YCBCR_ENC_709: + if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) + frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; + else + frame.colorimetry = HDMI_COLORIMETRY_ITU_709; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; + break; + default: /* Carries no data */ frame.colorimetry = HDMI_COLORIMETRY_ITU_601; + frame.extended_colorimetry = + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; + break; + } + } else { + frame.colorimetry = HDMI_COLORIMETRY_NONE; frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; - case V4L2_YCBCR_ENC_709: - if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709) - frame.colorimetry = HDMI_COLORIMETRY_EXTENDED; - else - frame.colorimetry = HDMI_COLORIMETRY_ITU_709; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_709; - break; - default: /* Carries no data */ - frame.colorimetry = HDMI_COLORIMETRY_ITU_601; - frame.extended_colorimetry = - HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; - break; + HDMI_EXTENDED_COLORIMETRY_XV_YCC_601; } frame.scan_mode = HDMI_SCAN_MODE_NONE; From 52e7c083b417417bb4352ebf3a6409988b6af238 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 26 Feb 2020 17:13:53 +0000 Subject: [PATCH 041/372] mailmap: Update email address My Netronome address is no longer active. I am no maintainer, but get_maintainer.pl sometimes returns my name for a small number of files (BPF-related). Add an entry to .mailmap for good measure. Signed-off-by: Quentin Monnet Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200226171353.18982-1-quentin@isovalent.com Signed-off-by: Alexei Starovoitov --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index ffb8f28290c7..a0dfce8de1ba 100644 --- a/.mailmap +++ b/.mailmap @@ -225,6 +225,7 @@ Pratyush Anand Praveen BP Punit Agrawal Qais Yousef +Quentin Monnet Quentin Perret Rafael J. Wysocki Rajesh Shah From 1bc7896e9ef44fd77858b3ef0b8a6840be3a4494 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 4 Mar 2020 11:11:04 -0800 Subject: [PATCH 042/372] bpf: Fix deadlock with rq_lock in bpf_send_signal() When experimenting with bpf_send_signal() helper in our production environment (5.2 based), we experienced a deadlock in NMI mode: #5 [ffffc9002219f770] queued_spin_lock_slowpath at ffffffff8110be24 #6 [ffffc9002219f770] _raw_spin_lock_irqsave at ffffffff81a43012 #7 [ffffc9002219f780] try_to_wake_up at ffffffff810e7ecd #8 [ffffc9002219f7e0] signal_wake_up_state at ffffffff810c7b55 #9 [ffffc9002219f7f0] __send_signal at ffffffff810c8602 #10 [ffffc9002219f830] do_send_sig_info at ffffffff810ca31a #11 [ffffc9002219f868] bpf_send_signal at ffffffff8119d227 #12 [ffffc9002219f988] bpf_overflow_handler at ffffffff811d4140 #13 [ffffc9002219f9e0] __perf_event_overflow at ffffffff811d68cf #14 [ffffc9002219fa10] perf_swevent_overflow at ffffffff811d6a09 #15 [ffffc9002219fa38] ___perf_sw_event at ffffffff811e0f47 #16 [ffffc9002219fc30] __schedule at ffffffff81a3e04d #17 [ffffc9002219fc90] schedule at ffffffff81a3e219 #18 [ffffc9002219fca0] futex_wait_queue_me at ffffffff8113d1b9 #19 [ffffc9002219fcd8] futex_wait at ffffffff8113e529 #20 [ffffc9002219fdf0] do_futex at ffffffff8113ffbc #21 [ffffc9002219fec0] __x64_sys_futex at ffffffff81140d1c #22 [ffffc9002219ff38] do_syscall_64 at ffffffff81002602 #23 [ffffc9002219ff50] entry_SYSCALL_64_after_hwframe at ffffffff81c00068 The above call stack is actually very similar to an issue reported by Commit eac9153f2b58 ("bpf/stackmap: Fix deadlock with rq_lock in bpf_get_stack()") by Song Liu. The only difference is bpf_send_signal() helper instead of bpf_get_stack() helper. The above deadlock is triggered with a perf_sw_event. Similar to Commit eac9153f2b58, the below almost identical reproducer used tracepoint point sched/sched_switch so the issue can be easily caught. /* stress_test.c */ #include #include #include #include #include #include #include #define THREAD_COUNT 1000 char *filename; void *worker(void *p) { void *ptr; int fd; char *pptr; fd = open(filename, O_RDONLY); if (fd < 0) return NULL; while (1) { struct timespec ts = {0, 1000 + rand() % 2000}; ptr = mmap(NULL, 4096 * 64, PROT_READ, MAP_PRIVATE, fd, 0); usleep(1); if (ptr == MAP_FAILED) { printf("failed to mmap\n"); break; } munmap(ptr, 4096 * 64); usleep(1); pptr = malloc(1); usleep(1); pptr[0] = 1; usleep(1); free(pptr); usleep(1); nanosleep(&ts, NULL); } close(fd); return NULL; } int main(int argc, char *argv[]) { void *ptr; int i; pthread_t threads[THREAD_COUNT]; if (argc < 2) return 0; filename = argv[1]; for (i = 0; i < THREAD_COUNT; i++) { if (pthread_create(threads + i, NULL, worker, NULL)) { fprintf(stderr, "Error creating thread\n"); return 0; } } for (i = 0; i < THREAD_COUNT; i++) pthread_join(threads[i], NULL); return 0; } and the following command: 1. run `stress_test /bin/ls` in one windown 2. hack bcc trace.py with the following change: --- a/tools/trace.py +++ b/tools/trace.py @@ -513,6 +513,7 @@ BPF_PERF_OUTPUT(%s); __data.tgid = __tgid; __data.pid = __pid; bpf_get_current_comm(&__data.comm, sizeof(__data.comm)); + bpf_send_signal(10); %s %s %s.perf_submit(%s, &__data, sizeof(__data)); 3. in a different window run ./trace.py -p $(pidof stress_test) t:sched:sched_switch The deadlock can be reproduced in our production system. Similar to Song's fix, the fix is to delay sending signal if irqs is disabled to avoid deadlocks involving with rq_lock. With this change, my above stress-test in our production system won't cause deadlock any more. I also implemented a scale-down version of reproducer in the selftest (a subsequent commit). With latest bpf-next, it complains for the following potential deadlock. [ 32.832450] -> #1 (&p->pi_lock){-.-.}: [ 32.833100] _raw_spin_lock_irqsave+0x44/0x80 [ 32.833696] task_rq_lock+0x2c/0xa0 [ 32.834182] task_sched_runtime+0x59/0xd0 [ 32.834721] thread_group_cputime+0x250/0x270 [ 32.835304] thread_group_cputime_adjusted+0x2e/0x70 [ 32.835959] do_task_stat+0x8a7/0xb80 [ 32.836461] proc_single_show+0x51/0xb0 ... [ 32.839512] -> #0 (&(&sighand->siglock)->rlock){....}: [ 32.840275] __lock_acquire+0x1358/0x1a20 [ 32.840826] lock_acquire+0xc7/0x1d0 [ 32.841309] _raw_spin_lock_irqsave+0x44/0x80 [ 32.841916] __lock_task_sighand+0x79/0x160 [ 32.842465] do_send_sig_info+0x35/0x90 [ 32.842977] bpf_send_signal+0xa/0x10 [ 32.843464] bpf_prog_bc13ed9e4d3163e3_send_signal_tp_sched+0x465/0x1000 [ 32.844301] trace_call_bpf+0x115/0x270 [ 32.844809] perf_trace_run_bpf_submit+0x4a/0xc0 [ 32.845411] perf_trace_sched_switch+0x10f/0x180 [ 32.846014] __schedule+0x45d/0x880 [ 32.846483] schedule+0x5f/0xd0 ... [ 32.853148] Chain exists of: [ 32.853148] &(&sighand->siglock)->rlock --> &p->pi_lock --> &rq->lock [ 32.853148] [ 32.854451] Possible unsafe locking scenario: [ 32.854451] [ 32.855173] CPU0 CPU1 [ 32.855745] ---- ---- [ 32.856278] lock(&rq->lock); [ 32.856671] lock(&p->pi_lock); [ 32.857332] lock(&rq->lock); [ 32.857999] lock(&(&sighand->siglock)->rlock); Deadlock happens on CPU0 when it tries to acquire &sighand->siglock but it has been held by CPU1 and CPU1 tries to grab &rq->lock and cannot get it. This is not exactly the callstack in our production environment, but sympotom is similar and both locks are using spin_lock_irqsave() to acquire the lock, and both involves rq_lock. The fix to delay sending signal when irq is disabled also fixed this issue. Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Cc: Song Liu Link: https://lore.kernel.org/bpf/20200304191104.2796501-1-yhs@fb.com --- kernel/trace/bpf_trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 19e793aa441a..68250d433bd7 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -732,7 +732,7 @@ static int bpf_send_signal_common(u32 sig, enum pid_type type) if (unlikely(!nmi_uaccess_okay())) return -EPERM; - if (in_nmi()) { + if (irqs_disabled()) { /* Do an early check on signal validity. Otherwise, * the error is lost in deferred irq_work. */ From c4ef2f3256e3bc008f98121cad39ee5467db07a6 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 4 Mar 2020 11:11:05 -0800 Subject: [PATCH 043/372] selftests/bpf: Add send_signal_sched_switch test Added one test, send_signal_sched_switch, to test bpf_send_signal() helper triggered by sched/sched_switch tracepoint. This test can be used to verify kernel deadlocks fixed by the previous commit. The test itself is heavily borrowed from Commit eac9153f2b58 ("bpf/stackmap: Fix deadlock with rq_lock in bpf_get_stack()"). Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Cc: Song Liu Link: https://lore.kernel.org/bpf/20200304191105.2796601-1-yhs@fb.com --- .../bpf/prog_tests/send_signal_sched_switch.c | 60 +++++++++++++++++++ .../bpf/progs/test_send_signal_kern.c | 6 ++ 2 files changed, 66 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c new file mode 100644 index 000000000000..189a34a7addb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include "test_send_signal_kern.skel.h" + +static void sigusr1_handler(int signum) +{ +} + +#define THREAD_COUNT 100 + +static void *worker(void *p) +{ + int i; + + for ( i = 0; i < 1000; i++) + usleep(1); + + return NULL; +} + +void test_send_signal_sched_switch(void) +{ + struct test_send_signal_kern *skel; + pthread_t threads[THREAD_COUNT]; + u32 duration = 0; + int i, err; + + signal(SIGUSR1, sigusr1_handler); + + skel = test_send_signal_kern__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n")) + return; + + skel->bss->pid = getpid(); + skel->bss->sig = SIGUSR1; + + err = test_send_signal_kern__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed\n")) + goto destroy_skel; + + for (i = 0; i < THREAD_COUNT; i++) { + err = pthread_create(threads + i, NULL, worker, NULL); + if (CHECK(err, "pthread_create", "Error creating thread, %s\n", + strerror(errno))) + goto destroy_skel; + } + + for (i = 0; i < THREAD_COUNT; i++) + pthread_join(threads[i], NULL); + +destroy_skel: + test_send_signal_kern__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c index 1acc91e87bfc..b4233d3efac2 100644 --- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -31,6 +31,12 @@ int send_signal_tp(void *ctx) return bpf_send_signal_test(ctx); } +SEC("tracepoint/sched/sched_switch") +int send_signal_tp_sched(void *ctx) +{ + return bpf_send_signal_test(ctx); +} + SEC("perf_event") int send_signal_perf(void *ctx) { From 8e5290e710f4ffe8e9f8813e2ed0397a94d7b2f1 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 4 Mar 2020 17:34:47 -0800 Subject: [PATCH 044/372] bpf: Return better error value in delete_elem for struct_ops map The current always succeed behavior in bpf_struct_ops_map_delete_elem() is not ideal for userspace tool. It can be improved to return proper error value. If it is in TOBEFREE, it means unregistration has been already done before but it is in progress and waiting for the subsystem to clear the refcnt to zero, so -EINPROGRESS. If it is INIT, it means the struct_ops has not been registered yet, so -ENOENT. Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200305013447.535326-1-kafai@fb.com --- kernel/bpf/bpf_struct_ops.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 042f95534f86..68a89a9f7ccd 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -482,13 +482,21 @@ static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) prev_state = cmpxchg(&st_map->kvalue.state, BPF_STRUCT_OPS_STATE_INUSE, BPF_STRUCT_OPS_STATE_TOBEFREE); - if (prev_state == BPF_STRUCT_OPS_STATE_INUSE) { + switch (prev_state) { + case BPF_STRUCT_OPS_STATE_INUSE: st_map->st_ops->unreg(&st_map->kvalue.data); if (refcount_dec_and_test(&st_map->kvalue.refcnt)) bpf_map_put(map); + return 0; + case BPF_STRUCT_OPS_STATE_TOBEFREE: + return -EINPROGRESS; + case BPF_STRUCT_OPS_STATE_INIT: + return -ENOENT; + default: + WARN_ON_ONCE(1); + /* Should never happen. Treat it as not found. */ + return -ENOENT; } - - return 0; } static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, From 849b4d94582a966ecb533448415462846da1f0fa Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 4 Mar 2020 17:34:54 -0800 Subject: [PATCH 045/372] bpf: Do not allow map_freeze in struct_ops map struct_ops map cannot support map_freeze. Otherwise, a struct_ops cannot be unregistered from the subsystem. Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200305013454.535397-1-kafai@fb.com --- kernel/bpf/syscall.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a91ad518c050..0c7fb0d4836d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1510,6 +1510,11 @@ static int map_freeze(const union bpf_attr *attr) if (IS_ERR(map)) return PTR_ERR(map); + if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { + fdput(f); + return -ENOTSUPP; + } + mutex_lock(&map->freeze_mutex); if (map->writecnt) { From 692b0399a22530b2de8490bea75a7d20d59391d0 Mon Sep 17 00:00:00 2001 From: Hamdan Igbaria Date: Mon, 24 Feb 2020 14:41:29 +0200 Subject: [PATCH 046/372] net/mlx5: DR, Fix postsend actions write length Fix the send info write length to be (actions x action) size in bytes. Fixes: 297cccebdc5a ("net/mlx5: DR, Expose an internal API to issue RDMA operations") Signed-off-by: Hamdan Igbaria Reviewed-by: Alex Vesker Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index 6dec2a550a10..2d93228ff633 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -933,7 +933,6 @@ static int dr_actions_l2_rewrite(struct mlx5dr_domain *dmn, action->rewrite.data = (void *)ops; action->rewrite.num_of_actions = i; - action->rewrite.chunk->byte_size = i * sizeof(*ops); ret = mlx5dr_send_postsend_action(dmn, action); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index c7f10d4f8f8d..095ec7b1399d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -558,7 +558,8 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, int ret; send_info.write.addr = (uintptr_t)action->rewrite.data; - send_info.write.length = action->rewrite.chunk->byte_size; + send_info.write.length = action->rewrite.num_of_actions * + DR_MODIFY_ACTION_SIZE; send_info.write.lkey = 0; send_info.remote_addr = action->rewrite.chunk->mr_addr; send_info.rkey = action->rewrite.chunk->rkey; From 56917766def72f5afdf4235adb91b6897ff26d9d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 20 Feb 2020 13:40:24 +0200 Subject: [PATCH 047/372] net/mlx5e: kTLS, Fix TCP seq off-by-1 issue in TX resync flow We have an off-by-1 issue in the TCP seq comparison. The last sequence number that belongs to the TCP packet's payload is not "start_seq + len", but one byte before it. Fix it so the 'ends_before' is evaluated properly. This fixes a bug that results in error completions in the kTLS HW offload flows. Fixes: ffbd9ca94e2e ("net/mlx5e: kTLS, Fix corner-case checks in TX resync flow") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index f260dd96873b..52a56622034a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -218,7 +218,7 @@ tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, * this packet was already acknowledged and its record info * was released. */ - ends_before = before(tcp_seq + datalen, tls_record_start_seq(record)); + ends_before = before(tcp_seq + datalen - 1, tls_record_start_seq(record)); if (unlikely(tls_record_is_start_marker(record))) { ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; From f28ca65efa87b3fb8da3d69ca7cb1ebc0448de66 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 24 Feb 2020 13:56:53 +0200 Subject: [PATCH 048/372] net/mlx5e: kTLS, Fix wrong value in record tracker enum Fix to match the HW spec: TRACKING state is 1, SEARCHING is 2. No real issue for now, as these values are not currently used. Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") Signed-off-by: Tariq Toukan Reviewed-by: Boris Pismenny Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index a3efa29a4629..63116be6b1d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -38,8 +38,8 @@ enum { enum { MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_START = 0, - MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 1, - MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 2, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING = 1, + MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_SEARCHING = 2, }; struct mlx5e_ktls_offload_context_tx { From 404402abd5f90aa90a134eb9604b1750c1941529 Mon Sep 17 00:00:00 2001 From: Sebastian Hense Date: Thu, 20 Feb 2020 08:11:36 +0100 Subject: [PATCH 049/372] net/mlx5e: Fix endianness handling in pedit mask The mask value is provided as 64 bit and has to be casted in either 32 or 16 bit. On big endian systems the wrong half was casted which resulted in an all zero mask. Fixes: 2b64beba0251 ("net/mlx5e: Support header re-write of partial fields in TC pedit offload") Signed-off-by: Sebastian Hense Reviewed-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 74091f72c9a8..ec5fc52bf572 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2476,10 +2476,11 @@ static int offload_pedit_fields(struct pedit_headers_action *hdrs, continue; if (f->field_bsize == 32) { - mask_be32 = *(__be32 *)&mask; + mask_be32 = (__be32)mask; mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32)); } else if (f->field_bsize == 16) { - mask_be16 = *(__be16 *)&mask; + mask_be32 = (__be32)mask; + mask_be16 = *(__be16 *)&mask_be32; mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16)); } From 0b136454741b2f6cb18d55e355d396db9248b2ab Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 19 Feb 2020 09:03:28 +0200 Subject: [PATCH 050/372] net/mlx5: Clear LAG notifier pointer after unregister After returning from unregister_netdevice_notifier_dev_net(), set the notifier_call field to NULL so successive call to mlx5_lag_add() will function as expected. Fixes: 7907f23adc18 ("net/mlx5: Implement RoCE LAG feature") Signed-off-by: Eli Cohen Reviewed-by: Vlad Buslov Reviewed-by: Raed Salem Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 8e19f6ab8393..93052b07c76c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -615,8 +615,10 @@ void mlx5_lag_remove(struct mlx5_core_dev *dev) break; if (i == MLX5_MAX_PORTS) { - if (ldev->nb.notifier_call) + if (ldev->nb.notifier_call) { unregister_netdevice_notifier_net(&init_net, &ldev->nb); + ldev->nb.notifier_call = NULL; + } mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); mlx5_lag_dev_free(ldev); From 9803aac7b5508718989e4cde11b854fc01037b01 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 7 Jan 2020 22:53:19 +0100 Subject: [PATCH 051/372] drm/komeda: mark PM functions as __maybe_unused Without this, we get a couple of warnings when CONFIG_PM is disabled: drivers/gpu/drm/arm/display/komeda/komeda_drv.c:156:12: error: 'komeda_rt_pm_resume' defined but not used [-Werror=unused-function] static int komeda_rt_pm_resume(struct device *dev) ^~~~~~~~~~~~~~~~~~~ drivers/gpu/drm/arm/display/komeda/komeda_drv.c:149:12: error: 'komeda_rt_pm_suspend' defined but not used [-Werror=unused-function] static int komeda_rt_pm_suspend(struct device *dev) ^~~~~~~~~~~~~~~~~~~~ Fixes: efb465088518 ("drm/komeda: Add runtime_pm support") Signed-off-by: Arnd Bergmann Reviewed-by: James Qian Wang (Arm Technology China) Signed-off-by: james qian wang (Arm Technology China) Link: https://patchwork.freedesktop.org/patch/msgid/20200107215327.1579195-1-arnd@arndb.de --- drivers/gpu/drm/arm/display/komeda/komeda_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c index ea5cd1e17304..e7933930a657 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c @@ -146,14 +146,14 @@ static const struct of_device_id komeda_of_match[] = { MODULE_DEVICE_TABLE(of, komeda_of_match); -static int komeda_rt_pm_suspend(struct device *dev) +static int __maybe_unused komeda_rt_pm_suspend(struct device *dev) { struct komeda_drv *mdrv = dev_get_drvdata(dev); return komeda_dev_suspend(mdrv->mdev); } -static int komeda_rt_pm_resume(struct device *dev) +static int __maybe_unused komeda_rt_pm_resume(struct device *dev) { struct komeda_drv *mdrv = dev_get_drvdata(dev); From 8019ad13ef7f64be44d4f892af9c840179009254 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 4 Mar 2020 11:28:31 +0100 Subject: [PATCH 052/372] futex: Fix inode life-time issue As reported by Jann, ihold() does not in fact guarantee inode persistence. And instead of making it so, replace the usage of inode pointers with a per boot, machine wide, unique inode identifier. This sequence number is global, but shared (file backed) futexes are rare enough that this should not become a performance issue. Reported-by: Jann Horn Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) --- fs/inode.c | 1 + include/linux/fs.h | 1 + include/linux/futex.h | 17 +++++---- kernel/futex.c | 89 ++++++++++++++++++++++++++----------------- 4 files changed, 65 insertions(+), 43 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 7d57068b6b7a..93d9252a00ab 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -138,6 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_sb = sb; inode->i_blkbits = sb->s_blocksize_bits; inode->i_flags = 0; + atomic64_set(&inode->i_sequence, 0); atomic_set(&inode->i_count, 1); inode->i_op = &empty_iops; inode->i_fop = &no_open_fops; diff --git a/include/linux/fs.h b/include/linux/fs.h index 3cd4fe6b845e..abedbffe2c9e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -698,6 +698,7 @@ struct inode { struct rcu_head i_rcu; }; atomic64_t i_version; + atomic64_t i_sequence; /* see futex */ atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; diff --git a/include/linux/futex.h b/include/linux/futex.h index 5cc3fed27d4c..b70df27d7e85 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -31,23 +31,26 @@ struct task_struct; union futex_key { struct { + u64 i_seq; unsigned long pgoff; - struct inode *inode; - int offset; + unsigned int offset; } shared; struct { + union { + struct mm_struct *mm; + u64 __tmp; + }; unsigned long address; - struct mm_struct *mm; - int offset; + unsigned int offset; } private; struct { + u64 ptr; unsigned long word; - void *ptr; - int offset; + unsigned int offset; } both; }; -#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } } #ifdef CONFIG_FUTEX enum { diff --git a/kernel/futex.c b/kernel/futex.c index 0cf84c8664f2..e14f7cd45dbd 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - ihold(key->shared.inode); /* implies smp_mb(); (B) */ + smp_mb(); /* explicit smp_mb(); (B) */ break; case FUT_OFF_MMSHARED: futex_get_mm(key); /* implies smp_mb(); (B) */ @@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - iput(key->shared.inode); break; case FUT_OFF_MMSHARED: mmdrop(key->private.mm); @@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, return timeout; } +/* + * Generate a machine wide unique identifier for this inode. + * + * This relies on u64 not wrapping in the life-time of the machine; which with + * 1ns resolution means almost 585 years. + * + * This further relies on the fact that a well formed program will not unmap + * the file while it has a (shared) futex waiting on it. This mapping will have + * a file reference which pins the mount and inode. + * + * If for some reason an inode gets evicted and read back in again, it will get + * a new sequence number and will _NOT_ match, even though it is the exact same + * file. + * + * It is important that match_futex() will never have a false-positive, esp. + * for PI futexes that can mess up the state. The above argues that false-negatives + * are only possible for malformed programs. + */ +static u64 get_inode_sequence_number(struct inode *inode) +{ + static atomic64_t i_seq; + u64 old; + + /* Does the inode already have a sequence number? */ + old = atomic64_read(&inode->i_sequence); + if (likely(old)) + return old; + + for (;;) { + u64 new = atomic64_add_return(1, &i_seq); + if (WARN_ON_ONCE(!new)) + continue; + + old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new); + if (old) + return old; + return new; + } +} + /** * get_futex_key() - Get parameters which are the keys for a futex * @uaddr: virtual address of the futex @@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout, * * The key words are stored in @key on success. * - * For shared mappings, it's (page->index, file_inode(vma->vm_file), - * offset_within_page). For private mappings, it's (uaddr, current->mm). - * We can usually work out the index without swapping in the page. + * For shared mappings (when @fshared), the key is: + * ( inode->i_sequence, page->index, offset_within_page ) + * [ also see get_inode_sequence_number() ] + * + * For private mappings (or when !@fshared), the key is: + * ( current->mm, address, 0 ) + * + * This allows (cross process, where applicable) identification of the futex + * without keeping the page pinned for the duration of the FUTEX_WAIT. * * lock_page() might sleep, the caller should not hold a spinlock. */ @@ -659,8 +704,6 @@ again: key->private.mm = mm; key->private.address = address; - get_futex_key_refs(key); /* implies smp_mb(); (B) */ - } else { struct inode *inode; @@ -692,40 +735,14 @@ again: goto again; } - /* - * Take a reference unless it is about to be freed. Previously - * this reference was taken by ihold under the page lock - * pinning the inode in place so i_lock was unnecessary. The - * only way for this check to fail is if the inode was - * truncated in parallel which is almost certainly an - * application bug. In such a case, just retry. - * - * We are not calling into get_futex_key_refs() in file-backed - * cases, therefore a successful atomic_inc return below will - * guarantee that get_futex_key() will still imply smp_mb(); (B). - */ - if (!atomic_inc_not_zero(&inode->i_count)) { - rcu_read_unlock(); - put_page(page); - - goto again; - } - - /* Should be impossible but lets be paranoid for now */ - if (WARN_ON_ONCE(inode->i_mapping != mapping)) { - err = -EFAULT; - rcu_read_unlock(); - iput(inode); - - goto out; - } - key->both.offset |= FUT_OFF_INODE; /* inode-based key */ - key->shared.inode = inode; + key->shared.i_seq = get_inode_sequence_number(inode); key->shared.pgoff = basepage_index(tail); rcu_read_unlock(); } + get_futex_key_refs(key); /* implies smp_mb(); (B) */ + out: put_page(page); return err; From cfd3bc752a3f5529506d279deb42e3bc8055695b Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Sun, 23 Feb 2020 11:34:49 -0800 Subject: [PATCH 053/372] perf diff: Fix undefined string comparision spotted by clang's -Wstring-compare clang warns: util/block-info.c:298:18: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:51: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:18: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/block-info.c:298:51: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { ^ ~~~~~~~~~~~~~~~ util/map.c:434:15: error: result of comparison against a string literal is unspecified (use an explicit string comparison function instead) [-Werror,-Wstring-compare] if (srcline != SRCLINE_UNKNOWN) ^ ~~~~~~~~~~~~~~~ Reviewer Notes: Looks good to me. Some more context: https://clang.llvm.org/docs/DiagnosticsReference.html#wstring-compare The spec says: J.1 Unspecified behavior The following are unspecified: .. Whether two string literals result in distinct arrays (6.4.5). Signed-off-by: Nick Desaulniers Reviewed-by: Ian Rogers Cc: Alexander Shishkin Cc: Changbin Du Cc: Jin Yao Cc: Jiri Olsa Cc: John Keeping Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: clang-built-linux@googlegroups.com Link: https://github.com/ClangBuiltLinux/linux/issues/900 Link: http://lore.kernel.org/lkml/20200223193456.25291-1-nick.desaulniers@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 3 ++- tools/perf/util/block-info.c | 3 ++- tools/perf/util/map.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index f8b6ae557d8b..c03c36fde7e2 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1312,7 +1312,8 @@ static int cycles_printf(struct hist_entry *he, struct hist_entry *pair, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld", start_line, end_line, block_he->diff.cycles); } else { diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c index c4b030bf6ec2..fbbb6d640dad 100644 --- a/tools/perf/util/block-info.c +++ b/tools/perf/util/block-info.c @@ -295,7 +295,8 @@ static int block_range_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, end_line = map__srcline(he->ms.map, bi->sym->start + bi->end, he->ms.sym); - if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) { + if ((strncmp(start_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) && + (strncmp(end_line, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0)) { scnprintf(buf, sizeof(buf), "[%s -> %s]", start_line, end_line); } else { diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index a08ca276098e..95428511300d 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -431,7 +431,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { char *srcline = map__srcline(map, addr, NULL); - if (srcline != SRCLINE_UNKNOWN) + if (strncmp(srcline, SRCLINE_UNKNOWN, strlen(SRCLINE_UNKNOWN)) != 0) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); } From 29b4f5f188571c112713c35cc87eefb46efee612 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 5 Mar 2020 10:37:12 +0200 Subject: [PATCH 054/372] perf top: Fix stdio interface input handling with glibc 2.28+ Since glibc 2.28 when running 'perf top --stdio', input handling no longer works, but hitting any key always just prints the "Mapped keys" help text. To fix it, call clearerr() in the display_thread() loop to clear any EOF sticky errors, as instructed in the glibc NEWS file (https://sourceware.org/git/?p=glibc.git;a=blob;f=NEWS): * All stdio functions now treat end-of-file as a sticky condition. If you read from a file until EOF, and then the file is enlarged by another process, you must call clearerr or another function with the same effect (e.g. fseek, rewind) before you can read the additional data. This corrects a longstanding C99 conformance bug. It is most likely to affect programs that use stdio to read interactive input from a terminal. (Bug #1190.) Signed-off-by: Tommi Rantala Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20200305083714.9381-2-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f6dd1a63f159..d2539b793f9d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -684,7 +684,9 @@ repeat: delay_msecs = top->delay_secs * MSEC_PER_SEC; set_term_quiet_input(&save); /* trash return*/ - getc(stdin); + clearerr(stdin); + if (poll(&stdin_poll, 1, 0) > 0) + getc(stdin); while (!done) { perf_top__print_sym_table(top); From f649bd9dd5d5004543bbc3c50b829577b49f5d75 Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 5 Mar 2020 10:37:13 +0200 Subject: [PATCH 055/372] perf bench futex-wake: Restore thread count default to online CPU count Since commit 3b2323c2c1c4 ("perf bench futex: Use cpumaps") the default number of threads the benchmark uses got changed from number of online CPUs to zero: $ perf bench futex wake # Running 'futex/wake' benchmark: Run summary [PID 15930]: blocking on 0 threads (at [private] futex 0x558b8ee4bfac), waking up 1 at a time. [Run 1]: Wokeup 0 of 0 threads in 0.0000 ms [...] [Run 10]: Wokeup 0 of 0 threads in 0.0000 ms Wokeup 0 of 0 threads in 0.0004 ms (+-40.82%) Restore the old behavior by grabbing the number of online CPUs via cpu->nr: $ perf bench futex wake # Running 'futex/wake' benchmark: Run summary [PID 18356]: blocking on 8 threads (at [private] futex 0xb3e62c), waking up 1 at a time. [Run 1]: Wokeup 8 of 8 threads in 0.0260 ms [...] [Run 10]: Wokeup 8 of 8 threads in 0.0270 ms Wokeup 8 of 8 threads in 0.0419 ms (+-24.35%) Fixes: 3b2323c2c1c4 ("perf bench futex: Use cpumaps") Signed-off-by: Tommi Rantala Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Darren Hart Cc: Davidlohr Bueso Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/20200305083714.9381-3-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-wake.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index df810096abfe..58906e9499bb 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -43,7 +43,7 @@ static bool done = false, silent = false, fshared = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats waketime_stats, wakeup_stats; -static unsigned int ncpus, threads_starting, nthreads = 0; +static unsigned int threads_starting, nthreads = 0; static int futex_flag = 0; static const struct option options[] = { @@ -141,7 +141,7 @@ int bench_futex_wake(int argc, const char **argv) sigaction(SIGINT, &act, NULL); if (!nthreads) - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) From 7b919a53102d81cd2e310b4941ac51c465d249ca Mon Sep 17 00:00:00 2001 From: Tommi Rantala Date: Thu, 5 Mar 2020 10:37:14 +0200 Subject: [PATCH 056/372] perf bench: Clear struct sigaction before sigaction() syscall Avoid garbage in sigaction structs used in sigaction() syscalls. Valgrind is complaining about it. Signed-off-by: Tommi Rantala Cc: Alexander Shishkin Cc: Changbin Du Cc: Darren Hart Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lore.kernel.org/lkml/20200305083714.9381-4-tommi.t.rantala@nokia.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/epoll-ctl.c | 1 + tools/perf/bench/epoll-wait.c | 1 + tools/perf/bench/futex-hash.c | 1 + tools/perf/bench/futex-lock-pi.c | 1 + tools/perf/bench/futex-requeue.c | 1 + tools/perf/bench/futex-wake-parallel.c | 1 + tools/perf/bench/futex-wake.c | 1 + 7 files changed, 7 insertions(+) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index a7526c05df38..cadc18d42aa4 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -312,6 +312,7 @@ int bench_epoll_ctl(int argc, const char **argv) exit(EXIT_FAILURE); } + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index d1c5cb526b9f..f938c585d512 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -426,6 +426,7 @@ int bench_epoll_wait(int argc, const char **argv) exit(EXIT_FAILURE); } + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 21776862e940..65eebe06c04d 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -137,6 +137,7 @@ int bench_futex_hash(int argc, const char **argv) if (!cpu) goto errmem; + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 30d97121dc4f..89fd8f325f38 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -160,6 +160,7 @@ int bench_futex_lock_pi(int argc, const char **argv) if (!cpu) err(EXIT_FAILURE, "calloc"); + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index a00a6891447a..7a15c2e61022 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -128,6 +128,7 @@ int bench_futex_requeue(int argc, const char **argv) if (!cpu) err(EXIT_FAILURE, "cpu_map__new"); + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index a053cf2b7039..cd2b81a845ac 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -234,6 +234,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) exit(EXIT_FAILURE); } + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 58906e9499bb..2dfcef3e371e 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -136,6 +136,7 @@ int bench_futex_wake(int argc, const char **argv) if (!cpu) err(EXIT_FAILURE, "calloc"); + memset(&act, 0, sizeof(act)); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); From 3f5777fbaf04c58d940526a22a2e0c813c837936 Mon Sep 17 00:00:00 2001 From: John Garry Date: Thu, 5 Mar 2020 19:08:01 +0800 Subject: [PATCH 057/372] perf jevents: Fix leak of mapfile memory The memory for global pointer is never freed during normal program execution, so let's do that in the main function exit as a good programming practice. A stray blank line is also removed. Reported-by: Jiri Olsa Signed-off-by: John Garry Cc: Alexander Shishkin Cc: Andi Kleen Cc: James Clark Cc: Joakim Zhang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Will Deacon Cc: linuxarm@huawei.com Link: http://lore.kernel.org/lkml/1583406486-154841-2-git-send-email-john.garry@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 079c77b6a2fd..27b4da80f751 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1082,10 +1082,9 @@ static int process_one_file(const char *fpath, const struct stat *sb, */ int main(int argc, char *argv[]) { - int rc; + int rc, ret = 0; int maxfds; char ldirname[PATH_MAX]; - const char *arch; const char *output_file; const char *start_dirname; @@ -1156,7 +1155,8 @@ int main(int argc, char *argv[]) /* Make build fail */ fclose(eventsfp); free_arch_std_events(); - return 1; + ret = 1; + goto out_free_mapfile; } else if (rc) { goto empty_map; } @@ -1174,14 +1174,17 @@ int main(int argc, char *argv[]) /* Make build fail */ fclose(eventsfp); free_arch_std_events(); - return 1; + ret = 1; } - return 0; + + goto out_free_mapfile; empty_map: fclose(eventsfp); create_empty_mapping(output_file); free_arch_std_events(); - return 0; +out_free_mapfile: + free(mapfile); + return ret; } From 441b62acd9c809e87bab45ad1d82b1b3b77cb4f0 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 5 Mar 2020 23:11:08 -0800 Subject: [PATCH 058/372] tools: Fix off-by 1 relative directory includes This is currently working due to extra include paths in the build. Committer testing: $ cd tools/include/uapi/asm/ Before this patch: $ ls -la ../../arch/x86/include/uapi/asm/errno.h ls: cannot access '../../arch/x86/include/uapi/asm/errno.h': No such file or directory $ After this patch; $ ls -la ../../../arch/x86/include/uapi/asm/errno.h -rw-rw-r--. 1 acme acme 31 Feb 20 12:42 ../../../arch/x86/include/uapi/asm/errno.h $ Check that that is still under tools/, i.e. hasn't escaped into the main kernel sources: $ cd ../../../arch/x86/include/uapi/asm/ $ pwd /home/acme/git/perf/tools/arch/x86/include/uapi/asm $ Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexios Zavras Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: Igor Lubashev Cc: Kan Liang Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Nick Desaulniers Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Wei Li Link: http://lore.kernel.org/lkml/20200306071110.130202-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm/errno.h | 14 +++++------ tools/perf/arch/arm64/util/arm-spe.c | 20 ++++++++-------- tools/perf/arch/arm64/util/perf_regs.c | 2 +- tools/perf/arch/powerpc/util/perf_regs.c | 4 ++-- tools/perf/arch/x86/util/auxtrace.c | 14 +++++------ tools/perf/arch/x86/util/event.c | 12 +++++----- tools/perf/arch/x86/util/header.c | 4 ++-- tools/perf/arch/x86/util/intel-bts.c | 24 +++++++++---------- tools/perf/arch/x86/util/intel-pt.c | 30 ++++++++++++------------ tools/perf/arch/x86/util/machine.c | 6 ++--- tools/perf/arch/x86/util/perf_regs.c | 8 +++---- tools/perf/arch/x86/util/pmu.c | 6 ++--- 12 files changed, 72 insertions(+), 72 deletions(-) diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h index ce3c5945a1c4..637189ec1ab9 100644 --- a/tools/include/uapi/asm/errno.h +++ b/tools/include/uapi/asm/errno.h @@ -1,18 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #if defined(__i386__) || defined(__x86_64__) -#include "../../arch/x86/include/uapi/asm/errno.h" +#include "../../../arch/x86/include/uapi/asm/errno.h" #elif defined(__powerpc__) -#include "../../arch/powerpc/include/uapi/asm/errno.h" +#include "../../../arch/powerpc/include/uapi/asm/errno.h" #elif defined(__sparc__) -#include "../../arch/sparc/include/uapi/asm/errno.h" +#include "../../../arch/sparc/include/uapi/asm/errno.h" #elif defined(__alpha__) -#include "../../arch/alpha/include/uapi/asm/errno.h" +#include "../../../arch/alpha/include/uapi/asm/errno.h" #elif defined(__mips__) -#include "../../arch/mips/include/uapi/asm/errno.h" +#include "../../../arch/mips/include/uapi/asm/errno.h" #elif defined(__ia64__) -#include "../../arch/ia64/include/uapi/asm/errno.h" +#include "../../../arch/ia64/include/uapi/asm/errno.h" #elif defined(__xtensa__) -#include "../../arch/xtensa/include/uapi/asm/errno.h" +#include "../../../arch/xtensa/include/uapi/asm/errno.h" #else #include #endif diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 8d6821d9c3f6..27653be24447 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -11,17 +11,17 @@ #include #include -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evsel.h" -#include "../../util/evlist.h" -#include "../../util/session.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/session.h" #include // page_size -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/auxtrace.h" -#include "../../util/record.h" -#include "../../util/arm-spe.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/arm-spe.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c index 2864e2e3776d..2833e101a7c6 100644 --- a/tools/perf/arch/arm64/util/perf_regs.c +++ b/tools/perf/arch/arm64/util/perf_regs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../util/perf_regs.h" +#include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG_END diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c index e9c436eeffc9..0a5242900248 100644 --- a/tools/perf/arch/powerpc/util/perf_regs.c +++ b/tools/perf/arch/powerpc/util/perf_regs.c @@ -4,8 +4,8 @@ #include #include -#include "../../util/perf_regs.h" -#include "../../util/debug.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" #include diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 7abc9fd4cbec..3da506e13f49 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -7,13 +7,13 @@ #include #include -#include "../../util/header.h" -#include "../../util/debug.h" -#include "../../util/pmu.h" -#include "../../util/auxtrace.h" -#include "../../util/intel-pt.h" -#include "../../util/intel-bts.h" -#include "../../util/evlist.h" +#include "../../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/pmu.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/evlist.h" static struct auxtrace_record *auxtrace_record__init_intel(struct evlist *evlist, diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index ac45015cc6ba..047dc00eafa6 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -3,12 +3,12 @@ #include #include -#include "../../util/event.h" -#include "../../util/synthetic-events.h" -#include "../../util/machine.h" -#include "../../util/tool.h" -#include "../../util/map.h" -#include "../../util/debug.h" +#include "../../../util/event.h" +#include "../../../util/synthetic-events.h" +#include "../../../util/machine.h" +#include "../../../util/tool.h" +#include "../../../util/map.h" +#include "../../../util/debug.h" #if defined(__x86_64__) diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index aa6deb463bf3..578c8c568ffd 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -7,8 +7,8 @@ #include #include -#include "../../util/debug.h" -#include "../../util/header.h" +#include "../../../util/debug.h" +#include "../../../util/header.h" static inline void cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c index 26cee1052179..09f93800bffd 100644 --- a/tools/perf/arch/x86/util/intel-bts.c +++ b/tools/perf/arch/x86/util/intel-bts.c @@ -11,18 +11,18 @@ #include #include -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evsel.h" -#include "../../util/evlist.h" -#include "../../util/mmap.h" -#include "../../util/session.h" -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/record.h" -#include "../../util/tsc.h" -#include "../../util/auxtrace.h" -#include "../../util/intel-bts.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/record.h" +#include "../../../util/tsc.h" +#include "../../../util/auxtrace.h" +#include "../../../util/intel-bts.h" #include // page_size #define KiB(x) ((x) * 1024) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 7eea4fd7ce58..1643aed8c4c8 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -13,23 +13,23 @@ #include #include -#include "../../util/session.h" -#include "../../util/event.h" -#include "../../util/evlist.h" -#include "../../util/evsel.h" -#include "../../util/evsel_config.h" -#include "../../util/cpumap.h" -#include "../../util/mmap.h" +#include "../../../util/session.h" +#include "../../../util/event.h" +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" +#include "../../../util/evsel_config.h" +#include "../../../util/cpumap.h" +#include "../../../util/mmap.h" #include -#include "../../util/parse-events.h" -#include "../../util/pmu.h" -#include "../../util/debug.h" -#include "../../util/auxtrace.h" -#include "../../util/record.h" -#include "../../util/target.h" -#include "../../util/tsc.h" +#include "../../../util/parse-events.h" +#include "../../../util/pmu.h" +#include "../../../util/debug.h" +#include "../../../util/auxtrace.h" +#include "../../../util/record.h" +#include "../../../util/target.h" +#include "../../../util/tsc.h" #include // page_size -#include "../../util/intel-pt.h" +#include "../../../util/intel-pt.h" #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c index e17e080e76f4..31679c35d493 100644 --- a/tools/perf/arch/x86/util/machine.c +++ b/tools/perf/arch/x86/util/machine.c @@ -5,9 +5,9 @@ #include #include // page_size -#include "../../util/machine.h" -#include "../../util/map.h" -#include "../../util/symbol.h" +#include "../../../util/machine.h" +#include "../../../util/map.h" +#include "../../../util/symbol.h" #include #include diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c index c218b83e063b..fca81b39b09f 100644 --- a/tools/perf/arch/x86/util/perf_regs.c +++ b/tools/perf/arch/x86/util/perf_regs.c @@ -5,10 +5,10 @@ #include #include -#include "../../perf-sys.h" -#include "../../util/perf_regs.h" -#include "../../util/debug.h" -#include "../../util/event.h" +#include "../../../perf-sys.h" +#include "../../../util/perf_regs.h" +#include "../../../util/debug.h" +#include "../../../util/event.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG(AX, PERF_REG_X86_AX), diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index e33ef5bc31c5..d48d608517fd 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -4,9 +4,9 @@ #include #include -#include "../../util/intel-pt.h" -#include "../../util/intel-bts.h" -#include "../../util/pmu.h" +#include "../../../util/intel-pt.h" +#include "../../../util/intel-bts.h" +#include "../../../util/pmu.h" struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { From 80f1f85036355e5581ec0b99913410345ad3491b Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Thu, 5 Mar 2020 15:44:12 -0800 Subject: [PATCH 059/372] bpf, x32: Fix bug with JMP32 JSET BPF_X checking upper bits The current x32 BPF JIT is incorrect for JMP32 JSET BPF_X when the upper 32 bits of operand registers are non-zero in certain situations. The problem is in the following code: case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: ... /* and dreg_lo,sreg_lo */ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); /* and dreg_hi,sreg_hi */ EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); /* or dreg_lo,dreg_hi */ EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); This code checks the upper bits of the operand registers regardless if the BPF instruction is BPF_JMP32 or BPF_JMP64. Registers dreg_hi and dreg_lo are not loaded from the stack for BPF_JMP32, however, they can still be polluted with values from previous instructions. The following BPF program demonstrates the bug. The jset64 instruction loads the temporary registers and performs the jump, since ((u64)r7 & (u64)r8) is non-zero. The jset32 should _not_ be taken, as the lower 32 bits are all zero, however, the current JIT will take the branch due the pollution of temporary registers from the earlier jset64. mov64 r0, 0 ld64 r7, 0x8000000000000000 ld64 r8, 0x8000000000000000 jset64 r7, r8, 1 exit jset32 r7, r8, 1 mov64 r0, 2 exit The expected return value of this program is 2; under the buggy x32 JIT it returns 0. The fix is to skip using the upper 32 bits for jset32 and compare the upper 32 bits for jset64 only. All tests in test_bpf.ko and selftests/bpf/test_verifier continue to pass with this change. We found this bug using our automated verification tool, Serval. Fixes: 69f827eb6e14 ("x32: bpf: implement jitting of JMP32") Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200305234416.31597-1-luke.r.nels@gmail.com --- arch/x86/net/bpf_jit_comp32.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 393d251798c0..4d2a7a764602 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -2039,10 +2039,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } /* and dreg_lo,sreg_lo */ EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo)); - /* and dreg_hi,sreg_hi */ - EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); - /* or dreg_lo,dreg_hi */ - EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); + if (is_jmp64) { + /* and dreg_hi,sreg_hi */ + EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi)); + /* or dreg_lo,dreg_hi */ + EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi)); + } goto emit_cond_jmp; } case BPF_JMP | BPF_JSET | BPF_K: From 93e5fbb18cec70b3b5c614f67b65388829113bdd Mon Sep 17 00:00:00 2001 From: Luke Nelson Date: Thu, 5 Mar 2020 15:44:13 -0800 Subject: [PATCH 060/372] selftests: bpf: Add test for JMP32 JSET BPF_X with upper bits set The existing tests attempt to check that JMP32 JSET ignores the upper bits in the operand registers. However, the tests missed one such bug in the x32 JIT that is only uncovered when a previous instruction pollutes the upper 32 bits of the registers. This patch adds a new test case that catches the bug by first executing a 64-bit JSET to pollute the upper 32-bits of the temporary registers, followed by a 32-bit JSET which should ignore the upper 32 bits. Co-developed-by: Xi Wang Signed-off-by: Xi Wang Signed-off-by: Luke Nelson Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200305234416.31597-2-luke.r.nels@gmail.com --- tools/testing/selftests/bpf/verifier/jmp32.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index bf0322eb5346..bd5cae4a7f73 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -61,6 +61,21 @@ }, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "jset32: ignores upper bits", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_IMM64(BPF_REG_7, 0x8000000000000000), + BPF_LD_IMM64(BPF_REG_8, 0x8000000000000000), + BPF_JMP_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_EXIT_INSN(), + BPF_JMP32_REG(BPF_JSET, BPF_REG_7, BPF_REG_8, 1), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 2, +}, { "jset32: min/max deduction", .insns = { From 089e5016d7eb063712063670e6da7c1a4de1a5c1 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 6 Mar 2020 15:16:21 +0200 Subject: [PATCH 061/372] iwlwifi: mvm: take the required lock when clearing time event data When receiving a session protection end notification, the time event data is cleared without holding the required lock. Fix it. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200306151128.a49846a634e4.Id1ada7c5a964f5e25f4d0eacc2c4b050015b46a2@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index c0b420fe5e48..1babc4bb5194 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -785,7 +785,9 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm, if (!le32_to_cpu(notif->status)) { iwl_mvm_te_check_disconnect(mvm, vif, "Session protection failure"); + spin_lock_bh(&mvm->time_event_lock); iwl_mvm_te_clear_data(mvm, te_data); + spin_unlock_bh(&mvm->time_event_lock); } if (le32_to_cpu(notif->start)) { @@ -801,7 +803,9 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm, */ iwl_mvm_te_check_disconnect(mvm, vif, "No beacon heard and the session protection is over already..."); + spin_lock_bh(&mvm->time_event_lock); iwl_mvm_te_clear_data(mvm, te_data); + spin_unlock_bh(&mvm->time_event_lock); } goto out_unlock; From cb377dfda1755b3bc01436755d866c8e5336a762 Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Fri, 6 Mar 2020 15:16:22 +0200 Subject: [PATCH 062/372] iwlwifi: consider HE capability when setting LDPC The AP may set the LDPC capability only in HE (IEEE80211_HE_PHY_CAP1), but we were checking it only in the HT capabilities. If we don't use this capability when required, the DSP gets the wrong configuration in HE and doesn't work properly. Signed-off-by: Mordechay Goodstein Fixes: befebbb30af0 ("iwlwifi: rs: consider LDPC capability in case of HE") Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200306151128.492d167c1a25.I1ad1353dbbf6c99ae57814be750f41a1c9f7f4ac@changeid --- drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c index e2cf9e015ef8..80ef238a8488 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c @@ -147,7 +147,11 @@ static u16 rs_fw_get_config_flags(struct iwl_mvm *mvm, (vht_ena && (vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC)))) flags |= IWL_TLC_MNG_CFG_FLAGS_LDPC_MSK; - /* consider our LDPC support in case of HE */ + /* consider LDPC support in case of HE */ + if (he_cap->has_he && (he_cap->he_cap_elem.phy_cap_info[1] & + IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD)) + flags |= IWL_TLC_MNG_CFG_FLAGS_LDPC_MSK; + if (sband->iftype_data && sband->iftype_data->he_cap.has_he && !(sband->iftype_data->he_cap.he_cap_elem.phy_cap_info[1] & IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD)) From 71bc0334a6371c7de0e0594439de7d237e4d2d03 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 6 Mar 2020 15:16:23 +0200 Subject: [PATCH 063/372] iwlwifi: check allocated pointer when allocating conf_tlvs We were erroneously checking the length of the tlv instead of checking the pointer returned by kmemdup() when allocating dbg_conf_tlv[]. This was probably a typo. Fix it by checking the returned pointer instead of the length. Reported-by: Markus Elfring Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200306151128.06e00e6e980f.I9a890ce83493b79892a5f690d12016525317fa7e@changeid --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 2d1cb4647c3b..0481796f75bc 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1467,7 +1467,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) kmemdup(pieces->dbg_conf_tlv[i], pieces->dbg_conf_tlv_len[i], GFP_KERNEL); - if (!pieces->dbg_conf_tlv_len[i]) + if (!pieces->dbg_conf_tlv[i]) goto out_free_fw; } } From a5688e600e78f9fc68102bf0fe5c797fc2826abe Mon Sep 17 00:00:00 2001 From: Mordechay Goodstein Date: Fri, 6 Mar 2020 15:16:24 +0200 Subject: [PATCH 064/372] iwlwifi: yoyo: don't add TLV offset when reading FIFOs The TLV offset is only used to read registers, while the offset used for the FIFO addresses are hard coded in the driver and not given by the TLV. If we try to apply the TLV offset when reading the FIFOs, we'll read from invalid addresses, causing the driver to hang. Signed-off-by: Mordechay Goodstein Fixes: 8d7dea25ada7 ("iwlwifi: dbg_ini: implement Rx fifos dump") Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200306151129.fbab869c26fa.I4ddac20d02f9bce41855a816aa6855c89bc3874e@changeid --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 91df1ee25dd0..e60eb5cc847f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -8,7 +8,7 @@ * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -31,7 +31,7 @@ * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -1409,11 +1409,7 @@ static int iwl_dump_ini_rxf_iter(struct iwl_fw_runtime *fwrt, goto out; } - /* - * region register have absolute value so apply rxf offset after - * reading the registers - */ - offs += rxf_data.offset; + offs = rxf_data.offset; /* Lock fence */ iwl_write_prph_no_grab(fwrt->trans, RXF_SET_FENCE_MODE + offs, 0x1); From 699b760bd29edba736590fffef7654cb079c753e Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 6 Mar 2020 15:16:25 +0200 Subject: [PATCH 065/372] iwlwifi: dbg: don't abort if sending DBGC_SUSPEND_RESUME fails If the firmware is in a bad state or not initialized fully, sending the DBGC_SUSPEND_RESUME command fails but we can still collect logs. Instead of aborting the entire dump process, simply ignore the error. By removing the last callpoint that was checking the return value, we can also convert the function to return void. Signed-off-by: Luca Coelho Fixes: 576058330f2d ("iwlwifi: dbg: support debug recording suspend resume command") Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200306151129.dcec37b2efd4.I8dcd190431d110a6a0e88095ce93591ccfb3d78d@changeid --- drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 15 +++++---------- drivers/net/wireless/intel/iwlwifi/fw/dbg.h | 6 +++--- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index e60eb5cc847f..8796ab8f2a5f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -2490,10 +2490,7 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx) goto out; } - if (iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, true)) { - IWL_ERR(fwrt, "Failed to stop DBGC recording, aborting dump\n"); - goto out; - } + iwl_fw_dbg_stop_restart_recording(fwrt, ¶ms, true); IWL_DEBUG_FW_INFO(fwrt, "WRT: Data collection start\n"); if (iwl_trans_dbg_ini_valid(fwrt->trans)) @@ -2658,14 +2655,14 @@ static int iwl_fw_dbg_restart_recording(struct iwl_trans *trans, return 0; } -int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt, - struct iwl_fw_dbg_params *params, - bool stop) +void iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt, + struct iwl_fw_dbg_params *params, + bool stop) { int ret = 0; if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status)) - return 0; + return; if (fw_has_capa(&fwrt->fw->ucode_capa, IWL_UCODE_TLV_CAPA_DBG_SUSPEND_RESUME_CMD_SUPP)) @@ -2682,7 +2679,5 @@ int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt, iwl_fw_set_dbg_rec_on(fwrt); } #endif - - return ret; } IWL_EXPORT_SYMBOL(iwl_fw_dbg_stop_restart_recording); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h index 179f2905d56b..9d3513213f5f 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h @@ -239,9 +239,9 @@ _iwl_fw_dbg_trigger_simple_stop(struct iwl_fw_runtime *fwrt, _iwl_fw_dbg_trigger_simple_stop((fwrt), (wdev), \ iwl_fw_dbg_get_trigger((fwrt)->fw,\ (trig))) -int iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt, - struct iwl_fw_dbg_params *params, - bool stop); +void iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt, + struct iwl_fw_dbg_params *params, + bool stop); #ifdef CONFIG_IWLWIFI_DEBUGFS static inline void iwl_fw_set_dbg_rec_on(struct iwl_fw_runtime *fwrt) From ce19801ba75a902ab515dda03b57738c708d0781 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Fri, 6 Mar 2020 15:16:26 +0200 Subject: [PATCH 066/372] iwlwifi: mvm: Fix rate scale NSS configuration The TLC configuration did not take into consideration the station's SMPS configuration, and thus configured rates for 2 NSS even if static SMPS was reported by the station. Fix this. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200306151129.b4f940d13eca.Ieebfa889d08205a3a961ae0138fb5832e8a0f9c1@changeid --- .../net/wireless/intel/iwlwifi/mvm/rs-fw.c | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c index 80ef238a8488..ca99a9c4f70e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -27,7 +27,7 @@ * BSD LICENSE * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -195,11 +195,13 @@ rs_fw_vht_set_enabled_rates(const struct ieee80211_sta *sta, { u16 supp; int i, highest_mcs; + u8 nss = sta->rx_nss; - for (i = 0; i < sta->rx_nss; i++) { - if (i == IWL_TLC_NSS_MAX) - break; + /* the station support only a single receive chain */ + if (sta->smps_mode == IEEE80211_SMPS_STATIC) + nss = 1; + for (i = 0; i < nss && i < IWL_TLC_NSS_MAX; i++) { highest_mcs = rs_fw_vht_highest_rx_mcs_index(vht_cap, i + 1); if (!highest_mcs) continue; @@ -245,8 +247,13 @@ rs_fw_he_set_enabled_rates(const struct ieee80211_sta *sta, u16 tx_mcs_160 = le16_to_cpu(sband->iftype_data->he_cap.he_mcs_nss_supp.tx_mcs_160); int i; + u8 nss = sta->rx_nss; - for (i = 0; i < sta->rx_nss && i < IWL_TLC_NSS_MAX; i++) { + /* the station support only a single receive chain */ + if (sta->smps_mode == IEEE80211_SMPS_STATIC) + nss = 1; + + for (i = 0; i < nss && i < IWL_TLC_NSS_MAX; i++) { u16 _mcs_160 = (mcs_160 >> (2 * i)) & 0x3; u16 _mcs_80 = (mcs_80 >> (2 * i)) & 0x3; u16 _tx_mcs_160 = (tx_mcs_160 >> (2 * i)) & 0x3; @@ -307,8 +314,14 @@ static void rs_fw_set_supp_rates(struct ieee80211_sta *sta, cmd->mode = IWL_TLC_MNG_MODE_HT; cmd->ht_rates[IWL_TLC_NSS_1][IWL_TLC_HT_BW_NONE_160] = cpu_to_le16(ht_cap->mcs.rx_mask[0]); - cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] = - cpu_to_le16(ht_cap->mcs.rx_mask[1]); + + /* the station support only a single receive chain */ + if (sta->smps_mode == IEEE80211_SMPS_STATIC) + cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] = + 0; + else + cmd->ht_rates[IWL_TLC_NSS_2][IWL_TLC_HT_BW_NONE_160] = + cpu_to_le16(ht_cap->mcs.rx_mask[1]); } } From 9352ed0165ff4313ab340c979446c3d64c531f7a Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 6 Mar 2020 15:16:27 +0200 Subject: [PATCH 067/372] iwlwifi: cfg: use antenna diversity with all AX101 devices We were erroneously only setting the tx_with_siso_diversity flag in the Qu B-step configurations for AX101 devices, though we should do it on all configurations. Add the flag to the other two configurations, namely Qu C-step and QuZ. Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/iwlwifi.20200306151129.1cd986ef467c.Idc0b111475ae3d38b68ae062613c080b574e33e1@changeid --- drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index a22a830019c0..355af47c5f73 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -283,6 +283,7 @@ const struct iwl_cfg iwl_ax101_cfg_qu_c0_hr_b0 = { * HT size; mac80211 would otherwise pick the HE max (256) by default. */ .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, + .tx_with_siso_diversity = true, .num_rbds = IWL_NUM_RBDS_22000_HE, }; @@ -309,6 +310,7 @@ const struct iwl_cfg iwl_ax101_cfg_quz_hr = { * HT size; mac80211 would otherwise pick the HE max (256) by default. */ .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, + .tx_with_siso_diversity = true, .num_rbds = IWL_NUM_RBDS_22000_HE, }; From cf7da891b624983ffadd6b97ef63a0def3e985ad Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 3 Mar 2020 16:50:33 +0100 Subject: [PATCH 068/372] docs: dmaengine: provider.rst: get rid of some warnings Get rid of those warnings by adding extra blank lines: Documentation/driver-api/dmaengine/provider.rst:270: WARNING: Unexpected indentation. Documentation/driver-api/dmaengine/provider.rst:273: WARNING: Block quote ends without a blank line; unexpected unindent. Documentation/driver-api/dmaengine/provider.rst:288: WARNING: Unexpected indentation. Documentation/driver-api/dmaengine/provider.rst:290: WARNING: Block quote ends without a blank line; unexpected unindent. While here, use parenthesis after get_metadata_ptr/set_metadata_len, as, if some day someone adds a kerneldoc markup for those, it should automatically generate a cross-reference to them. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/62cf2a87b379a92c9c0e5a40c2ae8a138b01fe0a.1583250595.git.mchehab+huawei@kernel.org Signed-off-by: Vinod Koul --- Documentation/driver-api/dmaengine/provider.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst index 790a15089f1f..56e5833e8a07 100644 --- a/Documentation/driver-api/dmaengine/provider.rst +++ b/Documentation/driver-api/dmaengine/provider.rst @@ -266,11 +266,15 @@ to use. attached (via the dmaengine_desc_attach_metadata() helper to the descriptor. From the DMA driver the following is expected for this mode: + - DMA_MEM_TO_DEV / DEV_MEM_TO_MEM + The data from the provided metadata buffer should be prepared for the DMA controller to be sent alongside of the payload data. Either by copying to a hardware descriptor, or highly coupled packet. + - DMA_DEV_TO_MEM + On transfer completion the DMA driver must copy the metadata to the client provided metadata buffer before notifying the client about the completion. After the transfer completion, DMA drivers must not touch the metadata @@ -284,10 +288,14 @@ to use. and dmaengine_desc_set_metadata_len() is provided as helper functions. From the DMA driver the following is expected for this mode: - - get_metadata_ptr + + - get_metadata_ptr() + Should return a pointer for the metadata buffer, the maximum size of the metadata buffer and the currently used / valid (if any) bytes in the buffer. - - set_metadata_len + + - set_metadata_len() + It is called by the clients after it have placed the metadata to the buffer to let the DMA driver know the number of valid bytes provided. From be40920fbf1003c38ccdc02b571e01a75d890c82 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 7 Mar 2020 03:32:58 +0900 Subject: [PATCH 069/372] tools: Let O= makes handle a relative path with -C option When I tried to compile tools/perf from the top directory with the -C option, the O= option didn't work correctly if I passed a relative path: $ make O=BUILD -C tools/perf/ make: Entering directory '/home/mhiramat/ksrc/linux/tools/perf' BUILD: Doing 'make -j8' parallel build ../scripts/Makefile.include:4: *** O=/home/mhiramat/ksrc/linux/tools/perf/BUILD does not exist. Stop. make: *** [Makefile:70: all] Error 2 make: Leaving directory '/home/mhiramat/ksrc/linux/tools/perf' The O= directory existence check failed because the check script ran in the build target directory instead of the directory where I ran the make command. To fix that, once change directory to $(PWD) and check O= directory, since the PWD is set to where the make command runs. Fixes: c883122acc0d ("perf tools: Let O= makes handle relative paths") Reported-by: Randy Dunlap Signed-off-by: Masami Hiramatsu Cc: Andrew Morton Cc: Borislav Petkov Cc: Geert Uytterhoeven Cc: Jiri Olsa Cc: Masahiro Yamada Cc: Michal Marek Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/158351957799.3363.15269768530697526765.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 2 +- tools/scripts/Makefile.include | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 7902a5681fc8..b8fc7d972be9 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -35,7 +35,7 @@ endif # Only pass canonical directory names as the output directory: # ifneq ($(O),) - FULL_O := $(shell readlink -f $(O) || echo $(O)) + FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O)) endif # diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index ded7a950dc40..6d2f3a1b2249 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 ifneq ($(O),) ifeq ($(origin O), command line) - dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) - ABSOLUTE_O := $(shell cd $(O) ; pwd) + dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) + ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) From 286d3250c9d6437340203fb64938bea344729a0e Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 8 Mar 2020 09:08:54 +0100 Subject: [PATCH 070/372] efi: Fix a race and a buffer overflow while reading efivars via sysfs There is a race and a buffer overflow corrupting a kernel memory while reading an EFI variable with a size more than 1024 bytes via the older sysfs method. This happens because accessing struct efi_variable in efivar_{attr,size,data}_read() and friends is not protected from a concurrent access leading to a kernel memory corruption and, at best, to a crash. The race scenario is the following: CPU0: CPU1: efivar_attr_read() var->DataSize = 1024; efivar_entry_get(... &var->DataSize) down_interruptible(&efivars_lock) efivar_attr_read() // same EFI var var->DataSize = 1024; efivar_entry_get(... &var->DataSize) down_interruptible(&efivars_lock) virt_efi_get_variable() // returns EFI_BUFFER_TOO_SMALL but // var->DataSize is set to a real // var size more than 1024 bytes up(&efivars_lock) virt_efi_get_variable() // called with var->DataSize set // to a real var size, returns // successfully and overwrites // a 1024-bytes kernel buffer up(&efivars_lock) This can be reproduced by concurrent reading of an EFI variable which size is more than 1024 bytes: ts# for cpu in $(seq 0 $(nproc --ignore=1)); do ( taskset -c $cpu \ cat /sys/firmware/efi/vars/KEKDefault*/size & ) ; done Fix this by using a local variable for a var's data buffer size so it does not get overwritten. Fixes: e14ab23dde12b80d ("efivars: efivar_entry API") Reported-by: Bob Sanders and the LTP testsuite Signed-off-by: Vladis Dronov Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Link: https://lore.kernel.org/r/20200305084041.24053-2-vdronov@redhat.com Link: https://lore.kernel.org/r/20200308080859.21568-24-ardb@kernel.org --- drivers/firmware/efi/efivars.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 7576450c8254..69f13bc4b931 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -83,13 +83,16 @@ static ssize_t efivar_attr_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; if (var->Attributes & EFI_VARIABLE_NON_VOLATILE) @@ -116,13 +119,16 @@ static ssize_t efivar_size_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; str += sprintf(str, "0x%lx\n", var->DataSize); @@ -133,12 +139,15 @@ static ssize_t efivar_data_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; memcpy(buf, var->Data, var->DataSize); @@ -250,14 +259,16 @@ efivar_show_raw(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; struct compat_efi_variable *compat; + unsigned long datasize = sizeof(var->Data); size_t size; + int ret; if (!entry || !buf) return 0; - var->DataSize = 1024; - if (efivar_entry_get(entry, &entry->var.Attributes, - &entry->var.DataSize, entry->var.Data)) + ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data); + var->DataSize = datasize; + if (ret) return -EIO; if (in_compat_syscall()) { From d6c066fda90d578aacdf19771a027ed484a79825 Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Sun, 8 Mar 2020 09:08:55 +0100 Subject: [PATCH 071/372] efi: Add a sanity check to efivar_store_raw() Add a sanity check to efivar_store_raw() the same way efivar_{attr,size,data}_read() and efivar_show_raw() have it. Signed-off-by: Vladis Dronov Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: Link: https://lore.kernel.org/r/20200305084041.24053-3-vdronov@redhat.com Link: https://lore.kernel.org/r/20200308080859.21568-25-ardb@kernel.org --- drivers/firmware/efi/efivars.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 69f13bc4b931..aff3dfb4d7ba 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -208,6 +208,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) u8 *data; int err; + if (!entry || !buf) + return -EINVAL; + if (in_compat_syscall()) { struct compat_efi_variable *compat; From cb2116ff97859d34fda6cb561ac654415f4c6230 Mon Sep 17 00:00:00 2001 From: Alexandru Tachici Date: Wed, 19 Feb 2020 16:31:12 +0200 Subject: [PATCH 072/372] iio: accel: adxl372: Set iio_chan BE Data stored in the iio-buffer is BE and this should be specified in the iio_chan_spec struct. Fixes: f4f55ce38e5f8 ("iio:adxl372: Add FIFO and interrupts support") Signed-off-by: Alexandru Tachici Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/adxl372.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c index 67b8817995c0..60daf04ce188 100644 --- a/drivers/iio/accel/adxl372.c +++ b/drivers/iio/accel/adxl372.c @@ -237,6 +237,7 @@ static const struct adxl372_axis_lookup adxl372_axis_lookup_table[] = { .realbits = 12, \ .storagebits = 16, \ .shift = 4, \ + .endianness = IIO_BE, \ }, \ } From b42aa97ed5f1169cfd37175ef388ea62ff2dcf43 Mon Sep 17 00:00:00 2001 From: Tomas Novotny Date: Tue, 18 Feb 2020 16:44:50 +0100 Subject: [PATCH 073/372] iio: light: vcnl4000: update sampling periods for vcnl4200 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vishay has published a new version of "Designing the VCNL4200 Into an Application" application note in October 2019. The new version specifies that there is +-20% of part to part tolerance. This explains the drift seen during experiments. The proximity pulse width is also changed from 32us to 30us. According to the support, the tolerance also applies to ambient light. So update the sampling periods. As the reading is blocking, current users may notice slightly longer response time. Fixes: be38866fbb97 ("iio: vcnl4000: add support for VCNL4200") Reviewed-by: Guido Günther Signed-off-by: Tomas Novotny Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4000.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index b0e241aaefb4..98428bf430bd 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -167,10 +167,10 @@ static int vcnl4200_init(struct vcnl4000_data *data) data->vcnl4200_ps.reg = VCNL4200_PS_DATA; switch (id) { case VCNL4200_PROD_ID: - /* Integration time is 50ms, but the experiments */ - /* show 54ms in total. */ - data->vcnl4200_al.sampling_rate = ktime_set(0, 54000 * 1000); - data->vcnl4200_ps.sampling_rate = ktime_set(0, 4200 * 1000); + /* Default wait time is 50ms, add 20% tolerance. */ + data->vcnl4200_al.sampling_rate = ktime_set(0, 60000 * 1000); + /* Default wait time is 4.8ms, add 20% tolerance. */ + data->vcnl4200_ps.sampling_rate = ktime_set(0, 5760 * 1000); data->al_scale = 24000; break; case VCNL4040_PROD_ID: From 2ca5a8792d617b4035aacd0a8be527f667fbf912 Mon Sep 17 00:00:00 2001 From: Tomas Novotny Date: Tue, 18 Feb 2020 16:44:51 +0100 Subject: [PATCH 074/372] iio: light: vcnl4000: update sampling periods for vcnl4040 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vishay has published a new version of "Designing the VCNL4200 Into an Application" application note in October 2019. The new version specifies that there is +-20% of part to part tolerance. Although the application note is related to vcnl4200, according to support the vcnl4040's "ASIC is quite similar to that one for the VCNL4200". So update the sampling periods (and comment), including the correct sampling period for proximity. Both sampling periods are lower. Users relying on the blocking behaviour of reading will get proximity measurements much earlier. Fixes: 5a441aade5b3 ("iio: light: vcnl4000 add support for the VCNL4040 proximity and light sensor") Reviewed-by: Guido Günther Tested-by: Guido Günther Signed-off-by: Tomas Novotny Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/vcnl4000.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c index 98428bf430bd..e5b00a6611ac 100644 --- a/drivers/iio/light/vcnl4000.c +++ b/drivers/iio/light/vcnl4000.c @@ -174,9 +174,10 @@ static int vcnl4200_init(struct vcnl4000_data *data) data->al_scale = 24000; break; case VCNL4040_PROD_ID: - /* Integration time is 80ms, add 10ms. */ - data->vcnl4200_al.sampling_rate = ktime_set(0, 100000 * 1000); - data->vcnl4200_ps.sampling_rate = ktime_set(0, 100000 * 1000); + /* Default wait time is 80ms, add 20% tolerance. */ + data->vcnl4200_al.sampling_rate = ktime_set(0, 96000 * 1000); + /* Default wait time is 5ms, add 20% tolerance. */ + data->vcnl4200_ps.sampling_rate = ktime_set(0, 6000 * 1000); data->al_scale = 120000; break; } From e43d110cdc206b6df4dd438cd10c81d1da910aad Mon Sep 17 00:00:00 2001 From: Wen-chien Jesse Sung Date: Mon, 24 Feb 2020 17:54:26 +0800 Subject: [PATCH 075/372] iio: st_sensors: remap SMO8840 to LIS2DH12 According to ST, the HID is for LIS2DH12. Fixes: 3d56e19815b3 ("iio: accel: st_accel: Add support for the SMO8840 ACPI id") Signed-off-by: Wen-chien Jesse Sung Tested-by: Hans de Goede Reviewed-by: Hans de Goede Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/accel/st_accel_i2c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c index 633955d764cc..849cf74153c4 100644 --- a/drivers/iio/accel/st_accel_i2c.c +++ b/drivers/iio/accel/st_accel_i2c.c @@ -110,7 +110,7 @@ MODULE_DEVICE_TABLE(of, st_accel_of_match); #ifdef CONFIG_ACPI static const struct acpi_device_id st_accel_acpi_match[] = { - {"SMO8840", (kernel_ulong_t)LNG2DM_ACCEL_DEV_NAME}, + {"SMO8840", (kernel_ulong_t)LIS2DH12_ACCEL_DEV_NAME}, {"SMO8A90", (kernel_ulong_t)LNG2DM_ACCEL_DEV_NAME}, { }, }; From 016a8845f6da65b2203f102f192046fbb624e250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Thu, 27 Feb 2020 17:27:34 +0100 Subject: [PATCH 076/372] iio: chemical: sps30: fix missing triggered buffer dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SPS30 uses triggered buffer, but the dependency is not specified in the Kconfig file. Fix this by selecting IIO_BUFFER and IIO_TRIGGERED_BUFFER config symbols. Cc: stable@vger.kernel.org Fixes: 232e0f6ddeae ("iio: chemical: add support for Sensirion SPS30 sensor") Signed-off-by: Petr Štetiar Signed-off-by: Jonathan Cameron --- drivers/iio/chemical/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iio/chemical/Kconfig b/drivers/iio/chemical/Kconfig index 0b91de4df8f4..a7e65a59bf42 100644 --- a/drivers/iio/chemical/Kconfig +++ b/drivers/iio/chemical/Kconfig @@ -91,6 +91,8 @@ config SPS30 tristate "SPS30 particulate matter sensor" depends on I2C select CRC8 + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER help Say Y here to build support for the Sensirion SPS30 particulate matter sensor. From 10856d88f7653b42196ca5b2775bbc1f15122a58 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 4 Mar 2020 19:34:23 +0800 Subject: [PATCH 077/372] iio: ping: set pa_laser_ping_cfg in of_ping_match pa_laser_ping_cfg should be set in of_ping_match instead of pa_ping_cfg. Fixes: 7bb501f49ddb ("iio: ping: add parallax ping sensors") Signed-off-by: YueHaibing Acked-by: Andreas Klinger Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/ping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iio/proximity/ping.c b/drivers/iio/proximity/ping.c index 34aff108dff5..12b893c5b0ee 100644 --- a/drivers/iio/proximity/ping.c +++ b/drivers/iio/proximity/ping.c @@ -269,7 +269,7 @@ static const struct iio_chan_spec ping_chan_spec[] = { static const struct of_device_id of_ping_match[] = { { .compatible = "parallax,ping", .data = &pa_ping_cfg}, - { .compatible = "parallax,laserping", .data = &pa_ping_cfg}, + { .compatible = "parallax,laserping", .data = &pa_laser_ping_cfg}, {}, }; From 805b13adde3964c78cba125a15527e88c19f87b3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 8 Mar 2020 20:07:28 -0600 Subject: [PATCH 078/372] io_uring: ensure RCU callback ordering with rcu_barrier() After more careful studying, Paul informs me that we cannot rely on ordering of RCU callbacks in the way that the the tagged commit did. The current construct looks like this: void C(struct rcu_head *rhp) { do_something(rhp); call_rcu(&p->rh, B); } call_rcu(&p->rh, A); call_rcu(&p->rh, C); and we're relying on ordering between A and B, which isn't guaranteed. Make this explicit instead, and have a work item issue the rcu_barrier() to ensure that A has run before we manually execute B. While thorough testing never showed this issue, it's dependent on the per-cpu load in terms of RCU callbacks. The updated method simplifies the code as well, and eliminates the need to maintain an rcu_head in the fileset data. Fixes: c1e2148f8ecb ("io_uring: free fixed_file_data after RCU grace period") Reported-by: Paul E. McKenney Signed-off-by: Jens Axboe --- fs/io_uring.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c06082bb039a..1b2517291b78 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -191,7 +191,6 @@ struct fixed_file_data { struct llist_head put_llist; struct work_struct ref_work; struct completion done; - struct rcu_head rcu; }; struct io_ring_ctx { @@ -5331,26 +5330,23 @@ static void io_file_ref_kill(struct percpu_ref *ref) complete(&data->done); } -static void __io_file_ref_exit_and_free(struct rcu_head *rcu) +static void io_file_ref_exit_and_free(struct work_struct *work) { - struct fixed_file_data *data = container_of(rcu, struct fixed_file_data, - rcu); + struct fixed_file_data *data; + + data = container_of(work, struct fixed_file_data, ref_work); + + /* + * Ensure any percpu-ref atomic switch callback has run, it could have + * been in progress when the files were being unregistered. Once + * that's done, we can safely exit and free the ref and containing + * data structure. + */ + rcu_barrier(); percpu_ref_exit(&data->refs); kfree(data); } -static void io_file_ref_exit_and_free(struct rcu_head *rcu) -{ - /* - * We need to order our exit+free call against the potentially - * existing call_rcu() for switching to atomic. One way to do that - * is to have this rcu callback queue the final put and free, as we - * could otherwise have a pre-existing atomic switch complete _after_ - * the free callback we queued. - */ - call_rcu(rcu, __io_file_ref_exit_and_free); -} - static int io_sqe_files_unregister(struct io_ring_ctx *ctx) { struct fixed_file_data *data = ctx->file_data; @@ -5369,7 +5365,8 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx) for (i = 0; i < nr_tables; i++) kfree(data->table[i].files); kfree(data->table); - call_rcu(&data->rcu, io_file_ref_exit_and_free); + INIT_WORK(&data->ref_work, io_file_ref_exit_and_free); + queue_work(system_wq, &data->ref_work); ctx->file_data = NULL; ctx->nr_user_files = 0; return 0; From f2ecf903ef06eb1bbbfa969db9889643d487e73a Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Mar 2020 09:21:48 +0100 Subject: [PATCH 079/372] ALSA: pcm: oss: Avoid plugin buffer overflow Each OSS PCM plugins allocate its internal buffer per pre-calculation of the max buffer size through the chain of plugins (calling src_frames and dst_frames callbacks). This works for most plugins, but the rate plugin might behave incorrectly. The calculation in the rate plugin involves with the fractional position, i.e. it may vary depending on the input position. Since the buffer size pre-calculation is always done with the offset zero, it may return a shorter size than it might be; this may result in the out-of-bound access as spotted by fuzzer. This patch addresses those possible buffer overflow accesses by simply setting the upper limit per the given buffer size for each plugin before src_frames() and after dst_frames() calls. Reported-by: syzbot+e1fe9f44fb8ecf4fb5dd@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/000000000000b25ea005a02bcf21@google.com Link: https://lore.kernel.org/r/20200309082148.19855-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_plugin.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 240e4702c098..c9401832967c 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -209,6 +209,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p if (stream == SNDRV_PCM_STREAM_PLAYBACK) { plugin = snd_pcm_plug_last(plug); while (plugin && drv_frames > 0) { + if (drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) drv_frames = plugin->src_frames(plugin, drv_frames); @@ -220,6 +222,8 @@ snd_pcm_sframes_t snd_pcm_plug_client_size(struct snd_pcm_substream *plug, snd_p plugin_next = plugin->next; if (plugin->dst_frames) drv_frames = plugin->dst_frames(plugin, drv_frames); + if (drv_frames > plugin->buf_frames) + drv_frames = plugin->buf_frames; plugin = plugin_next; } } else @@ -248,11 +252,15 @@ snd_pcm_sframes_t snd_pcm_plug_slave_size(struct snd_pcm_substream *plug, snd_pc if (frames < 0) return frames; } + if (frames > plugin->buf_frames) + frames = plugin->buf_frames; plugin = plugin_next; } } else if (stream == SNDRV_PCM_STREAM_CAPTURE) { plugin = snd_pcm_plug_last(plug); while (plugin) { + if (frames > plugin->buf_frames) + frames = plugin->buf_frames; plugin_prev = plugin->prev; if (plugin->src_frames) { frames = plugin->src_frames(plugin, frames); From d683469b3c93d7e2afd39e6e1970f24700eb7a68 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 9 Mar 2020 10:59:22 +0100 Subject: [PATCH 080/372] ALSA: line6: Fix endless MIDI read loop The MIDI input event parser of the LINE6 driver may enter into an endless loop when the unexpected data sequence is given, as it tries to continue the secondary bytes without termination. Also, when the input data is too short, the parser returns a negative error, while the caller doesn't handle it properly. This would lead to the unexpected behavior as well. This patch addresses those issues by checking the return value correctly and handling the one-byte event in the parser properly. The bug was reported by syzkaller. Reported-by: syzbot+cce32521ee0a824c21f7@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/000000000000033087059f8f8fa3@google.com Link: https://lore.kernel.org/r/20200309095922.30269-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/line6/driver.c | 2 +- sound/usb/line6/midibuf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index b5a3f754a4f1..4f096685ed65 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -305,7 +305,7 @@ static void line6_data_received(struct urb *urb) line6_midibuf_read(mb, line6->buffer_message, LINE6_MIDI_MESSAGE_MAXLEN); - if (done == 0) + if (done <= 0) break; line6->message_length = done; diff --git a/sound/usb/line6/midibuf.c b/sound/usb/line6/midibuf.c index 8d6eefa0d936..6a70463f82c4 100644 --- a/sound/usb/line6/midibuf.c +++ b/sound/usb/line6/midibuf.c @@ -159,7 +159,7 @@ int line6_midibuf_read(struct midi_buffer *this, unsigned char *data, int midi_length_prev = midibuf_message_length(this->command_prev); - if (midi_length_prev > 0) { + if (midi_length_prev > 1) { midi_length = midi_length_prev - 1; repeat = 1; } else From db2c549407d4a76563c579e4768f7d6d32afefba Mon Sep 17 00:00:00 2001 From: disconnect3d Date: Mon, 9 Mar 2020 11:48:53 +0100 Subject: [PATCH 081/372] perf map: Fix off by one in strncpy() size argument This patch fixes an off-by-one error in strncpy size argument in tools/perf/util/map.c. The issue is that in: strncmp(filename, "/system/lib/", 11) the passed string literal: "/system/lib/" has 12 bytes (without the NULL byte) and the passed size argument is 11. As a result, the logic won't match the ending "/" byte and will pass filepaths that are stored in other directories e.g. "/system/libmalicious/bin" or just "/system/libmalicious". This functionality seems to be present only on Android. I assume the /system/ directory is only writable by the root user, so I don't think this bug has much (or any) security impact. Fixes: eca818369996 ("perf tools: Add automatic remapping of Android libraries") Signed-off-by: disconnect3d Cc: Alexander Shishkin Cc: Changbin Du Cc: Jiri Olsa Cc: John Keeping Cc: Mark Rutland Cc: Michael Lentine Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20200309104855.3775-1-dominik.b.czarnota@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 95428511300d..b342f744b1fc 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) return true; } - if (!strncmp(filename, "/system/lib/", 11)) { + if (!strncmp(filename, "/system/lib/", 12)) { char *ndk, *app; const char *arch; size_t ndk_length; From a7ffd416d80497f03d1f62c0b330cff76a86d5ad Mon Sep 17 00:00:00 2001 From: Ilie Halip Date: Mon, 9 Mar 2020 10:56:17 +0200 Subject: [PATCH 082/372] perf python: Fix clang detection when using CC=clang-version Currently, the setup.py script detects the clang compiler only when invoked with CC=clang. But when using a specific version (e.g. CC=clang-11), this doesn't work correctly and wrong compiler flags are set, leading to build errors. To properly detect clang, invoke the compiler with -v and check the output. The first line should start with "clang version ...". Committer testing: $ make CC=clang-9 O=/tmp/build/perf -C tools/perf install-bin $ readelf -wi /tmp/build/perf/python/perf.cpython-37m-x86_64-linux-gnu.so | grep DW_AT_producer | head -1 DW_AT_producer : (indirect string, offset: 0x0): clang version 9.0.1 (Fedora 9.0.1-2.fc31) /usr/bin/clang-9 -Wno-unused-result -Wsign-compare -D DYNAMIC_ANNOTATIONS_ENABLED=1 -D NDEBUG -O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-command-line -m64 -mtune=generic -fasynchronous-unwind-tables -fcf-protection=full -D _GNU_SOURCE -fPIC -fwrapv -Wbad-function-cast -Wdeclaration-after-statement -Wformat-security -Wformat-y2k -Winit-self -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-system-headers -Wold-style-definition -Wpacked -Wredundant-decls -Wstrict-prototypes -Wswitch-default -Wswitch-enum -Wundef -Wwrite-strings -Wformat -Wshadow -D HAVE_ARCH_X86_64_SUPPORT -I /tmp/build/perf/arch/x86/include/generated -D HAVE_SYSCALL_TABLE_SUPPORT -D HAVE_PERF_REGS_SUPPORT -D HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -Werror -O3 -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 -fstack-protector-all -D _FORTIFY_SOURCE=2 -D _LARGEFILE64_SOURCE -D _FILE_OFFSET_BITS=64 -D _GNU_SOURCE -I /home/acme/git/perf/tools/lib/perf/include -I /home/acme/git/perf/tools/perf/util/include -I /home/acme/git/perf/tools/perf/arch/x86/include -I /home/acme/git/perf/tools/include/ -I /home/acme/git/perf/tools/arch/x86/include/uapi -I /home/acme/git/perf/tools/include/uapi -I /home/acme/git/perf/tools/arch/x86/include/ -I /home/acme/git/perf/tools/arch/x86/ -I /tmp/build/perf//util -I /tmp/build/perf/ -I /home/acme/git/perf/tools/perf/util -I /home/acme/git/perf/tools/perf -I /home/acme/git/perf/tools/lib/ -D HAVE_PTHREAD_ATTR_SETAFFINITY_NP -D HAVE_PTHREAD_BARRIER -D HAVE_EVENTFD -D HAVE_GET_CURRENT_DIR_NAME -D HAVE_GETTID -D HAVE_DWARF_GETLOCATIONS_SUPPORT -D HAVE_GLIBC_SUPPORT -D HAVE_AIO_SUPPORT -D HAVE_SCHED_GETCPU_SUPPORT -D HAVE_SETNS_SUPPORT -D HAVE_LIBELF_SUPPORT -D HAVE_LIBELF_MMAP_SUPPORT -D HAVE_ELF_GETPHDRNUM_SUPPORT -D HAVE_GELF_GETNOTE_SUPPORT -D HAVE_ELF_GETSHDRSTRNDX_SUPPORT -D HAVE_DWARF_SUPPORT -D HAVE_LIBBPF_SUPPORT -D HAVE_BPF_PROLOGUE -D HAVE_SDT_EVENT -D HAVE_JITDUMP -D HAVE_DWARF_UNWIND_SUPPORT -D NO_LIBUNWIND_DEBUG_FRAME -D HAVE_LIBUNWIND_SUPPORT -D HAVE_LIBCRYPTO_SUPPORT -D HAVE_SLANG_SUPPORT -D HAVE_GTK2_SUPPORT -D NO_LIBPERL -D HAVE_TIMERFD_SUPPORT -D HAVE_LIBPYTHON_SUPPORT -D HAVE_CPLUS_DEMANGLE_SUPPORT -D HAVE_LIBBFD_SUPPORT -D HAVE_ZLIB_SUPPORT -D HAVE_LZMA_SUPPORT -D HAVE_ZSTD_SUPPORT -D HAVE_LIBCAP_SUPPORT -D HAVE_BACKTRACE_SUPPORT -D HAVE_LIBNUMA_SUPPORT -D HAVE_KVM_STAT_SUPPORT -D DISASM_FOUR_ARGS_SIGNATURE -D HAVE_LIBBABELTRACE_SUPPORT -D HAVE_AUXTRACE_SUPPORT -D HAVE_JVMTI_CMLR -I /tmp/build/perf/ -fPIC -I util/include -I /usr/include/python3.7m -c /home/acme/git/perf/tools/perf/util/python.c -o /tmp/build/perf/python_ext_build/tmp/home/acme/git/perf/tools/perf/util/python.o -Wbad-function-cast -Wdeclaration-after-statement -Wformat-security -Wformat-y2k -Winit-self -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-system-headers -Wold-style-definition -Wpacked -Wredundant-decls -Wstrict-prototypes -Wswitch-default -Wswitch-enum -Wundef -Wwrite-strings -Wformat -Wshadow -D HAVE_ARCH_X86_64_SUPPORT -I /tmp/build/perf/arch/x86/include/generated -D HAVE_SYSCALL_TABLE_SUPPORT -D HAVE_PERF_REGS_SUPPORT -D HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -Werror -O3 -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 -fstack-protector-all -D _FORTIFY_SOURCE=2 -D _LARGEFILE64_SOURCE -D _FILE_OFFSET_BITS=64 -D _GNU_SOURCE -I /home/acme/git/perf/tools/lib/perf/include -I /home/acme/git/perf/tools/perf/util/include -I /home/acme/git/perf/tools/perf/arch/x86/include -I /home/acme/git/perf/tools/include/ -I /home/acme/git/perf/tools/arch/x86/include/uapi -I /home/acme/git/perf/tools/include/uapi -I /home/acme/git/perf/tools/arch/x86/include/ -I /home/acme/git/perf/tools/arch/x86/ -I /tmp/build/perf//util -I /tmp/build/perf/ -I /home/acme/git/perf/tools/perf/util -I /home/acme/git/perf/tools/perf -I /home/acme/git/perf/tools/lib/ -D HAVE_PTHREAD_ATTR_SETAFFINITY_NP -D HAVE_PTHREAD_BARRIER -D HAVE_EVENTFD -D HAVE_GET_CURRENT_DIR_NAME -D HAVE_GETTID -D HAVE_DWARF_GETLOCATIONS_SUPPORT -D HAVE_GLIBC_SUPPORT -D HAVE_AIO_SUPPORT -D HAVE_SCHED_GETCPU_SUPPORT -D HAVE_SETNS_SUPPORT -D HAVE_LIBELF_SUPPORT -D HAVE_LIBELF_MMAP_SUPPORT -D HAVE_ELF_GETPHDRNUM_SUPPORT -D HAVE_GELF_GETNOTE_SUPPORT -D HAVE_ELF_GETSHDRSTRNDX_SUPPORT -D HAVE_DWARF_SUPPORT -D HAVE_LIBBPF_SUPPORT -D HAVE_BPF_PROLOGUE -D HAVE_SDT_EVENT -D HAVE_JITDUMP -D HAVE_DWARF_UNWIND_SUPPORT -D NO_LIBUNWIND_DEBUG_FRAME -D HAVE_LIBUNWIND_SUPPORT -D HAVE_LIBCRYPTO_SUPPORT -D HAVE_SLANG_SUPPORT -D HAVE_GTK2_SUPPORT -D NO_LIBPERL -D HAVE_TIMERFD_SUPPORT -D HAVE_LIBPYTHON_SUPPORT -D HAVE_CPLUS_DEMANGLE_SUPPORT -D HAVE_LIBBFD_SUPPORT -D HAVE_ZLIB_SUPPORT -D HAVE_LZMA_SUPPORT -D HAVE_ZSTD_SUPPORT -D HAVE_LIBCAP_SUPPORT -D HAVE_BACKTRACE_SUPPORT -D HAVE_LIBNUMA_SUPPORT -D HAVE_KVM_STAT_SUPPORT -D DISASM_FOUR_ARGS_SIGNATURE -D HAVE_LIBBABELTRACE_SUPPORT -D HAVE_AUXTRACE_SUPPORT -D HAVE_JVMTI_CMLR -I /tmp/build/perf/ -fno-strict-aliasing -Wno-write-strings -Wno-unused-parameter -Wno-redundant-decls $ And here is how tools/perf/util/setup.py checks if the used clang has options that the distro specific python extension building compiler defaults: if cc_is_clang: from distutils.sysconfig import get_config_vars vars = get_config_vars() for var in ('CFLAGS', 'OPT'): vars[var] = sub("-specs=[^ ]+", "", vars[var]) if not clang_has_option("-mcet"): vars[var] = sub("-mcet", "", vars[var]) if not clang_has_option("-fcf-protection"): vars[var] = sub("-fcf-protection", "", vars[var]) if not clang_has_option("-fstack-clash-protection"): vars[var] = sub("-fstack-clash-protection", "", vars[var]) if not clang_has_option("-fstack-protector-strong"): vars[var] = sub("-fstack-protector-strong", "", vars[var]) So "-fcf-protection=full" is used, clang-9 has this option and thus it was kept, the perf python extension was built with it and the build completed successfully. Link: https://github.com/ClangBuiltLinux/linux/issues/903 Signed-off-by: Ilie Halip Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Igor Lubashev Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200309085618.14307-1-ilie.halip@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/setup.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index aa344a163eaf..8a065a6f9713 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -2,11 +2,13 @@ from os import getenv from subprocess import Popen, PIPE from re import sub -def clang_has_option(option): - return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] - cc = getenv("CC") -if cc == "clang": +cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline() + +def clang_has_option(option): + return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ] + +if cc_is_clang: from distutils.sysconfig import get_config_vars vars = get_config_vars() for var in ('CFLAGS', 'OPT'): @@ -40,7 +42,7 @@ class install_lib(_install_lib): cflags = getenv('CFLAGS', '').split() # switch off several checks (need to be at the end of cflags list) cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ] -if cc != "clang": +if not cc_is_clang: cflags += ['-Wno-cast-function-type' ] src_perf = getenv('srctree') + '/tools/perf' From 05e54e2386733dfdb62b6784b3d6e1b0bd9bb559 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 6 Mar 2020 23:31:21 -0800 Subject: [PATCH 083/372] perf parse-events: Fix reading of invalid memory in event parsing ADD_CONFIG_TERM accesses term->weak, however, in get_config_chgs this value is accessed outside of the list_for_each_entry and references invalid memory. Add an argument for ADD_CONFIG_TERM for weak and set it to false in the get_config_chgs case. This bug was cause by clang's address sanitizer and libfuzzer. It can be reproduced with a command line of: perf stat -a -e i/bs,tsc,L2/o Signed-off-by: Ian Rogers Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: clang-built-linux@googlegroups.com Link: http://lore.kernel.org/lkml/20200307073121.203816-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 46 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a14995835d85..a7dc0b096974 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1213,7 +1213,7 @@ static int config_attr(struct perf_event_attr *attr, static int get_config_terms(struct list_head *head_config, struct list_head *head_terms __maybe_unused) { -#define ADD_CONFIG_TERM(__type) \ +#define ADD_CONFIG_TERM(__type, __weak) \ struct perf_evsel_config_term *__t; \ \ __t = zalloc(sizeof(*__t)); \ @@ -1222,18 +1222,18 @@ static int get_config_terms(struct list_head *head_config, \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ - __t->weak = term->weak; \ + __t->weak = __weak; \ list_add_tail(&__t->list, head_terms) -#define ADD_CONFIG_TERM_VAL(__type, __name, __val) \ +#define ADD_CONFIG_TERM_VAL(__type, __name, __val, __weak) \ do { \ - ADD_CONFIG_TERM(__type); \ + ADD_CONFIG_TERM(__type, __weak); \ __t->val.__name = __val; \ } while (0) -#define ADD_CONFIG_TERM_STR(__type, __val) \ +#define ADD_CONFIG_TERM_STR(__type, __val, __weak) \ do { \ - ADD_CONFIG_TERM(__type); \ + ADD_CONFIG_TERM(__type, __weak); \ __t->val.str = strdup(__val); \ if (!__t->val.str) { \ zfree(&__t); \ @@ -1247,62 +1247,62 @@ do { \ list_for_each_entry(term, head_config, list) { switch (term->type_term) { case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: - ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num); + ADD_CONFIG_TERM_VAL(PERIOD, period, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: - ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num); + ADD_CONFIG_TERM_VAL(FREQ, freq, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_TIME: - ADD_CONFIG_TERM_VAL(TIME, time, term->val.num); + ADD_CONFIG_TERM_VAL(TIME, time, term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: - ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str); + ADD_CONFIG_TERM_STR(CALLGRAPH, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: - ADD_CONFIG_TERM_STR(BRANCH, term->val.str); + ADD_CONFIG_TERM_STR(BRANCH, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_STACKSIZE: ADD_CONFIG_TERM_VAL(STACK_USER, stack_user, - term->val.num); + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_INHERIT: ADD_CONFIG_TERM_VAL(INHERIT, inherit, - term->val.num ? 1 : 0); + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_NOINHERIT: ADD_CONFIG_TERM_VAL(INHERIT, inherit, - term->val.num ? 0 : 1); + term->val.num ? 0 : 1, term->weak); break; case PARSE_EVENTS__TERM_TYPE_MAX_STACK: ADD_CONFIG_TERM_VAL(MAX_STACK, max_stack, - term->val.num); + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: ADD_CONFIG_TERM_VAL(MAX_EVENTS, max_events, - term->val.num); + term->val.num, term->weak); break; case PARSE_EVENTS__TERM_TYPE_OVERWRITE: ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, - term->val.num ? 1 : 0); + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE: ADD_CONFIG_TERM_VAL(OVERWRITE, overwrite, - term->val.num ? 0 : 1); + term->val.num ? 0 : 1, term->weak); break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: - ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str); + ADD_CONFIG_TERM_STR(DRV_CFG, term->val.str, term->weak); break; case PARSE_EVENTS__TERM_TYPE_PERCORE: ADD_CONFIG_TERM_VAL(PERCORE, percore, - term->val.num ? true : false); + term->val.num ? true : false, term->weak); break; case PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT: ADD_CONFIG_TERM_VAL(AUX_OUTPUT, aux_output, - term->val.num ? 1 : 0); + term->val.num ? 1 : 0, term->weak); break; case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size, - term->val.num); + term->val.num, term->weak); break; default: break; @@ -1339,7 +1339,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config, } if (bits) - ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits); + ADD_CONFIG_TERM_VAL(CFG_CHG, cfg_chg, bits, false); #undef ADD_CONFIG_TERM return 0; From 6b8d68f1ce9266b05a55e93c62923ff51daae4c1 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Feb 2020 16:57:42 +0900 Subject: [PATCH 084/372] perf probe: Fix to delete multiple probe event When we put an event with multiple probes, perf-probe fails to delete with filters. This comes from a failure to list up the event name because of overwrapping its name. To fix this issue, skip to list up the event which has same name. Without this patch: # perf probe -l \* probe_perf:map__map_ip (on perf_sample__fprintf_brstackoff:21@ probe_perf:map__map_ip (on perf_sample__fprintf_brstackoff:25@ probe_perf:map__map_ip (on append_inlines:12@util/machine.c in probe_perf:map__map_ip (on unwind_entry:19@util/machine.c in / probe_perf:map__map_ip (on map__map_ip@util/map.h in /home/mhi probe_perf:map__map_ip (on map__map_ip@util/map.h in /home/mhi # perf probe -d \* "*" does not hit any event. Error: Failed to delete events. Reason: No such file or directory (Code: -2) With it: # perf probe -d \* Removed event: probe_perf:map__map_ip # Fixes: 72363540c009 ("perf probe: Support multiprobe event") Reported-by: Arnaldo Carvalho de Melo Reported-by: He Zhe Signed-off-by: Masami Hiramatsu Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/158287666197.16697.7514373548551863562.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 0f5fda11675f..8c852948513e 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -206,6 +206,9 @@ static struct strlist *__probe_file__get_namelist(int fd, bool include_group) } else ret = strlist__add(sl, tev.event); clear_probe_trace_event(&tev); + /* Skip if there is same name multi-probe event in the list */ + if (ret == -EEXIST) + ret = 0; if (ret < 0) break; } From 1efde2754275dbd9d11c6e0132a4f09facf297ab Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Feb 2020 00:42:01 +0900 Subject: [PATCH 085/372] perf probe: Do not depend on dwfl_module_addrsym() Do not depend on dwfl_module_addrsym() because it can fail on user-space shared libraries. Actually, same bug was fixed by commit 664fee3dc379 ("perf probe: Do not use dwfl_module_addrsym if dwarf_diename finds symbol name"), but commit 07d369857808 ("perf probe: Fix wrong address verification) reverted to get actual symbol address from symtab. This fixes it again by getting symbol address from DIE, and only if the DIE has only address range, it uses dwfl_module_addrsym(). Fixes: 07d369857808 ("perf probe: Fix wrong address verification) Reported-by: Alexandre Ghiti Signed-off-by: Masami Hiramatsu Tested-by: Alexandre Ghiti Cc: Alexander Shishkin Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sasha Levin Link: http://lore.kernel.org/lkml/158281812176.476.14164573830975116234.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 1c817add6ca4..e4cff49384f4 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -637,14 +637,19 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, return -EINVAL; } - /* Try to get actual symbol name from symtab */ - symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + if (dwarf_entrypc(sp_die, &eaddr) == 0) { + /* If the DIE has entrypc, use it. */ + symbol = dwarf_diename(sp_die); + } else { + /* Try to get actual symbol name and address from symtab */ + symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); + eaddr = sym.st_value; + } if (!symbol) { pr_warning("Failed to find symbol at 0x%lx\n", (unsigned long)paddr); return -ENOENT; } - eaddr = sym.st_value; tp->offset = (unsigned long)(paddr - eaddr); tp->address = (unsigned long)paddr; From 8d67743653dce5a0e7aa500fcccb237cde7ad88e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 8 Mar 2020 19:07:17 +0100 Subject: [PATCH 086/372] futex: Unbreak futex hashing The recent futex inode life time fix changed the ordering of the futex key union struct members, but forgot to adjust the hash function accordingly, As a result the hashing omits the leading 64bit and even hashes beyond the futex key causing a bad hash distribution which led to a ~100% performance regression. Hand in the futex key pointer instead of a random struct member and make the size calculation based of the struct offset. Fixes: 8019ad13ef7f ("futex: Fix inode life-time issue") Reported-by: Rong Chen Decoded-by: Linus Torvalds Signed-off-by: Thomas Gleixner Tested-by: Rong Chen Link: https://lkml.kernel.org/r/87h7yy90ve.fsf@nanos.tec.linutronix.de --- kernel/futex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index e14f7cd45dbd..82dfacb3250e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb) */ static struct futex_hash_bucket *hash_futex(union futex_key *key) { - u32 hash = jhash2((u32*)&key->both.word, - (sizeof(key->both.word)+sizeof(key->both.ptr))/4, + u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4, key->both.offset); + return &futex_queues[hash & (futex_hashsize - 1)]; } From 62039c30c19dcab96621e074aeeb90da7100def7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Mar 2020 15:27:55 -0700 Subject: [PATCH 087/372] bpf: Initialize storage pointers to NULL to prevent freeing garbage pointer Local storage array isn't initialized, so if cgroup storage allocation fails for BPF_CGROUP_STORAGE_SHARED, error handling code will attempt to free uninitialized pointer for BPF_CGROUP_STORAGE_PERCPU storage type. Avoid this by always initializing storage pointers to NULLs. Fixes: 8bad74f9840f ("bpf: extend cgroup bpf core to allow multiple cgroup storage types") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200309222756.1018737-1-andriin@fb.com --- kernel/bpf/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 9a500fadbef5..b2bc4c335bb1 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -302,8 +302,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *old_prog = NULL; - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], - *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; + struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {}; struct bpf_prog_list *pl, *replace_pl = NULL; enum bpf_cgroup_storage_type stype; int err; From 1d8006abaab4cb90f81add86e8d1bf9411add05a Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 9 Mar 2020 15:40:17 -0700 Subject: [PATCH 088/372] bpf: Fix cgroup ref leak in cgroup_bpf_inherit on out-of-memory There is no compensating cgroup_bpf_put() for each ancestor cgroup in cgroup_bpf_inherit(). If compute_effective_progs returns error, those cgroups won't be freed ever. Fix it by putting them in cleanup code path. Fixes: e10360f815ca ("bpf: cgroup: prevent out-of-order release of cgroup bpf") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20200309224017.1063297-1-andriin@fb.com --- kernel/bpf/cgroup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index b2bc4c335bb1..4f1472409ef8 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -227,6 +227,9 @@ cleanup: for (i = 0; i < NR; i++) bpf_prog_array_free(arrays[i]); + for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p)) + cgroup_bpf_put(p); + percpu_ref_exit(&cgrp->bpf.refcnt); return -ENOMEM; From 7ca6ee38909109751bfab79e9f6c570d2ed258c6 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 26 Feb 2020 16:21:20 +0300 Subject: [PATCH 089/372] watchdog: iTCO_wdt: Export vendorsupport In preparation for making ->smi_res optional the iTCO_wdt driver needs to know whether vendorsupport is being set to non-zero. For this reason export the variable. Signed-off-by: Mika Westerberg Reviewed-by: Guenter Roeck Signed-off-by: Wolfram Sang --- drivers/watchdog/iTCO_vendor.h | 2 ++ drivers/watchdog/iTCO_vendor_support.c | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/watchdog/iTCO_vendor.h b/drivers/watchdog/iTCO_vendor.h index 0f7373ba10d5..69e92e692ae0 100644 --- a/drivers/watchdog/iTCO_vendor.h +++ b/drivers/watchdog/iTCO_vendor.h @@ -1,10 +1,12 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* iTCO Vendor Specific Support hooks */ #ifdef CONFIG_ITCO_VENDOR_SUPPORT +extern int iTCO_vendorsupport; extern void iTCO_vendor_pre_start(struct resource *, unsigned int); extern void iTCO_vendor_pre_stop(struct resource *); extern int iTCO_vendor_check_noreboot_on(void); #else +#define iTCO_vendorsupport 0 #define iTCO_vendor_pre_start(acpibase, heartbeat) {} #define iTCO_vendor_pre_stop(acpibase) {} #define iTCO_vendor_check_noreboot_on() 1 diff --git a/drivers/watchdog/iTCO_vendor_support.c b/drivers/watchdog/iTCO_vendor_support.c index 4f1b96f59349..cf0eaa04b064 100644 --- a/drivers/watchdog/iTCO_vendor_support.c +++ b/drivers/watchdog/iTCO_vendor_support.c @@ -39,8 +39,10 @@ /* Broken BIOS */ #define BROKEN_BIOS 911 -static int vendorsupport; -module_param(vendorsupport, int, 0); +int iTCO_vendorsupport; +EXPORT_SYMBOL(iTCO_vendorsupport); + +module_param_named(vendorsupport, iTCO_vendorsupport, int, 0); MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default=" "0 (none), 1=SuperMicro Pent3, 911=Broken SMI BIOS"); @@ -152,7 +154,7 @@ static void broken_bios_stop(struct resource *smires) void iTCO_vendor_pre_start(struct resource *smires, unsigned int heartbeat) { - switch (vendorsupport) { + switch (iTCO_vendorsupport) { case SUPERMICRO_OLD_BOARD: supermicro_old_pre_start(smires); break; @@ -165,7 +167,7 @@ EXPORT_SYMBOL(iTCO_vendor_pre_start); void iTCO_vendor_pre_stop(struct resource *smires) { - switch (vendorsupport) { + switch (iTCO_vendorsupport) { case SUPERMICRO_OLD_BOARD: supermicro_old_pre_stop(smires); break; @@ -178,7 +180,7 @@ EXPORT_SYMBOL(iTCO_vendor_pre_stop); int iTCO_vendor_check_noreboot_on(void) { - switch (vendorsupport) { + switch (iTCO_vendorsupport) { case SUPERMICRO_OLD_BOARD: return 0; default: @@ -189,13 +191,13 @@ EXPORT_SYMBOL(iTCO_vendor_check_noreboot_on); static int __init iTCO_vendor_init_module(void) { - if (vendorsupport == SUPERMICRO_NEW_BOARD) { + if (iTCO_vendorsupport == SUPERMICRO_NEW_BOARD) { pr_warn("Option vendorsupport=%d is no longer supported, " "please use the w83627hf_wdt driver instead\n", SUPERMICRO_NEW_BOARD); return -EINVAL; } - pr_info("vendor-support=%d\n", vendorsupport); + pr_info("vendor-support=%d\n", iTCO_vendorsupport); return 0; } From e42b0c24389d5a1602e77db4f6def0d5a19e3e43 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 26 Feb 2020 16:21:21 +0300 Subject: [PATCH 090/372] watchdog: iTCO_wdt: Make ICH_RES_IO_SMI optional The iTCO_wdt driver only needs ICH_RES_IO_SMI I/O resource when either turn_SMI_watchdog_clear_off module parameter is set to match ->iTCO_version (or higher), and when legacy iTCO_vendorsupport is set. Modify the driver so that ICH_RES_IO_SMI is optional if the two conditions are not met. Signed-off-by: Mika Westerberg Reviewed-by: Guenter Roeck Signed-off-by: Wolfram Sang --- drivers/watchdog/iTCO_wdt.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 156360e37714..e707c4797f76 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -459,13 +459,25 @@ static int iTCO_wdt_probe(struct platform_device *pdev) if (!p->tco_res) return -ENODEV; - p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_IO_SMI); - if (!p->smi_res) - return -ENODEV; - p->iTCO_version = pdata->version; p->pci_dev = to_pci_dev(dev->parent); + p->smi_res = platform_get_resource(pdev, IORESOURCE_IO, ICH_RES_IO_SMI); + if (p->smi_res) { + /* The TCO logic uses the TCO_EN bit in the SMI_EN register */ + if (!devm_request_region(dev, p->smi_res->start, + resource_size(p->smi_res), + pdev->name)) { + pr_err("I/O address 0x%04llx already in use, device disabled\n", + (u64)SMI_EN(p)); + return -EBUSY; + } + } else if (iTCO_vendorsupport || + turn_SMI_watchdog_clear_off >= p->iTCO_version) { + pr_err("SMI I/O resource is missing\n"); + return -ENODEV; + } + iTCO_wdt_no_reboot_bit_setup(p, pdata); /* @@ -492,14 +504,6 @@ static int iTCO_wdt_probe(struct platform_device *pdev) /* Set the NO_REBOOT bit to prevent later reboots, just for sure */ p->update_no_reboot_bit(p->no_reboot_priv, true); - /* The TCO logic uses the TCO_EN bit in the SMI_EN register */ - if (!devm_request_region(dev, p->smi_res->start, - resource_size(p->smi_res), - pdev->name)) { - pr_err("I/O address 0x%04llx already in use, device disabled\n", - (u64)SMI_EN(p)); - return -EBUSY; - } if (turn_SMI_watchdog_clear_off >= p->iTCO_version) { /* * Bit 13: TCO_EN -> 0 From 04bbb97d1b732b2d197f103c5818f5c214a4cf81 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 26 Feb 2020 16:21:22 +0300 Subject: [PATCH 091/372] i2c: i801: Do not add ICH_RES_IO_SMI for the iTCO_wdt device Martin noticed that nct6775 driver does not load properly on his system in v5.4+ kernels. The issue was bisected to commit b84398d6d7f9 ("i2c: i801: Use iTCO version 6 in Cannon Lake PCH and beyond") but it is likely not the culprit because the faulty code has been in the driver already since commit 9424693035a5 ("i2c: i801: Create iTCO device on newer Intel PCHs"). So more likely some commit that added PCI IDs of recent chipsets made the driver to create the iTCO_wdt device on Martins system. The issue was debugged to be PCI configuration access to the PMC device that is not present. This returns all 1's when read and this caused the iTCO_wdt driver to accidentally request resourses used by nct6775. It turns out that the SMI resource is only required for some ancient systems, not the ones supported by this driver. For this reason do not populate the SMI resource at all and drop all the related code. The driver now always populates the main I/O resource and only in case of SPT (Intel Sunrisepoint) compatible devices it adds another resource for the NO_REBOOT bit. These two resources are of different types so platform_get_resource() used by the iTCO_wdt driver continues to find the both resources at index 0. Link: https://lore.kernel.org/linux-hwmon/CAM1AHpQ4196tyD=HhBu-2donSsuogabkfP03v1YF26Q7_BgvgA@mail.gmail.com/ Fixes: 9424693035a5 ("i2c: i801: Create iTCO device on newer Intel PCHs") [wsa: complete fix needs all of http://patchwork.ozlabs.org/project/linux-i2c/list/?series=160959&state=*] Reported-by: Martin Volf Signed-off-by: Mika Westerberg Reviewed-by: Guenter Roeck Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-i801.c | 45 ++++++++++------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index ca4f096fef74..a9c03f5c3482 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -132,11 +132,6 @@ #define TCOBASE 0x050 #define TCOCTL 0x054 -#define ACPIBASE 0x040 -#define ACPIBASE_SMI_OFF 0x030 -#define ACPICTRL 0x044 -#define ACPICTRL_EN 0x080 - #define SBREG_BAR 0x10 #define SBREG_SMBCTRL 0xc6000c #define SBREG_SMBCTRL_DNV 0xcf000c @@ -1553,7 +1548,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev, pci_bus_write_config_byte(pci_dev->bus, devfn, 0xe1, hidden); spin_unlock(&p2sb_spinlock); - res = &tco_res[ICH_RES_MEM_OFF]; + res = &tco_res[1]; if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS) res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL_DNV; else @@ -1563,7 +1558,7 @@ i801_add_tco_spt(struct i801_priv *priv, struct pci_dev *pci_dev, res->flags = IORESOURCE_MEM; return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1, - tco_res, 3, &spt_tco_platform_data, + tco_res, 2, &spt_tco_platform_data, sizeof(spt_tco_platform_data)); } @@ -1576,17 +1571,16 @@ static struct platform_device * i801_add_tco_cnl(struct i801_priv *priv, struct pci_dev *pci_dev, struct resource *tco_res) { - return platform_device_register_resndata(&pci_dev->dev, "iTCO_wdt", -1, - tco_res, 2, &cnl_tco_platform_data, - sizeof(cnl_tco_platform_data)); + return platform_device_register_resndata(&pci_dev->dev, + "iTCO_wdt", -1, tco_res, 1, &cnl_tco_platform_data, + sizeof(cnl_tco_platform_data)); } static void i801_add_tco(struct i801_priv *priv) { - u32 base_addr, tco_base, tco_ctl, ctrl_val; struct pci_dev *pci_dev = priv->pci_dev; - struct resource tco_res[3], *res; - unsigned int devfn; + struct resource tco_res[2], *res; + u32 tco_base, tco_ctl; /* If we have ACPI based watchdog use that instead */ if (acpi_has_watchdog()) @@ -1601,30 +1595,15 @@ static void i801_add_tco(struct i801_priv *priv) return; memset(tco_res, 0, sizeof(tco_res)); - - res = &tco_res[ICH_RES_IO_TCO]; + /* + * Always populate the main iTCO IO resource here. The second entry + * for NO_REBOOT MMIO is filled by the SPT specific function. + */ + res = &tco_res[0]; res->start = tco_base & ~1; res->end = res->start + 32 - 1; res->flags = IORESOURCE_IO; - /* - * Power Management registers. - */ - devfn = PCI_DEVFN(PCI_SLOT(pci_dev->devfn), 2); - pci_bus_read_config_dword(pci_dev->bus, devfn, ACPIBASE, &base_addr); - - res = &tco_res[ICH_RES_IO_SMI]; - res->start = (base_addr & ~1) + ACPIBASE_SMI_OFF; - res->end = res->start + 3; - res->flags = IORESOURCE_IO; - - /* - * Enable the ACPI I/O space. - */ - pci_bus_read_config_dword(pci_dev->bus, devfn, ACPICTRL, &ctrl_val); - ctrl_val |= ACPICTRL_EN; - pci_bus_write_config_dword(pci_dev->bus, devfn, ACPICTRL, ctrl_val); - if (priv->features & FEATURE_TCO_CNL) priv->tco_pdev = i801_add_tco_cnl(priv, pci_dev, tco_res); else From 2d48ea0efb8887ebba3e3720bb5b738aced4e574 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 5 Mar 2020 15:00:46 -0500 Subject: [PATCH 092/372] iommu/vt-d: Fix RCU-list bugs in intel_iommu_init() There are several places traverse RCU-list without holding any lock in intel_iommu_init(). Fix them by acquiring dmar_global_lock. WARNING: suspicious RCU usage ----------------------------- drivers/iommu/intel-iommu.c:5216 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 no locks held by swapper/0/1. Call Trace: dump_stack+0xa0/0xea lockdep_rcu_suspicious+0x102/0x10b intel_iommu_init+0x947/0xb13 pci_iommu_init+0x26/0x62 do_one_initcall+0xfe/0x500 kernel_init_freeable+0x45a/0x4f8 kernel_init+0x11/0x139 ret_from_fork+0x3a/0x50 DMAR: Intel(R) Virtualization Technology for Directed I/O Fixes: d8190dc63886 ("iommu/vt-d: Enable DMA remapping after rmrr mapped") Signed-off-by: Qian Cai Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 33593fea0250..693380355dea 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5193,6 +5193,7 @@ int __init intel_iommu_init(void) init_iommu_pm_ops(); + down_read(&dmar_global_lock); for_each_active_iommu(iommu, drhd) { iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, @@ -5200,6 +5201,7 @@ int __init intel_iommu_init(void) iommu_device_set_ops(&iommu->iommu, &intel_iommu_ops); iommu_device_register(&iommu->iommu); } + up_read(&dmar_global_lock); bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) @@ -5210,7 +5212,6 @@ int __init intel_iommu_init(void) down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) pr_warn("ACPI name space devices didn't probe correctly\n"); - up_read(&dmar_global_lock); /* Finally, we enable the DMA remapping hardware. */ for_each_iommu(iommu, drhd) { @@ -5219,6 +5220,8 @@ int __init intel_iommu_init(void) iommu_disable_protect_mem_regions(iommu); } + up_read(&dmar_global_lock); + pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); intel_iommu_enabled = 1; From f5152416528c2295f35dd9c9bd4fb27c4032413d Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 5 Mar 2020 15:15:02 -0500 Subject: [PATCH 093/372] iommu/vt-d: Silence RCU-list debugging warnings Similar to the commit 02d715b4a818 ("iommu/vt-d: Fix RCU list debugging warnings"), there are several other places that call list_for_each_entry_rcu() outside of an RCU read side critical section but with dmar_global_lock held. Silence those false positives as well. drivers/iommu/intel-iommu.c:4288 RCU-list traversed in non-reader section!! 1 lock held by swapper/0/1: #0: ffffffff935892c8 (dmar_global_lock){+.+.}, at: intel_iommu_init+0x1ad/0xb97 drivers/iommu/dmar.c:366 RCU-list traversed in non-reader section!! 1 lock held by swapper/0/1: #0: ffffffff935892c8 (dmar_global_lock){+.+.}, at: intel_iommu_init+0x125/0xb97 drivers/iommu/intel-iommu.c:5057 RCU-list traversed in non-reader section!! 1 lock held by swapper/0/1: #0: ffffffffa71892c8 (dmar_global_lock){++++}, at: intel_iommu_init+0x61a/0xb13 Signed-off-by: Qian Cai Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 3 ++- include/linux/dmar.h | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 071bb42bbbc5..7b16c4db40b4 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -363,7 +363,8 @@ dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd) { struct dmar_drhd_unit *dmaru; - list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list) + list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list, + dmar_rcu_check()) if (dmaru->segment == drhd->segment && dmaru->reg_base_addr == drhd->address) return dmaru; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 712be8bc6a7c..d7bf029df737 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -74,11 +74,13 @@ extern struct list_head dmar_drhd_units; dmar_rcu_check()) #define for_each_active_drhd_unit(drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) \ if (drhd->ignored) {} else #define for_each_active_iommu(i, drhd) \ - list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ + dmar_rcu_check()) \ if (i=drhd->iommu, drhd->ignored) {} else #define for_each_iommu(i, drhd) \ From 9be8bc4dd6177cf992b93b0bd014c4f611283896 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 13 Feb 2020 17:15:03 +0200 Subject: [PATCH 094/372] i2c: designware-pci: Fix BUG_ON during device removal Function i2c_dw_pci_remove() -> pci_free_irq_vectors() -> pci_disable_msi() -> free_msi_irqs() will throw a BUG_ON() for MSI enabled device since the driver has not released the requested IRQ before calling the pci_free_irq_vectors(). Here driver requests an IRQ using devm_request_irq() but automatic release happens only after remove callback. Fix this by explicitly freeing the IRQ before calling pci_free_irq_vectors(). Fixes: 21aa3983d619 ("i2c: designware-pci: Switch over to MSI interrupts") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Jarkko Nikula Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-designware-pcidrv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 050adda7c1bd..05b35ac33ce3 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -313,6 +313,7 @@ static void i2c_dw_pci_remove(struct pci_dev *pdev) pm_runtime_get_noresume(&pdev->dev); i2c_del_adapter(&dev->adapter); + devm_free_irq(&pdev->dev, dev->irq, dev); pci_free_irq_vectors(pdev); } From bcf3588d8ed3517e6ffaf083f034812aee9dc8e2 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 3 Mar 2020 13:50:46 +0100 Subject: [PATCH 095/372] macintosh: windfarm: fix MODINFO regression Commit af503716ac14 made sure OF devices get an OF style modalias with I2C events. It assumed all in-tree users were converted, yet it missed some Macintosh drivers. Add an OF module device table for all windfarm drivers to make them automatically load again. Fixes: af503716ac14 ("i2c: core: report OF style module alias for devices registered via OF") Link: https://bugzilla.kernel.org/show_bug.cgi?id=199471 Reported-by: Erhard Furtner Tested-by: Erhard Furtner Acked-by: Michael Ellerman (powerpc) Signed-off-by: Wolfram Sang Cc: stable@kernel.org # v4.17+ --- drivers/macintosh/windfarm_ad7417_sensor.c | 7 +++++++ drivers/macintosh/windfarm_fcu_controls.c | 7 +++++++ drivers/macintosh/windfarm_lm75_sensor.c | 16 +++++++++++++++- drivers/macintosh/windfarm_lm87_sensor.c | 7 +++++++ drivers/macintosh/windfarm_max6690_sensor.c | 7 +++++++ drivers/macintosh/windfarm_smu_sat.c | 7 +++++++ 6 files changed, 50 insertions(+), 1 deletion(-) diff --git a/drivers/macintosh/windfarm_ad7417_sensor.c b/drivers/macintosh/windfarm_ad7417_sensor.c index 125605987b44..e7dec328c7cf 100644 --- a/drivers/macintosh/windfarm_ad7417_sensor.c +++ b/drivers/macintosh/windfarm_ad7417_sensor.c @@ -312,9 +312,16 @@ static const struct i2c_device_id wf_ad7417_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_ad7417_id); +static const struct of_device_id wf_ad7417_of_id[] = { + { .compatible = "ad7417", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_ad7417_of_id); + static struct i2c_driver wf_ad7417_driver = { .driver = { .name = "wf_ad7417", + .of_match_table = wf_ad7417_of_id, }, .probe = wf_ad7417_probe, .remove = wf_ad7417_remove, diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c index 67daeec94b44..2470e5a725c8 100644 --- a/drivers/macintosh/windfarm_fcu_controls.c +++ b/drivers/macintosh/windfarm_fcu_controls.c @@ -580,9 +580,16 @@ static const struct i2c_device_id wf_fcu_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_fcu_id); +static const struct of_device_id wf_fcu_of_id[] = { + { .compatible = "fcu", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_fcu_of_id); + static struct i2c_driver wf_fcu_driver = { .driver = { .name = "wf_fcu", + .of_match_table = wf_fcu_of_id, }, .probe = wf_fcu_probe, .remove = wf_fcu_remove, diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c index 282c28a17ea1..1e5fa09845e7 100644 --- a/drivers/macintosh/windfarm_lm75_sensor.c +++ b/drivers/macintosh/windfarm_lm75_sensor.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -91,9 +92,14 @@ static int wf_lm75_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct wf_lm75_sensor *lm; - int rc, ds1775 = id->driver_data; + int rc, ds1775; const char *name, *loc; + if (id) + ds1775 = id->driver_data; + else + ds1775 = !!of_device_get_match_data(&client->dev); + DBG("wf_lm75: creating %s device at address 0x%02x\n", ds1775 ? "ds1775" : "lm75", client->addr); @@ -164,9 +170,17 @@ static const struct i2c_device_id wf_lm75_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_lm75_id); +static const struct of_device_id wf_lm75_of_id[] = { + { .compatible = "lm75", .data = (void *)0}, + { .compatible = "ds1775", .data = (void *)1 }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_lm75_of_id); + static struct i2c_driver wf_lm75_driver = { .driver = { .name = "wf_lm75", + .of_match_table = wf_lm75_of_id, }, .probe = wf_lm75_probe, .remove = wf_lm75_remove, diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c index b03a33b803b7..d011899c0a8a 100644 --- a/drivers/macintosh/windfarm_lm87_sensor.c +++ b/drivers/macintosh/windfarm_lm87_sensor.c @@ -166,9 +166,16 @@ static const struct i2c_device_id wf_lm87_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_lm87_id); +static const struct of_device_id wf_lm87_of_id[] = { + { .compatible = "lm87cimt", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_lm87_of_id); + static struct i2c_driver wf_lm87_driver = { .driver = { .name = "wf_lm87", + .of_match_table = wf_lm87_of_id, }, .probe = wf_lm87_probe, .remove = wf_lm87_remove, diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c index e666cc020683..1e7b03d44ad9 100644 --- a/drivers/macintosh/windfarm_max6690_sensor.c +++ b/drivers/macintosh/windfarm_max6690_sensor.c @@ -120,9 +120,16 @@ static const struct i2c_device_id wf_max6690_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_max6690_id); +static const struct of_device_id wf_max6690_of_id[] = { + { .compatible = "max6690", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_max6690_of_id); + static struct i2c_driver wf_max6690_driver = { .driver = { .name = "wf_max6690", + .of_match_table = wf_max6690_of_id, }, .probe = wf_max6690_probe, .remove = wf_max6690_remove, diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index c84ec49c3741..cb75dc035616 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c @@ -341,9 +341,16 @@ static const struct i2c_device_id wf_sat_id[] = { }; MODULE_DEVICE_TABLE(i2c, wf_sat_id); +static const struct of_device_id wf_sat_of_id[] = { + { .compatible = "smu-sat", }, + { } +}; +MODULE_DEVICE_TABLE(of, wf_sat_of_id); + static struct i2c_driver wf_sat_driver = { .driver = { .name = "wf_smu_sat", + .of_match_table = wf_sat_of_id, }, .probe = wf_sat_probe, .remove = wf_sat_remove, From 3747cd2efe7ecb9604972285ab3f60c96cb753a8 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Tue, 10 Mar 2020 10:16:18 +1300 Subject: [PATCH 096/372] i2c: gpio: suppress error on probe defer If a GPIO we are trying to use is not available and we are deferring the probe, don't output an error message. This seems to have been the intent of commit 05c74778858d ("i2c: gpio: Add support for named gpios in DT") but the error was still output due to not checking the updated 'retdesc'. Fixes: 05c74778858d ("i2c: gpio: Add support for named gpios in DT") Signed-off-by: Hamish Martin Acked-by: Linus Walleij Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-gpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c index 3a9e840a3546..a4a6825c8758 100644 --- a/drivers/i2c/busses/i2c-gpio.c +++ b/drivers/i2c/busses/i2c-gpio.c @@ -348,7 +348,7 @@ static struct gpio_desc *i2c_gpio_get_desc(struct device *dev, if (ret == -ENOENT) retdesc = ERR_PTR(-EPROBE_DEFER); - if (ret != -EPROBE_DEFER) + if (PTR_ERR(retdesc) != -EPROBE_DEFER) dev_err(dev, "error trying to get descriptor: %d\n", ret); return retdesc; From da86cad0e011c0216bf2303bc924fa080e150bc8 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Thu, 5 Mar 2020 14:26:22 +0200 Subject: [PATCH 097/372] MAINTAINERS: update web URL for iwlwifi The current URL mentioned in iwlwifi's W entry is outdated and currently pointing to a dead link. Change it so that it points to the correct Wiki page directly. Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index b1935c2ae118..2e366d8f054f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8684,7 +8684,7 @@ M: Emmanuel Grumbach M: Luca Coelho M: Intel Linux Wireless L: linux-wireless@vger.kernel.org -W: http://intellinuxwireless.org +W: https://wireless.wiki.kernel.org/en/users/drivers/iwlwifi T: git git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi.git S: Supported F: drivers/net/wireless/intel/iwlwifi/ From 045706bff837ee89c13f1ace173db71922c1c40b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 6 Mar 2020 17:08:57 +0200 Subject: [PATCH 098/372] xhci: Do not open code __print_symbolic() in xhci trace events libtraceevent (used by perf and trace-cmd) failed to parse the xhci_urb_dequeue trace event. This is because the user space trace event format parsing is not a full C compiler. It can handle some basic logic, but is not meant to be able to handle everything C can do. In cases where a trace event field needs to be converted from a number to a string, there's the __print_symbolic() macro that should be used: See samples/trace_events/trace-events-sample.h Some xhci trace events open coded the __print_symbolic() causing the user spaces tools to fail to parse it. This has to be replaced with __print_symbolic() instead. CC: stable@vger.kernel.org Reported-by: Tzvetomir Stoyanov Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206531 Fixes: 5abdc2e6e12ff ("usb: host: xhci: add urb_enqueue/dequeue/giveback tracers") Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200306150858.21904-2-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-trace.h | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index 56eb867803a6..b19582b2a72c 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -289,23 +289,12 @@ DECLARE_EVENT_CLASS(xhci_log_urb, ), TP_printk("ep%d%s-%s: urb %p pipe %u slot %d length %d/%d sgs %d/%d stream %d flags %08x", __entry->epnum, __entry->dir_in ? "in" : "out", - ({ char *s; - switch (__entry->type) { - case USB_ENDPOINT_XFER_INT: - s = "intr"; - break; - case USB_ENDPOINT_XFER_CONTROL: - s = "control"; - break; - case USB_ENDPOINT_XFER_BULK: - s = "bulk"; - break; - case USB_ENDPOINT_XFER_ISOC: - s = "isoc"; - break; - default: - s = "UNKNOWN"; - } s; }), __entry->urb, __entry->pipe, __entry->slot_id, + __print_symbolic(__entry->type, + { USB_ENDPOINT_XFER_INT, "intr" }, + { USB_ENDPOINT_XFER_CONTROL, "control" }, + { USB_ENDPOINT_XFER_BULK, "bulk" }, + { USB_ENDPOINT_XFER_ISOC, "isoc" }), + __entry->urb, __entry->pipe, __entry->slot_id, __entry->actual, __entry->length, __entry->num_mapped_sgs, __entry->num_sgs, __entry->stream, __entry->flags ) From 16263abc12d09871156a1c8650fb651f0e552f5e Mon Sep 17 00:00:00 2001 From: Alberto Mattea Date: Fri, 6 Mar 2020 17:08:58 +0200 Subject: [PATCH 099/372] usb: xhci: apply XHCI_SUSPEND_DELAY to AMD XHCI controller 1022:145c This controller timeouts during suspend (S3) with [ 240.521724] xhci_hcd 0000:30:00.3: WARN: xHC save state timeout [ 240.521729] xhci_hcd 0000:30:00.3: ERROR mismatched command completion event thus preventing the system from entering S3. Moreover it remains in an undefined state where some connected devices stop working until a reboot. Apply the XHCI_SUSPEND_DELAY quirk to make it suspend properly. CC: stable@vger.kernel.org Signed-off-by: Alberto Mattea Signed-off-by: Mathias Nyman Link: https://lore.kernel.org/r/20200306150858.21904-3-mathias.nyman@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 5e9b537df631..1fddc41fa1f3 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -136,7 +136,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_AMD_PLL_FIX; if (pdev->vendor == PCI_VENDOR_ID_AMD && - (pdev->device == 0x15e0 || + (pdev->device == 0x145c || + pdev->device == 0x15e0 || pdev->device == 0x15e1 || pdev->device == 0x43bb)) xhci->quirks |= XHCI_SUSPEND_DELAY; From da6c7faeb103c493e505e87643272f70be586635 Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Tue, 10 Mar 2020 16:32:29 +0900 Subject: [PATCH 100/372] bpf/btf: Fix BTF verification of enum members in struct/union btf_enum_check_member() was currently sure to recognize the size of "enum" type members in struct/union as the size of "int" even if its size was packed. This patch fixes BTF enum verification to use the correct size of member in BPF programs. Fixes: 179cde8cef7e ("bpf: btf: Check members of struct/union") Signed-off-by: Yoshiki Komachi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1583825550-18606-2-git-send-email-komachi.yoshiki@gmail.com --- kernel/bpf/btf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 787140095e58..32ab9225026e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -2418,7 +2418,7 @@ static int btf_enum_check_member(struct btf_verifier_env *env, struct_size = struct_type->size; bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); - if (struct_size - bytes_offset < sizeof(int)) { + if (struct_size - bytes_offset < member_type->size) { btf_verifier_log_member(env, struct_type, member, "Member exceeds struct_size"); return -EINVAL; From 6ffe559a77d1c963a3567f7a39a5419bdcdc4f1c Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Tue, 10 Mar 2020 16:32:30 +0900 Subject: [PATCH 101/372] selftests/bpf: Add test for the packed enum member in struct/union Add a simple test to the existing selftest program in order to make sure that a packed enum member in struct unexceeds the struct_size. Signed-off-by: Yoshiki Komachi Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1583825550-18606-3-git-send-email-komachi.yoshiki@gmail.com --- tools/testing/selftests/bpf/test_btf.c | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 93040ca83e60..8da77cda5f4a 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -1062,6 +1062,48 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Member exceeds struct_size", }, +/* Test member unexceeds the size of struct + * + * enum E { + * E0, + * E1, + * }; + * + * struct A { + * char m; + * enum E __attribute__((packed)) n; + * }; + */ +{ + .descr = "size check test #5", + .raw_types = { + /* int */ /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)), + /* char */ /* [2] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), + /* enum E { */ /* [3] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 1), + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + /* } */ + /* struct A { */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 2), + BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* char m; */ + BTF_MEMBER_ENC(NAME_TBD, 3, 8),/* enum E __attribute__((packed)) n; */ + /* } */ + BTF_END_RAW, + }, + .str_sec = "\0E\0E0\0E1\0A\0m\0n", + .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "size_check5_map", + .key_size = sizeof(int), + .value_size = 2, + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 4, +}, + /* typedef const void * const_void_ptr; * struct A { * const_void_ptr m; From dcd6589b11d3b1e71f516a87a7b9646ed356b4c0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 10 Mar 2020 13:07:46 -0400 Subject: [PATCH 102/372] blk-iocost: fix incorrect vtime comparison in iocg_is_idle() vtimes may wrap and time_before/after64() should be used to determine whether a given vtime is before or after another. iocg_is_idle() was incorrectly using plain "<" comparison do determine whether done_vtime is before vtime. Here, the only thing we're interested in is whether done_vtime matches vtime which indicates that there's nothing in flight. Let's test for inequality instead. Signed-off-by: Tejun Heo Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Cc: stable@vger.kernel.org # v5.4+ Signed-off-by: Jens Axboe --- block/blk-iocost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 27ca68621137..9a599cc28c29 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1318,7 +1318,7 @@ static bool iocg_is_idle(struct ioc_gq *iocg) return false; /* is something in flight? */ - if (atomic64_read(&iocg->done_vtime) < atomic64_read(&iocg->vtime)) + if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime)) return false; return true; From 9134ae2a2546cb96abddcd4469a79c77ee3a4480 Mon Sep 17 00:00:00 2001 From: Prabhath Sajeepa Date: Mon, 9 Mar 2020 15:07:53 -0600 Subject: [PATCH 103/372] nvme-rdma: Avoid double freeing of async event data The timeout of identify cmd, which is invoked as part of admin queue creation, can result in freeing of async event data both in nvme_rdma_timeout handler and error handling path of nvme_rdma_configure_admin queue thus causing NULL pointer reference. Call Trace: ? nvme_rdma_setup_ctrl+0x223/0x800 [nvme_rdma] nvme_rdma_create_ctrl+0x2ba/0x3f7 [nvme_rdma] nvmf_dev_write+0xa54/0xcc6 [nvme_fabrics] __vfs_write+0x1b/0x40 vfs_write+0xb2/0x1b0 ksys_write+0x61/0xd0 __x64_sys_write+0x1a/0x20 do_syscall_64+0x60/0x1e0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Reviewed-by: Roland Dreier Reviewed-by: Max Gurtovoy Reviewed-by: Christoph Hellwig Signed-off-by: Prabhath Sajeepa Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3e85c5cacefd..0fe08c4dfd2f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -850,9 +850,11 @@ out_free_tagset: if (new) blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); out_free_async_qe: - nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, - sizeof(struct nvme_command), DMA_TO_DEVICE); - ctrl->async_event_sqe.data = NULL; + if (ctrl->async_event_sqe.data) { + nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, + sizeof(struct nvme_command), DMA_TO_DEVICE); + ctrl->async_event_sqe.data = NULL; + } out_free_queue: nvme_rdma_free_queue(&ctrl->queues[0]); return error; From be7594a424b4de7473b3d726635c3bf6aa19a86e Mon Sep 17 00:00:00 2001 From: Can Guo Date: Thu, 5 Mar 2020 00:53:07 -0800 Subject: [PATCH 104/372] scsi: ufs: Fix possible unclocked access to auto hibern8 timer register Before access auto hibner8 timer register, make sure power and clock are properly configured to avoid unclocked register access. Link: https://lore.kernel.org/r/1583398391-14273-1-git-send-email-cang@codeaurora.org Fixes: ba7af5ec5126 ("scsi: ufs: export ufshcd_auto_hibern8_update for vendor usage") Reviewed-by: Stanley Chu Signed-off-by: Can Guo Signed-off-by: Martin K. Petersen --- drivers/scsi/ufs/ufshcd.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index abd0e6b05f79..2d705694636c 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3884,18 +3884,25 @@ EXPORT_SYMBOL_GPL(ufshcd_uic_hibern8_exit); void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit) { unsigned long flags; + bool update = false; - if (!(hba->capabilities & MASK_AUTO_HIBERN8_SUPPORT)) + if (!ufshcd_is_auto_hibern8_supported(hba)) return; spin_lock_irqsave(hba->host->host_lock, flags); - if (hba->ahit == ahit) - goto out_unlock; - hba->ahit = ahit; - if (!pm_runtime_suspended(hba->dev)) - ufshcd_writel(hba, hba->ahit, REG_AUTO_HIBERNATE_IDLE_TIMER); -out_unlock: + if (hba->ahit != ahit) { + hba->ahit = ahit; + update = true; + } spin_unlock_irqrestore(hba->host->host_lock, flags); + + if (update && !pm_runtime_suspended(hba->dev)) { + pm_runtime_get_sync(hba->dev); + ufshcd_hold(hba, false); + ufshcd_auto_hibern8_enable(hba); + ufshcd_release(hba); + pm_runtime_put(hba->dev); + } } EXPORT_SYMBOL_GPL(ufshcd_auto_hibern8_update); From 394b61711f3ce33f75bf70a3e22938464a13b3ee Mon Sep 17 00:00:00 2001 From: Wen Xiong Date: Fri, 6 Mar 2020 09:57:28 -0600 Subject: [PATCH 105/372] scsi: ipr: Fix softlockup when rescanning devices in petitboot When trying to rescan disks in petitboot shell, we hit the following softlockup stacktrace: Kernel panic - not syncing: System is deadlocked on memory [ 241.223394] CPU: 32 PID: 693 Comm: sh Not tainted 5.4.16-openpower1 #1 [ 241.223406] Call Trace: [ 241.223415] [c0000003f07c3180] [c000000000493fc4] dump_stack+0xa4/0xd8 (unreliable) [ 241.223432] [c0000003f07c31c0] [c00000000007d4ac] panic+0x148/0x3cc [ 241.223446] [c0000003f07c3260] [c000000000114b10] out_of_memory+0x468/0x4c4 [ 241.223461] [c0000003f07c3300] [c0000000001472b0] __alloc_pages_slowpath+0x594/0x6d8 [ 241.223476] [c0000003f07c3420] [c00000000014757c] __alloc_pages_nodemask+0x188/0x1a4 [ 241.223492] [c0000003f07c34a0] [c000000000153e10] alloc_pages_current+0xcc/0xd8 [ 241.223508] [c0000003f07c34e0] [c0000000001577ac] alloc_slab_page+0x30/0x98 [ 241.223524] [c0000003f07c3520] [c0000000001597fc] new_slab+0x138/0x40c [ 241.223538] [c0000003f07c35f0] [c00000000015b204] ___slab_alloc+0x1e4/0x404 [ 241.223552] [c0000003f07c36c0] [c00000000015b450] __slab_alloc+0x2c/0x48 [ 241.223566] [c0000003f07c36f0] [c00000000015b754] kmem_cache_alloc_node+0x9c/0x1b4 [ 241.223582] [c0000003f07c3760] [c000000000218c48] blk_alloc_queue_node+0x34/0x270 [ 241.223599] [c0000003f07c37b0] [c000000000226574] blk_mq_init_queue+0x2c/0x78 [ 241.223615] [c0000003f07c37e0] [c0000000002ff710] scsi_mq_alloc_queue+0x28/0x70 [ 241.223631] [c0000003f07c3810] [c0000000003005b8] scsi_alloc_sdev+0x184/0x264 [ 241.223647] [c0000003f07c38a0] [c000000000300ba0] scsi_probe_and_add_lun+0x288/0xa3c [ 241.223663] [c0000003f07c3a00] [c000000000301768] __scsi_scan_target+0xcc/0x478 [ 241.223679] [c0000003f07c3b20] [c000000000301c64] scsi_scan_channel.part.9+0x74/0x7c [ 241.223696] [c0000003f07c3b70] [c000000000301df4] scsi_scan_host_selected+0xe0/0x158 [ 241.223712] [c0000003f07c3bd0] [c000000000303f04] store_scan+0x104/0x114 [ 241.223727] [c0000003f07c3cb0] [c0000000002d5ac4] dev_attr_store+0x30/0x4c [ 241.223741] [c0000003f07c3cd0] [c0000000001dbc34] sysfs_kf_write+0x64/0x78 [ 241.223756] [c0000003f07c3cf0] [c0000000001da858] kernfs_fop_write+0x170/0x1b8 [ 241.223773] [c0000003f07c3d40] [c0000000001621fc] __vfs_write+0x34/0x60 [ 241.223787] [c0000003f07c3d60] [c000000000163c2c] vfs_write+0xa8/0xcc [ 241.223802] [c0000003f07c3db0] [c000000000163df4] ksys_write+0x70/0xbc [ 241.223816] [c0000003f07c3e20] [c00000000000b40c] system_call+0x5c/0x68 As a part of the scan process Linux will allocate and configure a scsi_device for each target to be scanned. If the device is not present, then the scsi_device is torn down. As a part of scsi_device teardown a workqueue item will be scheduled and the lockups we see are because there are 250k workqueue items to be processed. Accoding to the specification of SIS-64 sas controller, max_channel should be decreased on SIS-64 adapters to 4. The patch fixes softlockup issue. Thanks for Oliver Halloran's help with debugging and explanation! Link: https://lore.kernel.org/r/1583510248-23672-1-git-send-email-wenxiong@linux.vnet.ibm.com Signed-off-by: Wen Xiong Signed-off-by: Martin K. Petersen --- drivers/scsi/ipr.c | 3 ++- drivers/scsi/ipr.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index ae45cbe98ae2..cd8db1349871 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -9950,6 +9950,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->max_devs_supported = ipr_max_devs; if (ioa_cfg->sis64) { + host->max_channel = IPR_MAX_SIS64_BUSES; host->max_id = IPR_MAX_SIS64_TARGETS_PER_BUS; host->max_lun = IPR_MAX_SIS64_LUNS_PER_TARGET; if (ipr_max_devs > IPR_MAX_SIS64_DEVS) @@ -9958,6 +9959,7 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, + ((sizeof(struct ipr_config_table_entry64) * ioa_cfg->max_devs_supported))); } else { + host->max_channel = IPR_VSET_BUS; host->max_id = IPR_MAX_NUM_TARGETS_PER_BUS; host->max_lun = IPR_MAX_NUM_LUNS_PER_TARGET; if (ipr_max_devs > IPR_MAX_PHYSICAL_DEVS) @@ -9967,7 +9969,6 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, * ioa_cfg->max_devs_supported))); } - host->max_channel = IPR_VSET_BUS; host->unique_id = host->host_no; host->max_cmd_len = IPR_MAX_CDB_LEN; host->can_queue = ioa_cfg->max_cmds; diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index a67baeb36d1f..b97aa9ac2ffe 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -1300,6 +1300,7 @@ struct ipr_resource_entry { #define IPR_ARRAY_VIRTUAL_BUS 0x1 #define IPR_VSET_VIRTUAL_BUS 0x2 #define IPR_IOAFP_VIRTUAL_BUS 0x3 +#define IPR_MAX_SIS64_BUSES 0x4 #define IPR_GET_RES_PHYS_LOC(res) \ (((res)->bus << 24) | ((res)->target << 8) | (res)->lun) From 3b36b13d5e69d6f51ff1c55d1b404a74646c9757 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 11 Mar 2020 14:13:28 +0800 Subject: [PATCH 106/372] ALSA: hda/realtek: Fix pop noise on ALC225 Commit 317d9313925c ("ALSA: hda/realtek - Set default power save node to 0") makes the ALC225 have pop noise on S3 resume and cold boot. So partially revert this commit for ALC225 to fix the regression. Fixes: 317d9313925c ("ALSA: hda/realtek - Set default power save node to 0") BugLink: https://bugs.launchpad.net/bugs/1866357 Signed-off-by: Kai-Heng Feng Link: https://lore.kernel.org/r/20200311061328.17614-1-kai.heng.feng@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0ac06ff1a17c..7b83b020ac3c 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8051,6 +8051,8 @@ static int patch_alc269(struct hda_codec *codec) spec->gen.mixer_nid = 0; break; case 0x10ec0225: + codec->power_save_node = 1; + /* fall through */ case 0x10ec0295: case 0x10ec0299: spec->codec_variant = ALC269_TYPE_ALC225; From 9d32c0cde4e2d1343dfb88a67b2ec6397705b32b Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Fri, 6 Mar 2020 01:30:47 +0100 Subject: [PATCH 107/372] staging/speakup: fix get_word non-space look-ahead get_char was erroneously given the address of the pointer to the text instead of the address of the text, thus leading to random crashes when the user requests speaking a word while the current position is on a space character and say_word_ctl is not enabled. Reported-on: https://github.com/bytefire/speakup/issues/1 Reported-by: Kirk Reiser Reported-by: Janina Sajka Reported-by: Alexandr Epaneshnikov Reported-by: Gregory Nowak Reported-by: deedra waters Signed-off-by: Samuel Thibault Tested-by: Alexandr Epaneshnikov Tested-by: Gregory Nowak Tested-by: Michael Taboada Cc: stable Link: https://lore.kernel.org/r/20200306003047.thijtmqrnayd3dmw@function Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/speakup/main.c b/drivers/staging/speakup/main.c index 488f2539aa9a..81ecfd1a200d 100644 --- a/drivers/staging/speakup/main.c +++ b/drivers/staging/speakup/main.c @@ -561,7 +561,7 @@ static u_long get_word(struct vc_data *vc) return 0; } else if (tmpx < vc->vc_cols - 2 && (ch == SPACE || ch == 0 || (ch < 0x100 && IS_WDLM(ch))) && - get_char(vc, (u_short *)&tmp_pos + 1, &temp) > SPACE) { + get_char(vc, (u_short *)tmp_pos + 1, &temp) > SPACE) { tmp_pos += 2; tmpx++; } else { From bab0a0b03442a62fe3abefcb2169e0b9ff95990c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 10 Mar 2020 11:13:52 +0100 Subject: [PATCH 108/372] staging: wfx: fix warning about freeing in-use mutex during device unregister MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After hif_shutdown(), communication with the chip is no more possible. It the only request that never reply. Therefore, hif_cmd.lock is never unlocked. hif_shutdown() unlock itself hif_cmd.lock to avoid a potential warning during disposal of device. hif_cmd.key_renew_lock should also been unlocked for the same reason. Signed-off-by: Jérôme Pouiller Link: https://lore.kernel.org/r/20200310101356.182818-2-Jerome.Pouiller@silabs.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wfx/hif_tx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/wfx/hif_tx.c b/drivers/staging/wfx/hif_tx.c index 2428363371fa..7b732c531a74 100644 --- a/drivers/staging/wfx/hif_tx.c +++ b/drivers/staging/wfx/hif_tx.c @@ -140,6 +140,7 @@ int hif_shutdown(struct wfx_dev *wdev) else control_reg_write(wdev, 0); mutex_unlock(&wdev->hif_cmd.lock); + mutex_unlock(&wdev->hif_cmd.key_renew_lock); kfree(hif); return ret; } From c918c27ac6f0252aaa2374cdaa0426df4d5df9df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 10 Mar 2020 11:13:53 +0100 Subject: [PATCH 109/372] staging: wfx: fix lines ending with a comma instead of a semicolon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Obviously introduced by mistake. Fixes: 09779276f1ba ("staging: wfx: simplify hif_start() usage") Signed-off-by: Jérôme Pouiller Link: https://lore.kernel.org/r/20200310101356.182818-3-Jerome.Pouiller@silabs.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wfx/hif_tx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/staging/wfx/hif_tx.c b/drivers/staging/wfx/hif_tx.c index 7b732c531a74..7a56e45bcdaa 100644 --- a/drivers/staging/wfx/hif_tx.c +++ b/drivers/staging/wfx/hif_tx.c @@ -428,9 +428,9 @@ int hif_start(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf, struct hif_msg *hif; struct hif_req_start *body = wfx_alloc_hif(sizeof(*body), &hif); - body->dtim_period = conf->dtim_period, - body->short_preamble = conf->use_short_preamble, - body->channel_number = cpu_to_le16(channel->hw_value), + body->dtim_period = conf->dtim_period; + body->short_preamble = conf->use_short_preamble; + body->channel_number = cpu_to_le16(channel->hw_value); body->beacon_interval = cpu_to_le32(conf->beacon_int); body->basic_rate_set = cpu_to_le32(wfx_rate_mask_to_hw(wvif->wdev, conf->basic_rates)); From 4bbc6a3e7ad0d0f31ae3ba9858dbca45eb7a848e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 10 Mar 2020 11:13:54 +0100 Subject: [PATCH 110/372] staging: wfx: make warning about pending frame less scary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing station while some traffic is in progress may happen. Signed-off-by: Jérôme Pouiller Link: https://lore.kernel.org/r/20200310101356.182818-4-Jerome.Pouiller@silabs.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wfx/sta.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/staging/wfx/sta.c b/drivers/staging/wfx/sta.c index 03d0f224ffdb..010e13bcd33e 100644 --- a/drivers/staging/wfx/sta.c +++ b/drivers/staging/wfx/sta.c @@ -605,7 +605,9 @@ int wfx_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int i; for (i = 0; i < ARRAY_SIZE(sta_priv->buffered); i++) - WARN(sta_priv->buffered[i], "release station while Tx is in progress"); + if (sta_priv->buffered[i]) + dev_warn(wvif->wdev->dev, "release station while %d pending frame on queue %d", + sta_priv->buffered[i], i); // FIXME: see note in wfx_sta_add() if (vif->type == NL80211_IFTYPE_STATION) return 0; From 046cc2effd1312a23b9e3d8363be7c68f6e91f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 10 Mar 2020 11:13:55 +0100 Subject: [PATCH 111/372] staging: wfx: fix RCU usage in wfx_join_finalize() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Access to sta->ht_cap is protected by RCU. However, hif_set_association_mode() may sleep, so it can't be called in RCU. This patch fix this behavior by handling sta and its RCU directly from function hif_set_association_mode(). Signed-off-by: Jérôme Pouiller Fixes: d00149011066 ("staging: wfx: fix RCU usage") Link: https://lore.kernel.org/r/20200310101356.182818-5-Jerome.Pouiller@silabs.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wfx/hif_tx_mib.h | 15 ++++++++++----- drivers/staging/wfx/sta.c | 4 ++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/staging/wfx/hif_tx_mib.h b/drivers/staging/wfx/hif_tx_mib.h index bf3769c2a9b6..26b1406f9f6c 100644 --- a/drivers/staging/wfx/hif_tx_mib.h +++ b/drivers/staging/wfx/hif_tx_mib.h @@ -191,10 +191,10 @@ static inline int hif_set_block_ack_policy(struct wfx_vif *wvif, } static inline int hif_set_association_mode(struct wfx_vif *wvif, - struct ieee80211_bss_conf *info, - struct ieee80211_sta_ht_cap *ht_cap) + struct ieee80211_bss_conf *info) { int basic_rates = wfx_rate_mask_to_hw(wvif->wdev, info->basic_rates); + struct ieee80211_sta *sta = NULL; struct hif_mib_set_association_mode val = { .preambtype_use = 1, .mode = 1, @@ -204,12 +204,17 @@ static inline int hif_set_association_mode(struct wfx_vif *wvif, .basic_rate_set = cpu_to_le32(basic_rates) }; + rcu_read_lock(); // protect sta + if (info->bssid && !info->ibss_joined) + sta = ieee80211_find_sta(wvif->vif, info->bssid); + // FIXME: it is strange to not retrieve all information from bss_info - if (ht_cap && ht_cap->ht_supported) { - val.mpdu_start_spacing = ht_cap->ampdu_density; + if (sta && sta->ht_cap.ht_supported) { + val.mpdu_start_spacing = sta->ht_cap.ampdu_density; if (!(info->ht_operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT)) - val.greenfield = !!(ht_cap->cap & IEEE80211_HT_CAP_GRN_FLD); + val.greenfield = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD); } + rcu_read_unlock(); return hif_write_mib(wvif->wdev, wvif->id, HIF_MIB_ID_SET_ASSOCIATION_MODE, &val, sizeof(val)); diff --git a/drivers/staging/wfx/sta.c b/drivers/staging/wfx/sta.c index 010e13bcd33e..ed16475c207c 100644 --- a/drivers/staging/wfx/sta.c +++ b/drivers/staging/wfx/sta.c @@ -691,6 +691,7 @@ static void wfx_join_finalize(struct wfx_vif *wvif, wfx_rate_mask_to_hw(wvif->wdev, sta->supp_rates[wvif->channel->band]); else wvif->bss_params.operational_rate_set = -1; + rcu_read_unlock(); if (sta && info->ht_operation_mode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) hif_dual_cts_protection(wvif, true); @@ -703,8 +704,7 @@ static void wfx_join_finalize(struct wfx_vif *wvif, wvif->bss_params.beacon_lost_count = 20; wvif->bss_params.aid = info->aid; - hif_set_association_mode(wvif, info, sta ? &sta->ht_cap : NULL); - rcu_read_unlock(); + hif_set_association_mode(wvif, info); if (!info->ibss_joined) { hif_keep_alive_period(wvif, 30 /* sec */); From ac42c12dd752d315a7027dcb50421dbbd1af53bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Pouiller?= Date: Tue, 10 Mar 2020 11:13:56 +0100 Subject: [PATCH 112/372] staging: wfx: fix RCU usage between hif_join() and ieee80211_bss_get_ie() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Access to result of ieee80211_bss_get_ie() is protected by RCU. In other hand, function hif_join() can sleep and cannot be called with RCU locked. Provide a copy of "ssidie" to hif_join() to solve this behavior. Fixes: 9ced9b593741 ("staging: wfx: simplify hif_join()") Signed-off-by: Jérôme Pouiller Link: https://lore.kernel.org/r/20200310101356.182818-6-Jerome.Pouiller@silabs.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/wfx/hif_tx.c | 8 ++++---- drivers/staging/wfx/hif_tx.h | 2 +- drivers/staging/wfx/sta.c | 17 ++++++++++------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/staging/wfx/hif_tx.c b/drivers/staging/wfx/hif_tx.c index 7a56e45bcdaa..77bca43aca42 100644 --- a/drivers/staging/wfx/hif_tx.c +++ b/drivers/staging/wfx/hif_tx.c @@ -290,7 +290,7 @@ int hif_stop_scan(struct wfx_vif *wvif) } int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf, - const struct ieee80211_channel *channel, const u8 *ssidie) + struct ieee80211_channel *channel, const u8 *ssid, int ssidlen) { int ret; struct hif_msg *hif; @@ -308,9 +308,9 @@ int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf, body->basic_rate_set = cpu_to_le32(wfx_rate_mask_to_hw(wvif->wdev, conf->basic_rates)); memcpy(body->bssid, conf->bssid, sizeof(body->bssid)); - if (!conf->ibss_joined && ssidie) { - body->ssid_length = cpu_to_le32(ssidie[1]); - memcpy(body->ssid, &ssidie[2], ssidie[1]); + if (!conf->ibss_joined && ssid) { + body->ssid_length = cpu_to_le32(ssidlen); + memcpy(body->ssid, ssid, ssidlen); } wfx_fill_header(hif, wvif->id, HIF_REQ_ID_JOIN, sizeof(*body)); ret = wfx_cmd_send(wvif->wdev, hif, NULL, 0, false); diff --git a/drivers/staging/wfx/hif_tx.h b/drivers/staging/wfx/hif_tx.h index 20977e461718..f8520a14c14c 100644 --- a/drivers/staging/wfx/hif_tx.h +++ b/drivers/staging/wfx/hif_tx.h @@ -46,7 +46,7 @@ int hif_scan(struct wfx_vif *wvif, struct cfg80211_scan_request *req80211, int chan_start, int chan_num); int hif_stop_scan(struct wfx_vif *wvif); int hif_join(struct wfx_vif *wvif, const struct ieee80211_bss_conf *conf, - const struct ieee80211_channel *channel, const u8 *ssidie); + struct ieee80211_channel *channel, const u8 *ssid, int ssidlen); int hif_set_pm(struct wfx_vif *wvif, bool ps, int dynamic_ps_timeout); int hif_set_bss_params(struct wfx_vif *wvif, const struct hif_req_set_bss_params *arg); diff --git a/drivers/staging/wfx/sta.c b/drivers/staging/wfx/sta.c index ed16475c207c..af4f4bbd0572 100644 --- a/drivers/staging/wfx/sta.c +++ b/drivers/staging/wfx/sta.c @@ -491,9 +491,11 @@ static void wfx_set_mfp(struct wfx_vif *wvif, static void wfx_do_join(struct wfx_vif *wvif) { int ret; - const u8 *ssidie; struct ieee80211_bss_conf *conf = &wvif->vif->bss_conf; struct cfg80211_bss *bss = NULL; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + const u8 *ssidie = NULL; + int ssidlen = 0; wfx_tx_lock_flush(wvif->wdev); @@ -514,11 +516,14 @@ static void wfx_do_join(struct wfx_vif *wvif) if (!wvif->beacon_int) wvif->beacon_int = 1; - rcu_read_lock(); + rcu_read_lock(); // protect ssidie if (!conf->ibss_joined) ssidie = ieee80211_bss_get_ie(bss, WLAN_EID_SSID); - else - ssidie = NULL; + if (ssidie) { + ssidlen = ssidie[1]; + memcpy(ssid, &ssidie[2], ssidie[1]); + } + rcu_read_unlock(); wfx_tx_flush(wvif->wdev); @@ -527,10 +532,8 @@ static void wfx_do_join(struct wfx_vif *wvif) wfx_set_mfp(wvif, bss); - /* Perform actual join */ wvif->wdev->tx_burst_idx = -1; - ret = hif_join(wvif, conf, wvif->channel, ssidie); - rcu_read_unlock(); + ret = hif_join(wvif, conf, wvif->channel, ssid, ssidlen); if (ret) { ieee80211_connection_loss(wvif->vif); wvif->join_complete_status = -1; From e93fc7b4544a5475cfdbc22f87e89f9829bf801c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 3 Mar 2020 03:10:57 -0500 Subject: [PATCH 113/372] KVM: s390: Also reset registers in sync regs for initial cpu reset When we do the initial CPU reset we must not only clear the registers in the internal data structures but also in kvm_run sync_regs. For modern userspace sync_regs is the only place that it looks at. Fixes: 7de3f1423ff9 ("KVM: s390: Add new reset vcpu API") Acked-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d7ff30e45589..c2e6d4ba4e23 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3268,7 +3268,10 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) /* Initial reset is a superset of the normal reset */ kvm_arch_vcpu_ioctl_normal_reset(vcpu); - /* this equals initial cpu reset in pop, but we don't switch to ESA */ + /* + * This equals initial cpu reset in pop, but we don't switch to ESA. + * We do not only reset the internal data, but also ... + */ vcpu->arch.sie_block->gpsw.mask = 0; vcpu->arch.sie_block->gpsw.addr = 0; kvm_s390_set_prefix(vcpu, 0); @@ -3278,6 +3281,19 @@ static void kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr)); vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK; vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK; + + /* ... the data in sync regs */ + memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs)); + vcpu->run->s.regs.ckc = 0; + vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK; + vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK; + vcpu->run->psw_addr = 0; + vcpu->run->psw_mask = 0; + vcpu->run->s.regs.todpr = 0; + vcpu->run->s.regs.cputm = 0; + vcpu->run->s.regs.ckc = 0; + vcpu->run->s.regs.pp = 0; + vcpu->run->s.regs.gbea = 1; vcpu->run->s.regs.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; From f91da3bd21721c05cc7054156fa993edbb16777a Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Fri, 6 Mar 2020 19:20:18 +0530 Subject: [PATCH 114/372] dmaengine: move .device_release missing log warning to debug level Dmaengine core warns the drivers registering for missing .device_release implementation. The warning is accurate for dmaengine controllers which hotplug but not for rest. So reduce this to a debug log. Link: https://lore.kernel.org/r/20200306135018.2286959-1-vkoul@kernel.org Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index c3b1283b6d31..17909fd1820f 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1151,7 +1151,7 @@ int dma_async_device_register(struct dma_device *device) } if (!device->device_release) - dev_warn(device->dev, + dev_dbg(device->dev, "WARN: Device release is not defined so it is not safe to unbind this driver while in use\n"); kref_init(&device->ref); From 195967c088aa856fc7a2698230cddef3b8d7cda9 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 7 Mar 2020 21:57:37 +0100 Subject: [PATCH 115/372] MAINTAINERS: rectify the INTEL IADX DRIVER entry Commit bfe1d56091c1 ("dmaengine: idxd: Init and probe for Intel data accelerators") added the INTEL IADX DRIVER entry in MAINTAINERS, which mentions include/linux/idxd.h as file entry. However, this header file was not added in this commit, nor in any later one. Hence, since then, ./scripts/get_maintainer.pl --self-test complains: warning: no file matches F: include/linux/idxd.h Drop the file entry to the non-existing file in INTEL IADX DRIVER now. Signed-off-by: Lukas Bulwahn Acked-by: Dave Jiang Link: https://lore.kernel.org/r/20200307205737.5829-1-lukas.bulwahn@gmail.com Signed-off-by: Vinod Koul --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 38fe2f3f7b6f..44c6a4abef2a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8477,7 +8477,6 @@ L: dmaengine@vger.kernel.org S: Supported F: drivers/dma/idxd/* F: include/uapi/linux/idxd.h -F: include/linux/idxd.h INTEL IDLE DRIVER M: Jacob Pan From 42f502dfe132edb8d7a47e6c0641ed82d718ad0b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 08:37:19 +0100 Subject: [PATCH 116/372] HID: hid-picolcd_fb: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Signed-off-by: Takashi Iwai Signed-off-by: Jiri Kosina --- drivers/hid/hid-picolcd_fb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hid/hid-picolcd_fb.c b/drivers/hid/hid-picolcd_fb.c index e162a668fb7e..37ba05e8d94d 100644 --- a/drivers/hid/hid-picolcd_fb.c +++ b/drivers/hid/hid-picolcd_fb.c @@ -459,9 +459,9 @@ static ssize_t picolcd_fb_update_rate_show(struct device *dev, if (ret >= PAGE_SIZE) break; else if (i == fb_update_rate) - ret += snprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i); + ret += scnprintf(buf+ret, PAGE_SIZE-ret, "[%u] ", i); else - ret += snprintf(buf+ret, PAGE_SIZE-ret, "%u ", i); + ret += scnprintf(buf+ret, PAGE_SIZE-ret, "%u ", i); if (ret > 0) buf[min(ret, (size_t)PAGE_SIZE)-1] = '\n'; return ret; From 62a1a58039595698f644f560f00105bb4d5f5c7f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 11 Mar 2020 08:38:24 +0100 Subject: [PATCH 117/372] HID: hid-sensor-custom: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Signed-off-by: Takashi Iwai Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-custom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-sensor-custom.c b/drivers/hid/hid-sensor-custom.c index fb827c295842..4d25577a8573 100644 --- a/drivers/hid/hid-sensor-custom.c +++ b/drivers/hid/hid-sensor-custom.c @@ -313,7 +313,7 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr, while (i < ret) { if (i + attribute->size > ret) { - len += snprintf(&buf[len], + len += scnprintf(&buf[len], PAGE_SIZE - len, "%d ", values[i]); break; @@ -336,10 +336,10 @@ static ssize_t show_value(struct device *dev, struct device_attribute *attr, ++i; break; } - len += snprintf(&buf[len], PAGE_SIZE - len, + len += scnprintf(&buf[len], PAGE_SIZE - len, "%lld ", value); } - len += snprintf(&buf[len], PAGE_SIZE - len, "\n"); + len += scnprintf(&buf[len], PAGE_SIZE - len, "\n"); return len; } else if (input) From 90db6d772f749e38171d04619a5e3cd8804a6d02 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 10 Mar 2020 09:41:48 -0700 Subject: [PATCH 118/372] bpf, sockmap: Remove bucket->lock from sock_{hash|map}_free The bucket->lock is not needed in the sock_hash_free and sock_map_free calls, in fact it is causing a splat due to being inside rcu block. | BUG: sleeping function called from invalid context at net/core/sock.c:2935 | in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 62, name: kworker/0:1 | 3 locks held by kworker/0:1/62: | #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0 | #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0 | #2: ffff8881381f6df8 (&stab->lock){+...}, at: sock_map_free+0x26/0x180 | CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04008-g7b083332376e #454 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 | Workqueue: events bpf_map_free_deferred | Call Trace: | dump_stack+0x71/0xa0 | ___might_sleep.cold+0xa6/0xb6 | lock_sock_nested+0x28/0x90 | sock_map_free+0x5f/0x180 | bpf_map_free_deferred+0x58/0x80 | process_one_work+0x260/0x5e0 | worker_thread+0x4d/0x3e0 | kthread+0x108/0x140 | ? process_one_work+0x5e0/0x5e0 | ? kthread_park+0x90/0x90 | ret_from_fork+0x3a/0x50 The reason we have stab->lock and bucket->locks in sockmap code is to handle checking EEXIST in update/delete cases. We need to be careful during an update operation that we check for EEXIST and we need to ensure that the psock object is not in some partial state of removal/insertion while we do this. So both map_update_common and sock_map_delete need to guard from being run together potentially deleting an entry we are checking, etc. But by the time we get to the tear-down code in sock_{ma[|hash}_free we have already disconnected the map and we just did synchronize_rcu() in the line above so no updates/deletes should be in flight. Because of this we can drop the bucket locks from the map free'ing code, noting no update/deletes can be in-flight. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Reported-by: Jakub Sitnicki Suggested-by: Jakub Sitnicki Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/158385850787.30597.8346421465837046618.stgit@john-Precision-5820-Tower --- net/core/sock_map.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 085cef5857bb..b70c844a88ec 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -233,8 +233,11 @@ static void sock_map_free(struct bpf_map *map) struct bpf_stab *stab = container_of(map, struct bpf_stab, map); int i; + /* After the sync no updates or deletes will be in-flight so it + * is safe to walk map and remove entries without risking a race + * in EEXIST update case. + */ synchronize_rcu(); - raw_spin_lock_bh(&stab->lock); for (i = 0; i < stab->map.max_entries; i++) { struct sock **psk = &stab->sks[i]; struct sock *sk; @@ -248,7 +251,6 @@ static void sock_map_free(struct bpf_map *map) release_sock(sk); } } - raw_spin_unlock_bh(&stab->lock); /* wait for psock readers accessing its map link */ synchronize_rcu(); @@ -863,10 +865,13 @@ static void sock_hash_free(struct bpf_map *map) struct hlist_node *node; int i; + /* After the sync no updates or deletes will be in-flight so it + * is safe to walk map and remove entries without risking a race + * in EEXIST update case. + */ synchronize_rcu(); for (i = 0; i < htab->buckets_num; i++) { bucket = sock_hash_select_bucket(htab, i); - raw_spin_lock_bh(&bucket->lock); hlist_for_each_entry_safe(elem, node, &bucket->head, node) { hlist_del_rcu(&elem->node); lock_sock(elem->sk); @@ -875,7 +880,6 @@ static void sock_hash_free(struct bpf_map *map) rcu_read_unlock(); release_sock(elem->sk); } - raw_spin_unlock_bh(&bucket->lock); } /* wait for psock readers accessing its map link */ From 985e537a4082b4635754a57f4f95430790afee6a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 10 Mar 2020 18:35:57 +0100 Subject: [PATCH 119/372] x86/ioremap: Map EFI runtime services data as encrypted for SEV The dmidecode program fails to properly decode the SMBIOS data supplied by OVMF/UEFI when running in an SEV guest. The SMBIOS area, under SEV, is encrypted and resides in reserved memory that is marked as EFI runtime services data. As a result, when memremap() is attempted for the SMBIOS data, it can't be mapped as regular RAM (through try_ram_remap()) and, since the address isn't part of the iomem resources list, it isn't mapped encrypted through the fallback ioremap(). Add a new __ioremap_check_other() to deal with memory types like EFI_RUNTIME_SERVICES_DATA which are not covered by the resource ranges. This allows any runtime services data which has been created encrypted, to be mapped encrypted too. [ bp: Move functionality to a separate function. ] Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov Reviewed-by: Joerg Roedel Tested-by: Joerg Roedel Cc: # 5.3 Link: https://lkml.kernel.org/r/2d9e16eb5b53dc82665c95c6764b7407719df7a0.1582645327.git.thomas.lendacky@amd.com --- arch/x86/mm/ioremap.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 44e4beb4239f..935a91e1fd77 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -106,6 +106,19 @@ static unsigned int __ioremap_check_encrypted(struct resource *res) return 0; } +/* + * The EFI runtime services data area is not covered by walk_mem_res(), but must + * be mapped encrypted when SEV is active. + */ +static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *desc) +{ + if (!sev_active()) + return; + + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) + desc->flags |= IORES_MAP_ENCRYPTED; +} + static int __ioremap_collect_map_flags(struct resource *res, void *arg) { struct ioremap_desc *desc = arg; @@ -124,6 +137,9 @@ static int __ioremap_collect_map_flags(struct resource *res, void *arg) * To avoid multiple resource walks, this function walks resources marked as * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). + * + * After that, deal with misc other ranges in __ioremap_check_other() which do + * not fall into the above category. */ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, struct ioremap_desc *desc) @@ -135,6 +151,8 @@ static void __ioremap_check_mem(resource_size_t addr, unsigned long size, memset(desc, 0, sizeof(struct ioremap_desc)); walk_mem_res(start, end, desc, __ioremap_collect_map_flags); + + __ioremap_check_other(addr, desc); } /* From 8959b304c7062889b1276092cc8590dc1ba98f65 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 6 Mar 2020 14:23:26 +0100 Subject: [PATCH 120/372] gpiolib: Fix irq_disable() semantics The implementation if .irq_disable() which kicks in between the gpiolib and the driver is not properly mimicking the expected semantics of the irqchip core: the irqchip will call .irq_disable() if that exists, else it will call mask_irq() which first checks if .irq_mask() is defined before calling it. Since we are calling it unconditionally, we get this bug from drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c, as it only defines .irq_mask_ack and not .irq_mask: Unable to handle kernel NULL pointer dereference at virtual address 00000000 pgd = (ptrval) (...) PC is at 0x0 LR is at gpiochip_irq_disable+0x20/0x30 Fix this by only calling .irq_mask() if it exists. Cc: Brian Masney Cc: Hans Verkuil Cc: stable@vger.kernel.org Reviewed-by: Bartosz Golaszewski Fixes: 461c1a7d4733 ("gpiolib: override irq_enable/disable") Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20200306132326.1329640-1-linus.walleij@linaro.org --- drivers/gpio/gpiolib.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4d0106ceeba7..00fb91feba70 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2306,9 +2306,16 @@ static void gpiochip_irq_disable(struct irq_data *d) { struct gpio_chip *chip = irq_data_get_irq_chip_data(d); + /* + * Since we override .irq_disable() we need to mimic the + * behaviour of __irq_disable() in irq/chip.c. + * First call .irq_disable() if it exists, else mimic the + * behaviour of mask_irq() which calls .irq_mask() if + * it exists. + */ if (chip->irq.irq_disable) chip->irq.irq_disable(d); - else + else if (chip->irq.chip->irq_mask) chip->irq.chip->irq_mask(d); gpiochip_disable_irq(chip, d->hwirq); } From efaa87fa0947d525cf7c075316adde4e3ac7720b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Mar 2020 12:12:22 +0100 Subject: [PATCH 121/372] gpiolib: acpi: Correct comment for HP x2 10 honor_wakeup quirk Commit aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") added a quirk for some models of the HP x2 10 series. There are 2 issues with the comment describing the quirk: 1) The comment claims the DMI quirk applies to all Cherry Trail based HP x2 10 models. In the mean time I have learned that there are at least 3 models of the HP x2 10 models: Bay Trail SoC + AXP288 PMIC Cherry Trail SoC + AXP288 PMIC Cherry Trail SoC + TI PMIC And this quirk's DMI matches only match the Cherry Trail SoC + TI PMIC SoC, which is good because we want a slightly different quirk for the others. This commit updates the comment to make it clear that the quirk is only for the Cherry Trail SoC + TI PMIC models. 2) The comment says that it is ok to disable wakeup on all ACPI GPIO event handlers, because there is only the one for the embedded-controller events. This is not true, there also is a handler for the special INT0002 device which is related to USB wakeups. We need to also disable wakeups on that one because the device turns of the USB-keyboard built into the dock when closing the lid. The XHCI controller takes a while to notice this, so it only notices it when already suspended, causing a spurious wakeup because of this. So disabling wakeup on all handlers is the right thing to do, but not because there only is the one handler for the EC events. This commit updates the comment to correctly reflect this. Fixes: aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200302111225.6641-1-hdegoede@redhat.com Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 31fee5e918b7..a77edd31dd60 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1345,12 +1345,14 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { }, { /* - * Various HP X2 10 Cherry Trail models use an external - * embedded-controller connected via I2C + an ACPI GPIO - * event handler. The embedded controller generates various - * spurious wakeup events when suspended. So disable wakeup - * for its handler (it uses the only ACPI GPIO event handler). - * This breaks wakeup when opening the lid, the user needs + * HP X2 10 models with Cherry Trail SoC + TI PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FF:01 pin 0, causing spurious wakeups. + * When suspending by closing the LID, the power to the USB + * keyboard is turned off, causing INT0002 ACPI events to + * trigger once the XHCI controller notices the keyboard is + * gone. So INT0002 events cause spurious wakeups too. Ignoring + * EC wakes breaks wakeup when opening the lid, the user needs * to press the power-button to wakeup the system. The * alternative is suspend simply not working, which is worse. */ From 2ccb21f5516afef5e251184eeefbf36db90206d7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Mar 2020 12:12:23 +0100 Subject: [PATCH 122/372] gpiolib: acpi: Rework honor_wakeup option into an ignore_wake option Commit aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") was added to deal with spurious wakeups on one specific model of the HP x2 10 series. The approach taken there was to add a bool controlling wakeup support for all ACPI GPIO events. This was sufficient for the specific HP x2 10 model the commit was trying to fix, but in the mean time other models have turned up which need a similar workaround to avoid spurious wakeups from suspend, but only for one of the pins on which the ACPI tables request ACPI GPIO events. Since the honor_wakeup option was added to be able to ignore wake events, the name was perhaps not the best, this commit renames it to ignore_wake and changes it to a string with the following format: gpiolib_acpi.ignore_wake=controller@pin[,controller@pin[,...]] This allows working around spurious wakeup issues on a per pin basis. This commit also reworks the existing quirk for the HP x2 10 so that it functions as before. Note: -This removes the honor_wakeup parameter. This has only been upstream for a short time and to the best of my knowledge there are no users using this module parameter. -The controller@pin[,controller@pin[,...]] syntax is based on an existing kernel module parameter using the same controller@pin format. That version uses ';' as separator, but in practice that is problematic because grub2 cannot handle this without taking special care to escape the ';', so here we are using a ',' as separator instead which does not have this issue. Fixes: aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200302111225.6641-2-hdegoede@redhat.com Acked-by: Mika Westerberg Reviewed-by: Andy Shevchenko Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 96 +++++++++++++++++++++++++++++-------- 1 file changed, 76 insertions(+), 20 deletions(-) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index a77edd31dd60..87bb0d3c69da 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -21,18 +21,21 @@ #include "gpiolib.h" #include "gpiolib-acpi.h" -#define QUIRK_NO_EDGE_EVENTS_ON_BOOT 0x01l -#define QUIRK_NO_WAKEUP 0x02l - static int run_edge_events_on_boot = -1; module_param(run_edge_events_on_boot, int, 0444); MODULE_PARM_DESC(run_edge_events_on_boot, "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto"); -static int honor_wakeup = -1; -module_param(honor_wakeup, int, 0444); -MODULE_PARM_DESC(honor_wakeup, - "Honor the ACPI wake-capable flag: 0=no, 1=yes, -1=auto"); +static char *ignore_wake; +module_param(ignore_wake, charp, 0444); +MODULE_PARM_DESC(ignore_wake, + "controller@pin combos on which to ignore the ACPI wake flag " + "ignore_wake=controller@pin[,controller@pin[,...]]"); + +struct acpi_gpiolib_dmi_quirk { + bool no_edge_events_on_boot; + char *ignore_wake; +}; /** * struct acpi_gpio_event - ACPI GPIO event handler data @@ -202,6 +205,57 @@ static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio) acpi_gpiochip_request_irq(acpi_gpio, event); } +static bool acpi_gpio_in_ignore_list(const char *controller_in, int pin_in) +{ + const char *controller, *pin_str; + int len, pin; + char *endp; + + controller = ignore_wake; + while (controller) { + pin_str = strchr(controller, '@'); + if (!pin_str) + goto err; + + len = pin_str - controller; + if (len == strlen(controller_in) && + strncmp(controller, controller_in, len) == 0) { + pin = simple_strtoul(pin_str + 1, &endp, 10); + if (*endp != 0 && *endp != ',') + goto err; + + if (pin == pin_in) + return true; + } + + controller = strchr(controller, ','); + if (controller) + controller++; + } + + return false; +err: + pr_err_once("Error invalid value for gpiolib_acpi.ignore_wake: %s\n", + ignore_wake); + return false; +} + +static bool acpi_gpio_irq_is_wake(struct device *parent, + struct acpi_resource_gpio *agpio) +{ + int pin = agpio->pin_table[0]; + + if (agpio->wake_capable != ACPI_WAKE_CAPABLE) + return false; + + if (acpi_gpio_in_ignore_list(dev_name(parent), pin)) { + dev_info(parent, "Ignoring wakeup on pin %d\n", pin); + return false; + } + + return true; +} + /* Always returns AE_OK so that we keep looping over the resources */ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, void *context) @@ -289,7 +343,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, event->handle = evt_handle; event->handler = handler; event->irq = irq; - event->irq_is_wake = honor_wakeup && agpio->wake_capable == ACPI_WAKE_CAPABLE; + event->irq_is_wake = acpi_gpio_irq_is_wake(chip->parent, agpio); event->pin = pin; event->desc = desc; @@ -1328,7 +1382,9 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "MINIX"), DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), }, - .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .no_edge_events_on_boot = true, + }, }, { /* @@ -1341,7 +1397,9 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"), DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"), }, - .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .no_edge_events_on_boot = true, + }, }, { /* @@ -1360,33 +1418,31 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"), }, - .driver_data = (void *)QUIRK_NO_WAKEUP, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FF:01@0,INT0002:00@2", + }, }, {} /* Terminating entry */ }; static int acpi_gpio_setup_params(void) { + const struct acpi_gpiolib_dmi_quirk *quirk = NULL; const struct dmi_system_id *id; - long quirks = 0; id = dmi_first_match(gpiolib_acpi_quirks); if (id) - quirks = (long)id->driver_data; + quirk = id->driver_data; if (run_edge_events_on_boot < 0) { - if (quirks & QUIRK_NO_EDGE_EVENTS_ON_BOOT) + if (quirk && quirk->no_edge_events_on_boot) run_edge_events_on_boot = 0; else run_edge_events_on_boot = 1; } - if (honor_wakeup < 0) { - if (quirks & QUIRK_NO_WAKEUP) - honor_wakeup = 0; - else - honor_wakeup = 1; - } + if (ignore_wake == NULL && quirk && quirk->ignore_wake) + ignore_wake = quirk->ignore_wake; return 0; } From 0e91506ba00730f088961a8d39f8693b0f8e3fea Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Mar 2020 12:12:24 +0100 Subject: [PATCH 123/372] gpiolib: acpi: Add quirk to ignore EC wakeups on HP x2 10 BYT + AXP288 model Commit aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") was added to deal with spurious wakeups on one specific model of the HP x2 10 series. In the mean time I have learned that there are at least 3 different HP x2 10 models: Bay Trail SoC + AXP288 PMIC Cherry Trail SoC + AXP288 PMIC Cherry Trail SoC + TI PMIC And the original quirk is only correct for (and only matches the) Cherry Trail SoC + TI PMIC model. The Bay Trail SoC + AXP288 PMIC model has different DMI strings, has the external EC interrupt on a different GPIO pin and only needs to ignore wakeups on the EC interrupt, the INT0002 device works fine on this model. This commit adds an extra DMI based quirk for the HP x2 10 BYT + AXP288 model, ignoring wakeups for ACPI GPIO events on the EC interrupt pin on this model. This fixes spurious wakeups from suspend on this model. Fixes: aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200302111225.6641-3-hdegoede@redhat.com Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 87bb0d3c69da..d1ef060a5873 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1422,6 +1422,21 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { .ignore_wake = "INT33FF:01@0,INT0002:00@2", }, }, + { + /* + * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FC:02 pin 28, causing spurious wakeups. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_MATCH(DMI_BOARD_NAME, "815D"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FC:02@28", + }, + }, {} /* Terminating entry */ }; From 1292e3efb149ee21d8d33d725eeed4e6b1ade963 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 10 Mar 2020 12:49:43 +0100 Subject: [PATCH 124/372] mmc: core: Allow host controllers to require R1B for CMD6 It has turned out that some host controllers can't use R1B for CMD6 and other commands that have R1B associated with them. Therefore invent a new host cap, MMC_CAP_NEED_RSP_BUSY to let them specify this. In __mmc_switch(), let's check the flag and use it to prevent R1B responses from being converted into R1. Note that, this also means that the host are on its own, when it comes to manage the busy timeout. Suggested-by: Sowjanya Komatineni Cc: Tested-by: Anders Roxell Tested-by: Sowjanya Komatineni Tested-by: Faiz Abbas Tested-By: Peter Geis Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc_ops.c | 6 ++++-- include/linux/mmc/host.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index da425ee2d9bf..e025604e17d4 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -542,9 +542,11 @@ int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, * If the max_busy_timeout of the host is specified, make sure it's * enough to fit the used timeout_ms. In case it's not, let's instruct * the host to avoid HW busy detection, by converting to a R1 response - * instead of a R1B. + * instead of a R1B. Note, some hosts requires R1B, which also means + * they are on their own when it comes to deal with the busy timeout. */ - if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout)) + if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout && + (timeout_ms > host->max_busy_timeout)) use_r1b_resp = false; cmd.opcode = MMC_SWITCH; diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index ba703384bea0..4c5eb3aa8e72 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -333,6 +333,7 @@ struct mmc_host { MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR104 | \ MMC_CAP_UHS_DDR50) #define MMC_CAP_SYNC_RUNTIME_PM (1 << 21) /* Synced runtime PM suspends. */ +#define MMC_CAP_NEED_RSP_BUSY (1 << 22) /* Commands with R1B can't use R1. */ #define MMC_CAP_DRIVER_TYPE_A (1 << 23) /* Host supports Driver Type A */ #define MMC_CAP_DRIVER_TYPE_C (1 << 24) /* Host supports Driver Type C */ #define MMC_CAP_DRIVER_TYPE_D (1 << 25) /* Host supports Driver Type D */ From 43cc64e5221cc6741252b64bc4531dd1eefb733d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 10 Mar 2020 14:43:00 +0100 Subject: [PATCH 125/372] mmc: core: Respect MMC_CAP_NEED_RSP_BUSY for erase/trim/discard The busy timeout that is computed for each erase/trim/discard operation, can become quite long and may thus exceed the host->max_busy_timeout. If that becomes the case, mmc_do_erase() converts from using an R1B response to an R1 response, as to prevent the host from doing HW busy detection. However, it has turned out that some hosts requires an R1B response no matter what, so let's respect that via checking MMC_CAP_NEED_RSP_BUSY. Note that, if the R1B gets enforced, the host becomes fully responsible of managing the needed busy timeout, in one way or the other. Suggested-by: Sowjanya Komatineni Cc: Tested-by: Anders Roxell Tested-by: Sowjanya Komatineni Tested-by: Faiz Abbas Tested-By: Peter Geis Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index aa54d359dab7..a971c4bcc442 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1732,8 +1732,11 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from, * the erase operation does not exceed the max_busy_timeout, we should * use R1B response. Or we need to prevent the host from doing hw busy * detection, which is done by converting to a R1 response instead. + * Note, some hosts requires R1B, which also means they are on their own + * when it comes to deal with the busy timeout. */ - if (card->host->max_busy_timeout && + if (!(card->host->caps & MMC_CAP_NEED_RSP_BUSY) && + card->host->max_busy_timeout && busy_timeout > card->host->max_busy_timeout) { cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC; } else { From 055e04830d4544c57f2a5192a26c9e25915c29c0 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 10 Mar 2020 15:05:02 +0100 Subject: [PATCH 126/372] mmc: sdhci-omap: Fix busy detection by enabling MMC_CAP_NEED_RSP_BUSY It has turned out that the sdhci-omap controller requires the R1B response, for commands that has this response associated with them. So, converting from an R1B to an R1 response for a CMD6 for example, leads to problems with the HW busy detection support. Fix this by informing the mmc core about the requirement, via setting the host cap, MMC_CAP_NEED_RSP_BUSY. Reported-by: Naresh Kamboju Reported-by: Anders Roxell Reported-by: Faiz Abbas Cc: Tested-by: Anders Roxell Tested-by: Faiz Abbas Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-omap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 882053151a47..c4978177ef88 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1192,6 +1192,9 @@ static int sdhci_omap_probe(struct platform_device *pdev) if (of_find_property(dev->of_node, "dmas", NULL)) sdhci_switch_external_dma(host, true); + /* R1B responses is required to properly manage HW busy detection. */ + mmc->caps |= MMC_CAP_NEED_RSP_BUSY; + ret = sdhci_setup_host(host); if (ret) goto err_put_sync; From d2f8bfa4bff5028bc40ed56b4497c32e05b0178f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 10 Mar 2020 15:50:11 +0100 Subject: [PATCH 127/372] mmc: sdhci-tegra: Fix busy detection by enabling MMC_CAP_NEED_RSP_BUSY It has turned out that the sdhci-tegra controller requires the R1B response, for commands that has this response associated with them. So, converting from an R1B to an R1 response for a CMD6 for example, leads to problems with the HW busy detection support. Fix this by informing the mmc core about the requirement, via setting the host cap, MMC_CAP_NEED_RSP_BUSY. Reported-by: Bitan Biswas Reported-by: Peter Geis Suggested-by: Sowjanya Komatineni Cc: Tested-by: Sowjanya Komatineni Tested-By: Peter Geis Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-tegra.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 403ac44a7378..a25c3a4d3f6c 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -1552,6 +1552,9 @@ static int sdhci_tegra_probe(struct platform_device *pdev) if (tegra_host->soc_data->nvquirks & NVQUIRK_ENABLE_DDR50) host->mmc->caps |= MMC_CAP_1_8V_DDR; + /* R1B responses is required to properly manage HW busy detection. */ + host->mmc->caps |= MMC_CAP_NEED_RSP_BUSY; + tegra_sdhci_parse_dt(host); tegra_host->power_gpio = devm_gpiod_get_optional(&pdev->dev, "power", From 204c7eceb76f348b89ccbf20fa3ba9a703ffada5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 3 Mar 2020 22:10:01 -0800 Subject: [PATCH 128/372] ARC: show_regs: reduce lines of output Before ------ | CPU: 1 PID: 29061 Comm: tst-dynarray-at Not tainted 5.6.0-rc1-00002-g941fcc018ca6-dirty #12 | | [ECR ]: 0x00090000 => | [EFA ]: 0x00000000 | [ERET ]: 0x2004aa6c | @off 0x2aa6c in [/lib/libc-2.31.9000.so] VMA: 0x20020000 to 0x20122000 | [STAT32]: 0x80080a82 [IE U ] | BTA: 0x2004aa18 SP: 0x5ffff8a8 FP: 0x5ffff8fc | LPS: 0x2008788e LPE: 0x20087896 LPC: 0x00000000 | r00: 0x00000000 r01: 0x5ffff8a8 r02: 0x00000000 | r03: 0x00000008 r04: 0xffffffff r05: 0x00000000 | r06: 0x00000000 r07: 0x00000000 r08: 0x00000087 | r09: 0x00000000 r10: 0x2010691c r11: 0x00000020 | r12: 0x2003b214 r13: 0x5ffff8a8 r14: 0x20126e68 | r15: 0x2001f26c r16: 0x2012a000 r17: 0x00000001 | r18: 0x5ffff8fc r19: 0x00000000 r20: 0x5ffff948 | r21: 0x00000001 r22: 0xffffffff r23: 0x5fffff8c | r24: 0x4008c2a8 r25: 0x2001f6e0 After ----- | CPU: 1 PID: 29061 Comm: tst-dynarray-at Not tainted 5.6.0-rc1-00002-g941fcc018ca6-dirty #12 | @off 0x2aa6c in [/lib/libc-2.31.9000.so] VMA: 0x20020000 to 0x20122000 | ECR: 0x00090000 EFA: 0x00000000 ERET: 0x2004aa6c | STAT32: 0x80080a82 [IE U ] BTA: 0x2004aa18 | BLK: 0x2003b214 SP: 0x5ffff8a8 FP: 0x5ffff8fc | LPS: 0x2008788e LPE: 0x20087896 LPC: 0x00000000 | r00: 0x00000000 r01: 0x5ffff8a8 r02: 0x00000000 | r03: 0x00000008 r04: 0xffffffff r05: 0x00000000 | r06: 0x00000000 r07: 0x00000000 r08: 0x00000087 | r09: 0x00000000 r10: 0x2010691c r11: 0x00000020 | r12: 0x2003b214 r13: 0x5ffff8a8 r14: 0x20126e68 | r15: 0x2001f26c r16: 0x2012a000 r17: 0x00000001 | r18: 0x5ffff8fc r19: 0x00000000 r20: 0x5ffff948 | r21: 0x00000001 r22: 0xffffffff r23: 0x5fffff8c | r24: 0x4008c2a8 r25: 0x2001f6e0 BTA: 0x2004aa18 Signed-off-by: Vineet Gupta --- arch/arc/kernel/troubleshoot.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index b79886a6cec8..d2999503fb8a 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -104,8 +104,7 @@ static void show_faulting_vma(unsigned long address) if (IS_ERR(nm)) nm = "?"; } - pr_info(" @off 0x%lx in [%s]\n" - " VMA: 0x%08lx to 0x%08lx\n", + pr_info(" @off 0x%lx in [%s] VMA: 0x%08lx to 0x%08lx\n", vma->vm_start < TASK_UNMAPPED_BASE ? address : address - vma->vm_start, nm, vma->vm_start, vma->vm_end); @@ -120,8 +119,6 @@ static void show_ecr_verbose(struct pt_regs *regs) unsigned int vec, cause_code; unsigned long address; - pr_info("\n[ECR ]: 0x%08lx => ", regs->event); - /* For Data fault, this is data address not instruction addr */ address = current->thread.fault_address; @@ -130,10 +127,10 @@ static void show_ecr_verbose(struct pt_regs *regs) /* For DTLB Miss or ProtV, display the memory involved too */ if (vec == ECR_V_DTLB_MISS) { - pr_cont("Invalid %s @ 0x%08lx by insn @ 0x%08lx\n", + pr_cont("Invalid %s @ 0x%08lx by insn @ %pS\n", (cause_code == 0x01) ? "Read" : ((cause_code == 0x02) ? "Write" : "EX"), - address, regs->ret); + address, (void *)regs->ret); } else if (vec == ECR_V_ITLB_MISS) { pr_cont("Insn could not be fetched\n"); } else if (vec == ECR_V_MACH_CHK) { @@ -191,31 +188,31 @@ void show_regs(struct pt_regs *regs) show_ecr_verbose(regs); - pr_info("[EFA ]: 0x%08lx\n[BLINK ]: %pS\n[ERET ]: %pS\n", - current->thread.fault_address, - (void *)regs->blink, (void *)regs->ret); - if (user_mode(regs)) show_faulting_vma(regs->ret); /* faulting code, not data */ - pr_info("[STAT32]: 0x%08lx", regs->status32); + pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n", + regs->event, current->thread.fault_address, regs->ret); + + pr_info("STAT32: 0x%08lx", regs->status32); #define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : "" #ifdef CONFIG_ISA_ARCOMPACT - pr_cont(" : %2s%2s%2s%2s%2s%2s%2s\n", + pr_cont(" [%2s%2s%2s%2s%2s%2s%2s]", (regs->status32 & STATUS_U_MASK) ? "U " : "K ", STS_BIT(regs, DE), STS_BIT(regs, AE), STS_BIT(regs, A2), STS_BIT(regs, A1), STS_BIT(regs, E2), STS_BIT(regs, E1)); #else - pr_cont(" : %2s%2s%2s%2s\n", + pr_cont(" [%2s%2s%2s%2s]", STS_BIT(regs, IE), (regs->status32 & STATUS_U_MASK) ? "U " : "K ", STS_BIT(regs, DE), STS_BIT(regs, AE)); #endif - pr_info("BTA: 0x%08lx\t SP: 0x%08lx\t FP: 0x%08lx\n", - regs->bta, regs->sp, regs->fp); + pr_cont(" BTA: 0x%08lx\n", regs->bta); + pr_info("BLK: %pS\n SP: 0x%08lx FP: 0x%08lx\n", + (void *)regs->blink, regs->sp, regs->fp); pr_info("LPS: 0x%08lx\tLPE: 0x%08lx\tLPC: 0x%08lx\n", regs->lp_start, regs->lp_end, regs->lp_count); From 8d92e992a785f35d23f845206cf8c6cafbc264e0 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 11 Mar 2020 19:26:43 +0300 Subject: [PATCH 129/372] ARC: define __ALIGN_STR and __ALIGN symbols for ARC The default defintions use fill pattern 0x90 for padding which for ARC generates unintended "ldh_s r12,[r0,0x20]" corresponding to opcode 0x9090 So use ".align 4" which insert a "nop_s" instruction instead. Cc: stable@vger.kernel.org Acked-by: Vineet Gupta Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/include/asm/linkage.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index d9ee43c6b7db..fe19f1d412e7 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -29,6 +29,8 @@ .endm #define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm From 211b64e4b5b6bd5fdc19cd525c2cc9a90e6b0ec9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 11 Mar 2020 11:53:09 +0100 Subject: [PATCH 130/372] binderfs: use refcount for binder control devices too Binderfs binder-control devices are cleaned up via binderfs_evict_inode too() which will use refcount_dec_and_test(). However, we missed to set the refcount for binderfs binder-control devices and so we underflowed when the binderfs instance got unmounted. Pretty obvious oversight and should have been part of the more general UAF fix. The good news is that having test cases (suprisingly) helps. Technically, we could detect that we're about to cleanup the binder-control dentry in binderfs_evict_inode() and then simply clean it up. But that makes the assumption that the binder driver itself will never make use of a binderfs binder-control device after the binderfs instance it belongs to has been unmounted and the superblock for it been destroyed. While it is unlikely to ever come to this let's be on the safe side. Performance-wise this also really doesn't matter since the binder-control device is only every really when creating the binderfs filesystem or creating additional binder devices. Both operations are pretty rare. Fixes: f0fe2c0f050d ("binder: prevent UAF for binderfs devices II") Link: https://lore.kernel.org/r/CA+G9fYusdfg7PMfC9Xce-xLT7NiyKSbgojpK35GOm=Pf9jXXrA@mail.gmail.com Reported-by: Naresh Kamboju Cc: stable@vger.kernel.org Signed-off-by: Christian Brauner Acked-by: Todd Kjos Link: https://lore.kernel.org/r/20200311105309.1742827-1-christian.brauner@ubuntu.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 110e41f920c2..f303106b3362 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -448,6 +448,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb) inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; + refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->miscdev.minor = minor; From 46b7c49254f89d54f11c58fa629f66e224a16034 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZ=20Lin=20=28=E6=9E=97=E4=B8=8A=E6=99=BA=29?= Date: Sun, 1 Mar 2020 00:09:58 +0800 Subject: [PATCH 131/372] kbuild: Fix inconsistent comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 2042b5486bd3 ("kbuild: unset variables in top Makefile instead of setting 0") renamed the variable from "config-targets" to "config-build", the comment should be consistent accordingly. Signed-off-by: Kaiden PK Yu (余泊鎧) Signed-off-by: SZ Lin (林上智) Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 86035d866f2c..c9199ca3a8b0 100644 --- a/Makefile +++ b/Makefile @@ -1804,7 +1804,7 @@ existing-targets := $(wildcard $(sort $(targets))) -include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd) -endif # config-targets +endif # config-build endif # mixed-build endif # need-sub-make From 8cc4fd73501d9f1370c3eebb70cfe8cc9e24062b Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 10 Mar 2020 19:12:49 +0900 Subject: [PATCH 132/372] kconfig: introduce m32-flag and m64-flag When a compiler supports multiple architectures, some compiler features can be dependent on the target architecture. This is typical for Clang, which supports multiple LLVM backends. Even for GCC, we need to take care of biarch compiler cases. It is not a problem when we evaluate cc-option in Makefiles because cc-option is tested against the flag in question + $(KBUILD_CFLAGS). The cc-option in Kconfig, on the other hand, does not accumulate tested flags. Due to this simplification, it could potentially test cc-option against a different target. At first, Kconfig always evaluated cc-option against the host architecture. Since commit e8de12fb7cde ("kbuild: Check for unknown options with cc-option usage in Kconfig and clang"), in case of cross-compiling with Clang, the target triple is correctly passed to Kconfig. The case with biarch GCC (and native build with Clang) is still not handled properly. We need to pass some flags to specify the target machine bit. Due to the design, all the macros in Kconfig are expanded in the parse stage, where we do not know the target bit size yet. For example, arch/x86/Kconfig allows a user to toggle CONFIG_64BIT. If a compiler flag -foo depends on the machine bit, it must be tested twice, one with -m32 and the other with -m64. However, -m32/-m64 are not always recognized. So, this commits adds m64-flag and m32-flag macros. They expand to -m32, -m64, respectively if supported. Or, they expand to an empty string if unsupported. The typical usage is like this: config FOO bool default $(cc-option,$(m64-flag) -foo) if 64BIT default $(cc-option,$(m32-flag) -foo) This is clumsy, but there is no elegant way to handle this in the current static macro expansion. There was discussion for static functions vs dynamic functions. The consensus was to go as far as possible with the static functions. (https://lkml.org/lkml/2018/3/2/22) Signed-off-by: Masahiro Yamada Tested-by: George Spelvin Reviewed-by: Nathan Chancellor --- scripts/Kconfig.include | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index 85334dc8c997..496d11c92c97 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -44,3 +44,10 @@ $(error-if,$(success, $(LD) -v | grep -q gold), gold linker '$(LD)' not supporte # gcc version including patch level gcc-version := $(shell,$(srctree)/scripts/gcc-version.sh $(CC)) + +# machine bit flags +# $(m32-flag): -m32 if the compiler supports it, or an empty string otherwise. +# $(m64-flag): -m64 if the compiler supports it, or an empty string otherwise. +cc-option-bit = $(if-success,$(CC) -Werror $(1) -E -x c /dev/null -o /dev/null,$(1)) +m32-flag := $(cc-option-bit,-m32) +m64-flag := $(cc-option-bit,-m64) From 3a7c733165a4799fa1beb262fe244bfbcdd1c163 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 10 Mar 2020 19:12:50 +0900 Subject: [PATCH 133/372] int128: fix __uint128_t compiler test in Kconfig The support for __uint128_t is dependent on the target bit size. GCC that defaults to the 32-bit can still build the 64-bit kernel with -m64 flag passed. However, $(cc-option,-D__SIZEOF_INT128__=0) is evaluated against the default machine bit, which may not match to the kernel it is building. Theoretically, this could be evaluated separately for 64BIT/32BIT. config CC_HAS_INT128 bool default !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) if 64BIT default !$(cc-option,$(m32-flag) -D__SIZEOF_INT128__=0) I simplified it more because the 32-bit compiler is unlikely to support __uint128_t. Fixes: c12d3362a74b ("int128: move __uint128_t compiler test to Kconfig") Reported-by: George Spelvin Signed-off-by: Masahiro Yamada Tested-by: George Spelvin --- init/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 20a6ac33761c..4f717bfdbfe2 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -767,8 +767,7 @@ config ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH bool config CC_HAS_INT128 - def_bool y - depends on !$(cc-option,-D__SIZEOF_INT128__=0) + def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT # # For architectures that know their GCC __int128 support is sound From 58d6fee50e67bb1c69977f1a534ccb17bf58b0f1 Mon Sep 17 00:00:00 2001 From: Michael Auchter Date: Tue, 10 Mar 2020 15:58:40 -0500 Subject: [PATCH 134/372] misc: eeprom: at24: fix regulator underflow The at24 driver attempts to read a byte from the device to validate that it's actually present, and if not, disables the vcc regulator and returns -ENODEV. However, between the read and the error handling path, pm_runtime_idle() is called and invokes the driver's suspend callback, which also disables the vcc regulator. This leads to an underflow of the regulator enable count if the EEPROM is not present. Move the pm_runtime_suspend() call to be after the error handling path to resolve this. Fixes: cd5676db0574 ("misc: eeprom: at24: support pm_runtime control") Signed-off-by: Michael Auchter Signed-off-by: Bartosz Golaszewski --- drivers/misc/eeprom/at24.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 031eb64549af..282c9ef68ed2 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -712,13 +712,14 @@ static int at24_probe(struct i2c_client *client) * chip is functional. */ err = at24_read(at24, 0, &test_byte, 1); - pm_runtime_idle(dev); if (err) { pm_runtime_disable(dev); regulator_disable(at24->vcc_reg); return -ENODEV; } + pm_runtime_idle(dev); + if (writable) dev_info(dev, "%u byte %s EEPROM, writable, %u bytes/write\n", byte_len, client->name, at24->write_max); From b63e48fb50e1ca71db301ca9082befa6f16c55c4 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 5 Feb 2020 19:26:33 +0800 Subject: [PATCH 135/372] USB: Disable LPM on WD19's Realtek Hub Realtek Hub (0bda:0x0487) used in Dell Dock WD19 sometimes drops off the bus when bringing underlying ports from U3 to U0. Disabling LPM on the hub during setting link state is not enough, so let's disable LPM completely for this hub. Acked-by: Alan Stern Signed-off-by: Kai-Heng Feng Cc: stable Link: https://lore.kernel.org/r/20200205112633.25995-3-kai.heng.feng@canonical.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 2dac3e7cdd97..df6e6156e1d4 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -378,6 +378,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x0b05, 0x17e0), .driver_info = USB_QUIRK_IGNORE_REMOTE_WAKEUP }, + /* Realtek hub in Dell WD19 (Type-C) */ + { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, From d16e7b62c5adcd13832c6b0ba364c3468d21b856 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 11 Mar 2020 16:00:05 +0300 Subject: [PATCH 136/372] usb: typec: ucsi: displayport: Fix NULL pointer dereference If the registration of the DisplayPort was not successful, or if the port does not support DisplayPort alt mode in the first place, the function ucsi_displayport_remove_partner() will fail with NULL pointer dereference when it attempts to access the driver data. Adding a check to the function to make sure there really is driver data for the device before modifying it. Fixes: af8622f6a585 ("usb: typec: ucsi: Support for DisplayPort alt mode") Reported-by: Andrea Gagliardi La Gala BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=206365 Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200311130006.41288-2-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 0f1273ae086c..261131c9e37c 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -271,6 +271,9 @@ void ucsi_displayport_remove_partner(struct typec_altmode *alt) return; dp = typec_altmode_get_drvdata(alt); + if (!dp) + return; + dp->data.conf = 0; dp->data.status = 0; dp->initialized = false; From 081da1325d351ea8804cf74e65263ea120834f33 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 11 Mar 2020 16:00:06 +0300 Subject: [PATCH 137/372] usb: typec: ucsi: displayport: Fix a potential race during registration Locking the connector in ucsi_register_displayport() to make sure that nothing can access the displayport alternate mode before the function has finished and the alternate mode is actually ready. Fixes: af8622f6a585 ("usb: typec: ucsi: Support for DisplayPort alt mode") Cc: stable@vger.kernel.org Signed-off-by: Heikki Krogerus Link: https://lore.kernel.org/r/20200311130006.41288-3-heikki.krogerus@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/displayport.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 261131c9e37c..048381c058a5 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -288,6 +288,8 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con, struct typec_altmode *alt; struct ucsi_dp *dp; + mutex_lock(&con->lock); + /* We can't rely on the firmware with the capabilities. */ desc->vdo |= DP_CAP_DP_SIGNALING | DP_CAP_RECEPTACLE; @@ -296,12 +298,15 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con, desc->vdo |= all_assignments << 16; alt = typec_port_register_altmode(con->port, desc); - if (IS_ERR(alt)) + if (IS_ERR(alt)) { + mutex_unlock(&con->lock); return alt; + } dp = devm_kzalloc(&alt->dev, sizeof(*dp), GFP_KERNEL); if (!dp) { typec_unregister_altmode(alt); + mutex_unlock(&con->lock); return ERR_PTR(-ENOMEM); } @@ -314,5 +319,7 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con, alt->ops = &ucsi_displayport_ops; typec_altmode_set_drvdata(alt, dp); + mutex_unlock(&con->lock); + return alt; } From b433e340e7565110b0ce9ca4b3e26f4b97a1decf Mon Sep 17 00:00:00 2001 From: Ran Wang Date: Fri, 6 Mar 2020 17:23:28 +0800 Subject: [PATCH 138/372] usb: host: xhci-plat: add a shutdown When loading new kernel via kexec, we need to shutdown host controller to avoid any un-expected memory accessing during new kernel boot. Signed-off-by: Ran Wang Cc: stable Tested-by: Stephen Boyd Reviewed-by: Peter Chen Link: https://lore.kernel.org/r/20200306092328.41253-1-ran.wang_1@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-plat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index d90cd5ec09cf..315b4552693c 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -445,6 +445,7 @@ MODULE_DEVICE_TABLE(acpi, usb_xhci_acpi_match); static struct platform_driver usb_xhci_driver = { .probe = xhci_plat_probe, .remove = xhci_plat_remove, + .shutdown = usb_hcd_platform_shutdown, .driver = { .name = "xhci-hcd", .pm = &xhci_plat_pm_ops, From 8e852a7953be2a6ee371449f7257fe15ace6a1fc Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 4 Mar 2020 11:43:10 +0100 Subject: [PATCH 139/372] USB: serial: option: add ME910G1 ECM composition 0x110b Add ME910G1 ECM composition 0x110b: tty, tty, tty, ecm Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20200304104310.2938-1-dnlplm@gmail.com Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 084cc2fff3ae..0b5dcf973d94 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1183,6 +1183,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff), /* Telit ME910G1 */ .driver_info = NCTRL(0) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110b, 0xff), /* Telit ME910G1 (ECM) */ + .driver_info = NCTRL(0) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), From cecc113c1af0dd41ccf265c1fdb84dbd05e63423 Mon Sep 17 00:00:00 2001 From: Scott Chen Date: Wed, 11 Mar 2020 14:14:23 +0800 Subject: [PATCH 140/372] USB: serial: pl2303: add device-id for HP LD381 Add a device id for HP LD381 Display LD381: 03f0:0f7f Signed-off-by: Scott Chen Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index aab737e1e7b6..c5a2995dfa2e 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -99,6 +99,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(SUPERIAL_VENDOR_ID, SUPERIAL_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD220TA_PRODUCT_ID) }, + { USB_DEVICE(HP_VENDOR_ID, HP_LD381_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LD960TA_PRODUCT_ID) }, { USB_DEVICE(HP_VENDOR_ID, HP_LCM220_PRODUCT_ID) }, diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index a019ea7e6e0e..52db5519aaf0 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -130,6 +130,7 @@ #define HP_LM920_PRODUCT_ID 0x026b #define HP_TD620_PRODUCT_ID 0x0956 #define HP_LD960_PRODUCT_ID 0x0b39 +#define HP_LD381_PRODUCT_ID 0x0f7f #define HP_LCM220_PRODUCT_ID 0x3139 #define HP_LCM960_PRODUCT_ID 0x3239 #define HP_LD220_PRODUCT_ID 0x3524 From 18d200460cd73636d4f20674085c39e32b4e0097 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 11 Mar 2020 10:20:36 +0100 Subject: [PATCH 141/372] mmc: core: Respect MMC_CAP_NEED_RSP_BUSY for eMMC sleep command The busy timeout for the CMD5 to put the eMMC into sleep state, is specific to the card. Potentially the timeout may exceed the host->max_busy_timeout. If that becomes the case, mmc_sleep() converts from using an R1B response to an R1 response, as to prevent the host from doing HW busy detection. However, it has turned out that some hosts requires an R1B response no matter what, so let's respect that via checking MMC_CAP_NEED_RSP_BUSY. Note that, if the R1B gets enforced, the host becomes fully responsible of managing the needed busy timeout, in one way or the other. Suggested-by: Sowjanya Komatineni Cc: Link: https://lore.kernel.org/r/20200311092036.16084-1-ulf.hansson@linaro.org Signed-off-by: Ulf Hansson --- drivers/mmc/core/mmc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index f6912ded652d..de14b5845f52 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1910,9 +1910,12 @@ static int mmc_sleep(struct mmc_host *host) * If the max_busy_timeout of the host is specified, validate it against * the sleep cmd timeout. A failure means we need to prevent the host * from doing hw busy detection, which is done by converting to a R1 - * response instead of a R1B. + * response instead of a R1B. Note, some hosts requires R1B, which also + * means they are on their own when it comes to deal with the busy + * timeout. */ - if (host->max_busy_timeout && (timeout_ms > host->max_busy_timeout)) { + if (!(host->caps & MMC_CAP_NEED_RSP_BUSY) && host->max_busy_timeout && + (timeout_ms > host->max_busy_timeout)) { cmd.flags = MMC_RSP_R1 | MMC_CMD_AC; } else { cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; From f967140dfb7442e2db0868b03b961f9c59418a1b Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Wed, 11 Mar 2020 14:13:21 -0500 Subject: [PATCH 142/372] perf/amd/uncore: Replace manual sampling check with CAP_NO_INTERRUPT flag Enable the sampling check in kernel/events/core.c::perf_event_open(), which returns the more appropriate -EOPNOTSUPP. BEFORE: $ sudo perf record -a -e instructions,l3_request_g1.caching_l3_cache_accesses true Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (l3_request_g1.caching_l3_cache_accesses). /bin/dmesg | grep -i perf may provide additional information. With nothing relevant in dmesg. AFTER: $ sudo perf record -a -e instructions,l3_request_g1.caching_l3_cache_accesses true Error: l3_request_g1.caching_l3_cache_accesses: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' Fixes: c43ca5091a37 ("perf/x86/amd: Add support for AMD NB and L2I "uncore" counters") Signed-off-by: Kim Phillips Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200311191323.13124-1-kim.phillips@amd.com --- arch/x86/events/amd/uncore.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a6ea07f2aa84..4d867a752f0e 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -190,15 +190,12 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; - - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -306,7 +303,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -317,7 +314,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) From 5e6bdd37c5526ef01326df5dabb93011ee89237e Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Thu, 12 Mar 2020 14:17:15 +0100 Subject: [PATCH 143/372] s390/dasd: fix data corruption for thin provisioned devices Devices are formatted in multiple of tracks. For an Extent Space Efficient (ESE) volume we get errors when accessing unformatted tracks. In this case the driver either formats the track on the flight for write requests or returns zero data for read requests. In case a request spans multiple tracks, the indication of an unformatted track presented for the first track is incorrectly applied to all tracks covered by the request. As a result, tracks containing data will be handled as empty, resulting in zero data being returned on read, or overwriting existing data with zero on write. Fix by determining the track that gets the NRF error. For write requests only format the track that is surely not formatted. For Read requests all tracks before have returned valid data and should not be touched. All tracks after the unformatted track might be formatted or not. Those are returned to the blocklayer to build a new request. When using alias devices there is a chance that multiple write requests trigger a format of the same track which might lead to data loss. Ensure that a track is formatted only once by maintaining a list of currently processed tracks. Fixes: 5e2b17e712cf ("s390/dasd: Add dynamic formatting support for ESE volumes") Cc: stable@vger.kernel.org # 5.3+ Signed-off-by: Stefan Haberland Reviewed-by: Jan Hoeppner Reviewed-by: Peter Oberparleiter Signed-off-by: Jens Axboe --- drivers/s390/block/dasd.c | 27 +++++- drivers/s390/block/dasd_eckd.c | 163 +++++++++++++++++++++++++++++++-- drivers/s390/block/dasd_int.h | 15 ++- 3 files changed, 193 insertions(+), 12 deletions(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 6cca72782af6..cf87eb27879f 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -178,6 +178,8 @@ struct dasd_block *dasd_alloc_block(void) (unsigned long) block); INIT_LIST_HEAD(&block->ccw_queue); spin_lock_init(&block->queue_lock); + INIT_LIST_HEAD(&block->format_list); + spin_lock_init(&block->format_lock); timer_setup(&block->timer, dasd_block_timeout, 0); spin_lock_init(&block->profile.lock); @@ -1779,20 +1781,26 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, if (dasd_ese_needs_format(cqr->block, irb)) { if (rq_data_dir((struct request *)cqr->callback_data) == READ) { - device->discipline->ese_read(cqr); + device->discipline->ese_read(cqr, irb); cqr->status = DASD_CQR_SUCCESS; cqr->stopclk = now; dasd_device_clear_timer(device); dasd_schedule_device_bh(device); return; } - fcqr = device->discipline->ese_format(device, cqr); + fcqr = device->discipline->ese_format(device, cqr, irb); if (IS_ERR(fcqr)) { + if (PTR_ERR(fcqr) == -EINVAL) { + cqr->status = DASD_CQR_ERROR; + return; + } /* * If we can't format now, let the request go * one extra round. Maybe we can format later. */ cqr->status = DASD_CQR_QUEUED; + dasd_schedule_device_bh(device); + return; } else { fcqr->status = DASD_CQR_QUEUED; cqr->status = DASD_CQR_QUEUED; @@ -2748,11 +2756,13 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) { struct request *req; blk_status_t error = BLK_STS_OK; + unsigned int proc_bytes; int status; req = (struct request *) cqr->callback_data; dasd_profile_end(cqr->block, cqr, req); + proc_bytes = cqr->proc_bytes; status = cqr->block->base->discipline->free_cp(cqr, req); if (status < 0) error = errno_to_blk_status(status); @@ -2783,7 +2793,18 @@ static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr) blk_mq_end_request(req, error); blk_mq_run_hw_queues(req->q, true); } else { - blk_mq_complete_request(req); + /* + * Partial completed requests can happen with ESE devices. + * During read we might have gotten a NRF error and have to + * complete a request partially. + */ + if (proc_bytes) { + blk_update_request(req, BLK_STS_OK, + blk_rq_bytes(req) - proc_bytes); + blk_mq_requeue_request(req, true); + } else { + blk_mq_complete_request(req); + } } } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index a28b9ff82378..ad44d22e8859 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -207,6 +207,45 @@ static void set_ch_t(struct ch_t *geo, __u32 cyl, __u8 head) geo->head |= head; } +/* + * calculate failing track from sense data depending if + * it is an EAV device or not + */ +static int dasd_eckd_track_from_irb(struct irb *irb, struct dasd_device *device, + sector_t *track) +{ + struct dasd_eckd_private *private = device->private; + u8 *sense = NULL; + u32 cyl; + u8 head; + + sense = dasd_get_sense(irb); + if (!sense) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "ESE error no sense data\n"); + return -EINVAL; + } + if (!(sense[27] & DASD_SENSE_BIT_2)) { + DBF_DEV_EVENT(DBF_WARNING, device, "%s", + "ESE error no valid track data\n"); + return -EINVAL; + } + + if (sense[27] & DASD_SENSE_BIT_3) { + /* enhanced addressing */ + cyl = sense[30] << 20; + cyl |= (sense[31] & 0xF0) << 12; + cyl |= sense[28] << 8; + cyl |= sense[29]; + } else { + cyl = sense[29] << 8; + cyl |= sense[30]; + } + head = sense[31] & 0x0F; + *track = cyl * private->rdc_data.trk_per_cyl + head; + return 0; +} + static int set_timestamp(struct ccw1 *ccw, struct DE_eckd_data *data, struct dasd_device *device) { @@ -2986,6 +3025,37 @@ static int dasd_eckd_format_device(struct dasd_device *base, 0, NULL); } +static bool test_and_set_format_track(struct dasd_format_entry *to_format, + struct dasd_block *block) +{ + struct dasd_format_entry *format; + unsigned long flags; + bool rc = false; + + spin_lock_irqsave(&block->format_lock, flags); + list_for_each_entry(format, &block->format_list, list) { + if (format->track == to_format->track) { + rc = true; + goto out; + } + } + list_add_tail(&to_format->list, &block->format_list); + +out: + spin_unlock_irqrestore(&block->format_lock, flags); + return rc; +} + +static void clear_format_track(struct dasd_format_entry *format, + struct dasd_block *block) +{ + unsigned long flags; + + spin_lock_irqsave(&block->format_lock, flags); + list_del_init(&format->list); + spin_unlock_irqrestore(&block->format_lock, flags); +} + /* * Callback function to free ESE format requests. */ @@ -2993,15 +3063,19 @@ static void dasd_eckd_ese_format_cb(struct dasd_ccw_req *cqr, void *data) { struct dasd_device *device = cqr->startdev; struct dasd_eckd_private *private = device->private; + struct dasd_format_entry *format = data; + clear_format_track(format, cqr->basedev->block); private->count--; dasd_ffree_request(cqr, device); } static struct dasd_ccw_req * -dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr) +dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr, + struct irb *irb) { struct dasd_eckd_private *private; + struct dasd_format_entry *format; struct format_data_t fdata; unsigned int recs_per_trk; struct dasd_ccw_req *fcqr; @@ -3011,23 +3085,39 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr) struct request *req; sector_t first_trk; sector_t last_trk; + sector_t curr_trk; int rc; req = cqr->callback_data; - base = cqr->block->base; + block = cqr->block; + base = block->base; private = base->private; - block = base->block; blksize = block->bp_block; recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize); + format = &startdev->format_entry; first_trk = blk_rq_pos(req) >> block->s2b_shift; sector_div(first_trk, recs_per_trk); last_trk = (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; sector_div(last_trk, recs_per_trk); + rc = dasd_eckd_track_from_irb(irb, base, &curr_trk); + if (rc) + return ERR_PTR(rc); - fdata.start_unit = first_trk; - fdata.stop_unit = last_trk; + if (curr_trk < first_trk || curr_trk > last_trk) { + DBF_DEV_EVENT(DBF_WARNING, startdev, + "ESE error track %llu not within range %llu - %llu\n", + curr_trk, first_trk, last_trk); + return ERR_PTR(-EINVAL); + } + format->track = curr_trk; + /* test if track is already in formatting by another thread */ + if (test_and_set_format_track(format, block)) + return ERR_PTR(-EEXIST); + + fdata.start_unit = curr_trk; + fdata.stop_unit = curr_trk; fdata.blksize = blksize; fdata.intensity = private->uses_cdl ? DASD_FMT_INT_COMPAT : 0; @@ -3044,6 +3134,7 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr) return fcqr; fcqr->callback = dasd_eckd_ese_format_cb; + fcqr->callback_data = (void *) format; return fcqr; } @@ -3051,29 +3142,87 @@ dasd_eckd_ese_format(struct dasd_device *startdev, struct dasd_ccw_req *cqr) /* * When data is read from an unformatted area of an ESE volume, this function * returns zeroed data and thereby mimics a read of zero data. + * + * The first unformatted track is the one that got the NRF error, the address is + * encoded in the sense data. + * + * All tracks before have returned valid data and should not be touched. + * All tracks after the unformatted track might be formatted or not. This is + * currently not known, remember the processed data and return the remainder of + * the request to the blocklayer in __dasd_cleanup_cqr(). */ -static void dasd_eckd_ese_read(struct dasd_ccw_req *cqr) +static int dasd_eckd_ese_read(struct dasd_ccw_req *cqr, struct irb *irb) { + struct dasd_eckd_private *private; + sector_t first_trk, last_trk; + sector_t first_blk, last_blk; unsigned int blksize, off; + unsigned int recs_per_trk; struct dasd_device *base; struct req_iterator iter; + struct dasd_block *block; + unsigned int skip_block; + unsigned int blk_count; struct request *req; struct bio_vec bv; + sector_t curr_trk; + sector_t end_blk; char *dst; + int rc; req = (struct request *) cqr->callback_data; base = cqr->block->base; blksize = base->block->bp_block; + block = cqr->block; + private = base->private; + skip_block = 0; + blk_count = 0; + + recs_per_trk = recs_per_track(&private->rdc_data, 0, blksize); + first_trk = first_blk = blk_rq_pos(req) >> block->s2b_shift; + sector_div(first_trk, recs_per_trk); + last_trk = last_blk = + (blk_rq_pos(req) + blk_rq_sectors(req) - 1) >> block->s2b_shift; + sector_div(last_trk, recs_per_trk); + rc = dasd_eckd_track_from_irb(irb, base, &curr_trk); + if (rc) + return rc; + + /* sanity check if the current track from sense data is valid */ + if (curr_trk < first_trk || curr_trk > last_trk) { + DBF_DEV_EVENT(DBF_WARNING, base, + "ESE error track %llu not within range %llu - %llu\n", + curr_trk, first_trk, last_trk); + return -EINVAL; + } + + /* + * if not the first track got the NRF error we have to skip over valid + * blocks + */ + if (curr_trk != first_trk) + skip_block = curr_trk * recs_per_trk - first_blk; + + /* we have no information beyond the current track */ + end_blk = (curr_trk + 1) * recs_per_trk; rq_for_each_segment(bv, req, iter) { dst = page_address(bv.bv_page) + bv.bv_offset; for (off = 0; off < bv.bv_len; off += blksize) { - if (dst && rq_data_dir(req) == READ) { + if (first_blk + blk_count >= end_blk) { + cqr->proc_bytes = blk_count * blksize; + return 0; + } + if (dst && !skip_block) { dst += off; memset(dst, 0, blksize); + } else { + skip_block--; } + blk_count++; } } + return 0; } /* diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 91c9f9586e0f..fa552f9f1666 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -187,6 +187,7 @@ struct dasd_ccw_req { void (*callback)(struct dasd_ccw_req *, void *data); void *callback_data; + unsigned int proc_bytes; /* bytes for partial completion */ }; /* @@ -387,8 +388,9 @@ struct dasd_discipline { int (*ext_pool_warn_thrshld)(struct dasd_device *); int (*ext_pool_oos)(struct dasd_device *); int (*ext_pool_exhaust)(struct dasd_device *, struct dasd_ccw_req *); - struct dasd_ccw_req *(*ese_format)(struct dasd_device *, struct dasd_ccw_req *); - void (*ese_read)(struct dasd_ccw_req *); + struct dasd_ccw_req *(*ese_format)(struct dasd_device *, + struct dasd_ccw_req *, struct irb *); + int (*ese_read)(struct dasd_ccw_req *, struct irb *); }; extern struct dasd_discipline *dasd_diag_discipline_pointer; @@ -474,6 +476,11 @@ struct dasd_profile { spinlock_t lock; }; +struct dasd_format_entry { + struct list_head list; + sector_t track; +}; + struct dasd_device { /* Block device stuff. */ struct dasd_block *block; @@ -539,6 +546,7 @@ struct dasd_device { struct dentry *debugfs_dentry; struct dentry *hosts_dentry; struct dasd_profile profile; + struct dasd_format_entry format_entry; }; struct dasd_block { @@ -564,6 +572,9 @@ struct dasd_block { struct dentry *debugfs_dentry; struct dasd_profile profile; + + struct list_head format_list; + spinlock_t format_lock; }; struct dasd_attention_data { From cc3200eac4c5eb11c3f34848a014d1f286316310 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 12 Mar 2020 17:15:48 +0800 Subject: [PATCH 144/372] blk-mq: insert flush request to the front of dispatch queue commit 01e99aeca397 ("blk-mq: insert passthrough request into hctx->dispatch directly") may change to add flush request to the tail of dispatch by applying the 'add_head' parameter of blk_mq_sched_insert_request. Turns out this way causes performance regression on NCQ controller because flush is non-NCQ command, which can't be queued when there is any in-flight NCQ command. When adding flush rq to the front of hctx->dispatch, it is easier to introduce extra time to flush rq's latency compared with adding to the tail of dispatch queue because of S_SCHED_RESTART, then chance of flush merge is increased, and less flush requests may be issued to controller. So always insert flush request to the front of dispatch queue just like before applying commit 01e99aeca397 ("blk-mq: insert passthrough request into hctx->dispatch directly"). Cc: Damien Le Moal Cc: Shinichiro Kawasaki Reported-by: Shinichiro Kawasaki Fixes: 01e99aeca397 ("blk-mq: insert passthrough request into hctx->dispatch directly") Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 856356b1619e..74cedea56034 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -398,6 +398,28 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, WARN_ON(e && (rq->tag != -1)); if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) { + /* + * Firstly normal IO request is inserted to scheduler queue or + * sw queue, meantime we add flush request to dispatch queue( + * hctx->dispatch) directly and there is at most one in-flight + * flush request for each hw queue, so it doesn't matter to add + * flush request to tail or front of the dispatch queue. + * + * Secondly in case of NCQ, flush request belongs to non-NCQ + * command, and queueing it will fail when there is any + * in-flight normal IO request(NCQ command). When adding flush + * rq to the front of hctx->dispatch, it is easier to introduce + * extra time to flush rq's latency because of S_SCHED_RESTART + * compared with adding to the tail of dispatch queue, then + * chance of flush merge is increased, and less flush requests + * will be issued to controller. It is observed that ~10% time + * is saved in blktests block/004 on disk attached to AHCI/NCQ + * drive when adding flush rq to the front of hctx->dispatch. + * + * Simply queue flush rq to the front of hctx->dispatch so that + * intensive flush workloads can benefit in case of NCQ HW. + */ + at_head = (rq->rq_flags & RQF_FLUSH_SEQ) ? true : at_head; blk_mq_request_bypass_insert(rq, at_head, false); goto run; } From c80b18cbb04b7b101af9bd14550f13d9866c646a Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 19 Feb 2020 14:00:41 -0600 Subject: [PATCH 145/372] rtlwifi: rtl8188ee: Fix regression due to commit d1d1a96bdb44 For some unexplained reason, commit d1d1a96bdb44 ("rtlwifi: rtl8188ee: Remove local configuration variable") broke at least one system. As the only net effect of the change was to remove 2 bytes from the start of struct phy_status_rpt, this patch adds 2 bytes of padding at the beginning of the struct. Fixes: d1d1a96bdb44 ("rtlwifi: rtl8188ee: Remove local configuration variable") Cc: Stable # V5.4+ Reported-by: Ashish Tested-by: Ashish Signed-off-by: Larry Finger Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h index 917729807514..e17f70b4d199 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h @@ -561,6 +561,7 @@ static inline void clear_pci_tx_desc_content(__le32 *__pdesc, int _size) rxmcs == DESC92C_RATE11M) struct phy_status_rpt { + u8 padding[2]; u8 ch_corr[2]; u8 cck_sig_qual_ofdm_pwdb_all; u8 cck_agc_rpt_ofdm_cfosho_a; From b53df2e7442c73a932fb74228147fb946e531585 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Fri, 21 Feb 2020 10:37:08 +0900 Subject: [PATCH 146/372] block: Fix partition support for host aware zoned block devices Commit b72053072c0b ("block: allow partitions on host aware zone devices") introduced the helper function disk_has_partitions() to check if a given disk has valid partitions. However, since this function result directly depends on the disk partition table length rather than the actual existence of valid partitions in the table, it returns true even after all partitions are removed from the disk. For host aware zoned block devices, this results in zone management support to be kept disabled even after removing all partitions. Fix this by changing disk_has_partitions() to walk through the partition table entries and return true if and only if a valid non-zero size partition is found. Fixes: b72053072c0b ("block: allow partitions on host aware zone devices") Cc: stable@vger.kernel.org # 5.5 Reviewed-by: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Jens Axboe --- block/genhd.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/genhd.h | 13 +------------ 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index ff6268970ddc..9c2e13ce0d19 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -301,6 +301,42 @@ struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) } EXPORT_SYMBOL_GPL(disk_map_sector_rcu); +/** + * disk_has_partitions + * @disk: gendisk of interest + * + * Walk through the partition table and check if valid partition exists. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * True if the gendisk has at least one valid non-zero size partition. + * Otherwise false. + */ +bool disk_has_partitions(struct gendisk *disk) +{ + struct disk_part_tbl *ptbl; + int i; + bool ret = false; + + rcu_read_lock(); + ptbl = rcu_dereference(disk->part_tbl); + + /* Iterate partitions skipping the whole device at index 0 */ + for (i = 1; i < ptbl->len; i++) { + if (rcu_dereference(ptbl->part[i])) { + ret = true; + break; + } + } + + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(disk_has_partitions); + /* * Can be deleted altogether. Later. * diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6fbe58538ad6..07dc91835b98 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -245,18 +245,6 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk) !(disk->flags & GENHD_FL_NO_PART_SCAN); } -static inline bool disk_has_partitions(struct gendisk *disk) -{ - bool ret = false; - - rcu_read_lock(); - if (rcu_dereference(disk->part_tbl)->len > 1) - ret = true; - rcu_read_unlock(); - - return ret; -} - static inline dev_t disk_devt(struct gendisk *disk) { return MKDEV(disk->major, disk->first_minor); @@ -298,6 +286,7 @@ extern void disk_part_iter_exit(struct disk_part_iter *piter); extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); +bool disk_has_partitions(struct gendisk *disk); /* * Macros to operate on percpu disk statistics: From 531d3040bc5cf37dea01b118608347cca9325f9d Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Mon, 2 Mar 2020 15:03:35 +0200 Subject: [PATCH 147/372] ovl: fix lock in ovl_llseek() ovl_inode_lock() is interruptible. When inode_lock() in ovl_llseek() was replaced with ovl_inode_lock(), we did not add a check for error. Fix this by making ovl_inode_lock() uninterruptible and change the existing call sites to use an _interruptible variant. Reported-by: syzbot+66a9752fa927f745385e@syzkaller.appspotmail.com Fixes: b1f9d3858f72 ("ovl: use ovl_inode_lock in ovl_llseek()") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 7 ++++++- fs/overlayfs/util.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 3623d28aa4fa..3d3f2b8bdae5 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -318,7 +318,12 @@ static inline unsigned int ovl_xino_bits(struct super_block *sb) return ovl_same_dev(sb) ? OVL_FS(sb)->xino_mode : 0; } -static inline int ovl_inode_lock(struct inode *inode) +static inline void ovl_inode_lock(struct inode *inode) +{ + mutex_lock(&OVL_I(inode)->lock); +} + +static inline int ovl_inode_lock_interruptible(struct inode *inode) { return mutex_lock_interruptible(&OVL_I(inode)->lock); } diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index ea005085803f..042f7eb4f7f4 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -509,7 +509,7 @@ int ovl_copy_up_start(struct dentry *dentry, int flags) struct inode *inode = d_inode(dentry); int err; - err = ovl_inode_lock(inode); + err = ovl_inode_lock_interruptible(inode); if (!err && ovl_already_copied_up_locked(dentry, flags)) { err = 1; /* Already copied up */ ovl_inode_unlock(inode); @@ -764,7 +764,7 @@ int ovl_nlink_start(struct dentry *dentry) return err; } - err = ovl_inode_lock(inode); + err = ovl_inode_lock_interruptible(inode); if (err) return err; From f17f06a0c7794d3a7c2425663738823354447472 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Thu, 12 Mar 2020 15:25:10 +0000 Subject: [PATCH 148/372] slimbus: ngd: add v2.1.0 compatible This patch adds compatible for SlimBus Controller on SDM845. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20200312152510.12224-1-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/slimbus/qcom-ngd-ctrl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c index e3f5ebc0c05e..fc2575fef51b 100644 --- a/drivers/slimbus/qcom-ngd-ctrl.c +++ b/drivers/slimbus/qcom-ngd-ctrl.c @@ -1320,6 +1320,9 @@ static const struct of_device_id qcom_slim_ngd_dt_match[] = { { .compatible = "qcom,slim-ngd-v1.5.0", .data = &ngd_v1_5_offset_info, + },{ + .compatible = "qcom,slim-ngd-v2.1.0", + .data = &ngd_v1_5_offset_info, }, {} }; From 5461e0530c222129dfc941058be114b5cbc00837 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 12 Mar 2020 16:57:30 +0100 Subject: [PATCH 149/372] ALSA: pcm: oss: Remove WARNING from snd_pcm_plug_alloc() checks The return value checks in snd_pcm_plug_alloc() are covered with snd_BUG_ON() macro that may trigger a kernel WARNING depending on the kconfig. But since the error condition can be triggered by a weird user space parameter passed to OSS layer, we shouldn't give the kernel stack trace just for that. As it's a normal error condition, let's remove snd_BUG_ON() macro usage there. Reported-by: syzbot+2a59ee7a9831b264f45e@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20200312155730.7520-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_plugin.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index c9401832967c..752d078908e9 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -111,7 +111,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->next) { if (plugin->dst_frames) frames = plugin->dst_frames(plugin, frames); - if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) + if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->next; err = snd_pcm_plugin_alloc(plugin, frames); @@ -123,7 +123,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->prev) { if (plugin->src_frames) frames = plugin->src_frames(plugin, frames); - if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) + if ((snd_pcm_sframes_t)frames <= 0) return -ENXIO; plugin = plugin->prev; err = snd_pcm_plugin_alloc(plugin, frames); From e2e57291097b289f84e05fa58ab5e6c6f30cc6e2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Mar 2020 09:04:08 +0100 Subject: [PATCH 150/372] wlcore: remove stray plus sign The commit mentioned below added a stray plus sign, likely due to some conflict resolution (i.e. as a leftover from a unified diff), which was harmless since it was just used as an integer constant modifier. Remove it anyway, now that I stumbled across it. Fixes: cf33a7728bf2 ("wlcore: mesh: Add support for RX Broadcast Key") Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo --- drivers/net/wireless/ti/wlcore/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index ed049c9f7e29..f140f7d7f553 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -6274,7 +6274,7 @@ static int wl1271_init_ieee80211(struct wl1271 *wl) wl->hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD | WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL | WIPHY_FLAG_HAS_CHANNEL_SWITCH | -+ WIPHY_FLAG_IBSS_RSN; + WIPHY_FLAG_IBSS_RSN; wl->hw->wiphy->features |= NL80211_FEATURE_AP_SCAN; From 2eebb7abefb9f95b412c51add3d8216980bf6066 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Mar 2020 07:50:44 +0900 Subject: [PATCH 151/372] kbuild: doc: fix references to other documents All the files in Documentation/kbuild/ were converted to reST. Signed-off-by: Masahiro Yamada --- Documentation/kbuild/kbuild.rst | 2 +- Documentation/kbuild/kconfig-macro-language.rst | 2 +- Documentation/kbuild/makefiles.rst | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index f1e5dce86af7..510f38d7e78a 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -237,7 +237,7 @@ This is solely useful to speed up test compiles. KBUILD_EXTRA_SYMBOLS -------------------- For modules that use symbols from other modules. -See more details in modules.txt. +See more details in modules.rst. ALLSOURCE_ARCHS --------------- diff --git a/Documentation/kbuild/kconfig-macro-language.rst b/Documentation/kbuild/kconfig-macro-language.rst index 35b3263b7e40..8b413ef9603d 100644 --- a/Documentation/kbuild/kconfig-macro-language.rst +++ b/Documentation/kbuild/kconfig-macro-language.rst @@ -44,7 +44,7 @@ intermediate:: def_bool y Then, Kconfig moves onto the evaluation stage to resolve inter-symbol -dependency as explained in kconfig-language.txt. +dependency as explained in kconfig-language.rst. Variables diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 6bc126a14b3d..04d5c01a2e99 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -924,7 +924,7 @@ When kbuild executes, the following steps are followed (roughly): $(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that are used for assembler. - From commandline AFLAGS_MODULE shall be used (see kbuild.txt). + From commandline AFLAGS_MODULE shall be used (see kbuild.rst). KBUILD_CFLAGS_KERNEL $(CC) options specific for built-in @@ -937,7 +937,7 @@ When kbuild executes, the following steps are followed (roughly): $(KBUILD_CFLAGS_MODULE) is used to add arch-specific options that are used for $(CC). - From commandline CFLAGS_MODULE shall be used (see kbuild.txt). + From commandline CFLAGS_MODULE shall be used (see kbuild.rst). KBUILD_LDFLAGS_MODULE Options for $(LD) when linking modules @@ -945,7 +945,7 @@ When kbuild executes, the following steps are followed (roughly): $(KBUILD_LDFLAGS_MODULE) is used to add arch-specific options used when linking modules. This is often a linker script. - From commandline LDFLAGS_MODULE shall be used (see kbuild.txt). + From commandline LDFLAGS_MODULE shall be used (see kbuild.rst). KBUILD_LDS From af3d0a68698c7e5df8b72267086b23422a3954bb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 6 Mar 2020 16:49:49 +0000 Subject: [PATCH 152/372] powerpc/kasan: Fix shadow memory protection with CONFIG_KASAN_VMALLOC With CONFIG_KASAN_VMALLOC, new page tables are created at the time shadow memory for vmalloc area is unmapped. If some parts of the page table still have entries to the zero page shadow memory, the entries are wrongly marked RW. With CONFIG_KASAN_VMALLOC, almost the entire kernel address space is managed by KASAN. To make it simple, just create KASAN page tables for the entire kernel space at kasan_init(). That doesn't use much more space, and that's anyway already done for hash platforms. Fixes: 3d4247fcc938 ("powerpc/32: Add support of KASAN_VMALLOC") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/ef5248fc1f496c6b0dfdb59380f24968f25f75c5.1583513368.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/kasan/kasan_init_32.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index db5664dde5ff..d2bed3fcb719 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -120,12 +120,6 @@ static void __init kasan_unmap_early_shadow_vmalloc(void) unsigned long k_cur; phys_addr_t pa = __pa(kasan_early_shadow_page); - if (!early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) { - int ret = kasan_init_shadow_page_tables(k_start, k_end); - - if (ret) - panic("kasan: kasan_init_shadow_page_tables() failed"); - } for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur); pte_t *ptep = pte_offset_kernel(pmd, k_cur); @@ -143,7 +137,8 @@ void __init kasan_mmu_init(void) int ret; struct memblock_region *reg; - if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) { + if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) || + IS_ENABLED(CONFIG_KASAN_VMALLOC)) { ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END); if (ret) From b55dbe596942e15b3d18d36a41af4fde022c9d48 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Wed, 11 Mar 2020 16:39:47 +0800 Subject: [PATCH 153/372] drm/amd/display: fix typos for dcn20_funcs and dcn21_funcs struct In dcn20_funcs and dcn21_funcs struct, the member ".dsc_pg_control = NULL" should be removed due to .dsc_pg_control be assigned to dcn20_dsc_pg_control. Signed-off-by: Stanley.Yang Reviewed-by: Anthony Koo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c | 1 - drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c index d51e02fdab4d..5e640f17d3d4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_init.c @@ -108,7 +108,6 @@ static const struct hwseq_private_funcs dcn20_private_funcs = { .enable_power_gating_plane = dcn20_enable_power_gating_plane, .dpp_pg_control = dcn20_dpp_pg_control, .hubp_pg_control = dcn20_hubp_pg_control, - .dsc_pg_control = NULL, .update_odm = dcn20_update_odm, .dsc_pg_control = dcn20_dsc_pg_control, .get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color, diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c index 4861aa5c59ae..fddbd59bf4f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_init.c @@ -116,7 +116,6 @@ static const struct hwseq_private_funcs dcn21_private_funcs = { .enable_power_gating_plane = dcn20_enable_power_gating_plane, .dpp_pg_control = dcn20_dpp_pg_control, .hubp_pg_control = dcn20_hubp_pg_control, - .dsc_pg_control = NULL, .update_odm = dcn20_update_odm, .dsc_pg_control = dcn20_dsc_pg_control, .get_surface_visual_confirm_color = dcn10_get_surface_visual_confirm_color, From 5bbc6604a62814511c32f2e39bc9ffb2c1b92cbe Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Tue, 10 Mar 2020 08:40:41 -0400 Subject: [PATCH 154/372] drm/amd/amdgpu: Fix GPR read from debugfs (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The offset into the array was specified in bytes but should be in terms of 32-bit words. Also prevent large reads that would also cause a buffer overread. v2: Read from correct offset from internal storage buffer. Signed-off-by: Tom St Denis Acked-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f24ed9a1a3e5..337d7cdce8e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -781,11 +781,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, ssize_t result = 0; uint32_t offset, se, sh, cu, wave, simd, thread, bank, *data; - if (size & 3 || *pos & 3) + if (size > 4096 || size & 3 || *pos & 3) return -EINVAL; /* decode offset */ - offset = *pos & GENMASK_ULL(11, 0); + offset = (*pos & GENMASK_ULL(11, 0)) >> 2; se = (*pos & GENMASK_ULL(19, 12)) >> 12; sh = (*pos & GENMASK_ULL(27, 20)) >> 20; cu = (*pos & GENMASK_ULL(35, 28)) >> 28; @@ -823,7 +823,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = data[offset++]; + value = data[result >> 2]; r = put_user(value, (uint32_t *)buf); if (r) { result = r; From 063e768ebd27d3ec0d6908b7f8ea9b0a732b9949 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Wed, 11 Mar 2020 14:15:27 +0800 Subject: [PATCH 155/372] drm/amdgpu: add fbdev suspend/resume on gpu reset This can fix the baco reset failure seen on Navi10. And this should be a low risk fix as the same sequence is already used for system suspend/resume. Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 39cd545976b7..b8975857d60d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3913,6 +3913,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (r) goto out; + amdgpu_fbdev_set_suspend(tmp_adev, 0); + /* must succeed. */ amdgpu_ras_resume(tmp_adev); @@ -4086,6 +4088,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); + amdgpu_fbdev_set_suspend(adev, 1); + /* disable ras on ALL IPs */ if (!(in_ras_intr && !use_baco) && amdgpu_device_ip_need_full_reset(tmp_adev)) From dec9de2ada523b344eb2428abfedf9d6cd0a0029 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 28 Feb 2020 22:36:07 +0100 Subject: [PATCH 156/372] drm/amd/display: Add link_rate quirk for Apple 15" MBP 2017 This fixes a problem found on the MacBookPro 2017 Retina panel: The panel reports 10 bpc color depth in its EDID, and the firmware chooses link settings at boot which support enough bandwidth for 10 bpc (324000 kbit/sec aka LINK_RATE_RBR2 aka 0xc), but the DP_MAX_LINK_RATE dpcd register only reports 2.7 Gbps (multiplier value 0xa) as possible, in direct contradiction of what the firmware successfully set up. This restricts the panel to 8 bpc, not providing the full color depth of the panel on Linux <= 5.5. Additionally, commit '4a8ca46bae8a ("drm/amd/display: Default max bpc to 16 for eDP")' introduced into Linux 5.6-rc1 will unclamp panel depth to its full 10 bpc, thereby requiring a eDP bandwidth for all modes that exceeds the bandwidth available and causes all modes to fail validation -> No modes for the laptop panel -> failure to set any mode -> Panel goes dark. This patch adds a quirk specific to the MBP 2017 15" Retina panel to override reported max link rate to the correct maximum of 0xc = LINK_RATE_RBR2 to fix the darkness and reduced display precision. Please apply for Linux 5.6+ to avoid regressing Apple MBP panel support. Signed-off-by: Mario Kleiner Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index cb731c1d30b1..fd9e69634c50 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3401,6 +3401,17 @@ static bool retrieve_link_cap(struct dc_link *link) sink_id.ieee_device_id, sizeof(sink_id.ieee_device_id)); + /* Quirk Apple MBP 2017 15" Retina panel: Wrong DP_MAX_LINK_RATE */ + { + uint8_t str_mbp_2017[] = { 101, 68, 21, 101, 98, 97 }; + + if ((link->dpcd_caps.sink_dev_id == 0x0010fa) && + !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2017, + sizeof(str_mbp_2017))) { + link->reported_link_cap.link_rate = 0x0c; + } + } + core_link_read_dpcd( link, DP_SINK_HW_REVISION_START, From 59833696442c674acbbd297772ba89e7ad8c753d Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Mar 2020 15:01:37 +0100 Subject: [PATCH 157/372] iommu/vt-d: dmar: replace WARN_TAINT with pr_warn + add_taint Quoting from the comment describing the WARN functions in include/asm-generic/bug.h: * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant kernel issues that need prompt attention if they should ever * appear at runtime. * * Do not use these macros when checking for invalid external inputs The (buggy) firmware tables which the dmar code was calling WARN_TAINT for really are invalid external inputs. They are not under the kernel's control and the issues in them cannot be fixed by a kernel update. So logging a backtrace, which invites bug reports to be filed about this, is not helpful. Some distros, e.g. Fedora, have tools watching for the kernel backtraces logged by the WARN macros and offer the user an option to file a bug for this when these are encountered. The WARN_TAINT in warn_invalid_dmar() + another iommu WARN_TAINT, addressed in another patch, have lead to over a 100 bugs being filed this way. This commit replaces the WARN_TAINT("...") calls, with pr_warn(FW_BUG "...") + add_taint(TAINT_FIRMWARE_WORKAROUND, ...) calls avoiding the backtrace and thus also avoiding bug-reports being filed about this against the kernel. Fixes: fd0c8894893c ("intel-iommu: Set a more specific taint flag for invalid BIOS DMAR tables") Fixes: e625b4a95d50 ("iommu/vt-d: Parse ANDD records") Signed-off-by: Hans de Goede Signed-off-by: Joerg Roedel Acked-by: Lu Baolu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200309140138.3753-2-hdegoede@redhat.com BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1564895 --- drivers/iommu/dmar.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 7b16c4db40b4..c000ddff822e 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -441,12 +441,13 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header, /* Check for NUL termination within the designated length */ if (strnlen(andd->device_name, header->length - 8) == header->length - 8) { - WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; ANDD object name is not NUL-terminated\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return -EINVAL; } pr_info("ANDD device: %x name: %s\n", andd->device_number, @@ -472,14 +473,14 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) return 0; } } - WARN_TAINT( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", drhd->reg_base_addr, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return 0; } @@ -828,14 +829,14 @@ int __init dmar_table_init(void) static void warn_invalid_dmar(u64 addr, const char *message) { - WARN_TAINT_ONCE( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn_once(FW_BUG "Your BIOS is broken; DMAR reported at address %llx%s!\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", addr, message, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); } static int __ref From 96788c7a7f1e7206519d4d736f89a2072dcfe0fc Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Mar 2020 15:01:38 +0100 Subject: [PATCH 158/372] iommu/vt-d: dmar_parse_one_rmrr: replace WARN_TAINT with pr_warn + add_taint Quoting from the comment describing the WARN functions in include/asm-generic/bug.h: * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant kernel issues that need prompt attention if they should ever * appear at runtime. * * Do not use these macros when checking for invalid external inputs The (buggy) firmware tables which the dmar code was calling WARN_TAINT for really are invalid external inputs. They are not under the kernel's control and the issues in them cannot be fixed by a kernel update. So logging a backtrace, which invites bug reports to be filed about this, is not helpful. Some distros, e.g. Fedora, have tools watching for the kernel backtraces logged by the WARN macros and offer the user an option to file a bug for this when these are encountered. The WARN_TAINT in dmar_parse_one_rmrr + another iommu WARN_TAINT, addressed in another patch, have lead to over a 100 bugs being filed this way. This commit replaces the WARN_TAINT("...") call, with a pr_warn(FW_BUG "...") + add_taint(TAINT_FIRMWARE_WORKAROUND, ...) call avoiding the backtrace and thus also avoiding bug-reports being filed about this against the kernel. Fixes: f5a68bb0752e ("iommu/vt-d: Mark firmware tainted if RMRR fails sanity check") Signed-off-by: Hans de Goede Signed-off-by: Joerg Roedel Acked-by: Lu Baolu Cc: stable@vger.kernel.org Cc: Barret Rhoden Link: https://lore.kernel.org/r/20200309140138.3753-3-hdegoede@redhat.com BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1808874 --- drivers/iommu/intel-iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 693380355dea..bdcdfff6015b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4460,14 +4460,16 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) struct dmar_rmrr_unit *rmrru; rmrr = (struct acpi_dmar_reserved_memory *)header; - if (rmrr_sanity_check(rmrr)) - WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + if (rmrr_sanity_check(rmrr)) { + pr_warn(FW_BUG "Your BIOS is broken; bad RMRR [%#018Lx-%#018Lx]\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", rmrr->base_address, rmrr->end_address, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); + } rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); if (!rmrru) From 81ee85d0462410de8eeeec1b9761941fd6ed8c7b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 9 Mar 2020 19:25:10 +0100 Subject: [PATCH 159/372] iommu/vt-d: quirk_ioat_snb_local_iommu: replace WARN_TAINT with pr_warn + add_taint Quoting from the comment describing the WARN functions in include/asm-generic/bug.h: * WARN(), WARN_ON(), WARN_ON_ONCE, and so on can be used to report * significant kernel issues that need prompt attention if they should ever * appear at runtime. * * Do not use these macros when checking for invalid external inputs The (buggy) firmware tables which the dmar code was calling WARN_TAINT for really are invalid external inputs. They are not under the kernel's control and the issues in them cannot be fixed by a kernel update. So logging a backtrace, which invites bug reports to be filed about this, is not helpful. Fixes: 556ab45f9a77 ("ioat2: catch and recover from broken vtd configurations v6") Signed-off-by: Hans de Goede Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20200309182510.373875-1-hdegoede@redhat.com BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=701847 Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index bdcdfff6015b..2943d3600b7c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4261,10 +4261,11 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) /* we know that the this iommu should be at offset 0xa000 from vtbar */ drhd = dmar_find_matched_drhd_unit(pdev); - if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, - TAINT_FIRMWARE_WORKAROUND, - "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) + if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { + pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); From ba3b01d7a6f4ab9f8a0557044c9a7678f64ae070 Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Mon, 9 Mar 2020 13:09:46 -0700 Subject: [PATCH 160/372] iommu/vt-d: Fix debugfs register reads Commit 6825d3ea6cde ("iommu/vt-d: Add debugfs support to show register contents") dumps the register contents for all IOMMU devices. Currently, a 64 bit read(dmar_readq) is done for all the IOMMU registers, even though some of the registers are 32 bits, which is incorrect. Use the correct read function variant (dmar_readl/dmar_readq) while reading the contents of 32/64 bit registers respectively. Signed-off-by: Megha Dey Link: https://lore.kernel.org/r/1583784587-26126-2-git-send-email-megha.dey@linux.intel.com Acked-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu-debugfs.c | 46 ++++++++++++++++++----------- include/linux/intel-iommu.h | 2 ++ 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index c1257bef553c..0a7791934a16 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -33,38 +33,42 @@ struct iommu_regset { #define IOMMU_REGSET_ENTRY(_reg_) \ { DMAR_##_reg_##_REG, __stringify(_reg_) } -static const struct iommu_regset iommu_regs[] = { + +static const struct iommu_regset iommu_regs_32[] = { IOMMU_REGSET_ENTRY(VER), - IOMMU_REGSET_ENTRY(CAP), - IOMMU_REGSET_ENTRY(ECAP), IOMMU_REGSET_ENTRY(GCMD), IOMMU_REGSET_ENTRY(GSTS), - IOMMU_REGSET_ENTRY(RTADDR), - IOMMU_REGSET_ENTRY(CCMD), IOMMU_REGSET_ENTRY(FSTS), IOMMU_REGSET_ENTRY(FECTL), IOMMU_REGSET_ENTRY(FEDATA), IOMMU_REGSET_ENTRY(FEADDR), IOMMU_REGSET_ENTRY(FEUADDR), - IOMMU_REGSET_ENTRY(AFLOG), IOMMU_REGSET_ENTRY(PMEN), IOMMU_REGSET_ENTRY(PLMBASE), IOMMU_REGSET_ENTRY(PLMLIMIT), - IOMMU_REGSET_ENTRY(PHMBASE), - IOMMU_REGSET_ENTRY(PHMLIMIT), - IOMMU_REGSET_ENTRY(IQH), - IOMMU_REGSET_ENTRY(IQT), - IOMMU_REGSET_ENTRY(IQA), IOMMU_REGSET_ENTRY(ICS), - IOMMU_REGSET_ENTRY(IRTA), - IOMMU_REGSET_ENTRY(PQH), - IOMMU_REGSET_ENTRY(PQT), - IOMMU_REGSET_ENTRY(PQA), IOMMU_REGSET_ENTRY(PRS), IOMMU_REGSET_ENTRY(PECTL), IOMMU_REGSET_ENTRY(PEDATA), IOMMU_REGSET_ENTRY(PEADDR), IOMMU_REGSET_ENTRY(PEUADDR), +}; + +static const struct iommu_regset iommu_regs_64[] = { + IOMMU_REGSET_ENTRY(CAP), + IOMMU_REGSET_ENTRY(ECAP), + IOMMU_REGSET_ENTRY(RTADDR), + IOMMU_REGSET_ENTRY(CCMD), + IOMMU_REGSET_ENTRY(AFLOG), + IOMMU_REGSET_ENTRY(PHMBASE), + IOMMU_REGSET_ENTRY(PHMLIMIT), + IOMMU_REGSET_ENTRY(IQH), + IOMMU_REGSET_ENTRY(IQT), + IOMMU_REGSET_ENTRY(IQA), + IOMMU_REGSET_ENTRY(IRTA), + IOMMU_REGSET_ENTRY(PQH), + IOMMU_REGSET_ENTRY(PQT), + IOMMU_REGSET_ENTRY(PQA), IOMMU_REGSET_ENTRY(MTRRCAP), IOMMU_REGSET_ENTRY(MTRRDEF), IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000), @@ -127,10 +131,16 @@ static int iommu_regset_show(struct seq_file *m, void *unused) * by adding the offset to the pointer (virtual address). */ raw_spin_lock_irqsave(&iommu->register_lock, flag); - for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) { - value = dmar_readq(iommu->reg + iommu_regs[i].offset); + for (i = 0 ; i < ARRAY_SIZE(iommu_regs_32); i++) { + value = dmar_readl(iommu->reg + iommu_regs_32[i].offset); seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n", - iommu_regs[i].regs, iommu_regs[i].offset, + iommu_regs_32[i].regs, iommu_regs_32[i].offset, + value); + } + for (i = 0 ; i < ARRAY_SIZE(iommu_regs_64); i++) { + value = dmar_readq(iommu->reg + iommu_regs_64[i].offset); + seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n", + iommu_regs_64[i].regs, iommu_regs_64[i].offset, value); } raw_spin_unlock_irqrestore(&iommu->register_lock, flag); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4a16b39ae353..980234ae0312 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -123,6 +123,8 @@ #define dmar_readq(a) readq(a) #define dmar_writeq(a,v) writeq(v,a) +#define dmar_readl(a) readl(a) +#define dmar_writel(a, v) writel(v, a) #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) #define DMAR_VER_MINOR(v) ((v) & 0x0f) From 8daee952b4389729358665fb91949460641659d4 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 12 Mar 2020 14:32:44 +0100 Subject: [PATCH 161/372] i2c: acpi: put device when verifying client fails i2c_verify_client() can fail, so we need to put the device when that happens. Fixes: 525e6fabeae2 ("i2c / ACPI: add support for ACPI reconfigure notifications") Reported-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang Reviewed-by: Geert Uytterhoeven Reviewed-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-acpi.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 8f3dbc97a057..8b0ff780919b 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -394,9 +394,17 @@ EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle); static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { struct device *dev; + struct i2c_client *client; dev = bus_find_device_by_acpi_dev(&i2c_bus_type, adev); - return dev ? i2c_verify_client(dev) : NULL; + if (!dev) + return NULL; + + client = i2c_verify_client(dev); + if (!client) + put_device(dev); + + return client; } static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, From 469ff207b4c4033540b50bc59587dc915faa1367 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 12 Mar 2020 16:58:30 -0400 Subject: [PATCH 162/372] x86/vector: Remove warning on managed interrupt migration The vector management code assumes that managed interrupts cannot be migrated away from an online CPU. free_moved_vector() has a WARN_ON_ONCE() which triggers when a managed interrupt vector association on a online CPU is cleared. The CPU offline code uses a different mechanism which cannot trigger this. This assumption is not longer correct because the new CPU isolation feature which affects the placement of managed interrupts must be able to move a managed interrupt away from an online CPU. There are two reasons why this can happen: 1) When the interrupt is activated the affinity mask which was established in irq_create_affinity_masks() is handed in to the vector allocation code. This mask contains all CPUs to which the interrupt can be made affine to, but this does not take the CPU isolation 'managed_irq' mask into account. When the interrupt is finally requested by the device driver then the affinity is checked again and the CPU isolation 'managed_irq' mask is taken into account, which moves the interrupt to a non-isolated CPU if possible. 2) The interrupt can be affine to an isolated CPU because the non-isolated CPUs in the calculated affinity mask are not online. Once a non-isolated CPU which is in the mask comes online the interrupt is migrated to this non-isolated CPU In both cases the regular online migration mechanism is used which triggers the WARN_ON_ONCE() in free_moved_vector(). Case #1 could have been addressed by taking the isolation mask into account, but that would require a massive code change in the activation logic and the eventual migration event was accepted as a reasonable tradeoff when the isolation feature was developed. But even if #1 would be addressed, #2 would still trigger it. Of course the warning in free_moved_vector() was overlooked at that time and the above two cases which have been discussed during patch review have obviously never been tested before the final submission. So keep it simple and remove the warning. [ tglx: Rewrote changelog and added a comment to free_moved_vector() ] Fixes: 11ea68f553e2 ("genirq, sched/isolation: Isolate from handling managed interrupts") Signed-off-by: Peter Xu Signed-off-by: Thomas Gleixner Reviewed-by: Ming Lei Link: https://lkml.kernel.org/r/20200312205830.81796-1-peterx@redhat.com --- arch/x86/kernel/apic/vector.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 2c5676b0a6e7..48293d15f1e1 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -838,13 +838,15 @@ static void free_moved_vector(struct apic_chip_data *apicd) bool managed = apicd->is_managed; /* - * This should never happen. Managed interrupts are not - * migrated except on CPU down, which does not involve the - * cleanup vector. But try to keep the accounting correct - * nevertheless. + * Managed interrupts are usually not migrated away + * from an online CPU, but CPU isolation 'managed_irq' + * can make that happen. + * 1) Activation does not take the isolation into account + * to keep the code simple + * 2) Migration away from an isolated CPU can happen when + * a non-isolated CPU which is in the calculated + * affinity mask comes online. */ - WARN_ON_ONCE(managed); - trace_vector_free_moved(apicd->irq, cpu, vector, managed); irq_matrix_free(vector_matrix, cpu, vector, managed); per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED; From 8c34cd1a7f089dc03933289c5d4a4d1489549828 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 13 Mar 2020 09:41:52 +0100 Subject: [PATCH 163/372] drm/bochs: downgrade pci_request_region failure from error to warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shutdown of firmware framebuffer has a bunch of problems. Because of this the framebuffer region might still be reserved even after drm_fb_helper_remove_conflicting_pci_framebuffers() returned. Don't consider pci_request_region() failure for the framebuffer region as fatal error to workaround this issue. Reported-by: Marek Marczykowski-Górecki Signed-off-by: Gerd Hoffmann Acked-by: Sam Ravnborg Link: http://patchwork.freedesktop.org/patch/msgid/20200313084152.2734-1-kraxel@redhat.com --- drivers/gpu/drm/bochs/bochs_hw.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/bochs/bochs_hw.c b/drivers/gpu/drm/bochs/bochs_hw.c index b615b7dfdd9d..a4fc4e6aee39 100644 --- a/drivers/gpu/drm/bochs/bochs_hw.c +++ b/drivers/gpu/drm/bochs/bochs_hw.c @@ -156,10 +156,8 @@ int bochs_hw_init(struct drm_device *dev) size = min(size, mem); } - if (pci_request_region(pdev, 0, "bochs-drm") != 0) { - DRM_ERROR("Cannot request framebuffer\n"); - return -EBUSY; - } + if (pci_request_region(pdev, 0, "bochs-drm") != 0) + DRM_WARN("Cannot request framebuffer, boot fb still active?\n"); bochs->fb_map = ioremap(addr, size); if (bochs->fb_map == NULL) { From 53afcd310e867d25e394718558783c476301205c Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 21 Feb 2020 16:34:42 +0200 Subject: [PATCH 164/372] ovl: fix some xino configurations Fix up two bugs in the coversion to xino_mode: 1. xino=off does not always end up in disabled mode 2. xino=auto on 32bit arch should end up in disabled mode Take a proactive approach to disabling xino on 32bit kernel: 1. Disable XINO_AUTO config during build time 2. Disable xino with a warning on mount time As a by product, xino=on on 32bit arch also ends up in disabled mode. We never intended to enable xino on 32bit arch and this will make the rest of the logic simpler. Fixes: 0f831ec85eda ("ovl: simplify ovl_same_sb() helper") Signed-off-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/Kconfig | 1 + fs/overlayfs/super.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig index 444e2da4f60e..714c14c47ca5 100644 --- a/fs/overlayfs/Kconfig +++ b/fs/overlayfs/Kconfig @@ -93,6 +93,7 @@ config OVERLAY_FS_XINO_AUTO bool "Overlayfs: auto enable inode number mapping" default n depends on OVERLAY_FS + depends on 64BIT help If this config option is enabled then overlay filesystems will use unused high bits in undelying filesystem inode numbers to map all diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 319fe0d355b0..ac967f1cb6e5 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1411,6 +1411,8 @@ static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs, if (ofs->config.xino == OVL_XINO_ON) pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n"); ofs->xino_mode = 0; + } else if (ofs->config.xino == OVL_XINO_OFF) { + ofs->xino_mode = -1; } else if (ofs->config.xino == OVL_XINO_ON && ofs->xino_mode < 0) { /* * This is a roundup of number of bits needed for encoding @@ -1623,8 +1625,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_stack_depth = 0; sb->s_maxbytes = MAX_LFS_FILESIZE; /* Assume underlaying fs uses 32bit inodes unless proven otherwise */ - if (ofs->config.xino != OVL_XINO_OFF) + if (ofs->config.xino != OVL_XINO_OFF) { ofs->xino_mode = BITS_PER_LONG - 32; + if (!ofs->xino_mode) { + pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n"); + ofs->config.xino = OVL_XINO_OFF; + } + } /* alloc/destroy_inode needed for setting up traps in inode cache */ sb->s_op = &ovl_super_operations; From c853680453ac235e9010987a8bdaaba0e116d3c8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 13 Mar 2020 15:42:20 +0100 Subject: [PATCH 165/372] ovl: fix lockdep warning for async write Lockdep reports "WARNING: lock held when returning to user space!" due to async write holding freeze lock over the write. Apparently aio.c already deals with this by lying to lockdep about the state of the lock. Do the same here. No need to check for S_IFREG() here since these file ops are regular-only. Reported-by: syzbot+9331a354f4f624a52a55@syzkaller.appspotmail.com Fixes: 2406a307ac7d ("ovl: implement async IO routines") Signed-off-by: Miklos Szeredi --- fs/overlayfs/file.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index a5317216de73..87c362f65448 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -244,6 +244,9 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) if (iocb->ki_flags & IOCB_WRITE) { struct inode *inode = file_inode(orig_iocb->ki_filp); + /* Actually acquired in ovl_write_iter() */ + __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb, + SB_FREEZE_WRITE); file_end_write(iocb->ki_filp); ovl_copyattr(ovl_inode_real(inode), inode); } @@ -346,6 +349,9 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) goto out; file_start_write(real.file); + /* Pacify lockdep, same trick as done in aio_write() */ + __sb_writers_release(file_inode(real.file)->i_sb, + SB_FREEZE_WRITE); aio_req->fd = real; real.flags = 0; aio_req->orig_iocb = iocb; From ddd2b85ff73bb60061a9fb08ac1f5a03a2d4bce0 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 12 Mar 2020 21:36:53 +0000 Subject: [PATCH 166/372] afs: Use kfree_rcu() instead of casting kfree() to rcu_callback_t afs_put_addrlist() casts kfree() to rcu_callback_t. Apart from being wrong in theory, this might also blow up when people start enforcing function types via compiler instrumentation, and it means the rcu_head has to be first in struct afs_addr_list. Use kfree_rcu() instead, it's simpler and more correct. Signed-off-by: Jann Horn Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/addr_list.c | 2 +- fs/afs/internal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index df415c05939e..de1ae0bead3b 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -19,7 +19,7 @@ void afs_put_addrlist(struct afs_addr_list *alist) { if (alist && refcount_dec_and_test(&alist->usage)) - call_rcu(&alist->rcu, (rcu_callback_t)kfree); + kfree_rcu(alist, rcu); } /* diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 1d81fc4c3058..35f951ac296f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -81,7 +81,7 @@ enum afs_call_state { * List of server addresses. */ struct afs_addr_list { - struct rcu_head rcu; /* Must be first */ + struct rcu_head rcu; refcount_t usage; u32 version; /* Version */ unsigned char max_addrs; From 236ebc20d9afc5e9ff52f3cf3f365a91583aac10 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 10 Mar 2020 12:13:53 +0000 Subject: [PATCH 167/372] btrfs: fix log context list corruption after rename whiteout error During a rename whiteout, if btrfs_whiteout_for_rename() returns an error we can end up returning from btrfs_rename() with the log context object still in the root's log context list - this happens if 'sync_log' was set to true before we called btrfs_whiteout_for_rename() and it is dangerous because we end up with a corrupt linked list (root->log_ctxs) as the log context object was allocated on the stack. After btrfs_rename() returns, any task that is running btrfs_sync_log() concurrently can end up crashing because that linked list is traversed by btrfs_sync_log() (through btrfs_remove_all_log_ctxs()). That results in the same issue that commit e6c617102c7e4 ("Btrfs: fix log context list corruption after rename exchange operation") fixed. Fixes: d4682ba03ef618 ("Btrfs: sync log after logging new name") CC: stable@vger.kernel.org # 4.19+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 27076ebadb36..d267eb5caa7b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9496,6 +9496,10 @@ out_fail: ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx); if (ret) commit_transaction = true; + } else if (sync_log) { + mutex_lock(&root->log_mutex); + list_del(&ctx.list); + mutex_unlock(&root->log_mutex); } if (commit_transaction) { ret = btrfs_commit_transaction(trans); From 158fe6665389964a1de212818b4a5c52b7f7aff4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 13 Mar 2020 09:05:38 +0000 Subject: [PATCH 168/372] rxrpc: Abstract out the calculation of whether there's Tx space Abstract out the calculation of there being sufficient Tx buffer space. This is reproduced several times in the rxrpc sendmsg code. Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 813fd6888142..a13051d38097 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -17,6 +17,21 @@ #include #include "ar-internal.h" +/* + * Return true if there's sufficient Tx queue space. + */ +static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) +{ + unsigned int win_size = + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra); + rxrpc_seq_t tx_win = READ_ONCE(call->tx_hard_ack); + + if (_tx_win) + *_tx_win = tx_win; + return call->tx_top - tx_win < win_size; +} + /* * Wait for space to appear in the Tx queue or a signal to occur. */ @@ -26,9 +41,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, { for (;;) { set_current_state(TASK_INTERRUPTIBLE); - if (call->tx_top - call->tx_hard_ack < - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra)) + if (rxrpc_check_tx_space(call, NULL)) return 0; if (call->state >= RXRPC_CALL_COMPLETE) @@ -68,9 +81,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, set_current_state(TASK_UNINTERRUPTIBLE); tx_win = READ_ONCE(call->tx_hard_ack); - if (call->tx_top - tx_win < - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra)) + if (rxrpc_check_tx_space(call, &tx_win)) return 0; if (call->state >= RXRPC_CALL_COMPLETE) @@ -302,9 +313,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, _debug("alloc"); - if (call->tx_top - call->tx_hard_ack >= - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra)) { + if (!rxrpc_check_tx_space(call, NULL)) { ret = -EAGAIN; if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; From e138aa7d3271ac1b0690ae2c9b04d51468dce1d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 13 Mar 2020 09:22:09 +0000 Subject: [PATCH 169/372] rxrpc: Fix call interruptibility handling Fix the interruptibility of kernel-initiated client calls so that they're either only interruptible when they're waiting for a call slot to come available or they're not interruptible at all. Either way, they're not interruptible during transmission. This should help prevent StoreData calls from being interrupted when writeback is in progress. It doesn't, however, handle interruption during the receive phase. Userspace-initiated calls are still interruptable. After the signal has been handled, sendmsg() will return the amount of data copied out of the buffer and userspace can perform another sendmsg() call to continue transmission. Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals") Signed-off-by: David Howells --- fs/afs/rxrpc.c | 3 ++- include/net/af_rxrpc.h | 8 +++++++- net/rxrpc/af_rxrpc.c | 4 ++-- net/rxrpc/ar-internal.h | 4 ++-- net/rxrpc/call_object.c | 3 +-- net/rxrpc/conn_client.c | 13 +++++++++--- net/rxrpc/sendmsg.c | 44 +++++++++++++++++++++++++++++++++-------- 7 files changed, 60 insertions(+), 19 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 58d396592250..4c28712bb7f6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -413,7 +413,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) afs_wake_up_async_call : afs_wake_up_call_waiter), call->upgrade, - call->intr, + (call->intr ? RXRPC_PREINTERRUPTIBLE : + RXRPC_UNINTERRUPTIBLE), call->debug_id); if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 1abae3c340a5..8e547b4d88c8 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -16,6 +16,12 @@ struct sock; struct socket; struct rxrpc_call; +enum rxrpc_interruptibility { + RXRPC_INTERRUPTIBLE, /* Call is interruptible */ + RXRPC_PREINTERRUPTIBLE, /* Call can be cancelled whilst waiting for a slot */ + RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */ +}; + /* * Debug ID counter for tracing. */ @@ -41,7 +47,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, gfp_t, rxrpc_notify_rx_t, bool, - bool, + enum rxrpc_interruptibility, unsigned int); int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, struct msghdr *, size_t, diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index fe42f986cd94..7603cf811f75 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -285,7 +285,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, gfp_t gfp, rxrpc_notify_rx_t notify_rx, bool upgrade, - bool intr, + enum rxrpc_interruptibility interruptibility, unsigned int debug_id) { struct rxrpc_conn_parameters cp; @@ -310,7 +310,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, memset(&p, 0, sizeof(p)); p.user_call_ID = user_call_ID; p.tx_total_len = tx_total_len; - p.intr = intr; + p.interruptibility = interruptibility; memset(&cp, 0, sizeof(cp)); cp.local = rx->local; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 7d730c438404..1f72f43b082d 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -489,7 +489,6 @@ enum rxrpc_call_flag { RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ - RXRPC_CALL_IS_INTR, /* The call is interruptible */ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ }; @@ -598,6 +597,7 @@ struct rxrpc_call { atomic_t usage; u16 service_id; /* service ID */ u8 security_ix; /* Security type */ + enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ @@ -721,7 +721,7 @@ struct rxrpc_call_params { u32 normal; /* Max time since last call packet (msec) */ } timeouts; u8 nr_timeouts; /* Number of timeouts specified */ - bool intr; /* The call is interruptible */ + enum rxrpc_interruptibility interruptibility; /* How is interruptible is the call? */ }; struct rxrpc_send_params { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c9f34b0a11df..f07970207b54 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -237,8 +237,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - if (p->intr) - __set_bit(RXRPC_CALL_IS_INTR, &call->flags); + call->interruptibility = p->interruptibility; call->tx_total_len = p->tx_total_len; trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, atomic_read(&call->usage), diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index ea7d4c21f889..f2a1a5dbb5a7 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -655,13 +655,20 @@ static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp) add_wait_queue_exclusive(&call->waitq, &myself); for (;;) { - if (test_bit(RXRPC_CALL_IS_INTR, &call->flags)) + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + case RXRPC_PREINTERRUPTIBLE: set_current_state(TASK_INTERRUPTIBLE); - else + break; + case RXRPC_UNINTERRUPTIBLE: + default: set_current_state(TASK_UNINTERRUPTIBLE); + break; + } if (call->call_id) break; - if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) && + if ((call->interruptibility == RXRPC_INTERRUPTIBLE || + call->interruptibility == RXRPC_PREINTERRUPTIBLE) && signal_pending(current)) { ret = -ERESTARTSYS; break; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index a13051d38097..1eccfb92c9e1 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -62,7 +62,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, * Wait for space to appear in the Tx queue uninterruptibly, but with * a timeout of 2*RTT if no progress was made and a signal occurred. */ -static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, +static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, struct rxrpc_call *call) { rxrpc_seq_t tx_start, tx_win; @@ -87,8 +87,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, if (call->state >= RXRPC_CALL_COMPLETE) return call->error; - if (test_bit(RXRPC_CALL_IS_INTR, &call->flags) && - timeout == 0 && + if (timeout == 0 && tx_win == tx_start && signal_pending(current)) return -EINTR; @@ -102,6 +101,26 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, } } +/* + * Wait for space to appear in the Tx queue uninterruptibly. + */ +static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (rxrpc_check_tx_space(call, NULL)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + *timeo = schedule_timeout(*timeo); + } +} + /* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked @@ -119,10 +138,19 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, add_wait_queue(&call->waitq, &myself); - if (waitall) - ret = rxrpc_wait_for_tx_window_nonintr(rx, call); - else - ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); + switch (call->interruptibility) { + case RXRPC_INTERRUPTIBLE: + if (waitall) + ret = rxrpc_wait_for_tx_window_waitall(rx, call); + else + ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); + break; + case RXRPC_PREINTERRUPTIBLE: + case RXRPC_UNINTERRUPTIBLE: + default: + ret = rxrpc_wait_for_tx_window_nonintr(rx, call, timeo); + break; + } remove_wait_queue(&call->waitq, &myself); set_current_state(TASK_RUNNING); @@ -628,7 +656,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) .call.tx_total_len = -1, .call.user_call_ID = 0, .call.nr_timeouts = 0, - .call.intr = true, + .call.interruptibility = RXRPC_INTERRUPTIBLE, .abort_code = 0, .command = RXRPC_CMD_SEND_DATA, .exclusive = false, From 498b577660f08cef5d9e78e0ed6dcd4c0939e98c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 13 Mar 2020 17:30:27 +0000 Subject: [PATCH 170/372] rxrpc: Fix sendmsg(MSG_WAITALL) handling Fix the handling of sendmsg() with MSG_WAITALL for userspace to round the timeout for when a signal occurs up to at least two jiffies as a 1 jiffy timeout may end up being effectively 0 if jiffies wraps at the wrong time. Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals") Signed-off-by: David Howells --- net/rxrpc/sendmsg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1eccfb92c9e1..0fcf157aa09f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -71,8 +71,8 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, rtt = READ_ONCE(call->peer->rtt); rtt2 = nsecs_to_jiffies64(rtt) * 2; - if (rtt2 < 1) - rtt2 = 1; + if (rtt2 < 2) + rtt2 = 2; timeout = rtt2; tx_start = READ_ONCE(call->tx_hard_ack); From 4636cf184d6d9a92a56c2554681ea520dd4fe49a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 13 Mar 2020 13:36:01 +0000 Subject: [PATCH 171/372] afs: Fix some tracing details Fix a couple of tracelines to indicate the usage count after the atomic op, not the usage count before it to be consistent with other afs and rxrpc trace lines. Change the wording of the afs_call_trace_work trace ID label from "WORK" to "QUEUE" to reflect the fact that it's queueing work, not doing work. Fixes: 341f741f04be ("afs: Refcount the afs_call struct") Signed-off-by: David Howells --- fs/afs/rxrpc.c | 4 ++-- include/trace/events/afs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 4c28712bb7f6..907d5948564a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -169,7 +169,7 @@ void afs_put_call(struct afs_call *call) int n = atomic_dec_return(&call->usage); int o = atomic_read(&net->nr_outstanding_calls); - trace_afs_call(call, afs_call_trace_put, n + 1, o, + trace_afs_call(call, afs_call_trace_put, n, o, __builtin_return_address(0)); ASSERTCMP(n, >=, 0); @@ -736,7 +736,7 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, u = atomic_fetch_add_unless(&call->usage, 1, 0); if (u != 0) { - trace_afs_call(call, afs_call_trace_wake, u, + trace_afs_call(call, afs_call_trace_wake, u + 1, atomic_read(&call->net->nr_outstanding_calls), __builtin_return_address(0)); diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index 564ba1b5cf57..c612cabbc378 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -233,7 +233,7 @@ enum afs_cb_break_reason { EM(afs_call_trace_get, "GET ") \ EM(afs_call_trace_put, "PUT ") \ EM(afs_call_trace_wake, "WAKE ") \ - E_(afs_call_trace_work, "WORK ") + E_(afs_call_trace_work, "QUEUE") #define afs_server_traces \ EM(afs_server_trace_alloc, "ALLOC ") \ From dde9f095583b3f375ba23979045ee10dfcebec2f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 13 Mar 2020 13:46:08 +0000 Subject: [PATCH 172/372] afs: Fix handling of an abort from a service handler When an AFS service handler function aborts a call, AF_RXRPC marks the call as complete - which means that it's not going to get any more packets from the receiver. This is a problem because reception of the final ACK is what triggers afs_deliver_to_call() to drop the final ref on the afs_call object. Instead, aborted AFS service calls may then just sit around waiting for ever or until they're displaced by a new call on the same connection channel or a connection-level abort. Fix this by calling afs_set_call_complete() to finalise the afs_call struct representing the call. However, we then need to drop the ref that stops the call from being deallocated. We can do this in afs_set_call_complete(), as the work queue is holding a separate ref of its own, but then we shouldn't do it in afs_process_async_call() and afs_delete_async_call(). call->drop_ref is set to indicate that a ref needs dropping for a call and this is dealt with when we transition a call to AFS_CALL_COMPLETE. But then we also need to get rid of the ref that pins an asynchronous client call. We can do this by the same mechanism, setting call->drop_ref for an async client call too. We can also get rid of call->incoming since nothing ever sets it and only one thing ever checks it (futilely). A trace of the rxrpc_call and afs_call struct ref counting looks like: -0 [001] ..s5 164.764892: rxrpc_call: c=00000002 SEE u=3 sp=rxrpc_new_incoming_call+0x473/0xb34 a=00000000442095b5 -0 [001] .Ns5 164.766001: rxrpc_call: c=00000002 QUE u=4 sp=rxrpc_propose_ACK+0xbe/0x551 a=00000000442095b5 -0 [001] .Ns4 164.766005: rxrpc_call: c=00000002 PUT u=3 sp=rxrpc_new_incoming_call+0xa3f/0xb34 a=00000000442095b5 -0 [001] .Ns7 164.766433: afs_call: c=00000002 WAKE u=2 o=11 sp=rxrpc_notify_socket+0x196/0x33c kworker/1:2-1810 [001] ...1 164.768409: rxrpc_call: c=00000002 SEE u=3 sp=rxrpc_process_call+0x25/0x7ae a=00000000442095b5 kworker/1:2-1810 [001] ...1 164.769439: rxrpc_tx_packet: c=00000002 e9f1a7a8:95786a88:00000008:09c5 00000001 00000000 02 22 ACK CallAck kworker/1:2-1810 [001] ...1 164.769459: rxrpc_call: c=00000002 PUT u=2 sp=rxrpc_process_call+0x74f/0x7ae a=00000000442095b5 kworker/1:2-1810 [001] ...1 164.770794: afs_call: c=00000002 QUEUE u=3 o=12 sp=afs_deliver_to_call+0x449/0x72c kworker/1:2-1810 [001] ...1 164.770829: afs_call: c=00000002 PUT u=2 o=12 sp=afs_process_async_call+0xdb/0x11e kworker/1:2-1810 [001] ...2 164.771084: rxrpc_abort: c=00000002 95786a88:00000008 s=0 a=1 e=1 K-1 kworker/1:2-1810 [001] ...1 164.771461: rxrpc_tx_packet: c=00000002 e9f1a7a8:95786a88:00000008:09c5 00000002 00000000 04 00 ABORT CallAbort kworker/1:2-1810 [001] ...1 164.771466: afs_call: c=00000002 PUT u=1 o=12 sp=SRXAFSCB_ProbeUuid+0xc1/0x106 The abort generated in SRXAFSCB_ProbeUuid(), labelled "K-1", indicates that the local filesystem/cache manager didn't recognise the UUID as its own. Fixes: 2067b2b3f484 ("afs: Fix the CB.ProbeUuid service handler to reply correctly") Signed-off-by: David Howells --- fs/afs/cmservice.c | 14 ++++++++++++-- fs/afs/internal.h | 12 ++++++++++-- fs/afs/rxrpc.c | 33 ++++----------------------------- 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index ff3994a6be23..6765949b3aab 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -243,6 +243,17 @@ static void afs_cm_destructor(struct afs_call *call) call->buffer = NULL; } +/* + * Abort a service call from within an action function. + */ +static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error, + const char *why) +{ + rxrpc_kernel_abort_call(call->net->socket, call->rxcall, + abort_code, error, why); + afs_set_call_complete(call, error, 0); +} + /* * The server supplied a list of callbacks that it wanted to break. */ @@ -510,8 +521,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work) if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) afs_send_empty_reply(call); else - rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - 1, 1, "K-1"); + afs_abort_service_call(call, 1, 1, "K-1"); afs_put_call(call); _leave(""); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 1d81fc4c3058..52de2112e1b1 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -154,7 +154,7 @@ struct afs_call { }; unsigned char unmarshall; /* unmarshalling phase */ unsigned char addr_ix; /* Address in ->alist */ - bool incoming; /* T if incoming call */ + bool drop_ref; /* T if need to drop ref for incoming call */ bool send_pages; /* T if data from mapping should be sent */ bool need_attention; /* T if RxRPC poked us */ bool async; /* T if asynchronous */ @@ -1209,8 +1209,16 @@ static inline void afs_set_call_complete(struct afs_call *call, ok = true; } spin_unlock_bh(&call->state_lock); - if (ok) + if (ok) { trace_afs_call_done(call); + + /* Asynchronous calls have two refs to release - one from the alloc and + * one queued with the work item - and we can't just deallocate the + * call because the work item may be queued again. + */ + if (call->drop_ref) + afs_put_call(call); + } } /* diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 907d5948564a..972e3aafa361 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -18,7 +18,6 @@ struct workqueue_struct *afs_async_calls; static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); -static void afs_delete_async_call(struct work_struct *); static void afs_process_async_call(struct work_struct *); static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); @@ -402,8 +401,10 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp) /* If the call is going to be asynchronous, we need an extra ref for * the call to hold itself so the caller need not hang on to its ref. */ - if (call->async) + if (call->async) { afs_get_call(call, afs_call_trace_get); + call->drop_ref = true; + } /* create a call */ rxcall = rxrpc_kernel_begin_call(call->net->socket, srx, call->key, @@ -585,8 +586,6 @@ static void afs_deliver_to_call(struct afs_call *call) done: if (call->type->done) call->type->done(call); - if (state == AFS_CALL_COMPLETE && call->incoming) - afs_put_call(call); out: _leave(""); return; @@ -745,21 +744,6 @@ static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, } } -/* - * Delete an asynchronous call. The work item carries a ref to the call struct - * that we need to release. - */ -static void afs_delete_async_call(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - _enter(""); - - afs_put_call(call); - - _leave(""); -} - /* * Perform I/O processing on an asynchronous call. The work item carries a ref * to the call struct that we either need to release or to pass on. @@ -775,16 +759,6 @@ static void afs_process_async_call(struct work_struct *work) afs_deliver_to_call(call); } - if (call->state == AFS_CALL_COMPLETE) { - /* We have two refs to release - one from the alloc and one - * queued with the work item - and we can't just deallocate the - * call because the work item may be queued again. - */ - call->async_work.func = afs_delete_async_call; - if (!queue_work(afs_async_calls, &call->async_work)) - afs_put_call(call); - } - afs_put_call(call); _leave(""); } @@ -811,6 +785,7 @@ void afs_charge_preallocation(struct work_struct *work) if (!call) break; + call->drop_ref = true; call->async = true; call->state = AFS_CALL_SV_AWAIT_OP_ID; init_waitqueue_head(&call->waitq); From 7d7587db0d7fd1138f2afcffdc46a8e15630b944 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 12 Mar 2020 21:40:06 +0000 Subject: [PATCH 173/372] afs: Fix client call Rx-phase signal handling Fix the handling of signals in client rxrpc calls made by the afs filesystem. Ignore signals completely, leaving call abandonment or connection loss to be detected by timeouts inside AF_RXRPC. Allowing a filesystem call to be interrupted after the entire request has been transmitted and an abort sent means that the server may or may not have done the action - and we don't know. It may even be worse than that for older servers. Fixes: bc5e3a546d55 ("rxrpc: Use MSG_WAITALL to tell sendmsg() to temporarily ignore signals") Signed-off-by: David Howells --- fs/afs/rxrpc.c | 34 ++-------------------------------- include/net/af_rxrpc.h | 4 +--- net/rxrpc/af_rxrpc.c | 33 +++------------------------------ net/rxrpc/ar-internal.h | 1 - net/rxrpc/input.c | 1 - 5 files changed, 6 insertions(+), 67 deletions(-) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 972e3aafa361..1ecc67da6c1a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -604,11 +604,7 @@ call_complete: long afs_wait_for_call_to_complete(struct afs_call *call, struct afs_addr_cursor *ac) { - signed long rtt2, timeout; long ret; - bool stalled = false; - u64 rtt; - u32 life, last_life; bool rxrpc_complete = false; DECLARE_WAITQUEUE(myself, current); @@ -619,14 +615,6 @@ long afs_wait_for_call_to_complete(struct afs_call *call, if (ret < 0) goto out; - rtt = rxrpc_kernel_get_rtt(call->net->socket, call->rxcall); - rtt2 = nsecs_to_jiffies64(rtt) * 2; - if (rtt2 < 2) - rtt2 = 2; - - timeout = rtt2; - rxrpc_kernel_check_life(call->net->socket, call->rxcall, &last_life); - add_wait_queue(&call->waitq, &myself); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -637,37 +625,19 @@ long afs_wait_for_call_to_complete(struct afs_call *call, call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); - timeout = rtt2; continue; } if (afs_check_call_state(call, AFS_CALL_COMPLETE)) break; - if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall, &life)) { + if (!rxrpc_kernel_check_life(call->net->socket, call->rxcall)) { /* rxrpc terminated the call. */ rxrpc_complete = true; break; } - if (call->intr && timeout == 0 && - life == last_life && signal_pending(current)) { - if (stalled) - break; - __set_current_state(TASK_RUNNING); - rxrpc_kernel_probe_life(call->net->socket, call->rxcall); - timeout = rtt2; - stalled = true; - continue; - } - - if (life != last_life) { - timeout = rtt2; - last_life = life; - stalled = false; - } - - timeout = schedule_timeout(timeout); + schedule(); } remove_wait_queue(&call->waitq, &myself); diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 8e547b4d88c8..04e97bab6f28 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -64,9 +64,7 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, rxrpc_user_attach_call_t, unsigned long, gfp_t, unsigned int); void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); -bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *, - u32 *); -void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *); +bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *, ktime_t *); diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 7603cf811f75..15ee92d79581 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -371,44 +371,17 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call); * rxrpc_kernel_check_life - Check to see whether a call is still alive * @sock: The socket the call is on * @call: The call to check - * @_life: Where to store the life value * - * Allow a kernel service to find out whether a call is still alive - ie. we're - * getting ACKs from the server. Passes back in *_life a number representing - * the life state which can be compared to that returned by a previous call and - * return true if the call is still alive. - * - * If the life state stalls, rxrpc_kernel_probe_life() should be called and - * then 2RTT waited. + * Allow a kernel service to find out whether a call is still alive - + * ie. whether it has completed. */ bool rxrpc_kernel_check_life(const struct socket *sock, - const struct rxrpc_call *call, - u32 *_life) + const struct rxrpc_call *call) { - *_life = call->acks_latest; return call->state != RXRPC_CALL_COMPLETE; } EXPORT_SYMBOL(rxrpc_kernel_check_life); -/** - * rxrpc_kernel_probe_life - Poke the peer to see if it's still alive - * @sock: The socket the call is on - * @call: The call to check - * - * In conjunction with rxrpc_kernel_check_life(), allow a kernel service to - * find out whether a call is still alive by pinging it. This should cause the - * life state to be bumped in about 2*RTT. - * - * The must be called in TASK_RUNNING state on pain of might_sleep() objecting. - */ -void rxrpc_kernel_probe_life(struct socket *sock, struct rxrpc_call *call) -{ - rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, - rxrpc_propose_ack_ping_for_check_life); - rxrpc_send_ack_packet(call, true, NULL); -} -EXPORT_SYMBOL(rxrpc_kernel_probe_life); - /** * rxrpc_kernel_get_epoch - Retrieve the epoch value from a call. * @sock: The socket the call is on diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1f72f43b082d..3eb1ab40ca5c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -675,7 +675,6 @@ struct rxrpc_call { /* transmission-phase ACK management */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ - rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ef10fbf71b15..69e09d69c896 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -882,7 +882,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) before(prev_pkt, call->ackr_prev_seq)) goto out; call->acks_latest_ts = skb->tstamp; - call->acks_latest = sp->hdr.serial; call->ackr_first_seq = first_soft_ack; call->ackr_prev_seq = prev_pkt; From 82f2bc2fcc0160d6f82dd1ac64518ae0a4dd183f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 11 Mar 2020 12:41:21 -0700 Subject: [PATCH 174/372] kbuild: Disable -Wpointer-to-enum-cast Clang's -Wpointer-to-int-cast deviates from GCC in that it warns when casting to enums. The kernel does this in certain places, such as device tree matches to set the version of the device being used, which allows the kernel to avoid using a gigantic union. https://elixir.bootlin.com/linux/v5.5.8/source/drivers/ata/ahci_brcm.c#L428 https://elixir.bootlin.com/linux/v5.5.8/source/drivers/ata/ahci_brcm.c#L402 https://elixir.bootlin.com/linux/v5.5.8/source/include/linux/mod_devicetable.h#L264 To avoid a ton of false positive warnings, disable this particular part of the warning, which has been split off into a separate diagnostic so that the entire warning does not need to be turned off for clang. It will be visible under W=1 in case people want to go about fixing these easily and enabling the warning treewide. Cc: stable@vger.kernel.org Link: https://github.com/ClangBuiltLinux/linux/issues/887 Link: https://github.com/llvm/llvm-project/commit/2a41b31fcdfcb67ab7038fc2ffb606fd50b83a84 Signed-off-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/Makefile.extrawarn | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index ecddf83ac142..ca08f2fe7c34 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -48,6 +48,7 @@ KBUILD_CFLAGS += -Wno-initializer-overrides KBUILD_CFLAGS += -Wno-format KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -Wno-format-zero-length +KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) endif endif From b0bb0c22c4db623f2e7b1a471596fbf1c22c6dc5 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Mar 2020 14:09:54 +0800 Subject: [PATCH 175/372] iommu/vt-d: Fix the wrong printing in RHSA parsing When base address in RHSA structure doesn't match base address in each DRHD structure, the base address in last DRHD is printed out. This doesn't make sense when there are multiple DRHD units, fix it by printing the buggy RHSA's base address. Signed-off-by: Lu Baolu Signed-off-by: Zhenzhong Duan Fixes: fd0c8894893cb ("intel-iommu: Set a more specific taint flag for invalid BIOS DMAR tables") Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index c000ddff822e..c7b1461e8d0a 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -476,7 +476,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, + rhsa->base_address, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); From da72a379b2ec0bad3eb265787f7008bead0b040c Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Thu, 12 Mar 2020 14:09:55 +0800 Subject: [PATCH 176/372] iommu/vt-d: Ignore devices with out-of-spec domain number MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMD subdevices are created with a PCI domain ID of 0x10000 or higher. These subdevices are also handled like all other PCI devices by dmar_pci_bus_notifier(). However, when dmar_alloc_pci_notify_info() take records of such devices, it will truncate the domain ID to a u16 value (in info->seg). The device at (e.g.) 10000:00:02.0 is then treated by the DMAR code as if it is 0000:00:02.0. In the unlucky event that a real device also exists at 0000:00:02.0 and also has a device-specific entry in the DMAR table, dmar_insert_dev_scope() will crash on:   BUG_ON(i >= devices_cnt); That's basically a sanity check that only one PCI device matches a single DMAR entry; in this case we seem to have two matching devices. Fix this by ignoring devices that have a domain number higher than what can be looked up in the DMAR table. This problem was carefully diagnosed by Jian-Hong Pan. Signed-off-by: Lu Baolu Signed-off-by: Daniel Drake Fixes: 59ce0515cdaf3 ("iommu/vt-d: Update DRHD/RMRR/ATSR device scope caches when PCI hotplug happens") Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index c7b1461e8d0a..f77dae7ba7d4 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -128,6 +129,13 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) BUG_ON(dev->is_virtfn); + /* + * Ignore devices that have a domain number higher than what can + * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 + */ + if (pci_domain_nr(dev->bus) > U16_MAX) + return NULL; + /* Only generate path[] for device addition event */ if (event == BUS_NOTIFY_ADD_DEVICE) for (tmp = dev; tmp; tmp = tmp->bus->self) From 730ad0ede130015a773229573559e97ba0943065 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Thu, 12 Mar 2020 05:18:39 -0500 Subject: [PATCH 177/372] iommu/amd: Fix IOMMU AVIC not properly update the is_run bit in IRTE Commit b9c6ff94e43a ("iommu/amd: Re-factor guest virtual APIC (de-)activation code") accidentally left out the ir_data pointer when calling modity_irte_ga(), which causes the function amd_iommu_update_ga() to return prematurely due to struct amd_ir_data.ref is NULL and the "is_run" bit of IRTE does not get updated properly. This results in bad I/O performance since IOMMU AVIC always generate GA Log entry and notify IOMMU driver and KVM when it receives interrupt from the PCI pass-through device instead of directly inject interrupt to the vCPU. Fixes by passing ir_data when calling modify_irte_ga() as done previously. Fixes: b9c6ff94e43a ("iommu/amd: Re-factor guest virtual APIC (de-)activation code") Signed-off-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index aac132bd1ef0..20cce366e951 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3826,7 +3826,7 @@ int amd_iommu_activate_guest_mode(void *data) entry->lo.fields_vapic.ga_tag = ir_data->ga_tag; return modify_irte_ga(ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry, NULL); + ir_data->irq_2_irte.index, entry, ir_data); } EXPORT_SYMBOL(amd_iommu_activate_guest_mode); @@ -3852,7 +3852,7 @@ int amd_iommu_deactivate_guest_mode(void *data) APICID_TO_IRTE_DEST_HI(cfg->dest_apicid); return modify_irte_ga(ir_data->irq_2_irte.devid, - ir_data->irq_2_irte.index, entry, NULL); + ir_data->irq_2_irte.index, entry, ir_data); } EXPORT_SYMBOL(amd_iommu_deactivate_guest_mode); From 7a57c09bb1cb89239f38f690b87cdf2c7db76c34 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 12 Mar 2020 11:04:16 -0700 Subject: [PATCH 178/372] KVM: VMX: Condition ENCLS-exiting enabling on CPU support for SGX1 Enable ENCLS-exiting (and thus set vmcs.ENCLS_EXITING_BITMAP) only if the CPU supports SGX1. Per Intel's SDM, all ENCLS leafs #UD if SGX1 is not supported[*], i.e. intercepting ENCLS to inject a #UD is unnecessary. Avoiding ENCLS-exiting even when it is reported as supported by the CPU works around a reported issue where SGX is "hard" disabled after an S3 suspend/resume cycle, i.e. CPUID.0x7.SGX=0 and the VMCS field/control are enumerated as unsupported. While the root cause of the S3 issue is unknown, it's definitely _not_ a KVM (or kernel) bug, i.e. this is a workaround for what is most likely a hardware or firmware issue. As a bonus side effect, KVM saves a VMWRITE when first preparing vmcs01 and vmcs02. Note, SGX must be disabled in BIOS to take advantage of this workaround [*] The additional ENCLS CPUID check on SGX1 exists so that SGX can be globally "soft" disabled post-reset, e.g. if #MC bits in MCi_CTL are cleared. Soft disabled meaning disabling SGX without clearing the primary CPUID bit (in leaf 0x7) and without poking into non-SGX CPU paths, e.g. for the VMCS controls. Fixes: 0b665d304028 ("KVM: vmx: Inject #UD for SGX ENCLS instruction in guest") Reported-by: Toni Spets Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 40b1e6138cd5..26f8f31563e9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2338,6 +2338,17 @@ static void hardware_disable(void) kvm_cpu_vmxoff(); } +/* + * There is no X86_FEATURE for SGX yet, but anyway we need to query CPUID + * directly instead of going through cpu_has(), to ensure KVM is trapping + * ENCLS whenever it's supported in hardware. It does not matter whether + * the host OS supports or has enabled SGX. + */ +static bool cpu_has_sgx(void) +{ + return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0)); +} + static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr, u32 *result) { @@ -2418,8 +2429,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE | SECONDARY_EXEC_PT_USE_GPA | SECONDARY_EXEC_PT_CONCEAL_VMX | - SECONDARY_EXEC_ENABLE_VMFUNC | - SECONDARY_EXEC_ENCLS_EXITING; + SECONDARY_EXEC_ENABLE_VMFUNC; + if (cpu_has_sgx()) + opt2 |= SECONDARY_EXEC_ENCLS_EXITING; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) From 0c22056f8c43265da21a3dfe7b7e1379e5ff7c72 Mon Sep 17 00:00:00 2001 From: Nitesh Narayan Lal Date: Fri, 13 Mar 2020 09:16:24 -0400 Subject: [PATCH 179/372] KVM: x86: Initializing all kvm_lapic_irq fields in ioapic_write_indirect Previously all fields of structure kvm_lapic_irq were not initialized before it was passed to kvm_bitmap_or_dest_vcpus(). Which will cause an issue when any of those fields are used for processing a request. For example not initializing the msi_redir_hint field before passing to the kvm_bitmap_or_dest_vcpus(), may lead to a misbehavior of kvm_apic_map_get_dest_lapic(). This will specifically happen when the kvm_lowest_prio_delivery() returns TRUE due to a non-zero garbage value of msi_redir_hint, which should not happen as the request belongs to APIC fixed delivery mode and we do not want to deliver the interrupt only to the lowest priority candidate. This patch initializes all the fields of kvm_lapic_irq based on the values of ioapic redirect_entry object before passing it on to kvm_bitmap_or_dest_vcpus(). Fixes: 7ee30bc132c6 ("KVM: x86: deliver KVM IOAPIC scan request to target vCPUs") Signed-off-by: Nitesh Narayan Lal Reviewed-by: Vitaly Kuznetsov [Set level to false since the value doesn't really matter. Suggested by Vitaly Kuznetsov. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 7668fed1ce65..750ff0b29404 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -378,12 +378,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) if (e->fields.delivery_mode == APIC_DM_FIXED) { struct kvm_lapic_irq irq; - irq.shorthand = APIC_DEST_NOSHORT; irq.vector = e->fields.vector; irq.delivery_mode = e->fields.delivery_mode << 8; - irq.dest_id = e->fields.dest_id; irq.dest_mode = kvm_lapic_irq_dest_mode(!!e->fields.dest_mode); + irq.level = false; + irq.trig_mode = e->fields.trig_mode; + irq.shorthand = APIC_DEST_NOSHORT; + irq.dest_id = e->fields.dest_id; + irq.msi_redir_hint = false; bitmap_zero(&vcpu_bitmap, 16); kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, &vcpu_bitmap); From d01fd161e85904064290435f67f4ed59af5daf74 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 11 Mar 2020 11:56:49 +0000 Subject: [PATCH 180/372] irqchip/gic-v3: Workaround Cavium erratum 38539 when reading GICD_TYPER2 Despite the architecture spec requiring that reserved registers in the GIC distributor memory map are RES0 (and thus are not allowed to generate an exception), the Cavium ThunderX (aka TX1) SoC explodes as such: [ 0.000000] GICv3: GIC: Using split EOI/Deactivate mode [ 0.000000] GICv3: 128 SPIs implemented [ 0.000000] GICv3: 0 Extended SPIs implemented [ 0.000000] Internal error: synchronous external abort: 96000210 [#1] SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.4.0-rc4-00035-g3cf6a3d5725f #7956 [ 0.000000] Hardware name: cavium,thunder-88xx (DT) [ 0.000000] pstate: 60000085 (nZCv daIf -PAN -UAO) [ 0.000000] pc : __raw_readl+0x0/0x8 [ 0.000000] lr : gic_init_bases+0x110/0x560 [ 0.000000] sp : ffff800011243d90 [ 0.000000] x29: ffff800011243d90 x28: 0000000000000000 [ 0.000000] x27: 0000000000000018 x26: 0000000000000002 [ 0.000000] x25: ffff8000116f0000 x24: ffff000fbe6a2c80 [ 0.000000] x23: 0000000000000000 x22: ffff010fdc322b68 [ 0.000000] x21: ffff800010a7a208 x20: 00000000009b0404 [ 0.000000] x19: ffff80001124dad0 x18: 0000000000000010 [ 0.000000] x17: 000000004d8d492b x16: 00000000f67eb9af [ 0.000000] x15: ffffffffffffffff x14: ffff800011249908 [ 0.000000] x13: ffff800091243ae7 x12: ffff800011243af4 [ 0.000000] x11: ffff80001126e000 x10: ffff800011243a70 [ 0.000000] x9 : 00000000ffffffd0 x8 : ffff80001069c828 [ 0.000000] x7 : 0000000000000059 x6 : ffff8000113fb4d1 [ 0.000000] x5 : 0000000000000001 x4 : 0000000000000000 [ 0.000000] x3 : 0000000000000000 x2 : 0000000000000000 [ 0.000000] x1 : 0000000000000000 x0 : ffff8000116f000c [ 0.000000] Call trace: [ 0.000000] __raw_readl+0x0/0x8 [ 0.000000] gic_of_init+0x188/0x224 [ 0.000000] of_irq_init+0x200/0x3cc [ 0.000000] irqchip_init+0x1c/0x40 [ 0.000000] init_IRQ+0x160/0x1d0 [ 0.000000] start_kernel+0x2ec/0x4b8 [ 0.000000] Code: a8c47bfd d65f03c0 d538d080 d65f03c0 (b9400000) when reading the GICv4.1 GICD_TYPER2 register, which is unexpected... Work around it by adding a new quirk for the following variants: ThunderX: CN88xx OCTEON TX: CN83xx, CN81xx OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx* and use this flag to avoid accessing GICD_TYPER2. Note that all reserved registers (including redistributors and ITS) are impacted by this erratum, but that only GICD_TYPER2 has to be worked around so far. Signed-off-by: Marc Zyngier Tested-by: Robert Richter Tested-by: Mark Salter Tested-by: Tim Harvey Acked-by: Catalin Marinas Acked-by: Robert Richter Link: https://lore.kernel.org/r/20191027144234.8395-11-maz@kernel.org Link: https://lore.kernel.org/r/20200311115649.26060-1-maz@kernel.org --- Documentation/arm64/silicon-errata.rst | 2 ++ drivers/irqchip/irq-gic-v3.c | 30 +++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 99b2545455ff..a1563da07b2c 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -108,6 +108,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | +----------------+-----------------+-----------------+-----------------------------+ +| Cavium | ThunderX GICv3 | #38539 | N/A | ++----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | +----------------+-----------------+-----------------+-----------------------------+ | Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 | diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index c1f7af9d9ae7..1eec9d4649d5 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -34,6 +34,7 @@ #define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80) #define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) +#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) struct redist_region { void __iomem *redist_base; @@ -1464,6 +1465,15 @@ static bool gic_enable_quirk_msm8996(void *data) return true; } +static bool gic_enable_quirk_cavium_38539(void *data) +{ + struct gic_chip_data *d = data; + + d->flags |= FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539; + + return true; +} + static bool gic_enable_quirk_hip06_07(void *data) { struct gic_chip_data *d = data; @@ -1502,6 +1512,19 @@ static const struct gic_quirk gic_quirks[] = { .mask = 0xffffffff, .init = gic_enable_quirk_hip06_07, }, + { + /* + * Reserved register accesses generate a Synchronous + * External Abort. This erratum applies to: + * - ThunderX: CN88xx + * - OCTEON TX: CN83xx, CN81xx + * - OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx* + */ + .desc = "GICv3: Cavium erratum 38539", + .iidr = 0xa000034c, + .mask = 0xe8f00fff, + .init = gic_enable_quirk_cavium_38539, + }, { } }; @@ -1577,7 +1600,12 @@ static int __init gic_init_bases(void __iomem *dist_base, pr_info("%d SPIs implemented\n", GIC_LINE_NR - 32); pr_info("%d Extended SPIs implemented\n", GIC_ESPI_NR); - gic_data.rdists.gicd_typer2 = readl_relaxed(gic_data.dist_base + GICD_TYPER2); + /* + * ThunderX1 explodes on reading GICD_TYPER2, in violation of the + * architecture spec (which says that reserved registers are RES0). + */ + if (!(gic_data.flags & FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539)) + gic_data.rdists.gicd_typer2 = readl_relaxed(gic_data.dist_base + GICD_TYPER2); gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops, &gic_data); From 95fa10103dabc38be5de8efdfced5e67576ed896 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 9 Mar 2020 16:52:11 +0100 Subject: [PATCH 181/372] KVM: nVMX: avoid NULL pointer dereference with incorrect EVMCS GPAs When an EVMCS enabled L1 guest on KVM will tries doing enlightened VMEnter with EVMCS GPA = 0 the host crashes because the evmcs_gpa != vmx->nested.hv_evmcs_vmptr condition in nested_vmx_handle_enlightened_vmptrld() will evaluate to false (as nested.hv_evmcs_vmptr is zeroed after init). The crash will happen on vmx->nested.hv_evmcs pointer dereference. Another problematic EVMCS ptr value is '-1' but it only causes host crash after nested_release_evmcs() invocation. The problem is exactly the same as with '0', we mistakenly think that the EVMCS pointer hasn't changed and thus nested.hv_evmcs_vmptr is valid. Resolve the issue by adding an additional !vmx->nested.hv_evmcs check to nested_vmx_handle_enlightened_vmptrld(), this way we will always be trying kvm_vcpu_map() when nested.hv_evmcs is NULL and this is supposed to catch all invalid EVMCS GPAs. Also, initialize hv_evmcs_vmptr to '0' in nested_release_evmcs() to be consistent with initialization where we don't currently set hv_evmcs_vmptr to '-1'. Cc: stable@vger.kernel.org Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e920d7834d73..9750e590c89d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -224,7 +224,7 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) return; kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true); - vmx->nested.hv_evmcs_vmptr = -1ull; + vmx->nested.hv_evmcs_vmptr = 0; vmx->nested.hv_evmcs = NULL; } @@ -1923,7 +1923,8 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) return 1; - if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { + if (unlikely(!vmx->nested.hv_evmcs || + evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) { if (!vmx->nested.hv_evmcs) vmx->nested.current_vmptr = -1ull; From 1da8347d8505c137fb07ff06bbcd3f2bf37409bc Mon Sep 17 00:00:00 2001 From: Megha Dey Date: Sat, 14 Mar 2020 11:39:59 +0800 Subject: [PATCH 182/372] iommu/vt-d: Populate debugfs if IOMMUs are detected Currently, the intel iommu debugfs directory(/sys/kernel/debug/iommu/intel) gets populated only when DMA remapping is enabled (dmar_disabled = 0) irrespective of whether interrupt remapping is enabled or not. Instead, populate the intel iommu debugfs directory if any IOMMUs are detected. Cc: Dan Carpenter Fixes: ee2636b8670b1 ("iommu/vt-d: Enable base Intel IOMMU debugfs support") Signed-off-by: Megha Dey Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu-debugfs.c | 11 ++++++++++- drivers/iommu/intel-iommu.c | 4 +++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c index 0a7791934a16..3eb1fe240fb0 100644 --- a/drivers/iommu/intel-iommu-debugfs.c +++ b/drivers/iommu/intel-iommu-debugfs.c @@ -282,9 +282,16 @@ static int dmar_translation_struct_show(struct seq_file *m, void *unused) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; + u32 sts; rcu_read_lock(); for_each_active_iommu(iommu, drhd) { + sts = dmar_readl(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_TES)) { + seq_printf(m, "DMA Remapping is not enabled on %s\n", + iommu->name); + continue; + } root_tbl_walk(m, iommu); seq_putc(m, '\n'); } @@ -425,6 +432,7 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused) struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; u64 irta; + u32 sts; rcu_read_lock(); for_each_active_iommu(iommu, drhd) { @@ -434,7 +442,8 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused) seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n", iommu->name); - if (iommu->ir_table) { + sts = dmar_readl(iommu->reg + DMAR_GSTS_REG); + if (iommu->ir_table && (sts & DMA_GSTS_IRES)) { irta = virt_to_phys(iommu->ir_table->base); seq_printf(m, " IR table address:%llx\n", irta); ir_tbl_remap_entry_show(m, iommu); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2943d3600b7c..4be549478691 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5133,6 +5133,9 @@ int __init intel_iommu_init(void) down_write(&dmar_global_lock); + if (!no_iommu) + intel_iommu_debugfs_init(); + if (no_iommu || dmar_disabled) { /* * We exit the function here to ensure IOMMU's remapping and @@ -5228,7 +5231,6 @@ int __init intel_iommu_init(void) pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); intel_iommu_enabled = 1; - intel_iommu_debugfs_init(); return 0; From f1d96a8fcbbbb22d4fbc1d69eaaa678bbb0ff6e2 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 13 Mar 2020 22:29:14 +0300 Subject: [PATCH 183/372] io_uring: NULL-deref for IOSQE_{ASYNC,DRAIN} Processing links, io_submit_sqe() prepares requests, drops sqes, and passes them with sqe=NULL to io_queue_sqe(). There IOSQE_DRAIN and/or IOSQE_ASYNC requests will go through the same prep, which doesn't expect sqe=NULL and fail with NULL pointer deference. Always do full prepare including io_alloc_async_ctx() for linked requests, and then it can skip the second preparation. Cc: stable@vger.kernel.org # 5.5 Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/io_uring.c b/fs/io_uring.c index 1b2517291b78..b1fbc4424aa6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4131,6 +4131,9 @@ static int io_req_defer_prep(struct io_kiocb *req, { ssize_t ret = 0; + if (!sqe) + return 0; + if (io_op_defs[req->opcode].file_table) { ret = io_grab_files(req); if (unlikely(ret)) @@ -4907,6 +4910,11 @@ err_req: if (sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) { req->flags |= REQ_F_LINK; INIT_LIST_HEAD(&req->link_list); + + if (io_alloc_async_ctx(req)) { + ret = -EAGAIN; + goto err_req; + } ret = io_req_defer_prep(req, sqe); if (ret) req->flags |= REQ_F_FAIL_LINK; From b1be2e8cd290f620777bfdb8aa00890cd2fa02b5 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 11 Mar 2020 22:42:27 -0700 Subject: [PATCH 184/372] net_sched: hold rtnl lock in tcindex_partial_destroy_work() syzbot reported a use-after-free in tcindex_dump(). This is due to the lack of RTNL in the deferred rcu work. We queue this work with RTNL in tcindex_change(), later, tcindex_dump() is called: fh = tp->ops->get(tp, t->tcm_handle); ... err = tp->ops->change(..., &fh, ...); tfilter_notify(..., fh, ...); but there is nothing to serialize the pending tcindex_partial_destroy_work() with tcindex_dump(). Fix this by simply holding RTNL in tcindex_partial_destroy_work(), so that it won't be called until RTNL is released after tc_new_tfilter() is completed. Reported-and-tested-by: syzbot+653090db2562495901dc@syzkaller.appspotmail.com Fixes: 3d210534cc93 ("net_sched: fix a race condition in tcindex_destroy()") Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 09b7dc5fe7e0..f2cb24b6f0cf 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -261,8 +261,10 @@ static void tcindex_partial_destroy_work(struct work_struct *work) struct tcindex_data, rwork); + rtnl_lock(); kfree(p->perfect); kfree(p); + rtnl_unlock(); } static void tcindex_free_perfect_hash(struct tcindex_data *cp) From 0d1c3530e1bd38382edef72591b78e877e0edcd3 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 11 Mar 2020 22:42:28 -0700 Subject: [PATCH 185/372] net_sched: keep alloc_hash updated after hash allocation In commit 599be01ee567 ("net_sched: fix an OOB access in cls_tcindex") I moved cp->hash calculation before the first tcindex_alloc_perfect_hash(), but cp->alloc_hash is left untouched. This difference could lead to another out of bound access. cp->alloc_hash should always be the size allocated, we should update it after this tcindex_alloc_perfect_hash(). Reported-and-tested-by: syzbot+dcc34d54d68ef7d2d53d@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+c72da7b9ed57cde6fca2@syzkaller.appspotmail.com Fixes: 599be01ee567 ("net_sched: fix an OOB access in cls_tcindex") Cc: Jamal Hadi Salim Cc: Jiri Pirko Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_tcindex.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index f2cb24b6f0cf..9904299424a1 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -359,6 +359,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tcindex_alloc_perfect_hash(net, cp) < 0) goto errout; + cp->alloc_hash = cp->hash; for (i = 0; i < min(cp->hash, p->hash); i++) cp->perfect[i].res = p->perfect[i].res; balloc = 1; From 13d0f7b814d9b4c67e60d8c2820c86ea181e7d99 Mon Sep 17 00:00:00 2001 From: Bruno Meneguele Date: Thu, 12 Mar 2020 20:08:20 -0300 Subject: [PATCH 186/372] net/bpfilter: fix dprintf usage for /dev/kmsg The bpfilter UMH code was recently changed to log its informative messages to /dev/kmsg, however this interface doesn't support SEEK_CUR yet, used by dprintf(). As result dprintf() returns -EINVAL and doesn't log anything. However there already had some discussions about supporting SEEK_CUR into /dev/kmsg interface in the past it wasn't concluded. Since the only user of that from userspace perspective inside the kernel is the bpfilter UMH (userspace) module it's better to correct it here instead waiting a conclusion on the interface. Fixes: 36c4357c63f3 ("net: bpfilter: print umh messages to /dev/kmsg") Signed-off-by: Bruno Meneguele Signed-off-by: David S. Miller --- net/bpfilter/main.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c index 77396a098fbe..efea4874743e 100644 --- a/net/bpfilter/main.c +++ b/net/bpfilter/main.c @@ -10,7 +10,7 @@ #include #include "msgfmt.h" -int debug_fd; +FILE *debug_f; static int handle_get_cmd(struct mbox_request *cmd) { @@ -35,9 +35,10 @@ static void loop(void) struct mbox_reply reply; int n; + fprintf(debug_f, "testing the buffer\n"); n = read(0, &req, sizeof(req)); if (n != sizeof(req)) { - dprintf(debug_fd, "invalid request %d\n", n); + fprintf(debug_f, "invalid request %d\n", n); return; } @@ -47,7 +48,7 @@ static void loop(void) n = write(1, &reply, sizeof(reply)); if (n != sizeof(reply)) { - dprintf(debug_fd, "reply failed %d\n", n); + fprintf(debug_f, "reply failed %d\n", n); return; } } @@ -55,9 +56,10 @@ static void loop(void) int main(void) { - debug_fd = open("/dev/kmsg", 00000002); - dprintf(debug_fd, "Started bpfilter\n"); + debug_f = fopen("/dev/kmsg", "w"); + setvbuf(debug_f, 0, _IOLBF, 0); + fprintf(debug_f, "Started bpfilter\n"); loop(); - close(debug_fd); + fclose(debug_f); return 0; } From fc191af1bb0d069dc7e981076e8b80af21f1e61d Mon Sep 17 00:00:00 2001 From: Markus Fuchs Date: Fri, 6 Mar 2020 17:38:48 +0100 Subject: [PATCH 187/372] net: stmmac: platform: Fix misleading interrupt error msg Not every stmmac based platform makes use of the eth_wake_irq or eth_lpi interrupts. Use the platform_get_irq_byname_optional variant for these interrupts, so no error message is displayed, if they can't be found. Rather print an information to hint something might be wrong to assist debugging on platforms which use these interrupts. Signed-off-by: Markus Fuchs Signed-off-by: David S. Miller --- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index d10ac54bf385..13fafd905db8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -663,16 +663,22 @@ int stmmac_get_platform_resources(struct platform_device *pdev, * In case the wake up interrupt is not passed from the platform * so the driver will continue to use the mac irq (ndev->irq) */ - stmmac_res->wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq"); + stmmac_res->wol_irq = + platform_get_irq_byname_optional(pdev, "eth_wake_irq"); if (stmmac_res->wol_irq < 0) { if (stmmac_res->wol_irq == -EPROBE_DEFER) return -EPROBE_DEFER; + dev_info(&pdev->dev, "IRQ eth_wake_irq not found\n"); stmmac_res->wol_irq = stmmac_res->irq; } - stmmac_res->lpi_irq = platform_get_irq_byname(pdev, "eth_lpi"); - if (stmmac_res->lpi_irq == -EPROBE_DEFER) - return -EPROBE_DEFER; + stmmac_res->lpi_irq = + platform_get_irq_byname_optional(pdev, "eth_lpi"); + if (stmmac_res->lpi_irq < 0) { + if (stmmac_res->lpi_irq == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_info(&pdev->dev, "IRQ eth_lpi not found\n"); + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); stmmac_res->addr = devm_ioremap_resource(&pdev->dev, res); From 46ea929b2b3f66e6a9bc91adbb9ca2157065f9b2 Mon Sep 17 00:00:00 2001 From: Shahjada Abul Husain Date: Fri, 13 Mar 2020 14:32:57 +0530 Subject: [PATCH 188/372] cxgb4: fix delete filter entry fail in unload path Currently, the hardware TID index is assumed to start from index 0. However, with the following changeset, commit c21939998802 ("cxgb4: add support for high priority filters") hardware TID index can start after the high priority region, which has introduced a regression resulting in remove filters entry failure for cxgb4 unload path. This patch fix that. Fixes: c21939998802 ("cxgb4: add support for high priority filters") Signed-off-by: Shahjada Abul Husain Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 2a2938bbb93a..fc05248984fc 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -902,7 +902,7 @@ void clear_all_filters(struct adapter *adapter) adapter->tids.tid_tab[i]; if (f && (f->valid || f->pending)) - cxgb4_del_filter(dev, i, &f->fs); + cxgb4_del_filter(dev, f->tid, &f->fs); } sb = t4_read_reg(adapter, LE_DB_SRVR_START_INDEX_A); @@ -910,7 +910,7 @@ void clear_all_filters(struct adapter *adapter) f = (struct filter_entry *)adapter->tids.tid_tab[i]; if (f && (f->valid || f->pending)) - cxgb4_del_filter(dev, i, &f->fs); + cxgb4_del_filter(dev, f->tid, &f->fs); } } } From e1f8f78ffe9854308b9e12a73ebe4e909074fc33 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 13 Mar 2020 13:39:36 +0200 Subject: [PATCH 189/372] net: ip_gre: Separate ERSPAN newlink / changelink callbacks ERSPAN shares most of the code path with GRE and gretap code. While that helps keep the code compact, it is also error prone. Currently a broken userspace can turn a gretap tunnel into a de facto ERSPAN one by passing IFLA_GRE_ERSPAN_VER. There has been a similar issue in ip6gretap in the past. To prevent these problems in future, split the newlink and changelink code paths. Split the ERSPAN code out of ipgre_netlink_parms() into a new function erspan_netlink_parms(). Extract a piece of common logic from ipgre_newlink() and ipgre_changelink() into ipgre_newlink_encap_setup(). Add erspan_newlink() and erspan_changelink(). Fixes: 84e54fe0a5ea ("gre: introduce native tunnel support for ERSPAN") Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 103 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 85 insertions(+), 18 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 8274f98c511c..7765c65fc7d2 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1153,6 +1153,22 @@ static int ipgre_netlink_parms(struct net_device *dev, if (data[IFLA_GRE_FWMARK]) *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); + return 0; +} + +static int erspan_netlink_parms(struct net_device *dev, + struct nlattr *data[], + struct nlattr *tb[], + struct ip_tunnel_parm *parms, + __u32 *fwmark) +{ + struct ip_tunnel *t = netdev_priv(dev); + int err; + + err = ipgre_netlink_parms(dev, data, tb, parms, fwmark); + if (err) + return err; + if (data[IFLA_GRE_ERSPAN_VER]) { t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); @@ -1276,45 +1292,70 @@ static void ipgre_tap_setup(struct net_device *dev) ip_tunnel_setup(dev, gre_tap_net_id); } -static int ipgre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static int +ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[]) { - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; - __u32 fwmark = 0; - int err; if (ipgre_netlink_encap_parms(data, &ipencap)) { struct ip_tunnel *t = netdev_priv(dev); - err = ip_tunnel_encap_setup(t, &ipencap); + int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } + return 0; +} + +static int ipgre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel_parm p; + __u32 fwmark = 0; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; return ip_tunnel_newlink(dev, tb, &p, fwmark); } +static int erspan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel_parm p; + __u32 fwmark = 0; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + + err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); + if (err) + return err; + return ip_tunnel_newlink(dev, tb, &p, fwmark); +} + static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_encap ipencap; __u32 fwmark = t->fwmark; struct ip_tunnel_parm p; int err; - if (ipgre_netlink_encap_parms(data, &ipencap)) { - err = ip_tunnel_encap_setup(t, &ipencap); - - if (err < 0) - return err; - } + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) @@ -1327,8 +1368,34 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], t->parms.i_flags = p.i_flags; t->parms.o_flags = p.o_flags; - if (strcmp(dev->rtnl_link_ops->kind, "erspan")) - ipgre_link_update(dev, !tb[IFLA_MTU]); + ipgre_link_update(dev, !tb[IFLA_MTU]); + + return 0; +} + +static int erspan_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip_tunnel *t = netdev_priv(dev); + __u32 fwmark = t->fwmark; + struct ip_tunnel_parm p; + int err; + + err = ipgre_newlink_encap_setup(dev, data); + if (err) + return err; + + err = erspan_netlink_parms(dev, data, tb, &p, &fwmark); + if (err < 0) + return err; + + err = ip_tunnel_changelink(dev, tb, &p, fwmark); + if (err < 0) + return err; + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; return 0; } @@ -1519,8 +1586,8 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { .priv_size = sizeof(struct ip_tunnel), .setup = erspan_setup, .validate = erspan_validate, - .newlink = ipgre_newlink, - .changelink = ipgre_changelink, + .newlink = erspan_newlink, + .changelink = erspan_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, .fill_info = ipgre_fill_info, From 61fad6816fc10fb8793a925d5c1256d1c3db0cd2 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 13 Mar 2020 12:18:09 -0400 Subject: [PATCH 190/372] net/packet: tpacket_rcv: avoid a producer race condition PACKET_RX_RING can cause multiple writers to access the same slot if a fast writer wraps the ring while a slow writer is still copying. This is particularly likely with few, large, slots (e.g., GSO packets). Synchronize kernel thread ownership of rx ring slots with a bitmap. Writers acquire a slot race-free by testing tp_status TP_STATUS_KERNEL while holding the sk receive queue lock. They release this lock before copying and set tp_status to TP_STATUS_USER to release to userspace when done. During copying, another writer may take the lock, also see TP_STATUS_KERNEL, and start writing to the same slot. Introduce a new rx_owner_map bitmap with a bit per slot. To acquire a slot, test and set with the lock held. To release race-free, update tp_status and owner bit as a transaction, so take the lock again. This is the one of a variety of discussed options (see Link below): * instead of a shadow ring, embed the data in the slot itself, such as in tp_padding. But any test for this field may match a value left by userspace, causing deadlock. * avoid the lock on release. This leaves a small race if releasing the shadow slot before setting TP_STATUS_USER. The below reproducer showed that this race is not academic. If releasing the slot after tp_status, the race is more subtle. See the first link for details. * add a new tp_status TP_KERNEL_OWNED to avoid the transactional store of two fields. But, legacy applications may interpret all non-zero tp_status as owned by the user. As libpcap does. So this is possible only opt-in by newer processes. It can be added as an optional mode. * embed the struct at the tail of pg_vec to avoid extra allocation. The implementation proved no less complex than a separate field. The additional locking cost on release adds contention, no different than scaling on multicore or multiqueue h/w. In practice, below reproducer nor small packet tcpdump showed a noticeable change in perf report in cycles spent in spinlock. Where contention is problematic, packet sockets support mitigation through PACKET_FANOUT. And we can consider adding opt-in state TP_KERNEL_OWNED. Easy to reproduce by running multiple netperf or similar TCP_STREAM flows concurrently with `tcpdump -B 129 -n greater 60000`. Based on an earlier patchset by Jon Rosen. See links below. I believe this issue goes back to the introduction of tpacket_rcv, which predates git history. Link: https://www.mail-archive.com/netdev@vger.kernel.org/msg237222.html Suggested-by: Jon Rosen Signed-off-by: Willem de Bruijn Signed-off-by: Jon Rosen Signed-off-by: David S. Miller --- net/packet/af_packet.c | 21 +++++++++++++++++++++ net/packet/internal.h | 5 ++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index e5b0986215d2..29bd405adbbd 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2173,6 +2173,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec64 ts; __u32 ts_status; bool is_drop_n_account = false; + unsigned int slot_id = 0; bool do_vnet = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. @@ -2275,6 +2276,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (!h.raw) goto drop_n_account; + if (po->tp_version <= TPACKET_V2) { + slot_id = po->rx_ring.head; + if (test_bit(slot_id, po->rx_ring.rx_owner_map)) + goto drop_n_account; + __set_bit(slot_id, po->rx_ring.rx_owner_map); + } + if (do_vnet && virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), @@ -2380,7 +2388,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, #endif if (po->tp_version <= TPACKET_V2) { + spin_lock(&sk->sk_receive_queue.lock); __packet_set_status(po, h.raw, status); + __clear_bit(slot_id, po->rx_ring.rx_owner_map); + spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk); } else { prb_clear_blk_fill_status(&po->rx_ring); @@ -4277,6 +4288,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, { struct pgv *pg_vec = NULL; struct packet_sock *po = pkt_sk(sk); + unsigned long *rx_owner_map = NULL; int was_running, order = 0; struct packet_ring_buffer *rb; struct sk_buff_head *rb_queue; @@ -4362,6 +4374,12 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } break; default: + if (!tx_ring) { + rx_owner_map = bitmap_alloc(req->tp_frame_nr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!rx_owner_map) + goto out_free_pg_vec; + } break; } } @@ -4391,6 +4409,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, err = 0; spin_lock_bh(&rb_queue->lock); swap(rb->pg_vec, pg_vec); + if (po->tp_version <= TPACKET_V2) + swap(rb->rx_owner_map, rx_owner_map); rb->frame_max = (req->tp_frame_nr - 1); rb->head = 0; rb->frame_size = req->tp_frame_size; @@ -4422,6 +4442,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: + bitmap_free(rx_owner_map); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: diff --git a/net/packet/internal.h b/net/packet/internal.h index 82fb2b10f790..907f4cd2a718 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -70,7 +70,10 @@ struct packet_ring_buffer { unsigned int __percpu *pending_refcnt; - struct tpacket_kbdq_core prb_bdqc; + union { + unsigned long *rx_owner_map; + struct tpacket_kbdq_core prb_bdqc; + }; }; extern struct mutex fanout_mutex; From 0fda7600c2e174fe27e9cf02e78e345226e441fa Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 14 Mar 2020 08:18:42 +0100 Subject: [PATCH 191/372] geneve: move debug check after netdev unregister The debug check must be done after unregister_netdevice_many() call -- the list_del() for this is done inside .ndo_stop. Fixes: 2843a25348f8 ("geneve: speedup geneve tunnels dismantle") Reported-and-tested-by: Cc: Haishuang Yan Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- drivers/net/geneve.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 75757e9954ba..09f279c0182b 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1845,8 +1845,6 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head) if (!net_eq(dev_net(geneve->dev), net)) unregister_netdevice_queue(geneve->dev, head); } - - WARN_ON_ONCE(!list_empty(&gn->sock_list)); } static void __net_exit geneve_exit_batch_net(struct list_head *net_list) @@ -1861,6 +1859,12 @@ static void __net_exit geneve_exit_batch_net(struct list_head *net_list) /* unregister the devices gathered above */ unregister_netdevice_many(&list); rtnl_unlock(); + + list_for_each_entry(net, net_list, exit_list) { + const struct geneve_net *gn = net_generic(net, geneve_net_id); + + WARN_ON_ONCE(!list_empty(&gn->sock_list)); + } } static struct pernet_operations geneve_net_ops = { From fb33c6510d5595144d585aa194d377cf74d31911 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Mar 2020 15:01:23 -0700 Subject: [PATCH 192/372] Linux 5.6-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index e25db579ce74..171f2b004c8a 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From cb851c01b51bb610a9093d01624565ce1d4e38fa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 15 Mar 2020 10:07:35 +0200 Subject: [PATCH 193/372] mlxsw: reg: Increase register field length to 31 bits The cited commit set a value of 2^31-1 in order to "disable" the shaper on a given a port. However, the length of the maximum shaper rate field was not updated from 28 bits to 31 bits, which means ports are still limited to ~268Gbps despite supporting speeds of 400Gbps. Fix this by increasing the field's length. Fixes: 92afbfedb77d ("mlxsw: reg: Increase MLXSW_REG_QEEC_MAS_DIS") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index dd6685156396..e05d1d1be2fd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3572,7 +3572,7 @@ MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1); * When in bytes mode, value is specified in units of 1000bps. * Access: RW */ -MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 28); +MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 31); /* reg_qeec_de * DWRR configuration enable. Enables configuration of the dwrr and From 13bde56c5b7cc6489eb50c40449d461c7ca4da2d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 15 Mar 2020 10:34:58 +0100 Subject: [PATCH 194/372] net: caif: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Cc: "David S . Miller" Cc: netdev@vger.kernel.org Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/caif/caif_spi.c | 68 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/drivers/net/caif/caif_spi.c b/drivers/net/caif/caif_spi.c index 8e81bdf98ac6..63f2548f5b1b 100644 --- a/drivers/net/caif/caif_spi.c +++ b/drivers/net/caif/caif_spi.c @@ -141,29 +141,29 @@ static ssize_t dbgfs_state(struct file *file, char __user *user_buf, return 0; /* Print out debug information. */ - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "CAIF SPI debug information:\n"); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "CAIF SPI debug information:\n"); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), FLAVOR); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), FLAVOR); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "STATE: %d\n", cfspi->dbg_state); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Previous CMD: 0x%x\n", cfspi->pcmd); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Current CMD: 0x%x\n", cfspi->cmd); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Previous TX len: %d\n", cfspi->tx_ppck_len); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Previous RX len: %d\n", cfspi->rx_ppck_len); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Current TX len: %d\n", cfspi->tx_cpck_len); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Current RX len: %d\n", cfspi->rx_cpck_len); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Next TX len: %d\n", cfspi->tx_npck_len); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Next RX len: %d\n", cfspi->rx_npck_len); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "STATE: %d\n", cfspi->dbg_state); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Previous CMD: 0x%x\n", cfspi->pcmd); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Current CMD: 0x%x\n", cfspi->cmd); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Previous TX len: %d\n", cfspi->tx_ppck_len); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Previous RX len: %d\n", cfspi->rx_ppck_len); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Current TX len: %d\n", cfspi->tx_cpck_len); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Current RX len: %d\n", cfspi->rx_cpck_len); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Next TX len: %d\n", cfspi->tx_npck_len); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Next RX len: %d\n", cfspi->rx_npck_len); if (len > DEBUGFS_BUF_SIZE) len = DEBUGFS_BUF_SIZE; @@ -180,23 +180,23 @@ static ssize_t print_frame(char *buf, size_t size, char *frm, int len = 0; int i; for (i = 0; i < count; i++) { - len += snprintf((buf + len), (size - len), + len += scnprintf((buf + len), (size - len), "[0x" BYTE_HEX_FMT "]", frm[i]); if ((i == cut) && (count > (cut * 2))) { /* Fast forward. */ i = count - cut; - len += snprintf((buf + len), (size - len), - "--- %zu bytes skipped ---\n", - count - (cut * 2)); + len += scnprintf((buf + len), (size - len), + "--- %zu bytes skipped ---\n", + count - (cut * 2)); } if ((!(i % 10)) && i) { - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "\n"); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "\n"); } } - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), "\n"); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), "\n"); return len; } @@ -214,18 +214,18 @@ static ssize_t dbgfs_frame(struct file *file, char __user *user_buf, return 0; /* Print out debug information. */ - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Current frame:\n"); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Current frame:\n"); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Tx data (Len: %d):\n", cfspi->tx_cpck_len); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Tx data (Len: %d):\n", cfspi->tx_cpck_len); len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len), cfspi->xfer.va_tx[0], (cfspi->tx_cpck_len + SPI_CMD_SZ), 100); - len += snprintf((buf + len), (DEBUGFS_BUF_SIZE - len), - "Rx data (Len: %d):\n", cfspi->rx_cpck_len); + len += scnprintf((buf + len), (DEBUGFS_BUF_SIZE - len), + "Rx data (Len: %d):\n", cfspi->rx_cpck_len); len += print_frame((buf + len), (DEBUGFS_BUF_SIZE - len), cfspi->xfer.va_rx, From 4a348601eb9131893c22b6ed2d3b6ba2bafc2391 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 15 Mar 2020 10:34:59 +0100 Subject: [PATCH 195/372] net: mlx4: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Cc: "David S . Miller" Cc: Tariq Toukan To: netdev@vger.kernel.org Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/mcg.c | 62 ++++++++++++------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 9c481823b3e8..9486caecfbdc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -906,59 +906,59 @@ static void mlx4_err_rule(struct mlx4_dev *dev, char *str, int len = 0; mlx4_err(dev, "%s", str); - len += snprintf(buf + len, BUF_SIZE - len, - "port = %d prio = 0x%x qp = 0x%x ", - rule->port, rule->priority, rule->qpn); + len += scnprintf(buf + len, BUF_SIZE - len, + "port = %d prio = 0x%x qp = 0x%x ", + rule->port, rule->priority, rule->qpn); list_for_each_entry(cur, &rule->list, list) { switch (cur->id) { case MLX4_NET_TRANS_RULE_ID_ETH: - len += snprintf(buf + len, BUF_SIZE - len, - "dmac = %pM ", &cur->eth.dst_mac); + len += scnprintf(buf + len, BUF_SIZE - len, + "dmac = %pM ", &cur->eth.dst_mac); if (cur->eth.ether_type) - len += snprintf(buf + len, BUF_SIZE - len, - "ethertype = 0x%x ", - be16_to_cpu(cur->eth.ether_type)); + len += scnprintf(buf + len, BUF_SIZE - len, + "ethertype = 0x%x ", + be16_to_cpu(cur->eth.ether_type)); if (cur->eth.vlan_id) - len += snprintf(buf + len, BUF_SIZE - len, - "vlan-id = %d ", - be16_to_cpu(cur->eth.vlan_id)); + len += scnprintf(buf + len, BUF_SIZE - len, + "vlan-id = %d ", + be16_to_cpu(cur->eth.vlan_id)); break; case MLX4_NET_TRANS_RULE_ID_IPV4: if (cur->ipv4.src_ip) - len += snprintf(buf + len, BUF_SIZE - len, - "src-ip = %pI4 ", - &cur->ipv4.src_ip); + len += scnprintf(buf + len, BUF_SIZE - len, + "src-ip = %pI4 ", + &cur->ipv4.src_ip); if (cur->ipv4.dst_ip) - len += snprintf(buf + len, BUF_SIZE - len, - "dst-ip = %pI4 ", - &cur->ipv4.dst_ip); + len += scnprintf(buf + len, BUF_SIZE - len, + "dst-ip = %pI4 ", + &cur->ipv4.dst_ip); break; case MLX4_NET_TRANS_RULE_ID_TCP: case MLX4_NET_TRANS_RULE_ID_UDP: if (cur->tcp_udp.src_port) - len += snprintf(buf + len, BUF_SIZE - len, - "src-port = %d ", - be16_to_cpu(cur->tcp_udp.src_port)); + len += scnprintf(buf + len, BUF_SIZE - len, + "src-port = %d ", + be16_to_cpu(cur->tcp_udp.src_port)); if (cur->tcp_udp.dst_port) - len += snprintf(buf + len, BUF_SIZE - len, - "dst-port = %d ", - be16_to_cpu(cur->tcp_udp.dst_port)); + len += scnprintf(buf + len, BUF_SIZE - len, + "dst-port = %d ", + be16_to_cpu(cur->tcp_udp.dst_port)); break; case MLX4_NET_TRANS_RULE_ID_IB: - len += snprintf(buf + len, BUF_SIZE - len, - "dst-gid = %pI6\n", cur->ib.dst_gid); - len += snprintf(buf + len, BUF_SIZE - len, - "dst-gid-mask = %pI6\n", - cur->ib.dst_gid_msk); + len += scnprintf(buf + len, BUF_SIZE - len, + "dst-gid = %pI6\n", cur->ib.dst_gid); + len += scnprintf(buf + len, BUF_SIZE - len, + "dst-gid-mask = %pI6\n", + cur->ib.dst_gid_msk); break; case MLX4_NET_TRANS_RULE_ID_VXLAN: - len += snprintf(buf + len, BUF_SIZE - len, - "VNID = %d ", be32_to_cpu(cur->vxlan.vni)); + len += scnprintf(buf + len, BUF_SIZE - len, + "VNID = %d ", be32_to_cpu(cur->vxlan.vni)); break; case MLX4_NET_TRANS_RULE_ID_IPV6: break; @@ -967,7 +967,7 @@ static void mlx4_err_rule(struct mlx4_dev *dev, char *str, break; } } - len += snprintf(buf + len, BUF_SIZE - len, "\n"); + len += scnprintf(buf + len, BUF_SIZE - len, "\n"); mlx4_err(dev, "%s", buf); if (len >= BUF_SIZE) From 413ae546f8726ac0652e59fcf97561fc21f52653 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 15 Mar 2020 10:35:00 +0100 Subject: [PATCH 196/372] net: nfp: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Reviewed-by: Simon Horman Cc: "David S . Miller" Cc: Jakub Kicinski Cc: oss-drivers@netronome.com To: netdev@vger.kernel.org Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c index b454db283aef..684e4e036c55 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp6000_pcie.c @@ -616,7 +616,7 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) if (bar->iomem) { int pf; - msg += snprintf(msg, end - msg, "0.0: General/MSI-X SRAM, "); + msg += scnprintf(msg, end - msg, "0.0: General/MSI-X SRAM, "); atomic_inc(&bar->refcnt); bars_free--; @@ -661,7 +661,7 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) /* Configure, and lock, BAR0.1 for PCIe XPB (MSI-X PBA) */ bar = &nfp->bar[1]; - msg += snprintf(msg, end - msg, "0.1: PCIe XPB/MSI-X PBA, "); + msg += scnprintf(msg, end - msg, "0.1: PCIe XPB/MSI-X PBA, "); atomic_inc(&bar->refcnt); bars_free--; @@ -680,8 +680,8 @@ static int enable_bars(struct nfp6000_pcie *nfp, u16 interface) bar->iomem = ioremap(nfp_bar_resource_start(bar), nfp_bar_resource_len(bar)); if (bar->iomem) { - msg += snprintf(msg, end - msg, - "0.%d: Explicit%d, ", 4 + i, i); + msg += scnprintf(msg, end - msg, + "0.%d: Explicit%d, ", 4 + i, i); atomic_inc(&bar->refcnt); bars_free--; From 38e0f746c456b2f7d64b37aef3b6bd9ad73508c0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 15 Mar 2020 10:35:01 +0100 Subject: [PATCH 197/372] net: ionic: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Reviewed-by: Simon Horman Acked-by: Shannon Nelson Cc: "David S . Miller" Cc: Jakub Kicinski Cc: oss-drivers@netronome.com Cc: netdev@vger.kernel.org Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index c2f5b691e0fa..938e19ee0bcd 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -948,18 +948,18 @@ static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) int i; #define REMAIN(__x) (sizeof(buf) - (__x)) - i = snprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", - lif->rx_mode, rx_mode); + i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", + lif->rx_mode, rx_mode); if (rx_mode & IONIC_RX_MODE_F_UNICAST) - i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST"); + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST"); if (rx_mode & IONIC_RX_MODE_F_MULTICAST) - i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST"); + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST"); if (rx_mode & IONIC_RX_MODE_F_BROADCAST) - i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST"); + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST"); if (rx_mode & IONIC_RX_MODE_F_PROMISC) - i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); if (rx_mode & IONIC_RX_MODE_F_ALLMULTI) - i += snprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf); err = ionic_adminq_post_wait(lif, &ctx); From 5e892880e14fbc1944f49aa42a67496516fe5dac Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 15 Mar 2020 10:35:02 +0100 Subject: [PATCH 198/372] net: sfc: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Cc: "David S . Miller" Cc: Edward Cree Cc: Martin Habets Cc: Solarflare linux maintainers Cc: netdev@vger.kernel.org Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index 2713300343c7..15c731d04065 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -212,12 +212,14 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd, * progress on a NIC at any one time. So no need for locking. */ for (i = 0; i < hdr_len / 4 && bytes < PAGE_SIZE; i++) - bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, - " %08x", le32_to_cpu(hdr[i].u32[0])); + bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, + " %08x", + le32_to_cpu(hdr[i].u32[0])); for (i = 0; i < inlen / 4 && bytes < PAGE_SIZE; i++) - bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, - " %08x", le32_to_cpu(inbuf[i].u32[0])); + bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, + " %08x", + le32_to_cpu(inbuf[i].u32[0])); netif_info(efx, hw, efx->net_dev, "MCDI RPC REQ:%s\n", buf); } @@ -302,15 +304,15 @@ static void efx_mcdi_read_response_header(struct efx_nic *efx) */ for (i = 0; i < hdr_len && bytes < PAGE_SIZE; i++) { efx->type->mcdi_read_response(efx, &hdr, (i * 4), 4); - bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, - " %08x", le32_to_cpu(hdr.u32[0])); + bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, + " %08x", le32_to_cpu(hdr.u32[0])); } for (i = 0; i < data_len && bytes < PAGE_SIZE; i++) { efx->type->mcdi_read_response(efx, &hdr, mcdi->resp_hdr_len + (i * 4), 4); - bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, - " %08x", le32_to_cpu(hdr.u32[0])); + bytes += scnprintf(buf + bytes, PAGE_SIZE - bytes, + " %08x", le32_to_cpu(hdr.u32[0])); } netif_info(efx, hw, efx->net_dev, "MCDI RPC RESP:%s\n", buf); @@ -1417,9 +1419,11 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len) } ver_words = (__le16 *)MCDI_PTR(outbuf, GET_VERSION_OUT_VERSION); - offset = snprintf(buf, len, "%u.%u.%u.%u", - le16_to_cpu(ver_words[0]), le16_to_cpu(ver_words[1]), - le16_to_cpu(ver_words[2]), le16_to_cpu(ver_words[3])); + offset = scnprintf(buf, len, "%u.%u.%u.%u", + le16_to_cpu(ver_words[0]), + le16_to_cpu(ver_words[1]), + le16_to_cpu(ver_words[2]), + le16_to_cpu(ver_words[3])); /* EF10 may have multiple datapath firmware variants within a * single version. Report which variants are running. @@ -1427,9 +1431,9 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len) if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) { struct efx_ef10_nic_data *nic_data = efx->nic_data; - offset += snprintf(buf + offset, len - offset, " rx%x tx%x", - nic_data->rx_dpcpu_fw_id, - nic_data->tx_dpcpu_fw_id); + offset += scnprintf(buf + offset, len - offset, " rx%x tx%x", + nic_data->rx_dpcpu_fw_id, + nic_data->tx_dpcpu_fw_id); /* It's theoretically possible for the string to exceed 31 * characters, though in practice the first three version From 2da222f612b58b6e6b41972f46005b58aac1699e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 15 Mar 2020 10:35:03 +0100 Subject: [PATCH 199/372] net: netdevsim: Use scnprintf() for avoiding potential buffer overflow Since snprintf() returns the would-be-output size instead of the actual output size, the succeeding calls may go beyond the given buffer limit. Fix it by replacing with scnprintf(). Cc: "David S . Miller" Cc: Jakub Kicinski Cc: netdev@vger.kernel.org Signed-off-by: Takashi Iwai Signed-off-by: David S. Miller --- drivers/net/netdevsim/ipsec.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/netdevsim/ipsec.c b/drivers/net/netdevsim/ipsec.c index e27fc1a4516d..3811f1bde84e 100644 --- a/drivers/net/netdevsim/ipsec.c +++ b/drivers/net/netdevsim/ipsec.c @@ -29,9 +29,9 @@ static ssize_t nsim_dbg_netdev_ops_read(struct file *filp, return -ENOMEM; p = buf; - p += snprintf(p, bufsize - (p - buf), - "SA count=%u tx=%u\n", - ipsec->count, ipsec->tx); + p += scnprintf(p, bufsize - (p - buf), + "SA count=%u tx=%u\n", + ipsec->count, ipsec->tx); for (i = 0; i < NSIM_IPSEC_MAX_SA_COUNT; i++) { struct nsim_sa *sap = &ipsec->sa[i]; @@ -39,18 +39,18 @@ static ssize_t nsim_dbg_netdev_ops_read(struct file *filp, if (!sap->used) continue; - p += snprintf(p, bufsize - (p - buf), - "sa[%i] %cx ipaddr=0x%08x %08x %08x %08x\n", - i, (sap->rx ? 'r' : 't'), sap->ipaddr[0], - sap->ipaddr[1], sap->ipaddr[2], sap->ipaddr[3]); - p += snprintf(p, bufsize - (p - buf), - "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n", - i, be32_to_cpu(sap->xs->id.spi), - sap->xs->id.proto, sap->salt, sap->crypt); - p += snprintf(p, bufsize - (p - buf), - "sa[%i] key=0x%08x %08x %08x %08x\n", - i, sap->key[0], sap->key[1], - sap->key[2], sap->key[3]); + p += scnprintf(p, bufsize - (p - buf), + "sa[%i] %cx ipaddr=0x%08x %08x %08x %08x\n", + i, (sap->rx ? 'r' : 't'), sap->ipaddr[0], + sap->ipaddr[1], sap->ipaddr[2], sap->ipaddr[3]); + p += scnprintf(p, bufsize - (p - buf), + "sa[%i] spi=0x%08x proto=0x%x salt=0x%08x crypt=%d\n", + i, be32_to_cpu(sap->xs->id.spi), + sap->xs->id.proto, sap->salt, sap->crypt); + p += scnprintf(p, bufsize - (p - buf), + "sa[%i] key=0x%08x %08x %08x %08x\n", + i, sap->key[0], sap->key[1], + sap->key[2], sap->key[3]); } len = simple_read_from_buffer(buffer, count, ppos, buf, p - buf); From 173756b86803655d70af7732079b3aa935e6ab68 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 13 Mar 2020 06:50:14 +0000 Subject: [PATCH 200/372] hsr: use rcu_read_lock() in hsr_get_node_{list/status}() hsr_get_node_{list/status}() are not under rtnl_lock() because they are callback functions of generic netlink. But they use __dev_get_by_index() without rtnl_lock(). So, it would use unsafe data. In order to fix it, rcu_read_lock() and dev_get_by_index_rcu() are used instead of __dev_get_by_index(). Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 9 ++------- net/hsr/hsr_netlink.c | 39 +++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 3ba7f61be107..a64bb64935a6 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -482,12 +482,9 @@ int hsr_get_node_data(struct hsr_priv *hsr, struct hsr_port *port; unsigned long tdiff; - rcu_read_lock(); node = find_node_by_addr_A(&hsr->node_db, addr); - if (!node) { - rcu_read_unlock(); - return -ENOENT; /* No such entry */ - } + if (!node) + return -ENOENT; ether_addr_copy(addr_b, node->macaddress_B); @@ -522,7 +519,5 @@ int hsr_get_node_data(struct hsr_priv *hsr, *addr_b_ifindex = -1; } - rcu_read_unlock(); - return 0; } diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 8dc0547f01d0..d6760df2ad1f 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -251,15 +251,16 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) if (!na) goto invalid; - hsr_dev = __dev_get_by_index(genl_info_net(info), - nla_get_u32(info->attrs[HSR_A_IFINDEX])); + rcu_read_lock(); + hsr_dev = dev_get_by_index_rcu(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); if (!hsr_dev) - goto invalid; + goto rcu_unlock; if (!is_hsr_master(hsr_dev)) - goto invalid; + goto rcu_unlock; /* Send reply */ - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb_out) { res = -ENOMEM; goto fail; @@ -313,12 +314,10 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF1_SEQ, hsr_node_if1_seq); if (res < 0) goto nla_put_failure; - rcu_read_lock(); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_A); if (port) res = nla_put_u32(skb_out, HSR_A_IF1_IFINDEX, port->dev->ifindex); - rcu_read_unlock(); if (res < 0) goto nla_put_failure; @@ -328,20 +327,22 @@ static int hsr_get_node_status(struct sk_buff *skb_in, struct genl_info *info) res = nla_put_u16(skb_out, HSR_A_IF2_SEQ, hsr_node_if2_seq); if (res < 0) goto nla_put_failure; - rcu_read_lock(); port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); if (port) res = nla_put_u32(skb_out, HSR_A_IF2_IFINDEX, port->dev->ifindex); - rcu_read_unlock(); if (res < 0) goto nla_put_failure; + rcu_read_unlock(); + genlmsg_end(skb_out, msg_head); genlmsg_unicast(genl_info_net(info), skb_out, info->snd_portid); return 0; +rcu_unlock: + rcu_read_unlock(); invalid: netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; @@ -351,6 +352,7 @@ nla_put_failure: /* Fall through */ fail: + rcu_read_unlock(); return res; } @@ -377,15 +379,16 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) if (!na) goto invalid; - hsr_dev = __dev_get_by_index(genl_info_net(info), - nla_get_u32(info->attrs[HSR_A_IFINDEX])); + rcu_read_lock(); + hsr_dev = dev_get_by_index_rcu(genl_info_net(info), + nla_get_u32(info->attrs[HSR_A_IFINDEX])); if (!hsr_dev) - goto invalid; + goto rcu_unlock; if (!is_hsr_master(hsr_dev)) - goto invalid; + goto rcu_unlock; /* Send reply */ - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb_out) { res = -ENOMEM; goto fail; @@ -405,14 +408,11 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) hsr = netdev_priv(hsr_dev); - rcu_read_lock(); pos = hsr_get_next_node(hsr, NULL, addr); while (pos) { res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); - if (res < 0) { - rcu_read_unlock(); + if (res < 0) goto nla_put_failure; - } pos = hsr_get_next_node(hsr, pos, addr); } rcu_read_unlock(); @@ -422,6 +422,8 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) return 0; +rcu_unlock: + rcu_read_unlock(); invalid: netlink_ack(skb_in, nlmsg_hdr(skb_in), -EINVAL, NULL); return 0; @@ -431,6 +433,7 @@ nla_put_failure: /* Fall through */ fail: + rcu_read_unlock(); return res; } From ca19c70f5225771c05bcdcb832b4eb84d7271c5e Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 13 Mar 2020 06:50:24 +0000 Subject: [PATCH 201/372] hsr: add restart routine into hsr_get_node_list() The hsr_get_node_list() is to send node addresses to the userspace. If there are so many nodes, it could fail because of buffer size. In order to avoid this failure, the restart routine is added. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_netlink.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index d6760df2ad1f..726bfe923999 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -360,16 +360,14 @@ fail: */ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) { - /* For receiving */ - struct nlattr *na; - struct net_device *hsr_dev; - - /* For sending */ - struct sk_buff *skb_out; - void *msg_head; - struct hsr_priv *hsr; - void *pos; unsigned char addr[ETH_ALEN]; + struct net_device *hsr_dev; + struct sk_buff *skb_out; + struct hsr_priv *hsr; + bool restart = false; + struct nlattr *na; + void *pos = NULL; + void *msg_head; int res; if (!info) @@ -387,8 +385,9 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) if (!is_hsr_master(hsr_dev)) goto rcu_unlock; +restart: /* Send reply */ - skb_out = genlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); + skb_out = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb_out) { res = -ENOMEM; goto fail; @@ -402,17 +401,28 @@ static int hsr_get_node_list(struct sk_buff *skb_in, struct genl_info *info) goto nla_put_failure; } - res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); - if (res < 0) - goto nla_put_failure; + if (!restart) { + res = nla_put_u32(skb_out, HSR_A_IFINDEX, hsr_dev->ifindex); + if (res < 0) + goto nla_put_failure; + } hsr = netdev_priv(hsr_dev); - pos = hsr_get_next_node(hsr, NULL, addr); + if (!pos) + pos = hsr_get_next_node(hsr, NULL, addr); while (pos) { res = nla_put(skb_out, HSR_A_NODE_ADDR, ETH_ALEN, addr); - if (res < 0) + if (res < 0) { + if (res == -EMSGSIZE) { + genlmsg_end(skb_out, msg_head); + genlmsg_unicast(genl_info_net(info), skb_out, + info->snd_portid); + restart = true; + goto restart; + } goto nla_put_failure; + } pos = hsr_get_next_node(hsr, pos, addr); } rcu_read_unlock(); @@ -429,7 +439,7 @@ invalid: return 0; nla_put_failure: - kfree_skb(skb_out); + nlmsg_free(skb_out); /* Fall through */ fail: From 09e91dbea0aa32be02d8877bd50490813de56b9a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Fri, 13 Mar 2020 06:50:33 +0000 Subject: [PATCH 202/372] hsr: set .netnsok flag The hsr module has been supporting the list and status command. (HSR_C_GET_NODE_LIST and HSR_C_GET_NODE_STATUS) These commands send node information to the user-space via generic netlink. But, in the non-init_net namespace, these commands are not allowed because .netnsok flag is false. So, there is no way to get node information in the non-init_net namespace. Fixes: f421436a591d ("net/hsr: Add support for the High-availability Seamless Redundancy protocol (HSRv0)") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 726bfe923999..fae21c863b1f 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -470,6 +470,7 @@ static struct genl_family hsr_genl_family __ro_after_init = { .version = 1, .maxattr = HSR_A_MAX, .policy = hsr_genl_policy, + .netnsok = true, .module = THIS_MODULE, .ops = hsr_ops, .n_ops = ARRAY_SIZE(hsr_ops), From ef299cc3fa1a9e1288665a9fdc8bff55629fd359 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 13 Mar 2020 22:29:54 -0700 Subject: [PATCH 203/372] net_sched: cls_route: remove the right filter from hashtable route4_change() allocates a new filter and copies values from the old one. After the new filter is inserted into the hash table, the old filter should be removed and freed, as the final step of the update. However, the current code mistakenly removes the new one. This looks apparently wrong to me, and it causes double "free" and use-after-free too, as reported by syzbot. Reported-and-tested-by: syzbot+f9b32aaacd60305d9687@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+2f8c233f131943d6056d@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+9c2df9fd5e9445b74e01@syzkaller.appspotmail.com Fixes: 1109c00547fc ("net: sched: RCU cls_route") Cc: Jamal Hadi Salim Cc: Jiri Pirko Cc: John Fastabend Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_route.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 6f8786b06bde..5efa3e7ace15 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -534,8 +534,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, fp = &b->ht[h]; for (pfp = rtnl_dereference(*fp); pfp; fp = &pfp->next, pfp = rtnl_dereference(*fp)) { - if (pfp == f) { - *fp = f->next; + if (pfp == fold) { + rcu_assign_pointer(*fp, fold->next); break; } } From fe2a31d790f81bd14a76de3d3b87f4f1362f60cd Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sun, 15 Mar 2020 18:17:43 +0100 Subject: [PATCH 204/372] netlink: allow extack cookie also for error messages Commit ba0dc5f6e0ba ("netlink: allow sending extended ACK with cookie on success") introduced a cookie which can be sent to userspace as part of extended ack message in the form of NLMSGERR_ATTR_COOKIE attribute. Currently the cookie is ignored if error code is non-zero but there is no technical reason for such limitation and it can be useful to provide machine parseable information as part of an error message. Include NLMSGERR_ATTR_COOKIE whenever the cookie has been set, regardless of error code. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 43 ++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5313f1cec170..2f234791b879 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2392,19 +2392,14 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, if (nlk_has_extack && extack && extack->_msg) tlvlen += nla_total_size(strlen(extack->_msg) + 1); - if (err) { - if (!(nlk->flags & NETLINK_F_CAP_ACK)) - payload += nlmsg_len(nlh); - else - flags |= NLM_F_CAPPED; - if (nlk_has_extack && extack && extack->bad_attr) - tlvlen += nla_total_size(sizeof(u32)); - } else { + if (err && !(nlk->flags & NETLINK_F_CAP_ACK)) + payload += nlmsg_len(nlh); + else flags |= NLM_F_CAPPED; - - if (nlk_has_extack && extack && extack->cookie_len) - tlvlen += nla_total_size(extack->cookie_len); - } + if (err && nlk_has_extack && extack && extack->bad_attr) + tlvlen += nla_total_size(sizeof(u32)); + if (nlk_has_extack && extack && extack->cookie_len) + tlvlen += nla_total_size(extack->cookie_len); if (tlvlen) flags |= NLM_F_ACK_TLVS; @@ -2427,20 +2422,16 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, extack->_msg)); } - if (err) { - if (extack->bad_attr && - !WARN_ON((u8 *)extack->bad_attr < in_skb->data || - (u8 *)extack->bad_attr >= in_skb->data + - in_skb->len)) - WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, - (u8 *)extack->bad_attr - - (u8 *)nlh)); - } else { - if (extack->cookie_len) - WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, - extack->cookie_len, - extack->cookie)); - } + if (err && extack->bad_attr && + !WARN_ON((u8 *)extack->bad_attr < in_skb->data || + (u8 *)extack->bad_attr >= in_skb->data + + in_skb->len)) + WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, + (u8 *)extack->bad_attr - + (u8 *)nlh)); + if (extack->cookie_len) + WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, + extack->cookie_len, extack->cookie)); } nlmsg_end(skb, rep); From f1388ec4a144f40348321a0915c5535d623e165c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sun, 15 Mar 2020 18:17:48 +0100 Subject: [PATCH 205/372] netlink: add nl_set_extack_cookie_u32() Similar to existing nl_set_extack_cookie_u64(), add new helper nl_set_extack_cookie_u32() which sets extack cookie to a u32 value. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/netlink.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 205fa7b1f07a..4090524c3462 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -119,6 +119,15 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, extack->cookie_len = sizeof(__cookie); } +static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, + u32 cookie) +{ + u32 __cookie = cookie; + + memcpy(extack->cookie, &__cookie, sizeof(__cookie)); + extack->cookie_len = sizeof(__cookie); +} + void netlink_kernel_release(struct sock *sk); int __netlink_change_ngroups(struct sock *sk, unsigned int groups); int netlink_change_ngroups(struct sock *sk, unsigned int groups); From 2363d73a2f3e92787f336721c40918ba2eb0c74c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sun, 15 Mar 2020 18:17:53 +0100 Subject: [PATCH 206/372] ethtool: reject unrecognized request flags As pointed out by Jakub Kicinski, we ethtool netlink code should respond with an error if request head has flags set which are not recognized by kernel, either as a mistake or because it expects functionality introduced in later kernel versions. To avoid unnecessary roundtrips, use extack cookie to provide the information about supported request flags. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- net/ethtool/netlink.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 180c194fab07..fc9e0b806889 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -40,6 +40,7 @@ int ethnl_parse_header(struct ethnl_req_info *req_info, struct nlattr *tb[ETHTOOL_A_HEADER_MAX + 1]; const struct nlattr *devname_attr; struct net_device *dev = NULL; + u32 flags = 0; int ret; if (!header) { @@ -50,8 +51,17 @@ int ethnl_parse_header(struct ethnl_req_info *req_info, ethnl_header_policy, extack); if (ret < 0) return ret; - devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME]; + if (tb[ETHTOOL_A_HEADER_FLAGS]) { + flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]); + if (flags & ~ETHTOOL_FLAG_ALL) { + NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_HEADER_FLAGS], + "unrecognized request flags"); + nl_set_extack_cookie_u32(extack, ETHTOOL_FLAG_ALL); + return -EOPNOTSUPP; + } + } + devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME]; if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) { u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]); @@ -90,9 +100,7 @@ int ethnl_parse_header(struct ethnl_req_info *req_info, } req_info->dev = dev; - if (tb[ETHTOOL_A_HEADER_FLAGS]) - req_info->flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]); - + req_info->flags = flags; return 0; } From 4384f167ce5fa7241b61bb0984d651bc528ddebe Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Mar 2020 10:05:05 +0100 Subject: [PATCH 207/372] ALSA: seq: virmidi: Fix running status after receiving sysex The virmidi driver handles sysex event exceptionally in a short-cut snd_seq_dump_var_event() call, but this missed the reset of the running status. As a result, it may lead to an incomplete command right after the sysex when an event with the same running status was queued. Fix it by clearing the running status properly via alling snd_midi_event_reset_decode() for that code path. Reported-by: Andreas Steinmetz Cc: Link: https://lore.kernel.org/r/3b4a4e0f232b7afbaf0a843f63d0e538e3029bfd.camel@domdv.de Link: https://lore.kernel.org/r/20200316090506.23966-2-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/seq_virmidi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/seq/seq_virmidi.c b/sound/core/seq/seq_virmidi.c index 626d87c1539b..77d7037d1476 100644 --- a/sound/core/seq/seq_virmidi.c +++ b/sound/core/seq/seq_virmidi.c @@ -81,6 +81,7 @@ static int snd_virmidi_dev_receive_event(struct snd_virmidi_dev *rdev, if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) continue; snd_seq_dump_var_event(ev, (snd_seq_dump_func_t)snd_rawmidi_receive, vmidi->substream); + snd_midi_event_reset_decode(vmidi->parser); } else { len = snd_midi_event_decode(vmidi->parser, msg, sizeof(msg), ev); if (len > 0) From 6c3171ef76a0bad892050f6959a7eac02fb16df7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 16 Mar 2020 10:05:06 +0100 Subject: [PATCH 208/372] ALSA: seq: oss: Fix running status after receiving sysex This is a similar bug like the previous case for virmidi: the invalid running status is kept after receiving a sysex message. Again the fix is to clear the running status after handling the sysex. Cc: Link: https://lore.kernel.org/r/3b4a4e0f232b7afbaf0a843f63d0e538e3029bfd.camel@domdv.de Link: https://lore.kernel.org/r/20200316090506.23966-3-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_midi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/core/seq/oss/seq_oss_midi.c b/sound/core/seq/oss/seq_oss_midi.c index a88c235b2ea3..2ddfe2226651 100644 --- a/sound/core/seq/oss/seq_oss_midi.c +++ b/sound/core/seq/oss/seq_oss_midi.c @@ -602,6 +602,7 @@ send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq len = snd_seq_oss_timer_start(dp->timer); if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev); + snd_midi_event_reset_decode(mdev->coder); } else { len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev); if (len > 0) From 9777d8b2d2a148bc5d46694ec4f2559282fec8cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Mar 2020 09:26:23 +0000 Subject: [PATCH 209/372] drm/i915/execlists: Track active elements during dequeue Record the initial active element we use when building the next ELSP submission, so that we can compare against it latter to see if there's no change. Fixes: 44d0a9c05bc0 ("drm/i915/execlists: Skip redundant resubmission") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200311092624.10012-2-chris@chris-wilson.co.uk (cherry picked from commit 60ef5b7ac6a131f09d287a5f156c878c2c926a30) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_lrc.c | 32 +++++++++++------------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 940e7f7df69a..5e8928edf376 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1600,17 +1600,6 @@ static void virtual_xfer_breadcrumbs(struct virtual_engine *ve, spin_unlock(&old->breadcrumbs.irq_lock); } -static struct i915_request * -last_active(const struct intel_engine_execlists *execlists) -{ - struct i915_request * const *last = READ_ONCE(execlists->active); - - while (*last && i915_request_completed(*last)) - last++; - - return *last; -} - #define for_each_waiter(p__, rq__) \ list_for_each_entry_lockless(p__, \ &(rq__)->sched.waiters_list, \ @@ -1740,11 +1729,9 @@ static void record_preemption(struct intel_engine_execlists *execlists) (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); } -static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, + const struct i915_request *rq) { - struct i915_request *rq; - - rq = last_active(&engine->execlists); if (!rq) return 0; @@ -1755,13 +1742,14 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine) return READ_ONCE(engine->props.preempt_timeout_ms); } -static void set_preempt_timeout(struct intel_engine_cs *engine) +static void set_preempt_timeout(struct intel_engine_cs *engine, + const struct i915_request *rq) { if (!intel_engine_has_preempt_reset(engine)) return; set_timer_ms(&engine->execlists.preempt, - active_preempt_timeout(engine)); + active_preempt_timeout(engine, rq)); } static inline void clear_ports(struct i915_request **ports, int count) @@ -1774,6 +1762,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_request **port = execlists->pending; struct i915_request ** const last_port = port + execlists->port_mask; + struct i915_request * const *active; struct i915_request *last; struct rb_node *rb; bool submit = false; @@ -1828,7 +1817,10 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * i.e. we will retrigger preemption following the ack in case * of trouble. */ - last = last_active(execlists); + active = READ_ONCE(execlists->active); + while ((last = *active) && i915_request_completed(last)) + active++; + if (last) { if (need_preempt(engine, last, rb)) { ENGINE_TRACE(engine, @@ -2110,7 +2102,7 @@ done: * Skip if we ended up with exactly the same set of requests, * e.g. trying to timeslice a pair of ordered contexts */ - if (!memcmp(execlists->active, execlists->pending, + if (!memcmp(active, execlists->pending, (port - execlists->pending + 1) * sizeof(*port))) { do execlists_schedule_out(fetch_and_zero(port)); @@ -2121,7 +2113,7 @@ done: clear_ports(port + 1, last_port - port); execlists_submit_ports(engine); - set_preempt_timeout(engine); + set_preempt_timeout(engine, *active); } else { skip_submit: ring_set_paused(engine, 0); From c09f6b4d0883dfb859c1ddcfb04c3260ef310ce0 Mon Sep 17 00:00:00 2001 From: Caz Yokoyama Date: Wed, 4 Mar 2020 14:13:59 -0800 Subject: [PATCH 210/372] Revert "drm/i915/tgl: Add extra hdc flush workaround" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 36a6b5d964d995b536b1925ec42052ee40ba92c4. The commit takes care Wa_1604544889 which was fixed on a0 stepping based on a0 replan. So no SW workaround is required on any stepping now. Reviewed-by: Matt Roper Signed-off-by: Caz Yokoyama Signed-off-by: José Roberto de Souza Fixes: 36a6b5d964d9 ("drm/i915/tgl: Add extra hdc flush workaround") Link: https://patchwork.freedesktop.org/patch/msgid/1c751032ce79c80c5485cae315f1a9904ce07cac.1583359940.git.caz.yokoyama@intel.com (cherry picked from commit 175c4d9b3b9a60b4ea0b8cd034011808c6a03b05) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_lrc.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 5e8928edf376..31455eceeb0c 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -4000,26 +4000,6 @@ static int gen12_emit_flush_render(struct i915_request *request, *cs++ = preparser_disable(false); intel_ring_advance(request, cs); - - /* - * Wa_1604544889:tgl - */ - if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) { - flags = 0; - flags |= PIPE_CONTROL_CS_STALL; - flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH; - - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; - - cs = intel_ring_begin(request, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cs = gen8_emit_pipe_control(cs, flags, - LRC_PPHWSP_SCRATCH_ADDR); - intel_ring_advance(request, cs); - } } return 0; From fe8b7085cac3b0db03cdbb26d9309bc27325df0a Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 11 Mar 2020 09:22:55 -0700 Subject: [PATCH 211/372] drm/i915: Handle all MCR ranges The bspec documents multiple MCR ranges; make sure they're all captured by the driver. Bspec: 13991, 52079 Fixes: 592a7c5e082e ("drm/i915: Extend non readable mcr range") Cc: Mika Kuoppala Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20200311162300.1838847-2-matthew.d.roper@intel.com Reviewed-by: Mika Kuoppala (cherry picked from commit 415d1269975d3fc21c13a6ae8de7b5fe0e6febb1) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_workarounds.c | 25 ++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 173a7f2d109f..6c2f8462e0f3 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1529,15 +1529,34 @@ err_obj: return ERR_PTR(err); } +static const struct { + u32 start; + u32 end; +} mcr_ranges_gen8[] = { + { .start = 0x5500, .end = 0x55ff }, + { .start = 0x7000, .end = 0x7fff }, + { .start = 0x9400, .end = 0x97ff }, + { .start = 0xb000, .end = 0xb3ff }, + { .start = 0xe000, .end = 0xe7ff }, + {}, +}; + static bool mcr_range(struct drm_i915_private *i915, u32 offset) { + int i; + + if (INTEL_GEN(i915) < 8) + return false; + /* - * Registers in this range are affected by the MCR selector + * Registers in these ranges are affected by the MCR selector * which only controls CPU initiated MMIO. Routing does not * work for CS access so we cannot verify them on this path. */ - if (INTEL_GEN(i915) >= 8 && (offset >= 0xb000 && offset <= 0xb4ff)) - return true; + for (i = 0; mcr_ranges_gen8[i].start; i++) + if (offset >= mcr_ranges_gen8[i].start && + offset <= mcr_ranges_gen8[i].end) + return true; return false; } From 58322a1590fc189a8e1e349d309637d4a4942840 Mon Sep 17 00:00:00 2001 From: Chen-Tsung Hsieh Date: Mon, 16 Mar 2020 15:24:19 +0800 Subject: [PATCH 212/372] HID: google: add moonball USB id Add 1 additional hammer-like device. Signed-off-by: Chen-Tsung Hsieh Reviewed-by: Nicolas Boichat Signed-off-by: Jiri Kosina --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 2aa4ed157aec..85a054f1ce38 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -532,6 +532,8 @@ static const struct hid_device_id hammer_devices[] = { USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MAGNEMITE) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MOONBALL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 3a400ce603c4..33fddab41722 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -478,6 +478,7 @@ #define USB_DEVICE_ID_GOOGLE_WHISKERS 0x5030 #define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d +#define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f From 819d578d51d0ce73f06e35d69395ef55cd683a74 Mon Sep 17 00:00:00 2001 From: Tony Fischetti Date: Thu, 12 Mar 2020 12:16:06 -0400 Subject: [PATCH 213/372] HID: add ALWAYS_POLL quirk to lenovo pixart mouse A lenovo pixart mouse (17ef:608d) is afflicted common the the malfunction where it disconnects and reconnects every minute--each time incrementing the device number. This patch adds the device id of the device and specifies that it needs the HID_QUIRK_ALWAYS_POLL quirk in order to work properly. Signed-off-by: Tony Fischetti Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 33fddab41722..9f2213426556 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -727,6 +727,7 @@ #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 +#define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d #define USB_VENDOR_ID_LG 0x1fd2 #define USB_DEVICE_ID_LG_MULTITOUCH 0x0064 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 0e7b2d998395..3735546bb524 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -103,6 +103,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C007), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_KEYBOARD_G710_PLUS), HID_QUIRK_NOGET }, From 0fe1568061be372a908b408b93c2f64d1d95b6f6 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Mar 2020 14:05:56 +0200 Subject: [PATCH 214/372] net: fsl/fman: treat all RGMII modes in memac_adjust_link() Treat all internal delay variants the same as RGMII. Signed-off-by: Madalin Bucur Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/fman/fman_memac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index e1901874c19f..0d2b4ab01f24 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -782,7 +782,7 @@ int memac_adjust_link(struct fman_mac *memac, u16 speed) /* Set full duplex */ tmp &= ~IF_MODE_HD; - if (memac->phy_if == PHY_INTERFACE_MODE_RGMII) { + if (phy_interface_mode_is_rgmii(memac->phy_if)) { /* Configure RGMII in manual mode */ tmp &= ~IF_MODE_RGMII_AUTO; tmp &= ~IF_MODE_RGMII_SP_MASK; From 4022d808c45277693ea86478fab1f081ebf997e8 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Mar 2020 14:05:57 +0200 Subject: [PATCH 215/372] arm64: dts: ls1043a-rdb: correct RGMII delay mode to rgmii-id The correct setting for the RGMII ports on LS1043ARDB is to enable delay on both Rx and Tx so the interface mode used must be PHY_INTERFACE_MODE_RGMII_ID. Since commit 1b3047b5208a80 ("net: phy: realtek: add support for configuring the RX delay on RTL8211F") the Realtek 8211F PHY driver has control over the RGMII RX delay and it is disabling it for RGMII_TXID. The LS1043ARDB uses two such PHYs in RGMII_ID mode but in the device tree the mode was described as "rgmii_txid". This issue was not apparent at the time as the PHY driver took the same action for RGMII_TXID and RGMII_ID back then but it became visible (RX no longer working) after the above patch. Changing the phy-connection-type to "rgmii-id" to address the issue. Fixes: bf02f2ffe59c ("arm64: dts: add LS1043A DPAA FMan support") Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts index 4223a2352d45..dde50c88f5e3 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a-rdb.dts @@ -119,12 +119,12 @@ ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii-txid"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { From d79e9d7c1e4ba5f95f2ff3541880c40ea9722212 Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 16 Mar 2020 14:05:58 +0200 Subject: [PATCH 216/372] arm64: dts: ls1046ardb: set RGMII interfaces to RGMII_ID mode The correct setting for the RGMII ports on LS1046ARDB is to enable delay on both Rx and Tx so the interface mode used must be PHY_INTERFACE_MODE_RGMII_ID. Since commit 1b3047b5208a80 ("net: phy: realtek: add support for configuring the RX delay on RTL8211F") the Realtek 8211F PHY driver has control over the RGMII RX delay and it is disabling it for RGMII_TXID. The LS1046ARDB uses two such PHYs in RGMII_ID mode but in the device tree the mode was described as "rgmii". Changing the phy-connection-type to "rgmii-id" to address the issue. Fixes: 3fa395d2c48a ("arm64: dts: add LS1046A DPAA FMan nodes") Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts index dbc23d6cd3b4..d53ccc56bb63 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a-rdb.dts @@ -131,12 +131,12 @@ &fman0 { ethernet@e4000 { phy-handle = <&rgmii_phy1>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e6000 { phy-handle = <&rgmii_phy2>; - phy-connection-type = "rgmii"; + phy-connection-type = "rgmii-id"; }; ethernet@e8000 { From b317538c47943f9903860d83cc0060409e12d2ff Mon Sep 17 00:00:00 2001 From: Zheng Wei Date: Mon, 16 Mar 2020 22:23:47 +0800 Subject: [PATCH 217/372] net: vxge: fix wrong __VA_ARGS__ usage printk in macro vxge_debug_ll uses __VA_ARGS__ without "##" prefix, it causes a build error when there is no variable arguments(e.g. only fmt is specified.). Signed-off-by: Zheng Wei Signed-off-by: David S. Miller --- drivers/net/ethernet/neterion/vxge/vxge-config.h | 2 +- drivers/net/ethernet/neterion/vxge/vxge-main.h | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h index e678ba379598..628fa9b2f741 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-config.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h @@ -2045,7 +2045,7 @@ vxge_hw_vpath_strip_fcs_check(struct __vxge_hw_device *hldev, u64 vpath_mask); if ((level >= VXGE_ERR && VXGE_COMPONENT_LL & VXGE_DEBUG_ERR_MASK) || \ (level >= VXGE_TRACE && VXGE_COMPONENT_LL & VXGE_DEBUG_TRACE_MASK))\ if ((mask & VXGE_DEBUG_MASK) == mask) \ - printk(fmt "\n", __VA_ARGS__); \ + printk(fmt "\n", ##__VA_ARGS__); \ } while (0) #else #define vxge_debug_ll(level, mask, fmt, ...) diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h index 59a57ff5e96a..9c86f4f9cd42 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.h +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h @@ -452,49 +452,49 @@ int vxge_fw_upgrade(struct vxgedev *vdev, char *fw_name, int override); #if (VXGE_DEBUG_LL_CONFIG & VXGE_DEBUG_MASK) #define vxge_debug_ll_config(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_LL_CONFIG, fmt, ##__VA_ARGS__) #else #define vxge_debug_ll_config(level, fmt, ...) #endif #if (VXGE_DEBUG_INIT & VXGE_DEBUG_MASK) #define vxge_debug_init(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_INIT, fmt, ##__VA_ARGS__) #else #define vxge_debug_init(level, fmt, ...) #endif #if (VXGE_DEBUG_TX & VXGE_DEBUG_MASK) #define vxge_debug_tx(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_TX, fmt, ##__VA_ARGS__) #else #define vxge_debug_tx(level, fmt, ...) #endif #if (VXGE_DEBUG_RX & VXGE_DEBUG_MASK) #define vxge_debug_rx(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_RX, fmt, ##__VA_ARGS__) #else #define vxge_debug_rx(level, fmt, ...) #endif #if (VXGE_DEBUG_MEM & VXGE_DEBUG_MASK) #define vxge_debug_mem(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_MEM, fmt, ##__VA_ARGS__) #else #define vxge_debug_mem(level, fmt, ...) #endif #if (VXGE_DEBUG_ENTRYEXIT & VXGE_DEBUG_MASK) #define vxge_debug_entryexit(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_ENTRYEXIT, fmt, ##__VA_ARGS__) #else #define vxge_debug_entryexit(level, fmt, ...) #endif #if (VXGE_DEBUG_INTR & VXGE_DEBUG_MASK) #define vxge_debug_intr(level, fmt, ...) \ - vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, __VA_ARGS__) + vxge_debug_ll(level, VXGE_DEBUG_INTR, fmt, ##__VA_ARGS__) #else #define vxge_debug_intr(level, fmt, ...) #endif From 065fd83e1be2e1ba0d446a257fd86a3cc7bddb51 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Mon, 16 Mar 2020 22:56:36 +0800 Subject: [PATCH 218/372] net: mvneta: Fix the case where the last poll did not process all rx For the case where the last mvneta_poll did not process all RX packets, we need to xor the pp->cause_rx_tx or port->cause_rx_tx before claculating the rx_queue. Fixes: 2dcf75e2793c ("net: mvneta: Associate RX queues with each CPU") Signed-off-by: Jisheng Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvneta.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 98017e7d5dd0..11babc79dc6c 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3036,11 +3036,10 @@ static int mvneta_poll(struct napi_struct *napi, int budget) /* For the case where the last mvneta_poll did not process all * RX packets */ - rx_queue = fls(((cause_rx_tx >> 8) & 0xff)); - cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx : port->cause_rx_tx; + rx_queue = fls(((cause_rx_tx >> 8) & 0xff)); if (rx_queue) { rx_queue = rx_queue - 1; if (pp->bm_priv) From 5190044c2965514a973184ca68ef5fad57a24670 Mon Sep 17 00:00:00 2001 From: Jessica Yu Date: Wed, 11 Mar 2020 18:01:20 +0100 Subject: [PATCH 219/372] modpost: move the namespace field in Module.symvers last In order to preserve backwards compatability with kmod tools, we have to move the namespace field in Module.symvers last, as the depmod -e -E option looks at the first three fields in Module.symvers to check symbol versions (and it's expected they stay in the original order of crc, symbol, module). In addition, update an ancient comment above read_dump() in modpost that suggested that the export type field in Module.symvers was optional. I suspect that there were historical reasons behind that comment that are no longer accurate. We have been unconditionally printing the export type since 2.6.18 (commit bd5cbcedf44), which is over a decade ago now. Fix up read_dump() to treat each field as non-optional. I suspect the original read_dump() code treated the export field as optional in order to support pre <= 2.6.18 Module.symvers (which did not have the export type field). Note that although symbol namespaces are optional, the field will not be omitted from Module.symvers if a symbol does not have a namespace. In this case, the field will simply be empty and the next delimiter or end of line will follow. Cc: stable@vger.kernel.org Fixes: cb9b55d21fe0 ("modpost: add support for symbol namespaces") Tested-by: Matthias Maennich Reviewed-by: Matthias Maennich Reviewed-by: Lucas De Marchi Signed-off-by: Jessica Yu Signed-off-by: Masahiro Yamada --- Documentation/kbuild/modules.rst | 4 ++-- scripts/export_report.pl | 2 +- scripts/mod/modpost.c | 24 ++++++++++++------------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/kbuild/modules.rst b/Documentation/kbuild/modules.rst index 69fa48ee93d6..e0b45a257f21 100644 --- a/Documentation/kbuild/modules.rst +++ b/Documentation/kbuild/modules.rst @@ -470,9 +470,9 @@ build. The syntax of the Module.symvers file is:: - + - 0xe1cc2a05 usb_stor_suspend USB_STORAGE drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL + 0xe1cc2a05 usb_stor_suspend drivers/usb/storage/usb-storage EXPORT_SYMBOL_GPL USB_STORAGE The fields are separated by tabs and values may be empty (e.g. if no namespace is defined for an exported symbol). diff --git a/scripts/export_report.pl b/scripts/export_report.pl index 548330e8c4e7..feb3d5542a62 100755 --- a/scripts/export_report.pl +++ b/scripts/export_report.pl @@ -94,7 +94,7 @@ if (defined $opt{'o'}) { # while ( <$module_symvers> ) { chomp; - my (undef, $symbol, $namespace, $module, $gpl) = split('\t'); + my (undef, $symbol, $module, $gpl, $namespace) = split('\t'); $SYMBOL { $symbol } = [ $module , "0" , $symbol, $gpl]; } close($module_symvers); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 7edfdb2f4497..6ab235354f36 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -2427,7 +2427,7 @@ static void write_if_changed(struct buffer *b, const char *fname) } /* parse Module.symvers file. line format: - * 0x12345678symbolmodule[[export]something] + * 0x12345678symbolmoduleexportnamespace **/ static void read_dump(const char *fname, unsigned int kernel) { @@ -2440,7 +2440,7 @@ static void read_dump(const char *fname, unsigned int kernel) return; while ((line = get_next_line(&pos, file, size))) { - char *symname, *namespace, *modname, *d, *export, *end; + char *symname, *namespace, *modname, *d, *export; unsigned int crc; struct module *mod; struct symbol *s; @@ -2448,16 +2448,16 @@ static void read_dump(const char *fname, unsigned int kernel) if (!(symname = strchr(line, '\t'))) goto fail; *symname++ = '\0'; - if (!(namespace = strchr(symname, '\t'))) - goto fail; - *namespace++ = '\0'; - if (!(modname = strchr(namespace, '\t'))) + if (!(modname = strchr(symname, '\t'))) goto fail; *modname++ = '\0'; - if ((export = strchr(modname, '\t')) != NULL) - *export++ = '\0'; - if (export && ((end = strchr(export, '\t')) != NULL)) - *end = '\0'; + if (!(export = strchr(modname, '\t'))) + goto fail; + *export++ = '\0'; + if (!(namespace = strchr(export, '\t'))) + goto fail; + *namespace++ = '\0'; + crc = strtoul(line, &d, 16); if (*symname == '\0' || *modname == '\0' || *d != '\0') goto fail; @@ -2508,9 +2508,9 @@ static void write_dump(const char *fname) namespace = symbol->namespace; buf_printf(&buf, "0x%08x\t%s\t%s\t%s\t%s\n", symbol->crc, symbol->name, - namespace ? namespace : "", symbol->module->name, - export_str(symbol->export)); + export_str(symbol->export), + namespace ? namespace : ""); } symbol = symbol->next; } From 32ca98feab8c9076c89c0697c5a85e46fece809d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Mon, 16 Mar 2020 19:53:00 +0200 Subject: [PATCH 220/372] net: ip_gre: Accept IFLA_INFO_DATA-less configuration The fix referenced below causes a crash when an ERSPAN tunnel is created without passing IFLA_INFO_DATA. Fix by validating passed-in data in the same way as ipgre does. Fixes: e1f8f78ffe98 ("net: ip_gre: Separate ERSPAN newlink / changelink callbacks") Reported-by: syzbot+1b4ebf4dae4e510dd219@syzkaller.appspotmail.com Signed-off-by: Petr Machata Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7765c65fc7d2..029b24eeafba 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1168,6 +1168,8 @@ static int erspan_netlink_parms(struct net_device *dev, err = ipgre_netlink_parms(dev, data, tb, parms, fwmark); if (err) return err; + if (!data) + return 0; if (data[IFLA_GRE_ERSPAN_VER]) { t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]); From 785d74ec3bbf26ac7f6e92e6e96a259aec0f107a Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 16 Mar 2020 14:25:19 +0300 Subject: [PATCH 221/372] initramfs: restore default compression behavior Even though INITRAMFS_SOURCE kconfig option isn't set in most of defconfigs it is used (set) extensively by various build systems. Commit f26661e12765 ("initramfs: make initramfs compression choice non-optional") has changed default compression mode. Previously we compress initramfs using available compression algorithm. Now we don't use any compression at all by default. It significantly increases the image size in case of build system chooses embedded initramfs. Initially I faced with this issue while using buildroot. As of today it's not possible to set preferred compression mode in target defconfig as this option depends on INITRAMFS_SOURCE being set. Modification of all build systems either doesn't look like good option. Let's instead rewrite initramfs compression mode choices list the way that "INITRAMFS_COMPRESSION_NONE" will be the last option in the list. In that case it will be chosen only if all other options (which implements any compression) are not available. Signed-off-by: Eugeniy Paltsev Signed-off-by: Masahiro Yamada --- usr/Kconfig | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/usr/Kconfig b/usr/Kconfig index bdf5bbd40727..96afb03b65f9 100644 --- a/usr/Kconfig +++ b/usr/Kconfig @@ -124,17 +124,6 @@ choice If in doubt, select 'None' -config INITRAMFS_COMPRESSION_NONE - bool "None" - help - Do not compress the built-in initramfs at all. This may sound wasteful - in space, but, you should be aware that the built-in initramfs will be - compressed at a later stage anyways along with the rest of the kernel, - on those architectures that support this. However, not compressing the - initramfs may lead to slightly higher memory consumption during a - short time at boot, while both the cpio image and the unpacked - filesystem image will be present in memory simultaneously - config INITRAMFS_COMPRESSION_GZIP bool "Gzip" depends on RD_GZIP @@ -207,4 +196,15 @@ config INITRAMFS_COMPRESSION_LZ4 If you choose this, keep in mind that most distros don't provide lz4 by default which could cause a build failure. +config INITRAMFS_COMPRESSION_NONE + bool "None" + help + Do not compress the built-in initramfs at all. This may sound wasteful + in space, but, you should be aware that the built-in initramfs will be + compressed at a later stage anyways along with the rest of the kernel, + on those architectures that support this. However, not compressing the + initramfs may lead to slightly higher memory consumption during a + short time at boot, while both the cpio image and the unpacked + filesystem image will be present in memory simultaneously + endchoice From 84d49b3d08a1d33690cc159036f381c31c27c17b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 16 Mar 2020 19:47:52 +0100 Subject: [PATCH 222/372] mmc: sdhci-acpi: Switch signal voltage back to 3.3V on suspend on external microSD on Lenovo Miix 320 Based on a sample of 7 DSDTs from Cherry Trail devices using an AXP288 PMIC depending on the design one of 2 possible LDOs on the PMIC is used for the MMC signalling voltage, either DLDO3 or GPIO1LDO (GPIO1 pin in low noise LDO mode). The Lenovo Miix 320-10ICR uses GPIO1LDO in the SHC1 ACPI device's DSM methods to set 3.3 or 1.8 signalling voltage and this appears to work as advertised, so presumably the device is actually using GPIO1LDO for the external microSD signalling voltage. But this device has a bug in the _PS0 method of the SHC1 ACPI device, the DSM remembers the last set signalling voltage and the _PS0 restores this after a (runtime) suspend-resume cycle, but it "restores" the voltage on DLDO3 instead of setting it on GPIO1LDO as the DSM method does. DLDO3 is used for the LCD and setting it to 1.8V causes the LCD to go black. This commit works around this issue by calling the Intel DSM to reset the signal voltage to 3.3V after the host has been runtime suspended. This will make the _PS0 method reprogram the DLDO3 voltage to 3.3V, which leaves it at its original setting fixing the LCD going black. This commit adds and uses a DMI quirk mechanism to only trigger this workaround on the Lenovo Miix 320 while leaving the behavior of the driver unchanged on other devices. BugLink: https://bugs.freedesktop.org/show_bug.cgi?id=111294 BugLink: https://gitlab.freedesktop.org/drm/intel/issues/355 Reported-by: russianneuromancer Signed-off-by: Hans de Goede Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200316184753.393458-1-hdegoede@redhat.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 68 +++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index 9651dca6863e..b4c1b2367066 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -72,9 +73,15 @@ struct sdhci_acpi_host { const struct sdhci_acpi_slot *slot; struct platform_device *pdev; bool use_runtime_pm; + bool is_intel; + bool reset_signal_volt_on_suspend; unsigned long private[0] ____cacheline_aligned; }; +enum { + DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP = BIT(0), +}; + static inline void *sdhci_acpi_priv(struct sdhci_acpi_host *c) { return (void *)c->private; @@ -391,6 +398,8 @@ static int intel_probe_slot(struct platform_device *pdev, struct acpi_device *ad host->mmc_host_ops.start_signal_voltage_switch = intel_start_signal_voltage_switch; + c->is_intel = true; + return 0; } @@ -647,6 +656,24 @@ static const struct acpi_device_id sdhci_acpi_ids[] = { }; MODULE_DEVICE_TABLE(acpi, sdhci_acpi_ids); +static const struct dmi_system_id sdhci_acpi_quirks[] = { + { + /* + * The Lenovo Miix 320-10ICR has a bug in the _PS0 method of + * the SHC1 ACPI device, this bug causes it to reprogram the + * wrong LDO (DLDO3) to 1.8V if 1.8V modes are used and the + * card is (runtime) suspended + resumed. DLDO3 is used for + * the LCD and setting it to 1.8V causes the LCD to go black. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), + }, + .driver_data = (void *)DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP, + }, + {} /* Terminating entry */ +}; + static const struct sdhci_acpi_slot *sdhci_acpi_get_slot(struct acpi_device *adev) { const struct sdhci_acpi_uid_slot *u; @@ -663,17 +690,23 @@ static int sdhci_acpi_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const struct sdhci_acpi_slot *slot; struct acpi_device *device, *child; + const struct dmi_system_id *id; struct sdhci_acpi_host *c; struct sdhci_host *host; struct resource *iomem; resource_size_t len; size_t priv_size; + int quirks = 0; int err; device = ACPI_COMPANION(dev); if (!device) return -ENODEV; + id = dmi_first_match(sdhci_acpi_quirks); + if (id) + quirks = (long)id->driver_data; + slot = sdhci_acpi_get_slot(device); /* Power on the SDHCI controller and its children */ @@ -759,6 +792,9 @@ static int sdhci_acpi_probe(struct platform_device *pdev) dev_warn(dev, "failed to setup card detect gpio\n"); c->use_runtime_pm = false; } + + if (quirks & DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP) + c->reset_signal_volt_on_suspend = true; } err = sdhci_setup_host(host); @@ -823,17 +859,39 @@ static int sdhci_acpi_remove(struct platform_device *pdev) return 0; } +static void __maybe_unused sdhci_acpi_reset_signal_voltage_if_needed( + struct device *dev) +{ + struct sdhci_acpi_host *c = dev_get_drvdata(dev); + struct sdhci_host *host = c->host; + + if (c->is_intel && c->reset_signal_volt_on_suspend && + host->mmc->ios.signal_voltage != MMC_SIGNAL_VOLTAGE_330) { + struct intel_host *intel_host = sdhci_acpi_priv(c); + unsigned int fn = INTEL_DSM_V33_SWITCH; + u32 result = 0; + + intel_dsm(intel_host, dev, fn, &result); + } +} + #ifdef CONFIG_PM_SLEEP static int sdhci_acpi_suspend(struct device *dev) { struct sdhci_acpi_host *c = dev_get_drvdata(dev); struct sdhci_host *host = c->host; + int ret; if (host->tuning_mode != SDHCI_TUNING_MODE_3) mmc_retune_needed(host->mmc); - return sdhci_suspend_host(host); + ret = sdhci_suspend_host(host); + if (ret) + return ret; + + sdhci_acpi_reset_signal_voltage_if_needed(dev); + return 0; } static int sdhci_acpi_resume(struct device *dev) @@ -853,11 +911,17 @@ static int sdhci_acpi_runtime_suspend(struct device *dev) { struct sdhci_acpi_host *c = dev_get_drvdata(dev); struct sdhci_host *host = c->host; + int ret; if (host->tuning_mode != SDHCI_TUNING_MODE_3) mmc_retune_needed(host->mmc); - return sdhci_runtime_suspend_host(host); + ret = sdhci_runtime_suspend_host(host); + if (ret) + return ret; + + sdhci_acpi_reset_signal_voltage_if_needed(dev); + return 0; } static int sdhci_acpi_runtime_resume(struct device *dev) From 3397b251ea02003f47f0b1667f3fe30bb4f9ce90 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 16 Mar 2020 19:47:53 +0100 Subject: [PATCH 223/372] mmc: sdhci-acpi: Disable write protect detection on Acer Aspire Switch 10 (SW5-012) On the Acer Aspire Switch 10 (SW5-012) microSD slot always reports the card being write-protected even though microSD cards do not have a write-protect switch at all. Add a new DMI_QUIRK_SD_NO_WRITE_PROTECT quirk which when set sets the MMC_CAP2_NO_WRITE_PROTECT flag on the controller for the external SD slot; and add a DMI quirk table entry which selects this quirk for the Acer SW5-012. Signed-off-by: Hans de Goede Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200316184753.393458-2-hdegoede@redhat.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-acpi.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c index b4c1b2367066..2a2173d953f5 100644 --- a/drivers/mmc/host/sdhci-acpi.c +++ b/drivers/mmc/host/sdhci-acpi.c @@ -80,6 +80,7 @@ struct sdhci_acpi_host { enum { DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP = BIT(0), + DMI_QUIRK_SD_NO_WRITE_PROTECT = BIT(1), }; static inline void *sdhci_acpi_priv(struct sdhci_acpi_host *c) @@ -671,6 +672,18 @@ static const struct dmi_system_id sdhci_acpi_quirks[] = { }, .driver_data = (void *)DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP, }, + { + /* + * The Acer Aspire Switch 10 (SW5-012) microSD slot always + * reports the card being write-protected even though microSD + * cards do not have a write-protect switch at all. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire SW5-012"), + }, + .driver_data = (void *)DMI_QUIRK_SD_NO_WRITE_PROTECT, + }, {} /* Terminating entry */ }; @@ -795,6 +808,9 @@ static int sdhci_acpi_probe(struct platform_device *pdev) if (quirks & DMI_QUIRK_RESET_SD_SIGNAL_VOLT_ON_SUSP) c->reset_signal_volt_on_suspend = true; + + if (quirks & DMI_QUIRK_SD_NO_WRITE_PROTECT) + host->mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT; } err = sdhci_setup_host(host); From 18b587b45c13bb6a07ed0edac15f06892593d07a Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 12 Mar 2020 19:42:57 +0900 Subject: [PATCH 224/372] mmc: sdhci-cadence: set SDHCI_QUIRK2_PRESET_VALUE_BROKEN for UniPhier The SDHCI_PRESET_FOR_* registers are not set for the UniPhier platform integration. (They are all read as zeros). Set the SDHCI_QUIRK2_PRESET_VALUE_BROKEN quirk flag. Otherwise, the High Speed DDR mode on the eMMC controller (MMC_TIMING_MMC_DDR52) would not work. I split the platform data to give no impact to other platforms, although the UniPhier platform is currently only the upstream user of this IP. The SDHCI_QUIRK2_PRESET_VALUE_BROKEN flag is set if the compatible string matches to "socionext,uniphier-sd4hc". Signed-off-by: Masahiro Yamada Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200312104257.21017-1-yamada.masahiro@socionext.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-cadence.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-cadence.c b/drivers/mmc/host/sdhci-cadence.c index 5827d3751b81..e573495f8726 100644 --- a/drivers/mmc/host/sdhci-cadence.c +++ b/drivers/mmc/host/sdhci-cadence.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "sdhci-pltfm.h" @@ -235,6 +236,11 @@ static const struct sdhci_ops sdhci_cdns_ops = { .set_uhs_signaling = sdhci_cdns_set_uhs_signaling, }; +static const struct sdhci_pltfm_data sdhci_cdns_uniphier_pltfm_data = { + .ops = &sdhci_cdns_ops, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, +}; + static const struct sdhci_pltfm_data sdhci_cdns_pltfm_data = { .ops = &sdhci_cdns_ops, }; @@ -334,6 +340,7 @@ static void sdhci_cdns_hs400_enhanced_strobe(struct mmc_host *mmc, static int sdhci_cdns_probe(struct platform_device *pdev) { struct sdhci_host *host; + const struct sdhci_pltfm_data *data; struct sdhci_pltfm_host *pltfm_host; struct sdhci_cdns_priv *priv; struct clk *clk; @@ -350,8 +357,12 @@ static int sdhci_cdns_probe(struct platform_device *pdev) if (ret) return ret; + data = of_device_get_match_data(dev); + if (!data) + data = &sdhci_cdns_pltfm_data; + nr_phy_params = sdhci_cdns_phy_param_count(dev->of_node); - host = sdhci_pltfm_init(pdev, &sdhci_cdns_pltfm_data, + host = sdhci_pltfm_init(pdev, data, struct_size(priv, phy_params, nr_phy_params)); if (IS_ERR(host)) { ret = PTR_ERR(host); @@ -431,7 +442,10 @@ static const struct dev_pm_ops sdhci_cdns_pm_ops = { }; static const struct of_device_id sdhci_cdns_match[] = { - { .compatible = "socionext,uniphier-sd4hc" }, + { + .compatible = "socionext,uniphier-sd4hc", + .data = &sdhci_cdns_uniphier_pltfm_data, + }, { .compatible = "cdns,sd4hc" }, { /* sentinel */ } }; From 53dd0a7cd65edc83b0c243d1c08377c8b876b2ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sun, 15 Mar 2020 17:44:25 +0100 Subject: [PATCH 225/372] mmc: sdhci-of-at91: fix cd-gpios for SAMA5D2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SAMA5D2x doesn't drive CMD line if GPIO is used as CD line (at least SAMA5D27 doesn't). Fix this by forcing card-detect in the module if module-controlled CD is not used. Fixed commit addresses the problem only for non-removable cards. This amends it to also cover gpio-cd case. Cc: stable@vger.kernel.org Fixes: 7a1e3f143176 ("mmc: sdhci-of-at91: force card detect value for non removable devices") Signed-off-by: Michał Mirosław Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/8d10950d9940468577daef4772b82a071b204716.1584290561.git.mirq-linux@rere.qmqm.pl Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-of-at91.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index ab2bd314a390..fcef5c0d0908 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -132,7 +132,8 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) sdhci_reset(host, mask); - if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) + || mmc_gpio_get_cd(host->mmc) >= 0) sdhci_at91_set_force_card_detect(host); if (priv->cal_always_on && (mask & SDHCI_RESET_ALL)) @@ -427,8 +428,11 @@ static int sdhci_at91_probe(struct platform_device *pdev) * detection procedure using the SDMCC_CD signal is bypassed. * This bit is reset when a software reset for all command is performed * so we need to implement our own reset function to set back this bit. + * + * WA: SAMA5D2 doesn't drive CMD if using CD GPIO line. */ - if (host->mmc->caps & MMC_CAP_NONREMOVABLE) + if ((host->mmc->caps & MMC_CAP_NONREMOVABLE) + || mmc_gpio_get_cd(host->mmc) >= 0) sdhci_at91_set_force_card_detect(host); pm_runtime_put_autosuspend(&pdev->dev); From 8f3675be4bda33adbdc1dd2ab3b6c76a7599a79e Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Mar 2020 12:01:49 +0100 Subject: [PATCH 226/372] staging: greybus: loopback_test: fix poll-mask build breakage A scripted conversion from userland POLL* to kernel EPOLL* constants mistakingly replaced the poll flags in the loopback_test tool, which therefore no longer builds. Fixes: a9a08845e9ac ("vfs: do bulk POLL* -> EPOLL* replacement") Cc: stable # 4.16 Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200312110151.22028-2-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/tools/loopback_test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index ba6f905f26fa..41e1820d9ac9 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -655,7 +655,7 @@ static int open_poll_files(struct loopback_test *t) goto err; } read(t->fds[fds_idx].fd, &dummy, 1); - t->fds[fds_idx].events = EPOLLERR|EPOLLPRI; + t->fds[fds_idx].events = POLLERR | POLLPRI; t->fds[fds_idx].revents = 0; fds_idx++; } @@ -748,7 +748,7 @@ static int wait_for_complete(struct loopback_test *t) } for (i = 0; i < t->poll_count; i++) { - if (t->fds[i].revents & EPOLLPRI) { + if (t->fds[i].revents & POLLPRI) { /* Dummy read to clear the event */ read(t->fds[i].fd, &dummy, 1); number_of_events++; From f16023834863932f95dfad13fac3fc47f77d2f29 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Mar 2020 12:01:50 +0100 Subject: [PATCH 227/372] staging: greybus: loopback_test: fix potential path truncation Newer GCC warns about a possible truncation of a generated sysfs path name as we're concatenating a directory path with a file name and placing the result in a buffer that is half the size of the maximum length of the directory path (which is user controlled). loopback_test.c: In function 'open_poll_files': loopback_test.c:651:31: warning: '%s' directive output may be truncated writing up to 511 bytes into a region of size 255 [-Wformat-truncation=] 651 | snprintf(buf, sizeof(buf), "%s%s", dev->sysfs_entry, "iteration_count"); | ^~ loopback_test.c:651:3: note: 'snprintf' output between 16 and 527 bytes into a destination of size 255 651 | snprintf(buf, sizeof(buf), "%s%s", dev->sysfs_entry, "iteration_count"); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fix this by making sure the buffer is large enough the concatenated strings. Fixes: 6b0658f68786 ("greybus: tools: Add tools directory to greybus repo and add loopback") Fixes: 9250c0ee2626 ("greybus: Loopback_test: use poll instead of inotify") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200312110151.22028-3-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/tools/loopback_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index 41e1820d9ac9..d38bb4fbd6b9 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -637,7 +637,7 @@ baddir: static int open_poll_files(struct loopback_test *t) { struct loopback_device *dev; - char buf[MAX_STR_LEN]; + char buf[MAX_SYSFS_PATH + MAX_STR_LEN]; char dummy; int fds_idx = 0; int i; From ae62cf5eb2792d9a818c2d93728ed92119357017 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 12 Mar 2020 12:01:51 +0100 Subject: [PATCH 228/372] staging: greybus: loopback_test: fix potential path truncations Newer GCC warns about possible truncations of two generated path names as we're concatenating the configurable sysfs and debugfs path prefixes with a filename and placing the results in buffers of the same size as the maximum length of the prefixes. snprintf(d->name, MAX_STR_LEN, "gb_loopback%u", dev_id); snprintf(d->sysfs_entry, MAX_SYSFS_PATH, "%s%s/", t->sysfs_prefix, d->name); snprintf(d->debugfs_entry, MAX_SYSFS_PATH, "%sraw_latency_%s", t->debugfs_prefix, d->name); Fix this by separating the maximum path length from the maximum prefix length and reducing the latter enough to fit the generated strings. Note that we also need to reduce the device-name buffer size as GCC isn't smart enough to figure out that we ever only used MAX_STR_LEN bytes of it. Fixes: 6b0658f68786 ("greybus: tools: Add tools directory to greybus repo and add loopback") Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20200312110151.22028-4-johan@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/staging/greybus/tools/loopback_test.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c index d38bb4fbd6b9..69c6dce9be31 100644 --- a/drivers/staging/greybus/tools/loopback_test.c +++ b/drivers/staging/greybus/tools/loopback_test.c @@ -19,6 +19,7 @@ #include #define MAX_NUM_DEVICES 10 +#define MAX_SYSFS_PREFIX 0x80 #define MAX_SYSFS_PATH 0x200 #define CSV_MAX_LINE 0x1000 #define SYSFS_MAX_INT 0x20 @@ -67,7 +68,7 @@ struct loopback_results { }; struct loopback_device { - char name[MAX_SYSFS_PATH]; + char name[MAX_STR_LEN]; char sysfs_entry[MAX_SYSFS_PATH]; char debugfs_entry[MAX_SYSFS_PATH]; struct loopback_results results; @@ -93,8 +94,8 @@ struct loopback_test { int stop_all; int poll_count; char test_name[MAX_STR_LEN]; - char sysfs_prefix[MAX_SYSFS_PATH]; - char debugfs_prefix[MAX_SYSFS_PATH]; + char sysfs_prefix[MAX_SYSFS_PREFIX]; + char debugfs_prefix[MAX_SYSFS_PREFIX]; struct timespec poll_timeout; struct loopback_device devices[MAX_NUM_DEVICES]; struct loopback_results aggregate_results; @@ -907,10 +908,10 @@ int main(int argc, char *argv[]) t.iteration_max = atoi(optarg); break; case 'S': - snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", optarg); + snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg); break; case 'D': - snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", optarg); + snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", optarg); break; case 'm': t.mask = atol(optarg); @@ -961,10 +962,10 @@ int main(int argc, char *argv[]) } if (!strcmp(t.sysfs_prefix, "")) - snprintf(t.sysfs_prefix, MAX_SYSFS_PATH, "%s", sysfs_prefix); + snprintf(t.sysfs_prefix, MAX_SYSFS_PREFIX, "%s", sysfs_prefix); if (!strcmp(t.debugfs_prefix, "")) - snprintf(t.debugfs_prefix, MAX_SYSFS_PATH, "%s", debugfs_prefix); + snprintf(t.debugfs_prefix, MAX_SYSFS_PREFIX, "%s", debugfs_prefix); ret = find_loopback_devices(&t); if (ret) From bb5786b9286c253557a0115bc8d21879e61b7b94 Mon Sep 17 00:00:00 2001 From: Michael Straube Date: Thu, 12 Mar 2020 10:36:52 +0100 Subject: [PATCH 229/372] staging: rtl8188eu: Add device id for MERCUSYS MW150US v2 This device was added to the stand-alone driver on github. Add it to the staging driver as well. Link: https://github.com/lwfinger/rtl8188eu/commit/2141f244c3e7 Signed-off-by: Michael Straube Cc: stable Link: https://lore.kernel.org/r/20200312093652.13918-1-straube.linux@gmail.com Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/os_dep/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index b5d42f411dd8..845c8817281c 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -38,6 +38,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ {USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */ + {USB_DEVICE(0x2C4E, 0x0102)}, /* MERCUSYS MW150US v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ From d858c706bdca97698752bd26b60c21ec07ef04f2 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 17 Mar 2020 16:28:07 +0800 Subject: [PATCH 230/372] ALSA: hda/realtek - Enable headset mic of Acer X2660G with ALC662 The Acer desktop X2660G with ALC662 can't detect the headset microphone until ALC662_FIXUP_ACER_X2660G_HEADSET_MODE quirk applied. Signed-off-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20200317082806.73194-2-jian-hong@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7b83b020ac3c..a08481f358e9 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8612,6 +8612,7 @@ enum { ALC669_FIXUP_ACER_ASPIRE_ETHOS, ALC669_FIXUP_ACER_ASPIRE_ETHOS_HEADSET, ALC671_FIXUP_HP_HEADSET_MIC2, + ALC662_FIXUP_ACER_X2660G_HEADSET_MODE, }; static const struct hda_fixup alc662_fixups[] = { @@ -8957,6 +8958,15 @@ static const struct hda_fixup alc662_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc671_fixup_hp_headset_mic2, }, + [ALC662_FIXUP_ACER_X2660G_HEADSET_MODE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x02a1113c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC662_FIXUP_USI_FUNC + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -8968,6 +8978,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x034a, "Gateway LT27", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE), + SND_PCI_QUIRK(0x1025, 0x124e, "Acer 2660G", ALC662_FIXUP_ACER_X2660G_HEADSET_MODE), SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05fe, "Dell XPS 15", ALC668_FIXUP_DELL_XPS13), From a124458a127ccd7629e20cd7bae3e1f758ed32aa Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Tue, 17 Mar 2020 16:28:09 +0800 Subject: [PATCH 231/372] ALSA: hda/realtek - Enable the headset of Acer N50-600 with ALC662 A headset on the desktop like Acer N50-600 does not work, until quirk ALC662_FIXUP_ACER_NITRO_HEADSET_MODE is applied. Signed-off-by: Jian-Hong Pan Cc: Link: https://lore.kernel.org/r/20200317082806.73194-3-jian-hong@endlessm.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a08481f358e9..63e1a56f705b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8613,6 +8613,7 @@ enum { ALC669_FIXUP_ACER_ASPIRE_ETHOS_HEADSET, ALC671_FIXUP_HP_HEADSET_MIC2, ALC662_FIXUP_ACER_X2660G_HEADSET_MODE, + ALC662_FIXUP_ACER_NITRO_HEADSET_MODE, }; static const struct hda_fixup alc662_fixups[] = { @@ -8967,6 +8968,16 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC662_FIXUP_USI_FUNC }, + [ALC662_FIXUP_ACER_NITRO_HEADSET_MODE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x01a11140 }, /* use as headset mic, without its own jack detect */ + { 0x1b, 0x0221144f }, + { } + }, + .chained = true, + .chain_id = ALC662_FIXUP_USI_FUNC + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -8978,6 +8989,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x034a, "Gateway LT27", ALC662_FIXUP_INV_DMIC), SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE), + SND_PCI_QUIRK(0x1025, 0x123c, "Acer Nitro N50-600", ALC662_FIXUP_ACER_NITRO_HEADSET_MODE), SND_PCI_QUIRK(0x1025, 0x124e, "Acer 2660G", ALC662_FIXUP_ACER_X2660G_HEADSET_MODE), SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE), From 39946886fc865a4c26f1b3ea0805936db2d8986d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2020 12:22:59 +0300 Subject: [PATCH 232/372] cifs: potential unintitliazed error code in cifs_getattr() Smatch complains that "rc" could be uninitialized. fs/cifs/inode.c:2206 cifs_getattr() error: uninitialized symbol 'rc'. Changing it to "return 0;" improves readability as well. Fixes: cc1baf98c8f6 ("cifs: do not ignore the SYNC flags in getattr") Signed-off-by: Dan Carpenter Signed-off-by: Steve French Acked-by: Ronnie Sahlberg --- fs/cifs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 1e8a4b1579db..b16f8d23e97b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2191,7 +2191,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat, if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) stat->gid = current_fsgid(); } - return rc; + return 0; } int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, From 1be1fa42ebb73ad8fd67d2c846931361b4e3dd0a Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 9 Mar 2020 01:35:09 -0700 Subject: [PATCH 233/372] CIFS: Increment num_remote_opens stats counter even in case of smb2_query_dir_first The num_remote_opens counter keeps track of the number of open files which must be maintained by the server at any point. This is a per-tree-connect counter, and the value of this counter gets displayed in the /proc/fs/cifs/Stats output as a following... Open files: 0 total (local), 1 open on server ^^^^^^^^^^^^^^^^ As a thumb-rule, we want to increment this counter for each open/create that we successfully execute on the server. Similarly, we should decrement the counter when we successfully execute a close. In this case, an increment was being missed in case of smb2_query_dir_first, in case of successful open. As a result, we would underflow the counter and we could even see the counter go to negative after sufficient smb2_query_dir_first calls. I tested the stats counter for a bunch of filesystem operations with the fix. And it looks like the counter looks correct to me. I also check if we missed the increments and decrements elsewhere. It does not seem so. Few other cases where an open is done and we don't increment the counter are the compound calls where the corresponding close is also sent in the request. Signed-off-by: Shyam Prasad N CC: Stable Signed-off-by: Steve French Reviewed-by: Aurelien Aptel Reviewed-by: Pavel Shilovsky --- fs/cifs/smb2ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c31e84ee3c39..3dddd20c5e2b 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2222,6 +2222,8 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon, goto qdf_free; } + atomic_inc(&tcon->num_remote_opens); + qd_rsp = (struct smb2_query_directory_rsp *)rsp_iov[1].iov_base; if (qd_rsp->sync_hdr.Status == STATUS_NO_MORE_FILES) { trace_smb3_query_dir_done(xid, fid->persistent_fid, From 979a2665eb6c603ddce0ab374041ab101827b2e7 Mon Sep 17 00:00:00 2001 From: Murphy Zhou Date: Sat, 14 Mar 2020 11:38:31 +0800 Subject: [PATCH 234/372] CIFS: fiemap: do not return EINVAL if get nothing If we call fiemap on a truncated file with none blocks allocated, it makes sense we get nothing from this call. No output means no blocks have been counted, but the call succeeded. It's a valid response. Simple example reproducer: xfs_io -f 'truncate 2M' -c 'fiemap -v' /cifssch/testfile xfs_io: ioctl(FS_IOC_FIEMAP) ["/cifssch/testfile"]: Invalid argument Signed-off-by: Murphy Zhou Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky CC: Stable --- fs/cifs/smb2ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 3dddd20c5e2b..cfe9b800ea8c 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3419,7 +3419,7 @@ static int smb3_fiemap(struct cifs_tcon *tcon, if (rc) goto out; - if (out_data_len < sizeof(struct file_allocated_range_buffer)) { + if (out_data_len && out_data_len < sizeof(struct file_allocated_range_buffer)) { rc = -EINVAL; goto out; } From 7368760d1bcdabf515c41a502568b489de3da683 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Mon, 16 Mar 2020 11:10:34 +0800 Subject: [PATCH 235/372] usb: chipidea: udc: fix sleeping function called from invalid context The code calls pm_runtime_get_sync with irq disabled, it causes below warning: BUG: sleeping function called from invalid context at wer/runtime.c:1075 in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: er/u8:1 CPU: 1 PID: 37 Comm: kworker/u8:1 Not tainted 20200304-00181-gbebfd2a5be98 #1588 Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) Workqueue: ci_otg ci_otg_work [] (unwind_backtrace) from [] 1/0x14) [] (show_stack) from [] 5/0x94) [] (dump_stack) from [] +0xeb/0x118) [] (___might_sleep) from [] esume+0x75/0x78) [] (__pm_runtime_resume) from [] 0x23/0x74) [] (ci_udc_pullup) from [] nect+0x2b/0xcc) [] (usb_gadget_connect) from [] _connect+0x59/0x104) [] (ci_hdrc_gadget_connect) from [] ssion+0x43/0x48) [] (ci_udc_vbus_session) from [] s_connect+0x17/0x9c) [] (usb_gadget_vbus_connect) from [] bd/0x128) [] (ci_otg_work) from [] rk+0x149/0x404) [] (process_one_work) from [] 0xf7/0x3bc) [] (worker_thread) from [] x118) [] (kthread) from [] (ret_from_fork+0x11/0x34) Tested-by: Dmitry Osipenko Cc: #v5.5 Fixes: 72dc8df7920f ("usb: chipidea: udc: protect usb interrupt enable") Reported-by: Dmitry Osipenko Signed-off-by: Peter Chen Link: https://lore.kernel.org/r/20200316031034.17847-2-peter.chen@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index ffaf46f5d062..4c4ac30db498 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1530,18 +1530,19 @@ static const struct usb_ep_ops usb_ep_ops = { static void ci_hdrc_gadget_connect(struct usb_gadget *_gadget, int is_active) { struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget); - unsigned long flags; if (is_active) { pm_runtime_get_sync(&_gadget->dev); hw_device_reset(ci); - spin_lock_irqsave(&ci->lock, flags); + spin_lock_irq(&ci->lock); if (ci->driver) { hw_device_state(ci, ci->ep0out->qh.dma); usb_gadget_set_state(_gadget, USB_STATE_POWERED); + spin_unlock_irq(&ci->lock); usb_udc_vbus_handler(_gadget, true); + } else { + spin_unlock_irq(&ci->lock); } - spin_unlock_irqrestore(&ci->lock, flags); } else { usb_udc_vbus_handler(_gadget, false); if (ci->driver) From 75d7676ead19b1fbb5e0ee934c9ccddcb666b68c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 13 Mar 2020 13:07:08 +0100 Subject: [PATCH 236/372] usb: quirks: add NO_LPM quirk for RTL8153 based ethernet adapters We have been receiving bug reports that ethernet connections over RTL8153 based ethernet adapters stops working after a while with errors like these showing up in dmesg when the ethernet stops working: [12696.189484] r8152 6-1:1.0 enp10s0u1: Tx timeout [12702.333456] r8152 6-1:1.0 enp10s0u1: Tx timeout [12707.965422] r8152 6-1:1.0 enp10s0u1: Tx timeout This has been reported on Dell WD15 docks, Belkin USB-C Express Dock 3.1 docks and with generic USB to ethernet dongles using the RTL8153 chipsets. Some users have tried adding usbcore.quirks=0bda:8153:k to the kernel commandline and all users who have tried this report that this fixes this. Also note that we already have an existing NO_LPM quirk for the RTL8153 used in the Microsoft Surface Dock (where it uses a different usb-id). This commit adds a NO_LPM quirk for the generic Realtek RTL8153 0bda:8153 usb-id, fixing the Tx timeout errors on these devices. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=198931 Cc: stable@vger.kernel.org Cc: russianneuromancer@ya.ru Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200313120708.100339-1-hdegoede@redhat.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index df6e6156e1d4..da30b5664ff3 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -381,6 +381,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Realtek hub in Dell WD19 (Type-C) */ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, + /* Generic RTL8153 based ethernet adapters */ + { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM }, + /* Action Semiconductor flash disk */ { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, From 633e2b2ded739a34bd0fb1d8b5b871f7e489ea29 Mon Sep 17 00:00:00 2001 From: Anthony Mallet Date: Thu, 12 Mar 2020 14:31:00 +0100 Subject: [PATCH 237/372] USB: cdc-acm: fix close_delay and closing_wait units in TIOCSSERIAL close_delay and closing_wait are specified in hundredth of a second but stored internally in jiffies. Use the jiffies_to_msecs() and msecs_to_jiffies() functions to convert from each other. Signed-off-by: Anthony Mallet Cc: stable Link: https://lore.kernel.org/r/20200312133101.7096-1-anthony.mallet@laas.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 62f4fb9b362f..da619176deca 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -896,10 +896,10 @@ static int get_serial_info(struct tty_struct *tty, struct serial_struct *ss) ss->xmit_fifo_size = acm->writesize; ss->baud_base = le32_to_cpu(acm->line.dwDTERate); - ss->close_delay = acm->port.close_delay / 10; + ss->close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; ss->closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : - acm->port.closing_wait / 10; + jiffies_to_msecs(acm->port.closing_wait) / 10; return 0; } @@ -909,9 +909,10 @@ static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss) unsigned int closing_wait, close_delay; int retval = 0; - close_delay = ss->close_delay * 10; + close_delay = msecs_to_jiffies(ss->close_delay * 10); closing_wait = ss->closing_wait == ASYNC_CLOSING_WAIT_NONE ? - ASYNC_CLOSING_WAIT_NONE : ss->closing_wait * 10; + ASYNC_CLOSING_WAIT_NONE : + msecs_to_jiffies(ss->closing_wait * 10); mutex_lock(&acm->port.mutex); From b401f8c4f492cbf74f3f59c9141e5be3071071bb Mon Sep 17 00:00:00 2001 From: Anthony Mallet Date: Thu, 12 Mar 2020 14:31:01 +0100 Subject: [PATCH 238/372] USB: cdc-acm: fix rounding error in TIOCSSERIAL By default, tty_port_init() initializes those parameters to a multiple of HZ. For instance in line 69 of tty_port.c: port->close_delay = (50 * HZ) / 100; https://github.com/torvalds/linux/blob/master/drivers/tty/tty_port.c#L69 With e.g. CONFIG_HZ = 250 (as this is the case for Ubuntu 18.04 linux-image-4.15.0-37-generic), the default setting for close_delay is thus 125. When ioctl(fd, TIOCGSERIAL, &s) is executed, the setting returned in user space is '12' (125/10). When ioctl(fd, TIOCSSERIAL, &s) is then executed with the same setting '12', the value is interpreted as '120' which is different from the current setting and a EPERM error may be raised by set_serial_info() if !CAP_SYS_ADMIN. https://github.com/torvalds/linux/blob/master/drivers/usb/class/cdc-acm.c#L919 Fixes: ba2d8ce9db0a6 ("cdc-acm: implement TIOCSSERIAL to avoid blocking close(2)") Signed-off-by: Anthony Mallet Cc: stable Link: https://lore.kernel.org/r/20200312133101.7096-2-anthony.mallet@laas.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index da619176deca..47f09a6ce7bd 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -907,6 +907,7 @@ static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct acm *acm = tty->driver_data; unsigned int closing_wait, close_delay; + unsigned int old_closing_wait, old_close_delay; int retval = 0; close_delay = msecs_to_jiffies(ss->close_delay * 10); @@ -914,18 +915,24 @@ static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss) ASYNC_CLOSING_WAIT_NONE : msecs_to_jiffies(ss->closing_wait * 10); + /* we must redo the rounding here, so that the values match */ + old_close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; + old_closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? + ASYNC_CLOSING_WAIT_NONE : + jiffies_to_msecs(acm->port.closing_wait) / 10; + mutex_lock(&acm->port.mutex); - if (!capable(CAP_SYS_ADMIN)) { - if ((close_delay != acm->port.close_delay) || - (closing_wait != acm->port.closing_wait)) + if ((ss->close_delay != old_close_delay) || + (ss->closing_wait != old_closing_wait)) { + if (!capable(CAP_SYS_ADMIN)) retval = -EPERM; - else - retval = -EOPNOTSUPP; - } else { - acm->port.close_delay = close_delay; - acm->port.closing_wait = closing_wait; - } + else { + acm->port.close_delay = close_delay; + acm->port.closing_wait = closing_wait; + } + } else + retval = -EOPNOTSUPP; mutex_unlock(&acm->port.mutex); return retval; From d0bab0c39e32d39a8c5cddca72e5b4a3059fe050 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 11 Mar 2020 17:12:44 +0000 Subject: [PATCH 239/372] arm64: smp: fix smp_send_stop() behaviour On a system with only one CPU online, when another one CPU panics while starting-up, smp_send_stop() will fail to send any STOP message to the other already online core, resulting in a system still responsive and alive at the end of the panic procedure. [ 186.700083] CPU3: shutdown [ 187.075462] CPU2: shutdown [ 187.162869] CPU1: shutdown [ 188.689998] ------------[ cut here ]------------ [ 188.691645] kernel BUG at arch/arm64/kernel/cpufeature.c:886! [ 188.692079] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 188.692444] Modules linked in: [ 188.693031] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.6.0-rc4-00001-g338d25c35a98 #104 [ 188.693175] Hardware name: Foundation-v8A (DT) [ 188.693492] pstate: 200001c5 (nzCv dAIF -PAN -UAO) [ 188.694183] pc : has_cpuid_feature+0xf0/0x348 [ 188.694311] lr : verify_local_elf_hwcaps+0x84/0xe8 [ 188.694410] sp : ffff800011b1bf60 [ 188.694536] x29: ffff800011b1bf60 x28: 0000000000000000 [ 188.694707] x27: 0000000000000000 x26: 0000000000000000 [ 188.694801] x25: 0000000000000000 x24: ffff80001189a25c [ 188.694905] x23: 0000000000000000 x22: 0000000000000000 [ 188.694996] x21: ffff8000114aa018 x20: ffff800011156a38 [ 188.695089] x19: ffff800010c944a0 x18: 0000000000000004 [ 188.695187] x17: 0000000000000000 x16: 0000000000000000 [ 188.695280] x15: 0000249dbde5431e x14: 0262cbe497efa1fa [ 188.695371] x13: 0000000000000002 x12: 0000000000002592 [ 188.695472] x11: 0000000000000080 x10: 00400032b5503510 [ 188.695572] x9 : 0000000000000000 x8 : ffff800010c80204 [ 188.695659] x7 : 00000000410fd0f0 x6 : 0000000000000001 [ 188.695750] x5 : 00000000410fd0f0 x4 : 0000000000000000 [ 188.695836] x3 : 0000000000000000 x2 : ffff8000100939d8 [ 188.695919] x1 : 0000000000180420 x0 : 0000000000180480 [ 188.696253] Call trace: [ 188.696410] has_cpuid_feature+0xf0/0x348 [ 188.696504] verify_local_elf_hwcaps+0x84/0xe8 [ 188.696591] check_local_cpu_capabilities+0x44/0x128 [ 188.696666] secondary_start_kernel+0xf4/0x188 [ 188.697150] Code: 52805001 72a00301 6b01001f 54000ec0 (d4210000) [ 188.698639] ---[ end trace 3f12ca47652f7b72 ]--- [ 188.699160] Kernel panic - not syncing: Attempted to kill the idle task! [ 188.699546] Kernel Offset: disabled [ 188.699828] CPU features: 0x00004,20c02008 [ 188.700012] Memory Limit: none [ 188.700538] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]--- [root@arch ~]# echo Helo Helo [root@arch ~]# cat /proc/cpuinfo | grep proce processor : 0 Make smp_send_stop() account also for the online status of the calling CPU while evaluating how many CPUs are effectively online: this way, the right number of STOPs is sent, so enforcing a proper freeze of the system at the end of panic even under the above conditions. Fixes: 08e875c16a16c ("arm64: SMP support") Reported-by: Dave Martin Acked-by: Mark Rutland Signed-off-by: Cristian Marussi Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d4ed9a19d8fe..e4dc241c5a8e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -958,11 +958,22 @@ void tick_broadcast(const struct cpumask *mask) } #endif +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + void smp_send_stop(void) { unsigned long timeout; - if (num_online_cpus() > 1) { + if (num_other_online_cpus()) { cpumask_t mask; cpumask_copy(&mask, cpu_online_mask); @@ -975,10 +986,10 @@ void smp_send_stop(void) /* Wait up to one second for other CPUs to stop */ timeout = USEC_PER_SEC; - while (num_online_cpus() > 1 && timeout--) + while (num_other_online_cpus() && timeout--) udelay(1); - if (num_online_cpus() > 1) + if (num_other_online_cpus()) pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", cpumask_pr_args(cpu_online_mask)); From f50b7dacccbab2b9e3ef18f52a6dcc18ed2050b9 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 11 Mar 2020 17:12:45 +0000 Subject: [PATCH 240/372] arm64: smp: fix crash_smp_send_stop() behaviour On a system configured to trigger a crash_kexec() reboot, when only one CPU is online and another CPU panics while starting-up, crash_smp_send_stop() will fail to send any STOP message to the other already online core, resulting in fail to freeze and registers not properly saved. Moreover even if the proper messages are sent (case CPUs > 2) it will similarly fail to account for the booting CPU when executing the final stop wait-loop, so potentially resulting in some CPU not been waited for shutdown before rebooting. A tangible effect of this behaviour can be observed when, after a panic with kexec enabled and loaded, on the following reboot triggered by kexec, the cpu that could not be successfully stopped fails to come back online: [ 362.291022] ------------[ cut here ]------------ [ 362.291525] kernel BUG at arch/arm64/kernel/cpufeature.c:886! [ 362.292023] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 362.292400] Modules linked in: [ 362.292970] CPU: 3 PID: 0 Comm: swapper/3 Kdump: loaded Not tainted 5.6.0-rc4-00003-gc780b890948a #105 [ 362.293136] Hardware name: Foundation-v8A (DT) [ 362.293382] pstate: 200001c5 (nzCv dAIF -PAN -UAO) [ 362.294063] pc : has_cpuid_feature+0xf0/0x348 [ 362.294177] lr : verify_local_elf_hwcaps+0x84/0xe8 [ 362.294280] sp : ffff800011b1bf60 [ 362.294362] x29: ffff800011b1bf60 x28: 0000000000000000 [ 362.294534] x27: 0000000000000000 x26: 0000000000000000 [ 362.294631] x25: 0000000000000000 x24: ffff80001189a25c [ 362.294718] x23: 0000000000000000 x22: 0000000000000000 [ 362.294803] x21: ffff8000114aa018 x20: ffff800011156a00 [ 362.294897] x19: ffff800010c944a0 x18: 0000000000000004 [ 362.294987] x17: 0000000000000000 x16: 0000000000000000 [ 362.295073] x15: 00004e53b831ae3c x14: 00004e53b831ae3c [ 362.295165] x13: 0000000000000384 x12: 0000000000000000 [ 362.295251] x11: 0000000000000000 x10: 00400032b5503510 [ 362.295334] x9 : 0000000000000000 x8 : ffff800010c7e204 [ 362.295426] x7 : 00000000410fd0f0 x6 : 0000000000000001 [ 362.295508] x5 : 00000000410fd0f0 x4 : 0000000000000000 [ 362.295592] x3 : 0000000000000000 x2 : ffff8000100939d8 [ 362.295683] x1 : 0000000000180420 x0 : 0000000000180480 [ 362.296011] Call trace: [ 362.296257] has_cpuid_feature+0xf0/0x348 [ 362.296350] verify_local_elf_hwcaps+0x84/0xe8 [ 362.296424] check_local_cpu_capabilities+0x44/0x128 [ 362.296497] secondary_start_kernel+0xf4/0x188 [ 362.296998] Code: 52805001 72a00301 6b01001f 54000ec0 (d4210000) [ 362.298652] SMP: stopping secondary CPUs [ 362.300615] Starting crashdump kernel... [ 362.301168] Bye! [ 0.000000] Booting Linux on physical CPU 0x0000000003 [0x410fd0f0] [ 0.000000] Linux version 5.6.0-rc4-00003-gc780b890948a (crimar01@e120937-lin) (gcc version 8.3.0 (GNU Toolchain for the A-profile Architecture 8.3-2019.03 (arm-rel-8.36))) #105 SMP PREEMPT Fri Mar 6 17:00:42 GMT 2020 [ 0.000000] Machine model: Foundation-v8A [ 0.000000] earlycon: pl11 at MMIO 0x000000001c090000 (options '') [ 0.000000] printk: bootconsole [pl11] enabled ..... [ 0.138024] rcu: Hierarchical SRCU implementation. [ 0.153472] its@2f020000: unable to locate ITS domain [ 0.154078] its@2f020000: Unable to locate ITS domain [ 0.157541] EFI services will not be available. [ 0.175395] smp: Bringing up secondary CPUs ... [ 0.209182] psci: failed to boot CPU1 (-22) [ 0.209377] CPU1: failed to boot: -22 [ 0.274598] Detected PIPT I-cache on CPU2 [ 0.278707] GICv3: CPU2: found redistributor 1 region 0:0x000000002f120000 [ 0.285212] CPU2: Booted secondary processor 0x0000000001 [0x410fd0f0] [ 0.369053] Detected PIPT I-cache on CPU3 [ 0.372947] GICv3: CPU3: found redistributor 2 region 0:0x000000002f140000 [ 0.378664] CPU3: Booted secondary processor 0x0000000002 [0x410fd0f0] [ 0.401707] smp: Brought up 1 node, 3 CPUs [ 0.404057] SMP: Total of 3 processors activated. Make crash_smp_send_stop() account also for the online status of the calling CPU while evaluating how many CPUs are effectively online: this way the right number of STOPs is sent and all other stopped-cores's registers are properly saved. Fixes: 78fd584cdec05 ("arm64: kdump: implement machine_crash_shutdown()") Acked-by: Mark Rutland Signed-off-by: Cristian Marussi Signed-off-by: Will Deacon --- arch/arm64/kernel/smp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index e4dc241c5a8e..5407bf5d98ac 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1012,7 +1012,11 @@ void crash_smp_send_stop(void) cpus_stopped = 1; - if (num_online_cpus() == 1) { + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) { sdei_mask_local_cpu(); return; } @@ -1020,7 +1024,7 @@ void crash_smp_send_stop(void) cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); pr_crit("SMP: stopping secondary CPUs\n"); smp_cross_call(&mask, IPI_CPU_CRASH_STOP); From 028fd76b9b1cc9e195ce60790791fd3f37b9b5d7 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 16 Mar 2020 20:49:06 +1300 Subject: [PATCH 241/372] Revert "net: mvmdio: avoid error message for optional IRQ" This reverts commit e1f550dc44a4d535da4e25ada1b0eaf8f3417929. platform_get_irq_optional() will still return -ENXIO when no interrupt is provided so the additional error handling caused the driver prone to fail when no interrupt was specified. Revert the change so we can apply the correct minimal fix. Signed-off-by: Chris Packham Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvmdio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index d2e2dc538428..0b9e851f3da4 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -347,7 +347,7 @@ static int orion_mdio_probe(struct platform_device *pdev) } - dev->err_interrupt = platform_get_irq_optional(pdev, 0); + dev->err_interrupt = platform_get_irq(pdev, 0); if (dev->err_interrupt > 0 && resource_size(r) < MVMDIO_ERR_INT_MASK + 4) { dev_err(&pdev->dev, @@ -364,8 +364,8 @@ static int orion_mdio_probe(struct platform_device *pdev) writel(MVMDIO_ERR_INT_SMI_DONE, dev->regs + MVMDIO_ERR_INT_MASK); - } else if (dev->err_interrupt < 0) { - ret = dev->err_interrupt; + } else if (dev->err_interrupt == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; goto out_mdio; } From fa2632f74e57bbc869c8ad37751a11b6147a3acc Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Mon, 16 Mar 2020 20:49:07 +1300 Subject: [PATCH 242/372] net: mvmdio: avoid error message for optional IRQ Per the dt-binding the interrupt is optional so use platform_get_irq_optional() instead of platform_get_irq(). Since commit 7723f4c5ecdb ("driver core: platform: Add an error message to platform_get_irq*()") platform_get_irq() produces an error message orion-mdio f1072004.mdio: IRQ index 0 not found which is perfectly normal if one hasn't specified the optional property in the device tree. Signed-off-by: Chris Packham Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvmdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 0b9e851f3da4..d14762d93640 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -347,7 +347,7 @@ static int orion_mdio_probe(struct platform_device *pdev) } - dev->err_interrupt = platform_get_irq(pdev, 0); + dev->err_interrupt = platform_get_irq_optional(pdev, 0); if (dev->err_interrupt > 0 && resource_size(r) < MVMDIO_ERR_INT_MASK + 4) { dev_err(&pdev->dev, From 612eb1c3b9e504de24136c947ed7c07bc342f3aa Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Mon, 16 Mar 2020 14:44:55 -0700 Subject: [PATCH 243/372] Revert "net: bcmgenet: use RGMII loopback for MAC reset" This reverts commit 3a55402c93877d291b0a612d25edb03d1b4b93ac. This is not a good solution when connecting to an external switch that may not support the isolation of the TXC signal resulting in output driver contention on the pin. A different solution is necessary. Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 2 ++ drivers/net/ethernet/broadcom/genet/bcmmii.c | 34 ------------------- 2 files changed, 2 insertions(+), 34 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index e50a15397e11..c8ac2d83208f 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1989,6 +1989,8 @@ static void reset_umac(struct bcmgenet_priv *priv) /* issue soft reset with (rg)mii loopback to ensure a stable rxclk */ bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD); + udelay(2); + bcmgenet_umac_writel(priv, 0, UMAC_CMD); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 10244941a7a6..69e80fb6e039 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -181,38 +181,8 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) const char *phy_name = NULL; u32 id_mode_dis = 0; u32 port_ctrl; - int bmcr = -1; - int ret; u32 reg; - /* MAC clocking workaround during reset of umac state machines */ - reg = bcmgenet_umac_readl(priv, UMAC_CMD); - if (reg & CMD_SW_RESET) { - /* An MII PHY must be isolated to prevent TXC contention */ - if (priv->phy_interface == PHY_INTERFACE_MODE_MII) { - ret = phy_read(phydev, MII_BMCR); - if (ret >= 0) { - bmcr = ret; - ret = phy_write(phydev, MII_BMCR, - bmcr | BMCR_ISOLATE); - } - if (ret) { - netdev_err(dev, "failed to isolate PHY\n"); - return ret; - } - } - /* Switch MAC clocking to RGMII generated clock */ - bcmgenet_sys_writel(priv, PORT_MODE_EXT_GPHY, SYS_PORT_CTRL); - /* Ensure 5 clks with Rx disabled - * followed by 5 clks with Reset asserted - */ - udelay(4); - reg &= ~(CMD_SW_RESET | CMD_LCL_LOOP_EN); - bcmgenet_umac_writel(priv, reg, UMAC_CMD); - /* Ensure 5 more clocks before Rx is enabled */ - udelay(2); - } - switch (priv->phy_interface) { case PHY_INTERFACE_MODE_INTERNAL: phy_name = "internal PHY"; @@ -282,10 +252,6 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) bcmgenet_sys_writel(priv, port_ctrl, SYS_PORT_CTRL); - /* Restore the MII PHY after isolation */ - if (bmcr >= 0) - phy_write(phydev, MII_BMCR, bmcr); - priv->ext_phy = !priv->internal_phy && (priv->phy_interface != PHY_INTERFACE_MODE_MOCA); From 88f6c8bf1aaed5039923fb4c701cab4d42176275 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Mon, 16 Mar 2020 14:44:56 -0700 Subject: [PATCH 244/372] net: bcmgenet: keep MAC in reset until PHY is up As noted in commit 28c2d1a7a0bf ("net: bcmgenet: enable loopback during UniMAC sw_reset") the UniMAC must be clocked at least 5 cycles while the sw_reset is asserted to ensure a clean reset. That commit enabled local loopback to provide an Rx clock from the GENET sourced Tx clk. However, when connected in MII mode the Tx clk is sourced by the PHY so if an EPHY is not supplying clocks (e.g. when the link is down) the UniMAC does not receive the necessary clocks. This commit extends the sw_reset window until the PHY reports that the link is up thereby ensuring that the clocks are being provided to the MAC to produce a clean reset. One consequence is that if the system attempts to enter a Wake on LAN suspend state when the PHY link has not been active the MAC may not have had a chance to initialize cleanly. In this case, we remove the sw_reset and enable the WoL reception path as normal with the hope that the PHY will provide the necessary clocks to drive the WoL blocks if the link becomes active after the system has entered suspend. Fixes: 1c1008c793fa ("net: bcmgenet: add main driver file") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 10 ++++------ drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 6 +++++- drivers/net/ethernet/broadcom/genet/bcmmii.c | 6 ++++++ 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index c8ac2d83208f..ce64b4e47feb 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1965,6 +1965,8 @@ static void umac_enable_set(struct bcmgenet_priv *priv, u32 mask, bool enable) u32 reg; reg = bcmgenet_umac_readl(priv, UMAC_CMD); + if (reg & CMD_SW_RESET) + return; if (enable) reg |= mask; else @@ -1984,13 +1986,9 @@ static void reset_umac(struct bcmgenet_priv *priv) bcmgenet_rbuf_ctrl_set(priv, 0); udelay(10); - /* disable MAC while updating its registers */ - bcmgenet_umac_writel(priv, 0, UMAC_CMD); - - /* issue soft reset with (rg)mii loopback to ensure a stable rxclk */ - bcmgenet_umac_writel(priv, CMD_SW_RESET | CMD_LCL_LOOP_EN, UMAC_CMD); + /* issue soft reset and disable MAC while updating its registers */ + bcmgenet_umac_writel(priv, CMD_SW_RESET, UMAC_CMD); udelay(2); - bcmgenet_umac_writel(priv, 0, UMAC_CMD); } static void bcmgenet_intr_disable(struct bcmgenet_priv *priv) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index ea20d94bd050..c9a43695b182 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -132,8 +132,12 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv, return -EINVAL; } - /* disable RX */ + /* Can't suspend with WoL if MAC is still in reset */ reg = bcmgenet_umac_readl(priv, UMAC_CMD); + if (reg & CMD_SW_RESET) + reg &= ~CMD_SW_RESET; + + /* disable RX */ reg &= ~CMD_RX_EN; bcmgenet_umac_writel(priv, reg, UMAC_CMD); mdelay(10); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index 69e80fb6e039..b5930f80039d 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -95,6 +95,12 @@ void bcmgenet_mii_setup(struct net_device *dev) CMD_HD_EN | CMD_RX_PAUSE_IGNORE | CMD_TX_PAUSE_IGNORE); reg |= cmd_bits; + if (reg & CMD_SW_RESET) { + reg &= ~CMD_SW_RESET; + bcmgenet_umac_writel(priv, reg, UMAC_CMD); + udelay(2); + reg |= CMD_TX_EN | CMD_RX_EN; + } bcmgenet_umac_writel(priv, reg, UMAC_CMD); } else { /* done if nothing has changed */ From 872307abbd0d9afd72171929806c2fa33dc34179 Mon Sep 17 00:00:00 2001 From: Rayagonda Kokatanur Date: Tue, 17 Mar 2020 10:24:35 +0530 Subject: [PATCH 245/372] net: phy: mdio-mux-bcm-iproc: check clk_prepare_enable() return value Check clk_prepare_enable() return value. Fixes: 2c7230446bc9 ("net: phy: Add pm support to Broadcom iProc mdio mux driver") Signed-off-by: Rayagonda Kokatanur Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/mdio-mux-bcm-iproc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/mdio-mux-bcm-iproc.c b/drivers/net/phy/mdio-mux-bcm-iproc.c index 88d409e48c1f..aad6809ebe39 100644 --- a/drivers/net/phy/mdio-mux-bcm-iproc.c +++ b/drivers/net/phy/mdio-mux-bcm-iproc.c @@ -288,8 +288,13 @@ static int mdio_mux_iproc_suspend(struct device *dev) static int mdio_mux_iproc_resume(struct device *dev) { struct iproc_mdiomux_desc *md = dev_get_drvdata(dev); + int rc; - clk_prepare_enable(md->core_clk); + rc = clk_prepare_enable(md->core_clk); + if (rc) { + dev_err(md->dev, "failed to enable core clk\n"); + return rc; + } mdio_mux_iproc_config(md); return 0; From ce1f352162828ba07470328828a32f47aa759020 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 17 Mar 2020 09:06:39 +0200 Subject: [PATCH 246/372] net: ena: fix incorrect setting of the number of msix vectors Overview: We don't frequently change the msix vectors throughout the life cycle of the driver. We do so in two functions: ena_probe() and ena_restore(). ena_probe() is only called when the driver is loaded. ena_restore() on the other hand is called during device reset / resume operations. We use num_io_queues for calculating and allocating the number of msix vectors. At ena_probe() this value is equal to max_num_io_queues and thus this is not an issue, however ena_restore() might be called after the number of io queues has changed. A possible bug scenario is as follows: * Change number of queues from 8 to 4. (num_io_queues = 4, max_num_io_queues = 8, msix_vecs = 9,) * Trigger reset occurs -> ena_restore is called. (num_io_queues = 4, max_num_io_queues =8 , msix_vecs = 5) * Change number of queues from 4 to 6. (num_io_queues = 6, max_num_io_queues = 8, msix_vecs = 5) * The driver will reset due to failure of check_for_rx_interrupt_queue() Fix: This can be easily fixed by always using max_num_io_queues to init the msix_vecs, since this number won't change as opposed to num_io_queues. Fixes: 4d19266022ec ("net: ena: multiple queue creation related cleanups") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 0b2fd96b93d7..39f290f4458f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1968,7 +1968,7 @@ static int ena_enable_msix(struct ena_adapter *adapter) } /* Reserved the max msix vectors we might need */ - msix_vecs = ENA_MAX_MSIX_VEC(adapter->num_io_queues); + msix_vecs = ENA_MAX_MSIX_VEC(adapter->max_num_io_queues); netif_dbg(adapter, probe, adapter->netdev, "trying to enable MSI-X, vectors %d\n", msix_vecs); From e02ae6ed51be3d28923bfd318ae57000f5643da5 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 17 Mar 2020 09:06:40 +0200 Subject: [PATCH 247/372] net: ena: fix request of incorrect number of IRQ vectors Bug: In short the main issue is caused by the fact that the number of queues is changed using ethtool after ena_probe() has been called and before ena_up() was executed. Here is the full scenario in detail: * ena_probe() is called when the driver is loaded, the driver is not up yet at the end of ena_probe(). * The number of queues is changed -> io_queue_count is changed as well - ena_up() is not called since the "dev_was_up" boolean in ena_update_queue_count() is false. * ena_up() is called by the kernel (it's called asynchronously some time after ena_probe()). ena_setup_io_intr() is called by ena_up() and it uses io_queue_count to get the suitable irq lines for each msix vector. The function ena_request_io_irq() is called right after that and it uses msix_vecs - This value only changes during ena_probe() and ena_restore() - to request the irq vectors. This results in "Failed to request I/O IRQ" error for i > io_queue_count. Numeric example: * After ena_probe() io_queue_count = 8, msix_vecs = 9. * The number of queues changes to 4 -> io_queue_count = 4, msix_vecs = 9. * ena_up() is executed for the first time: ** ena_setup_io_intr() inits the vectors only up to io_queue_count. ** ena_request_io_irq() calls request_irq() and fails for i = 5. How to reproduce: simply run the following commands: sudo rmmod ena && sudo insmod ena.ko; sudo ethtool -L eth1 combined 3; Fix: Use ENA_MAX_MSIX_VEC(adapter->num_io_queues + adapter->xdp_num_queues) instead of adapter->msix_vecs. We need to take XDP queues into consideration as they need to have msix vectors assigned to them as well. Note that the XDP cannot be attached before the driver is up and running but in XDP mode the issue might occur when the number of queues changes right after a reset trigger. The ENA_MAX_MSIX_VEC simply adds one to the argument since the first msix vector is reserved for management queue. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Sameeh Jubran Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 39f290f4458f..836fda585391 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -2068,6 +2068,7 @@ static int ena_request_mgmnt_irq(struct ena_adapter *adapter) static int ena_request_io_irq(struct ena_adapter *adapter) { + u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; unsigned long flags = 0; struct ena_irq *irq; int rc = 0, i, k; @@ -2078,7 +2079,7 @@ static int ena_request_io_irq(struct ena_adapter *adapter) return -EINVAL; } - for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { irq = &adapter->irq_tbl[i]; rc = request_irq(irq->vector, irq->handler, flags, irq->name, irq->data); @@ -2119,6 +2120,7 @@ static void ena_free_mgmnt_irq(struct ena_adapter *adapter) static void ena_free_io_irq(struct ena_adapter *adapter) { + u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; struct ena_irq *irq; int i; @@ -2129,7 +2131,7 @@ static void ena_free_io_irq(struct ena_adapter *adapter) } #endif /* CONFIG_RFS_ACCEL */ - for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) { irq = &adapter->irq_tbl[i]; irq_set_affinity_hint(irq->vector, NULL); free_irq(irq->vector, irq->data); @@ -2144,12 +2146,13 @@ static void ena_disable_msix(struct ena_adapter *adapter) static void ena_disable_io_intr_sync(struct ena_adapter *adapter) { + u32 io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; int i; if (!netif_running(adapter->netdev)) return; - for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) + for (i = ENA_IO_IRQ_FIRST_IDX; i < ENA_MAX_MSIX_VEC(io_queue_count); i++) synchronize_irq(adapter->irq_tbl[i].vector); } From 30623e1ed116bcd1785217d0a98eec643687e091 Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 17 Mar 2020 09:06:41 +0200 Subject: [PATCH 248/372] net: ena: avoid memory access violation by validating req_id properly Rx req_id is an index in struct ena_eth_io_rx_cdesc_base. The driver should validate that the Rx req_id it received from the device is in range [0, ring_size -1]. Failure to do so could yield to potential memory access violoation. The validation was mistakenly done when refilling the Rx submission queue and not in Rx completion queue. Fixes: ad974baef2a1 ("net: ena: add support for out of order rx buffers refill") Signed-off-by: Noam Dagan Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 836fda585391..51333a05c14d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1018,13 +1018,9 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) struct ena_rx_buffer *rx_info; req_id = rx_ring->free_ids[next_to_use]; - rc = validate_rx_req_id(rx_ring, req_id); - if (unlikely(rc < 0)) - break; rx_info = &rx_ring->rx_buffer_info[req_id]; - rc = ena_alloc_rx_page(rx_ring, rx_info, GFP_ATOMIC | __GFP_COMP); if (unlikely(rc < 0)) { @@ -1379,9 +1375,15 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, struct ena_rx_buffer *rx_info; u16 len, req_id, buf = 0; void *va; + int rc; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; + + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc < 0)) + return NULL; + rx_info = &rx_ring->rx_buffer_info[req_id]; if (unlikely(!rx_info->page)) { @@ -1454,6 +1456,11 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, buf++; len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; + + rc = validate_rx_req_id(rx_ring, req_id); + if (unlikely(rc < 0)) + return NULL; + rx_info = &rx_ring->rx_buffer_info[req_id]; } while (1); From dfdde1345bc124816f0fd42fa91b8748051e758e Mon Sep 17 00:00:00 2001 From: Arthur Kiyanovski Date: Tue, 17 Mar 2020 09:06:42 +0200 Subject: [PATCH 249/372] net: ena: fix continuous keep-alive resets last_keep_alive_jiffies is updated in probe and when a keep-alive event is received. In case the driver times-out on a keep-alive event, it has high chances of continuously timing-out on keep-alive events. This is because when the driver recovers from the keep-alive-timeout reset the value of last_keep_alive_jiffies is very old, and if a keep-alive event is not received before the next timer expires, the value of last_keep_alive_jiffies will cause another keep-alive-timeout reset and so forth in a loop. Solution: Update last_keep_alive_jiffies whenever the device is restored after reset. Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)") Signed-off-by: Noam Dagan Signed-off-by: Arthur Kiyanovski Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 51333a05c14d..4647d7656761 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -3486,6 +3486,7 @@ static int ena_restore_device(struct ena_adapter *adapter) netif_carrier_on(adapter->netdev); mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); + adapter->last_keep_alive_jiffies = jiffies; dev_err(&pdev->dev, "Device reset completed successfully, Driver info: %s\n", version); From 6497ca07f5e91131c6c05e4564d7f98a780aa02b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2020 15:54:19 +0100 Subject: [PATCH 250/372] net: phy: sfp-bus.c: get rid of docs warnings The indentation for the returned values are weird, causing those warnings: ./drivers/net/phy/sfp-bus.c:579: WARNING: Unexpected indentation. ./drivers/net/phy/sfp-bus.c:619: WARNING: Unexpected indentation. Use a list and change the identation for it to be properly parsed by the documentation toolchain. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: David S. Miller --- drivers/net/phy/sfp-bus.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index d949ea7b4f8c..6900c68260e0 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -572,13 +572,15 @@ static void sfp_upstream_clear(struct sfp_bus *bus) * the sfp_bus structure, incrementing its reference count. This must * be put via sfp_bus_put() when done. * - * Returns: on success, a pointer to the sfp_bus structure, - * %NULL if no SFP is specified, - * on failure, an error pointer value: - * corresponding to the errors detailed for - * fwnode_property_get_reference_args(). - * %-ENOMEM if we failed to allocate the bus. - * an error from the upstream's connect_phy() method. + * Returns: + * - on success, a pointer to the sfp_bus structure, + * - %NULL if no SFP is specified, + * - on failure, an error pointer value: + * + * - corresponding to the errors detailed for + * fwnode_property_get_reference_args(). + * - %-ENOMEM if we failed to allocate the bus. + * - an error from the upstream's connect_phy() method. */ struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) { @@ -612,13 +614,15 @@ EXPORT_SYMBOL_GPL(sfp_bus_find_fwnode); * the SFP bus using sfp_register_upstream(). This takes a reference on the * bus, so it is safe to put the bus after this call. * - * Returns: on success, a pointer to the sfp_bus structure, - * %NULL if no SFP is specified, - * on failure, an error pointer value: - * corresponding to the errors detailed for - * fwnode_property_get_reference_args(). - * %-ENOMEM if we failed to allocate the bus. - * an error from the upstream's connect_phy() method. + * Returns: + * - on success, a pointer to the sfp_bus structure, + * - %NULL if no SFP is specified, + * - on failure, an error pointer value: + * + * - corresponding to the errors detailed for + * fwnode_property_get_reference_args(). + * - %-ENOMEM if we failed to allocate the bus. + * - an error from the upstream's connect_phy() method. */ int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, const struct sfp_upstream_ops *ops) From 2de9780f75076c1a1f122cbd39df0fa545284724 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 17 Mar 2020 15:54:20 +0100 Subject: [PATCH 251/372] net: core: dev.c: fix a documentation warning There's a markup for link with is "foo_". On this kernel-doc comment, we don't want this, but instead, place a literal reference. So, escape the literal with ``foo``, in order to avoid this warning: ./net/core/dev.c:5195: WARNING: Unknown target name: "page_is". Signed-off-by: Mauro Carvalho Chehab Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index c6c985fe7b1b..402a986659cf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5195,7 +5195,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc) * * More direct receive version of netif_receive_skb(). It should * only be used by callers that have a need to skip RPS and Generic XDP. - * Caller must also take care of handling if (page_is_)pfmemalloc. + * Caller must also take care of handling if ``(page_is_)pfmemalloc``. * * This function may only be called from softirq context and interrupts * should be enabled. From 283f87c0d5d32b4a5c22636adc559bca82196ed3 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Mar 2020 08:22:11 +0200 Subject: [PATCH 252/372] stm class: sys-t: Fix the use of time_after() The operands of time_after() are in a wrong order in both instances in the sys-t driver. Fix that. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Fixes: 39f10239df75 ("stm class: p_sys-t: Add support for CLOCKSYNC packets") Fixes: d69d5e83110f ("stm class: Add MIPI SyS-T protocol support") Cc: stable@vger.kernel.org # v4.20+ Link: https://lore.kernel.org/r/20200317062215.15598-3-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/stm/p_sys-t.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/stm/p_sys-t.c b/drivers/hwtracing/stm/p_sys-t.c index b178a5495b67..360b5c03df95 100644 --- a/drivers/hwtracing/stm/p_sys-t.c +++ b/drivers/hwtracing/stm/p_sys-t.c @@ -238,7 +238,7 @@ static struct configfs_attribute *sys_t_policy_attrs[] = { static inline bool sys_t_need_ts(struct sys_t_output *op) { if (op->node.ts_interval && - time_after(op->ts_jiffies + op->node.ts_interval, jiffies)) { + time_after(jiffies, op->ts_jiffies + op->node.ts_interval)) { op->ts_jiffies = jiffies; return true; @@ -250,8 +250,8 @@ static inline bool sys_t_need_ts(struct sys_t_output *op) static bool sys_t_need_clock_sync(struct sys_t_output *op) { if (op->node.clocksync_interval && - time_after(op->clocksync_jiffies + op->node.clocksync_interval, - jiffies)) { + time_after(jiffies, + op->clocksync_jiffies + op->node.clocksync_interval)) { op->clocksync_jiffies = jiffies; return true; From 885f123554bbdc1807ca25a374be6e9b3bddf4de Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Mar 2020 08:22:13 +0200 Subject: [PATCH 253/372] intel_th: msu: Fix the unexpected state warning The unexpected state warning should only warn on illegal state transitions. Fix that. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Fixes: 615c164da0eb4 ("intel_th: msu: Introduce buffer interface") Cc: stable@vger.kernel.org # v5.4+ Link: https://lore.kernel.org/r/20200317062215.15598-5-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 8e48c7458aa3..43e70507c949 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -718,9 +718,6 @@ static int msc_win_set_lockout(struct msc_window *win, if (old != expect) { ret = -EINVAL; - dev_warn_ratelimited(msc_dev(win->msc), - "expected lockout state %d, got %d\n", - expect, old); goto unlock; } @@ -741,6 +738,10 @@ unlock: /* from intel_th_msc_window_unlock(), don't warn if not locked */ if (expect == WIN_LOCKED && old == new) return 0; + + dev_warn_ratelimited(msc_dev(win->msc), + "expected lockout state %d, got %d\n", + expect, old); } return ret; From ce666be89a8a09c5924ff08fc32e119f974bdab6 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Mar 2020 08:22:14 +0200 Subject: [PATCH 254/372] intel_th: Fix user-visible error codes There are a few places in the driver that end up returning ENOTSUPP to the user, replace those with EINVAL. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Fixes: ba82664c134ef ("intel_th: Add Memory Storage Unit driver") Cc: stable@vger.kernel.org # v4.4+ Link: https://lore.kernel.org/r/20200317062215.15598-6-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/msu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 43e70507c949..255f8f41c8ff 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -761,7 +761,7 @@ static int msc_configure(struct msc *msc) lockdep_assert_held(&msc->buf_mutex); if (msc->mode > MSC_MODE_MULTI) - return -ENOTSUPP; + return -EINVAL; if (msc->mode == MSC_MODE_MULTI) { if (msc_win_set_lockout(msc->cur_win, WIN_READY, WIN_INUSE)) @@ -1295,7 +1295,7 @@ static int msc_buffer_alloc(struct msc *msc, unsigned long *nr_pages, } else if (msc->mode == MSC_MODE_MULTI) { ret = msc_buffer_multi_alloc(msc, nr_pages, nr_wins); } else { - ret = -ENOTSUPP; + ret = -EINVAL; } if (!ret) { @@ -1531,7 +1531,7 @@ static ssize_t intel_th_msc_read(struct file *file, char __user *buf, if (ret >= 0) *ppos = iter->offset; } else { - ret = -ENOTSUPP; + ret = -EINVAL; } put_count: From add492d2e9446a77ede9bb43699ec85ca8fc1aba Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 17 Mar 2020 08:22:15 +0200 Subject: [PATCH 255/372] intel_th: pci: Add Elkhart Lake CPU support This adds support for the Trace Hub in Elkhart Lake CPU. Signed-off-by: Alexander Shishkin Reviewed-by: Andy Shevchenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200317062215.15598-7-alexander.shishkin@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/hwtracing/intel_th/pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index e9d90b53bbc4..86aa6a46bcba 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -234,6 +234,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Elkhart Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4529), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Elkhart Lake */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26), From 4686392c32361c97e8434adf9cc77ad7991bfa81 Mon Sep 17 00:00:00 2001 From: Ricky Wu Date: Mon, 16 Mar 2020 10:52:32 +0800 Subject: [PATCH 256/372] mmc: rtsx_pci: Fix support for speed-modes that relies on tuning The TX/RX register should not be treated the same way to allow for better support of tuning. Fix this by using a default initial value for TX. Signed-off-by: Ricky Wu Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200316025232.1167-1-ricky_wu@realtek.com [Ulf: Updated changelog] Signed-off-by: Ulf Hansson Acked-by: Greg Kroah-Hartman --- drivers/misc/cardreader/rts5227.c | 2 +- drivers/misc/cardreader/rts5249.c | 2 ++ drivers/misc/cardreader/rts5260.c | 2 +- drivers/misc/cardreader/rts5261.c | 2 +- drivers/mmc/host/rtsx_pci_sdmmc.c | 13 ++++++++----- 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index 4feed296a327..423fecc19fc4 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -394,7 +394,7 @@ static const struct pcr_ops rts522a_pcr_ops = { void rts522a_init_params(struct rtsx_pcr *pcr) { rts5227_init_params(pcr); - + pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 20, 11); pcr->reg_pm_ctrl3 = RTS522A_PM_CTRL3; pcr->option.ocp_en = 1; diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c index db936e4d6e56..1a81cda948c1 100644 --- a/drivers/misc/cardreader/rts5249.c +++ b/drivers/misc/cardreader/rts5249.c @@ -618,6 +618,7 @@ static const struct pcr_ops rts524a_pcr_ops = { void rts524a_init_params(struct rtsx_pcr *pcr) { rts5249_init_params(pcr); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11); pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; pcr->option.ltr_l1off_snooze_sspwrgate = LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; @@ -733,6 +734,7 @@ static const struct pcr_ops rts525a_pcr_ops = { void rts525a_init_params(struct rtsx_pcr *pcr) { rts5249_init_params(pcr); + pcr->tx_initial_phase = SET_CLOCK_PHASE(25, 29, 11); pcr->option.ltr_l1off_sspwrgate = LTR_L1OFF_SSPWRGATE_5250_DEF; pcr->option.ltr_l1off_snooze_sspwrgate = LTR_L1OFF_SNOOZE_SSPWRGATE_5250_DEF; diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c index 4214f02a17fd..711054ebad74 100644 --- a/drivers/misc/cardreader/rts5260.c +++ b/drivers/misc/cardreader/rts5260.c @@ -662,7 +662,7 @@ void rts5260_init_params(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(1, 29, 16); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 29, 11); pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); pcr->ic_version = rts5260_get_ic_version(pcr); diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c index bc4967a6efa1..78c3b1d424c3 100644 --- a/drivers/misc/cardreader/rts5261.c +++ b/drivers/misc/cardreader/rts5261.c @@ -764,7 +764,7 @@ void rts5261_init_params(struct rtsx_pcr *pcr) pcr->sd30_drive_sel_1v8 = CFG_DRIVER_TYPE_B; pcr->sd30_drive_sel_3v3 = CFG_DRIVER_TYPE_B; pcr->aspm_en = ASPM_L1_EN; - pcr->tx_initial_phase = SET_CLOCK_PHASE(20, 27, 16); + pcr->tx_initial_phase = SET_CLOCK_PHASE(27, 27, 11); pcr->rx_initial_phase = SET_CLOCK_PHASE(24, 6, 5); pcr->ic_version = rts5261_get_ic_version(pcr); diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index bd50935dc37d..11087976ab19 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -606,19 +606,22 @@ static int sd_change_phase(struct realtek_pci_sdmmc *host, u8 sample_point, bool rx) { struct rtsx_pcr *pcr = host->pcr; - + u16 SD_VP_CTL = 0; dev_dbg(sdmmc_dev(host), "%s(%s): sample_point = %d\n", __func__, rx ? "RX" : "TX", sample_point); rtsx_pci_write_register(pcr, CLK_CTL, CHANGE_CLK, CHANGE_CLK); - if (rx) + if (rx) { + SD_VP_CTL = SD_VPRX_CTL; rtsx_pci_write_register(pcr, SD_VPRX_CTL, PHASE_SELECT_MASK, sample_point); - else + } else { + SD_VP_CTL = SD_VPTX_CTL; rtsx_pci_write_register(pcr, SD_VPTX_CTL, PHASE_SELECT_MASK, sample_point); - rtsx_pci_write_register(pcr, SD_VPCLK0_CTL, PHASE_NOT_RESET, 0); - rtsx_pci_write_register(pcr, SD_VPCLK0_CTL, PHASE_NOT_RESET, + } + rtsx_pci_write_register(pcr, SD_VP_CTL, PHASE_NOT_RESET, 0); + rtsx_pci_write_register(pcr, SD_VP_CTL, PHASE_NOT_RESET, PHASE_NOT_RESET); rtsx_pci_write_register(pcr, CLK_CTL, CHANGE_CLK, 0); rtsx_pci_write_register(pcr, SD_CFG1, SD_ASYNC_FIFO_NOT_RST, 0); From ed069827ca70af057caf21c395f76c2c0b82d429 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 23 Feb 2020 23:33:59 -0800 Subject: [PATCH 257/372] tty: drop outdated comments about release_tty() locking The current version of the TTY code unlocks the tty_struct(s) before release_tty() rather than after. Moreover, tty_unlock_pair() no longer exists. Thus, remove the outdated comments regarding tty_unlock_pair(). Signed-off-by: Eric Biggers Link: https://lore.kernel.org/r/20200224073359.292795-1-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a1453fe10862..1fcf7ad83dfa 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1589,9 +1589,7 @@ void tty_kclose(struct tty_struct *tty) tty_debug_hangup(tty, "freeing structure\n"); /* * The release_tty function takes care of the details of clearing - * the slots and preserving the termios structure. The tty_unlock_pair - * should be safe as we keep a kref while the tty is locked (so the - * unlock never unlocks a freed tty). + * the slots and preserving the termios structure. */ mutex_lock(&tty_mutex); tty_port_set_kopened(tty->port, 0); @@ -1621,9 +1619,7 @@ void tty_release_struct(struct tty_struct *tty, int idx) tty_debug_hangup(tty, "freeing structure\n"); /* * The release_tty function takes care of the details of clearing - * the slots and preserving the termios structure. The tty_unlock_pair - * should be safe as we keep a kref while the tty is locked (so the - * unlock never unlocks a freed tty). + * the slots and preserving the termios structure. */ mutex_lock(&tty_mutex); release_tty(tty, idx); From 17329563a97df3ba474eca5037c1336e46e14ff8 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Feb 2020 10:20:43 -0800 Subject: [PATCH 258/372] tty: fix compat TIOCGSERIAL leaking uninitialized memory Commit 77654350306a ("take compat TIOC[SG]SERIAL treatment into tty_compat_ioctl()") changed the compat version of TIOCGSERIAL to start copying a whole 'serial_struct32' to userspace rather than individual fields, but failed to initialize all padding and fields -- namely the hole after the 'iomem_reg_shift' field, and the 'reserved' field. Fix this by initializing the struct to zero. [v2: use sizeof, and convert the adjacent line for consistency.] Reported-by: syzbot+8da9175e28eadcb203ce@syzkaller.appspotmail.com Fixes: 77654350306a ("take compat TIOC[SG]SERIAL treatment into tty_compat_ioctl()") Cc: # v4.20+ Signed-off-by: Eric Biggers Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/20200224182044.234553-2-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 1fcf7ad83dfa..db4a13bc855e 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2730,7 +2730,9 @@ static int compat_tty_tiocgserial(struct tty_struct *tty, struct serial_struct32 v32; struct serial_struct v; int err; - memset(&v, 0, sizeof(struct serial_struct)); + + memset(&v, 0, sizeof(v)); + memset(&v32, 0, sizeof(v32)); if (!tty->ops->set_serial) return -ENOTTY; From 6e622cd8bd888c7fa3ee2b7dfb3514ab53b21570 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Feb 2020 10:20:44 -0800 Subject: [PATCH 259/372] tty: fix compat TIOCGSERIAL checking wrong function ptr Commit 77654350306a ("take compat TIOC[SG]SERIAL treatment into tty_compat_ioctl()") changed the compat version of TIOCGSERIAL to start checking for the presence of the ->set_serial function pointer rather than ->get_serial. This appears to be a copy-and-paste error, since ->get_serial is the function pointer that is called as well as the pointer that is checked by the non-compat version of TIOCGSERIAL. Fix this by checking the correct function pointer. Fixes: 77654350306a ("take compat TIOC[SG]SERIAL treatment into tty_compat_ioctl()") Cc: # v4.20+ Signed-off-by: Eric Biggers Acked-by: Jiri Slaby Link: https://lore.kernel.org/r/20200224182044.234553-3-ebiggers@kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index db4a13bc855e..5a6f36b391d9 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2734,7 +2734,7 @@ static int compat_tty_tiocgserial(struct tty_struct *tty, memset(&v, 0, sizeof(v)); memset(&v32, 0, sizeof(v32)); - if (!tty->ops->set_serial) + if (!tty->ops->get_serial) return -ENOTTY; err = tty->ops->get_serial(tty, &v); if (!err) { From b216a8e7908cd750550c0480cf7d2b3a37f06954 Mon Sep 17 00:00:00 2001 From: Qiujun Huang Date: Wed, 18 Mar 2020 15:53:50 +0800 Subject: [PATCH 260/372] drm/lease: fix WARNING in idr_destroy drm_lease_create takes ownership of leases. And leases will be released by drm_master_put. drm_master_put ->drm_master_destroy ->idr_destroy So we needn't call idr_destroy again. Reported-and-tested-by: syzbot+05835159fe322770fe3d@syzkaller.appspotmail.com Signed-off-by: Qiujun Huang Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1584518030-4173-1-git-send-email-hqjagain@gmail.com --- drivers/gpu/drm/drm_lease.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index b481cafdde28..825abe38201a 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -542,10 +542,12 @@ int drm_mode_create_lease_ioctl(struct drm_device *dev, } DRM_DEBUG_LEASE("Creating lease\n"); + /* lessee will take the ownership of leases */ lessee = drm_lease_create(lessor, &leases); if (IS_ERR(lessee)) { ret = PTR_ERR(lessee); + idr_destroy(&leases); goto out_leases; } @@ -580,7 +582,6 @@ out_lessee: out_leases: put_unused_fd(fd); - idr_destroy(&leases); DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl failed: %d\n", ret); return ret; From fd4d9c7d0c71866ec0c2825189ebd2ce35bd95b8 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 17 Mar 2020 01:28:45 +0100 Subject: [PATCH 261/372] mm: slub: add missing TID bump in kmem_cache_alloc_bulk() When kmem_cache_alloc_bulk() attempts to allocate N objects from a percpu freelist of length M, and N > M > 0, it will first remove the M elements from the percpu freelist, then call ___slab_alloc() to allocate the next element and repopulate the percpu freelist. ___slab_alloc() can re-enable IRQs via allocate_slab(), so the TID must be bumped before ___slab_alloc() to properly commit the freelist head change. Fix it by unconditionally bumping c->tid when entering the slowpath. Cc: stable@vger.kernel.org Fixes: ebe909e0fdb3 ("slub: improve bulk alloc strategy") Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds --- mm/slub.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 17dc00e33115..eae5bb47b22f 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3174,6 +3174,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { + /* + * We may have removed an object from c->freelist using + * the fastpath in the previous iteration; in that case, + * c->tid has not been bumped yet. + * Since ___slab_alloc() may reenable interrupts while + * allocating memory, we should bump c->tid now. + */ + c->tid = next_tid(c->tid); + /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist From 5076190daded2197f62fe92cf69674488be44175 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 17 Mar 2020 11:04:09 -0700 Subject: [PATCH 262/372] mm: slub: be more careful about the double cmpxchg of freelist This is just a cleanup addition to Jann's fix to properly update the transaction ID for the slub slowpath in commit fd4d9c7d0c71 ("mm: slub: add missing TID bump.."). The transaction ID is what protects us against any concurrent accesses, but we should really also make sure to make the 'freelist' comparison itself always use the same freelist value that we then used as the new next free pointer. Jann points out that if we do all of this carefully, we could skip the transaction ID update for all the paths that only remove entries from the lists, and only update the TID when adding entries (to avoid the ABA issue with cmpxchg and list handling re-adding a previously seen value). But this patch just does the "make sure to cmpxchg the same value we used" rather than then try to be clever. Acked-by: Jann Horn Signed-off-by: Linus Torvalds --- mm/slub.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index eae5bb47b22f..97580b41a24b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2997,11 +2997,13 @@ redo: barrier(); if (likely(page == c->page)) { - set_freepointer(s, tail_obj, c->freelist); + void **freelist = READ_ONCE(c->freelist); + + set_freepointer(s, tail_obj, freelist); if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, - c->freelist, tid, + freelist, tid, head, next_tid(tid)))) { note_cmpxchg_failure("slab_free", s, tid); From 4b8a5cfb5fd375cf4c7502a18f0096ed2881be27 Mon Sep 17 00:00:00 2001 From: Xiao Yang Date: Wed, 18 Mar 2020 18:34:16 +0800 Subject: [PATCH 263/372] modpost: Get proper section index by get_secindex() instead of st_shndx (uint16_t) st_shndx is limited to 65535(i.e. SHN_XINDEX) so sym_get_data() gets wrong section index by st_shndx if requested symbol contains extended section index that is more than 65535. In this case, we need to get proper section index by .symtab_shndx section. Module.symvers generated by building kernel with "-ffunction-sections -fdata-sections" shows the issue. Fixes: 56067812d5b0 ("kbuild: modversions: add infrastructure for emitting relative CRCs") Fixes: e84f9fbbece1 ("modpost: refactor namespace_from_kstrtabns() to not hard-code section name") Signed-off-by: Xiao Yang Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 6ab235354f36..55a0a2eccbd2 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -308,7 +308,8 @@ static const char *sec_name(struct elf_info *elf, int secindex) static void *sym_get_data(const struct elf_info *info, const Elf_Sym *sym) { - Elf_Shdr *sechdr = &info->sechdrs[sym->st_shndx]; + unsigned int secindex = get_secindex(info, sym); + Elf_Shdr *sechdr = &info->sechdrs[secindex]; unsigned long offset; offset = sym->st_value; From dcf23ac3e846ca0cf626c155a0e3fcbbcf4fae8a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 18 Mar 2020 07:52:21 -0400 Subject: [PATCH 264/372] locks: reinstate locks_delete_block optimization There is measurable performance impact in some synthetic tests due to commit 6d390e4b5d48 (locks: fix a potential use-after-free problem when wakeup a waiter). Fix the race condition instead by clearing the fl_blocker pointer after the wake_up, using explicit acquire/release semantics. This does mean that we can no longer use the clearing of fl_blocker as the wait condition, so switch the waiters over to checking whether the fl_blocked_member list_head is empty. Reviewed-by: yangerkun Reviewed-by: NeilBrown Fixes: 6d390e4b5d48 (locks: fix a potential use-after-free problem when wakeup a waiter) Signed-off-by: Jeff Layton Signed-off-by: Linus Torvalds --- fs/cifs/file.c | 3 ++- fs/locks.c | 54 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3b942ecdd4be..8f9d849a0012 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1169,7 +1169,8 @@ try_again: rc = posix_lock_file(file, flock, NULL); up_write(&cinode->lock_sem); if (rc == FILE_LOCK_DEFERRED) { - rc = wait_event_interruptible(flock->fl_wait, !flock->fl_blocker); + rc = wait_event_interruptible(flock->fl_wait, + list_empty(&flock->fl_blocked_member)); if (!rc) goto try_again; locks_delete_block(flock); diff --git a/fs/locks.c b/fs/locks.c index 426b55d333d5..b8a31c1c4fff 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -725,7 +725,6 @@ static void __locks_delete_block(struct file_lock *waiter) { locks_delete_global_blocked(waiter); list_del_init(&waiter->fl_blocked_member); - waiter->fl_blocker = NULL; } static void __locks_wake_up_blocks(struct file_lock *blocker) @@ -740,6 +739,13 @@ static void __locks_wake_up_blocks(struct file_lock *blocker) waiter->fl_lmops->lm_notify(waiter); else wake_up(&waiter->fl_wait); + + /* + * The setting of fl_blocker to NULL marks the "done" + * point in deleting a block. Paired with acquire at the top + * of locks_delete_block(). + */ + smp_store_release(&waiter->fl_blocker, NULL); } } @@ -753,11 +759,42 @@ int locks_delete_block(struct file_lock *waiter) { int status = -ENOENT; + /* + * If fl_blocker is NULL, it won't be set again as this thread "owns" + * the lock and is the only one that might try to claim the lock. + * + * We use acquire/release to manage fl_blocker so that we can + * optimize away taking the blocked_lock_lock in many cases. + * + * The smp_load_acquire guarantees two things: + * + * 1/ that fl_blocked_requests can be tested locklessly. If something + * was recently added to that list it must have been in a locked region + * *before* the locked region when fl_blocker was set to NULL. + * + * 2/ that no other thread is accessing 'waiter', so it is safe to free + * it. __locks_wake_up_blocks is careful not to touch waiter after + * fl_blocker is released. + * + * If a lockless check of fl_blocker shows it to be NULL, we know that + * no new locks can be inserted into its fl_blocked_requests list, and + * can avoid doing anything further if the list is empty. + */ + if (!smp_load_acquire(&waiter->fl_blocker) && + list_empty(&waiter->fl_blocked_requests)) + return status; + spin_lock(&blocked_lock_lock); if (waiter->fl_blocker) status = 0; __locks_wake_up_blocks(waiter); __locks_delete_block(waiter); + + /* + * The setting of fl_blocker to NULL marks the "done" point in deleting + * a block. Paired with acquire at the top of this function. + */ + smp_store_release(&waiter->fl_blocker, NULL); spin_unlock(&blocked_lock_lock); return status; } @@ -1350,7 +1387,8 @@ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } @@ -1435,7 +1473,8 @@ int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start, error = posix_lock_inode(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl.fl_wait, !fl.fl_blocker); + error = wait_event_interruptible(fl.fl_wait, + list_empty(&fl.fl_blocked_member)); if (!error) { /* * If we've been sleeping someone might have @@ -1638,7 +1677,8 @@ restart: locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->fl_wait, - !new_fl->fl_blocker, break_time); + list_empty(&new_fl->fl_blocked_member), + break_time); percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); @@ -2122,7 +2162,8 @@ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } @@ -2399,7 +2440,8 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd, error = vfs_lock_file(filp, cmd, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; - error = wait_event_interruptible(fl->fl_wait, !fl->fl_blocker); + error = wait_event_interruptible(fl->fl_wait, + list_empty(&fl->fl_blocked_member)); if (error) break; } From acfc62dc68770aa665cc606891f6df7d6d1e52c0 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 18 Mar 2020 17:09:05 -0400 Subject: [PATCH 265/372] drm/amdgpu: fix typo for vcn1 idle check fix typo for vcn1 idle check Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 71f61afdc655..09b0572b838d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -1352,7 +1352,7 @@ static int vcn_v1_0_set_clockgating_state(void *handle, if (enable) { /* wait for STATUS to clear */ - if (vcn_v1_0_is_idle(handle)) + if (!vcn_v1_0_is_idle(handle)) return -EBUSY; vcn_v1_0_enable_clock_gating(adev); } else { From b5689d22aa6d815f29d34f8cbd708f9d34eed70a Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 18 Mar 2020 17:10:56 -0400 Subject: [PATCH 266/372] drm/amdgpu: fix typo for vcn2/jpeg2 idle check fix typo for vcn2/jpeg2 idle check Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index ff2e6e1ccde7..6173951db7b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -693,7 +693,7 @@ static int jpeg_v2_0_set_clockgating_state(void *handle, bool enable = (state == AMD_CG_STATE_GATE); if (enable) { - if (jpeg_v2_0_is_idle(handle)) + if (!jpeg_v2_0_is_idle(handle)) return -EBUSY; jpeg_v2_0_enable_clock_gating(adev); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index c387c81f8695..b7f17342bbf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -1217,7 +1217,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle, if (enable) { /* wait for STATUS to clear */ - if (vcn_v2_0_is_idle(handle)) + if (!vcn_v2_0_is_idle(handle)) return -EBUSY; vcn_v2_0_enable_clock_gating(adev); } else { From a3c33e7a4a116f8715c0ef0e668e6aeff009c762 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 18 Mar 2020 17:12:12 -0400 Subject: [PATCH 267/372] drm/amdgpu: fix typo for vcn2.5/jpeg2.5 idle check fix typo for vcn2.5/jpeg2.5 idle check Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index c6d046df4b70..c04c2078a7c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -477,7 +477,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle, continue; if (enable) { - if (jpeg_v2_5_is_idle(handle)) + if (!jpeg_v2_5_is_idle(handle)) return -EBUSY; jpeg_v2_5_enable_clock_gating(adev, i); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 2d64ba1adf99..678253d81154 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -1672,7 +1672,7 @@ static int vcn_v2_5_set_clockgating_state(void *handle, return 0; if (enable) { - if (vcn_v2_5_is_idle(handle)) + if (!vcn_v2_5_is_idle(handle)) return -EBUSY; vcn_v2_5_enable_clock_gating(adev); } else { From dd2af10402684cb5840a127caec9e7cdcff6d167 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 18 Mar 2020 12:50:33 +0200 Subject: [PATCH 268/372] net/sched: act_ct: Fix leak of ct zone template on replace Currently, on replace, the previous action instance params is swapped with a newly allocated params. The old params is only freed (via kfree_rcu), without releasing the allocated ct zone template related to it. Call tcf_ct_params_free (via call_rcu) for the old params, so it will release it. Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") Signed-off-by: Paul Blakey Signed-off-by: David S. Miller --- net/sched/act_ct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f685c0d73708..41114b463161 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -739,7 +739,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (goto_ch) tcf_chain_put_by_act(goto_ch); if (params) - kfree_rcu(params, rcu); + call_rcu(¶ms->rcu, tcf_ct_params_free); if (res == ACT_P_CREATED) tcf_idr_insert(tn, *a); From 384d91c267e621e0926062cfb3f20cb72dc16928 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Wed, 18 Mar 2020 13:28:09 +0000 Subject: [PATCH 269/372] vxlan: check return value of gro_cells_init() gro_cells_init() returns error if memory allocation is failed. But the vxlan module doesn't check the return value of gro_cells_init(). Fixes: 58ce31cca1ff ("vxlan: GRO support at tunnel layer")` Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index d3b08b76b1ec..45308b3350cf 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2779,10 +2779,19 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan, /* Setup stats when device is created */ static int vxlan_init(struct net_device *dev) { + struct vxlan_dev *vxlan = netdev_priv(dev); + int err; + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; + err = gro_cells_init(&vxlan->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + return 0; } @@ -3043,8 +3052,6 @@ static void vxlan_setup(struct net_device *dev) vxlan->dev = dev; - gro_cells_init(&vxlan->gro_cells, dev); - for (h = 0; h < FDB_HASH_SIZE; ++h) { spin_lock_init(&vxlan->hash_lock[h]); INIT_HLIST_HEAD(&vxlan->fdb_head[h]); From 166391159c5deb84795d2ff46e95f276177fa5fb Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 18 Mar 2020 18:30:43 -0600 Subject: [PATCH 270/372] wireguard: selftests: remove duplicated include This commit removes a duplicated include. Signed-off-by: YueHaibing Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- tools/testing/selftests/wireguard/qemu/init.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c index 90bc9813cadc..c9698120ac9d 100644 --- a/tools/testing/selftests/wireguard/qemu/init.c +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include From 551599edbfff2431cef943a772fbde1c3e26eaf8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Mar 2020 18:30:44 -0600 Subject: [PATCH 271/372] wireguard: selftests: test using new 64-bit time_t In case this helps expose bugs with the newer 64-bit time_t types, we do our testing with the newer musl that supports this as well as CONFIG_COMPAT_32BIT_TIME=n. This matters to us, since wireguard does in fact deal with timestamps. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- tools/testing/selftests/wireguard/qemu/Makefile | 2 +- tools/testing/selftests/wireguard/qemu/kernel.config | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 28d477683e8a..90598a425c18 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -41,7 +41,7 @@ $(DISTFILES_PATH)/$(1): flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi' endef -$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3)) +$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8)) $(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c)) $(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d)) $(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae)) diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index af9323a0b6e0..d531de13c95b 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -56,7 +56,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_HZ_PERIODIC=n CONFIG_HIGH_RES_TIMERS=y -CONFIG_COMPAT_32BIT_TIME=y CONFIG_ARCH_RANDOM=y CONFIG_FILE_LOCKING=y CONFIG_POSIX_TIMERS=y From a5588604af448664e796daf3c1d5a4523c60667b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Mar 2020 18:30:45 -0600 Subject: [PATCH 272/372] wireguard: queueing: account for skb->protocol==0 We carry out checks to the effect of: if (skb->protocol != wg_examine_packet_protocol(skb)) goto err; By having wg_skb_examine_untrusted_ip_hdr return 0 on failure, this means that the check above still passes in the case where skb->protocol is zero, which is possible to hit with AF_PACKET: struct sockaddr_pkt saddr = { .spkt_device = "wg0" }; unsigned char buffer[5] = { 0 }; sendto(socket(AF_PACKET, SOCK_PACKET, /* skb->protocol = */ 0), buffer, sizeof(buffer), 0, (const struct sockaddr *)&saddr, sizeof(saddr)); Additional checks mean that this isn't actually a problem in the code base, but I could imagine it becoming a problem later if the function is used more liberally. I would prefer to fix this by having wg_examine_packet_protocol return a 32-bit ~0 value on failure, which will never match any value of skb->protocol, which would simply change the generated code from a mov to a movzx. However, sparse complains, and adding __force casts doesn't seem like a good idea, so instead we just add a simple helper function to check for the zero return value. Since wg_examine_packet_protocol itself gets inlined, this winds up not adding an additional branch to the generated code, since the 0 return value already happens in a mergable branch. Reported-by: Fabian Freyer Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/device.c | 2 +- drivers/net/wireguard/queueing.h | 8 +++++++- drivers/net/wireguard/receive.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index cdc96968b0f4..3ac3f8570ca1 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -122,7 +122,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) u32 mtu; int ret; - if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) { + if (unlikely(!wg_check_packet_protocol(skb))) { ret = -EPROTONOSUPPORT; net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name); goto err; diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index fecb559cbdb6..cf1e0e2376d8 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -66,7 +66,7 @@ struct packet_cb { #define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer) /* Returns either the correct skb->protocol value, or 0 if invalid. */ -static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) +static inline __be16 wg_examine_packet_protocol(struct sk_buff *skb) { if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct iphdr)) <= @@ -81,6 +81,12 @@ static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb) return 0; } +static inline bool wg_check_packet_protocol(struct sk_buff *skb) +{ + __be16 real_protocol = wg_examine_packet_protocol(skb); + return real_protocol && skb->protocol == real_protocol; +} + static inline void wg_reset_packet(struct sk_buff *skb) { skb_scrub_packet(skb, true); diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 4a153894cee2..243ed7172dd2 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -56,7 +56,7 @@ static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg) size_t data_offset, data_len, header_len; struct udphdr *udp; - if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol || + if (unlikely(!wg_check_packet_protocol(skb) || skb_transport_header(skb) < skb->head || (skb_transport_header(skb) + sizeof(struct udphdr)) > skb_tail_pointer(skb))) @@ -388,7 +388,7 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, */ skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = ~0; /* All levels */ - skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb); + skb->protocol = wg_examine_packet_protocol(skb); if (skb->protocol == htons(ETH_P_IP)) { len = ntohs(ip_hdr(skb)->tot_len); if (unlikely(len < sizeof(struct iphdr))) From 2b8765c52db24c0fbcc81bac9b5e8390f2c7d3c8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Mar 2020 18:30:46 -0600 Subject: [PATCH 273/372] wireguard: receive: remove dead code from default packet type case The situation in which we wind up hitting the default case here indicates a major bug in earlier parsing code. It is not a usual thing that should ever happen, which means a "friendly" message for it doesn't make sense. Rather, replace this with a WARN_ON, just like we do earlier in the file for a similar situation, so that somebody sends us a bug report and we can fix it. Reported-by: Fabian Freyer Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/receive.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 243ed7172dd2..da3b782ab7d3 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -587,8 +587,7 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) wg_packet_consume_data(wg, skb); break; default: - net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", - wg->dev->name, skb); + WARN(1, "Non-exhaustive parsing of packet header lead to unknown packet type!\n"); goto err; } return; From 11a7686aa99c7fe4b3f80f6dcccd54129817984d Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Mar 2020 18:30:47 -0600 Subject: [PATCH 274/372] wireguard: noise: error out precomputed DH during handshake rather than config We precompute the static-static ECDH during configuration time, in order to save an expensive computation later when receiving network packets. However, not all ECDH computations yield a contributory result. Prior, we were just not letting those peers be added to the interface. However, this creates a strange inconsistency, since it was still possible to add other weird points, like a valid public key plus a low-order point, and, like points that result in zeros, a handshake would not complete. In order to make the behavior more uniform and less surprising, simply allow all peers to be added. Then, we'll error out later when doing the crypto if there's an issue. This also adds more separation between the crypto layer and the configuration layer. Discussed-with: Mathias Hall-Andersen Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- drivers/net/wireguard/netlink.c | 8 +--- drivers/net/wireguard/noise.c | 55 ++++++++++++---------- drivers/net/wireguard/noise.h | 12 ++--- drivers/net/wireguard/peer.c | 7 +-- tools/testing/selftests/wireguard/netns.sh | 15 ++++-- 5 files changed, 49 insertions(+), 48 deletions(-) diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c index bda26405497c..802099c8828a 100644 --- a/drivers/net/wireguard/netlink.c +++ b/drivers/net/wireguard/netlink.c @@ -411,11 +411,7 @@ static int set_peer(struct wg_device *wg, struct nlattr **attrs) peer = wg_peer_create(wg, public_key, preshared_key); if (IS_ERR(peer)) { - /* Similar to the above, if the key is invalid, we skip - * it without fanfare, so that services don't need to - * worry about doing key validation themselves. - */ - ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer); + ret = PTR_ERR(peer); peer = NULL; goto out; } @@ -569,7 +565,7 @@ static int wg_set_device(struct sk_buff *skb, struct genl_info *info) private_key); list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { - BUG_ON(!wg_noise_precompute_static_static(peer)); + wg_noise_precompute_static_static(peer); wg_noise_expire_current_peer_keypairs(peer); } wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c index 919d9d866446..708dc61c974f 100644 --- a/drivers/net/wireguard/noise.c +++ b/drivers/net/wireguard/noise.c @@ -44,32 +44,23 @@ void __init wg_noise_init(void) } /* Must hold peer->handshake.static_identity->lock */ -bool wg_noise_precompute_static_static(struct wg_peer *peer) +void wg_noise_precompute_static_static(struct wg_peer *peer) { - bool ret; - down_write(&peer->handshake.lock); - if (peer->handshake.static_identity->has_identity) { - ret = curve25519( - peer->handshake.precomputed_static_static, + if (!peer->handshake.static_identity->has_identity || + !curve25519(peer->handshake.precomputed_static_static, peer->handshake.static_identity->static_private, - peer->handshake.remote_static); - } else { - u8 empty[NOISE_PUBLIC_KEY_LEN] = { 0 }; - - ret = curve25519(empty, empty, peer->handshake.remote_static); + peer->handshake.remote_static)) memset(peer->handshake.precomputed_static_static, 0, NOISE_PUBLIC_KEY_LEN); - } up_write(&peer->handshake.lock); - return ret; } -bool wg_noise_handshake_init(struct noise_handshake *handshake, - struct noise_static_identity *static_identity, - const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], - const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], - struct wg_peer *peer) +void wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer) { memset(handshake, 0, sizeof(*handshake)); init_rwsem(&handshake->lock); @@ -81,7 +72,7 @@ bool wg_noise_handshake_init(struct noise_handshake *handshake, NOISE_SYMMETRIC_KEY_LEN); handshake->static_identity = static_identity; handshake->state = HANDSHAKE_ZEROED; - return wg_noise_precompute_static_static(peer); + wg_noise_precompute_static_static(peer); } static void handshake_zero(struct noise_handshake *handshake) @@ -403,6 +394,19 @@ static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], return true; } +static bool __must_check mix_precomputed_dh(u8 chaining_key[NOISE_HASH_LEN], + u8 key[NOISE_SYMMETRIC_KEY_LEN], + const u8 precomputed[NOISE_PUBLIC_KEY_LEN]) +{ + static u8 zero_point[NOISE_PUBLIC_KEY_LEN]; + if (unlikely(!crypto_memneq(precomputed, zero_point, NOISE_PUBLIC_KEY_LEN))) + return false; + kdf(chaining_key, key, NULL, precomputed, NOISE_HASH_LEN, + NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, + chaining_key); + return true; +} + static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len) { struct blake2s_state blake; @@ -531,10 +535,9 @@ wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, NOISE_PUBLIC_KEY_LEN, key, handshake->hash); /* ss */ - kdf(handshake->chaining_key, key, NULL, - handshake->precomputed_static_static, NOISE_HASH_LEN, - NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, - handshake->chaining_key); + if (!mix_precomputed_dh(handshake->chaining_key, key, + handshake->precomputed_static_static)) + goto out; /* {t} */ tai64n_now(timestamp); @@ -595,9 +598,9 @@ wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src, handshake = &peer->handshake; /* ss */ - kdf(chaining_key, key, NULL, handshake->precomputed_static_static, - NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, - chaining_key); + if (!mix_precomputed_dh(chaining_key, key, + handshake->precomputed_static_static)) + goto out; /* {t} */ if (!message_decrypt(t, src->encrypted_timestamp, diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h index 138a07bb817c..f532d59d3f19 100644 --- a/drivers/net/wireguard/noise.h +++ b/drivers/net/wireguard/noise.h @@ -94,11 +94,11 @@ struct noise_handshake { struct wg_device; void wg_noise_init(void); -bool wg_noise_handshake_init(struct noise_handshake *handshake, - struct noise_static_identity *static_identity, - const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], - const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], - struct wg_peer *peer); +void wg_noise_handshake_init(struct noise_handshake *handshake, + struct noise_static_identity *static_identity, + const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], + const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], + struct wg_peer *peer); void wg_noise_handshake_clear(struct noise_handshake *handshake); static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns) { @@ -116,7 +116,7 @@ void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer); void wg_noise_set_static_identity_private_key( struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN]); -bool wg_noise_precompute_static_static(struct wg_peer *peer); +void wg_noise_precompute_static_static(struct wg_peer *peer); bool wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst, diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c index 071eedf33f5a..1d634bd3038f 100644 --- a/drivers/net/wireguard/peer.c +++ b/drivers/net/wireguard/peer.c @@ -34,11 +34,8 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, return ERR_PTR(ret); peer->device = wg; - if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity, - public_key, preshared_key, peer)) { - ret = -EKEYREJECTED; - goto err_1; - } + wg_noise_handshake_init(&peer->handshake, &wg->static_identity, + public_key, preshared_key, peer); if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) goto err_1; if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 138d46b3f330..936e1ca9410e 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -527,11 +527,16 @@ n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0 n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75 n0 wg set wg0 peer "$pub2" allowed-ips ::/0 n0 wg set wg0 peer "$pub2" remove -low_order_points=( AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38= ) -n0 wg set wg0 private-key /dev/null ${low_order_points[@]/#/peer } -[[ -z $(n0 wg show wg0 peers) ]] -n0 wg set wg0 private-key <(echo "$key1") ${low_order_points[@]/#/peer } -[[ -z $(n0 wg show wg0 peers) ]] +for low_order_point in AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= AQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA= 4Ot6fDtBuK4WVuP68Z/EatoJjeucMrH9hmIFFl9JuAA= X5yVvKNQjCSx0LFVnIPvWwREXMRYHI6G2CJO3dCfEVc= 7P///////////////////////////////////////38= 7f///////////////////////////////////////38= 7v///////////////////////////////////////38=; do + n0 wg set wg0 peer "$low_order_point" persistent-keepalive 1 endpoint 127.0.0.1:1111 +done +[[ -n $(n0 wg show wg0 peers) ]] +exec 4< <(n0 ncat -l -u -p 1111) +ncat_pid=$! +waitncatudp $netns0 $ncat_pid +ip0 link set wg0 up +! read -r -n 1 -t 2 <&4 || false +kill $ncat_pid ip0 link del wg0 declare -A objects From c83557859eaa1286330a4d3d2e1ea0c0988c4604 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 18 Mar 2020 20:38:29 +0000 Subject: [PATCH 275/372] arm64: kpti: Fix "kpti=off" when KASLR is enabled Enabling KASLR forces the use of non-global page-table entries for kernel mappings, as this is a decision that we have to make very early on before mapping the kernel proper. When used in conjunction with the "kpti=off" command-line option, it is possible to use non-global kernel mappings but with the kpti trampoline disabled. Since commit 09e3c22a86f6 ("arm64: Use a variable to store non-global mappings decision"), arm64_kernel_unmapped_at_el0() reflects only the use of non-global mappings and does not take into account whether the kpti trampoline is enabled. This breaks context switching of the TPIDRRO_EL0 register for 64-bit tasks, where the clearing of the register is deferred to the ret-to-user code, but it also breaks the ARM SPE PMU driver which helpfully recommends passing "kpti=off" on the command line! Report whether or not KPTI is actually enabled in arm64_kernel_unmapped_at_el0() and check the 'arm64_use_ng_mappings' global variable directly when determining the protection flags for kernel mappings. Cc: Mark Brown Reported-by: Hongbo Yao Tested-by: Hongbo Yao Fixes: 09e3c22a86f6 ("arm64: Use a variable to store non-global mappings decision") Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 4 +--- arch/arm64/include/asm/pgtable-prot.h | 6 ++++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index e4d862420bb4..d79ce6df9e12 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -29,11 +29,9 @@ typedef struct { */ #define ASID(mm) ((mm)->context.id.counter & 0xffff) -extern bool arm64_use_ng_mappings; - static inline bool arm64_kernel_unmapped_at_el0(void) { - return arm64_use_ng_mappings; + return cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0); } typedef void (*bp_hardening_cb_t)(void); diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 6f87839f0249..1305e28225fc 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -23,11 +23,13 @@ #include +extern bool arm64_use_ng_mappings; + #define _PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define _PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PTE_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PTE_NG : 0) -#define PMD_MAYBE_NG (arm64_kernel_unmapped_at_el0() ? PMD_SECT_NG : 0) +#define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) +#define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) From 870b4333a62e45b0b2000d14b301b7b8b8cad9da Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Mar 2020 19:27:48 +0100 Subject: [PATCH 276/372] x86/ioremap: Fix CONFIG_EFI=n build In order to use efi_mem_type(), one needs CONFIG_EFI enabled. Otherwise that function is undefined. Use IS_ENABLED() to check and avoid the ifdeffery as the compiler optimizes away the following unreachable code then. Fixes: 985e537a4082 ("x86/ioremap: Map EFI runtime services data as encrypted for SEV") Reported-by: Randy Dunlap Signed-off-by: Borislav Petkov Tested-by: Randy Dunlap Cc: Tom Lendacky Cc: Link: https://lkml.kernel.org/r/7561e981-0d9b-d62c-0ef2-ce6007aff1ab@infradead.org --- arch/x86/mm/ioremap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 935a91e1fd77..18c637c0dc6f 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -115,6 +115,9 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des if (!sev_active()) return; + if (!IS_ENABLED(CONFIG_EFI)) + return; + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) desc->flags |= IORES_MAP_ENCRYPTED; } From 7883a14339299773b2ce08dcfd97c63c199a9289 Mon Sep 17 00:00:00 2001 From: Mikhail Petrov Date: Wed, 11 Mar 2020 23:37:09 +0300 Subject: [PATCH 277/372] scripts/kallsyms: fix wrong kallsyms_relative_base There is the code in the read_symbol function in 'scripts/kallsyms.c': if (is_ignored_symbol(name, type)) return NULL; /* Ignore most absolute/undefined (?) symbols. */ if (strcmp(name, "_text") == 0) _text = addr; But the is_ignored_symbol function returns true for name="_text" and type='A'. So the next condition is not executed and the _text variable is always zero. It makes the wrong kallsyms_relative_base symbol as a result of the code (CONFIG_KALLSYMS_BASE_RELATIVE is defined): if (base_relative) { output_label("kallsyms_relative_base"); output_address(relative_base); printf("\n"); } Because the output_address function uses the _text variable. So the kallsyms_lookup function and all related functions in the kernel do not work properly. For example, the stack trace in oops: Call Trace: [aa095e58] [809feab8] kobj_ns_ops_tbl+0x7ff09ac8/0x7ff1c1c4 (unreliable) [aa095e98] [80002b64] kobj_ns_ops_tbl+0x7f50db74/0x80000010 [aa095ef8] [809c3d24] kobj_ns_ops_tbl+0x7feced34/0x7ff1c1c4 [aa095f28] [80002ed0] kobj_ns_ops_tbl+0x7f50dee0/0x80000010 [aa095f38] [8000f238] kobj_ns_ops_tbl+0x7f51a248/0x80000010 The right stack trace: Call Trace: [aa095e58] [809feab8] module_vdu_video_init+0x2fc/0x3bc (unreliable) [aa095e98] [80002b64] do_one_initcall+0x40/0x1f0 [aa095ef8] [809c3d24] kernel_init_freeable+0x164/0x1d8 [aa095f28] [80002ed0] kernel_init+0x14/0x124 [aa095f38] [8000f238] ret_from_kernel_thread+0x14/0x1c [masahiroy@kernel.org: This issue happens on binutils <= 2.22 The following commit fixed it: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=d2667025dd30611514810c28bee9709e4623012a The symbol type of _text is 'T' on binutils >= 2.23 The minimal supported binutils version for the kernel build is 2.21 ] Signed-off-by: Mikhail Petrov Signed-off-by: Masahiro Yamada --- scripts/kallsyms.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 0133dfaaf352..3e8dea6e0a95 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -195,13 +195,13 @@ static struct sym_entry *read_symbol(FILE *in) return NULL; } - if (is_ignored_symbol(name, type)) - return NULL; - - /* Ignore most absolute/undefined (?) symbols. */ if (strcmp(name, "_text") == 0) _text = addr; + /* Ignore most absolute/undefined (?) symbols. */ + if (is_ignored_symbol(name, type)) + return NULL; + check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges)); check_symbol_range(name, addr, &percpu_range, 1); From 564200ed8e71d91327d337e46bc778cee02da866 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 19 Mar 2020 11:42:56 -0300 Subject: [PATCH 278/372] tools headers uapi: Update linux/in.h copy To get the changes in: 267762538705 ("seg6: fix SRv6 L2 tunnels to use IANA-assigned protocol number") That ends up automatically adding the new IPPROTO_ETHERNET to the socket args beautifiers: $ tools/perf/trace/beauty/socket_ipproto.sh > before Apply this patch: $ tools/perf/trace/beauty/socket_ipproto.sh > after $ diff -u before after --- before 2020-03-19 11:48:36.876673819 -0300 +++ after 2020-03-19 11:49:00.148541377 -0300 @@ -6,6 +6,7 @@ [132] = "SCTP", [136] = "UDPLITE", [137] = "MPLS", + [143] = "ETHERNET", [17] = "UDP", [1] = "ICMP", [22] = "IDP", $ Addresses this tools/perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: David S. Miller Cc: Paolo Lungaroni Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/in.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 1521073b6348..8533bf07450f 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -74,6 +74,8 @@ enum { #define IPPROTO_UDPLITE IPPROTO_UDPLITE IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */ #define IPPROTO_MPLS IPPROTO_MPLS + IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ +#define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_MPTCP = 262, /* Multipath TCP connection */ From 5d892919fdd0cefd361697472d4e1b174a594991 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Wed, 18 Mar 2020 15:26:49 +0000 Subject: [PATCH 279/372] rtc: max8907: add missing select REGMAP_IRQ I have hit the following build error: armv7a-hardfloat-linux-gnueabi-ld: drivers/rtc/rtc-max8907.o: in function `max8907_rtc_probe': rtc-max8907.c:(.text+0x400): undefined reference to `regmap_irq_get_virq' max8907 should select REGMAP_IRQ Fixes: 94c01ab6d7544 ("rtc: add MAX8907 RTC driver") Cc: stable Signed-off-by: Corentin Labbe Signed-off-by: Linus Torvalds --- drivers/rtc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 34c8b6c7e095..8e503881d9d6 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -327,6 +327,7 @@ config RTC_DRV_MAX6900 config RTC_DRV_MAX8907 tristate "Maxim MAX8907" depends on MFD_MAX8907 || COMPILE_TEST + select REGMAP_IRQ help If you say yes here you will get support for the RTC of Maxim MAX8907 PMIC. From 3568b88944fef28db3ee989b957da49ffc627ede Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Thu, 19 Mar 2020 14:11:38 +0000 Subject: [PATCH 280/372] arm64: compat: Fix syscall number of compat_clock_getres The syscall number of compat_clock_getres was erroneously set to 247 (__NR_io_cancel!) instead of 264. This causes the vDSO fallback of clock_getres() to land on the wrong syscall for compat tasks. Fix the numbering. Cc: Fixes: 53c489e1dfeb6 ("arm64: compat: Add missing syscall numbers") Acked-by: Catalin Marinas Reviewed-by: Nick Desaulniers Signed-off-by: Vincenzo Frascino Signed-off-by: Will Deacon --- arch/arm64/include/asm/unistd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 1dd22da1c3a9..803039d504de 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -25,8 +25,8 @@ #define __NR_compat_gettimeofday 78 #define __NR_compat_sigreturn 119 #define __NR_compat_rt_sigreturn 173 -#define __NR_compat_clock_getres 247 #define __NR_compat_clock_gettime 263 +#define __NR_compat_clock_getres 264 #define __NR_compat_clock_gettime64 403 #define __NR_compat_clock_getres_time64 406 From 61abaf02d2ec3d4575c66fa1b4a863877736b932 Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Tue, 17 Mar 2020 10:02:52 +0800 Subject: [PATCH 281/372] netfilter: flowtable: reload ip{v6}h in nf_flow_nat_ip{v6} Since nf_flow_snat_port and nf_flow_snat_ip{v6} call pskb_may_pull() which may change skb->data, so we need to reload ip{v6}h at the right place. Fixes: a908fdec3dda ("netfilter: nf_flow_table: move ipv6 offload hook code to nf_flow_table") Fixes: 7d2086871762 ("netfilter: nf_flow_table: move ipv4 offload hook code to nf_flow_table") Signed-off-by: Haishuang Yan Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 9e563fd3da0f..22caab7bb755 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -146,11 +146,13 @@ static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb, if (test_bit(NF_FLOW_SNAT, &flow->flags) && (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 || - nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0)) + nf_flow_snat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0)) return -1; + + iph = ip_hdr(skb); if (test_bit(NF_FLOW_DNAT, &flow->flags) && (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 || - nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0)) + nf_flow_dnat_ip(flow, skb, ip_hdr(skb), thoff, dir) < 0)) return -1; return 0; @@ -426,11 +428,13 @@ static int nf_flow_nat_ipv6(const struct flow_offload *flow, if (test_bit(NF_FLOW_SNAT, &flow->flags) && (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || - nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) + nf_flow_snat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0)) return -1; + + ip6h = ipv6_hdr(skb); if (test_bit(NF_FLOW_DNAT, &flow->flags) && (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 || - nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0)) + nf_flow_dnat_ipv6(flow, skb, ipv6_hdr(skb), thoff, dir) < 0)) return -1; return 0; From 41e9ec5a54f95eee1a57c8d26ab70e0492548c1b Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Tue, 17 Mar 2020 10:02:53 +0800 Subject: [PATCH 282/372] netfilter: flowtable: reload ip{v6}h in nf_flow_tuple_ip{v6} Since pskb_may_pull may change skb->data, so we need to reload ip{v6}h at the right place. Fixes: a908fdec3dda ("netfilter: nf_flow_table: move ipv6 offload hook code to nf_flow_table") Fixes: 7d2086871762 ("netfilter: nf_flow_table: move ipv4 offload hook code to nf_flow_table") Signed-off-by: Haishuang Yan Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_ip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index 22caab7bb755..ba775aecd89a 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -191,6 +191,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev, if (!pskb_may_pull(skb, thoff + sizeof(*ports))) return -1; + iph = ip_hdr(skb); ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v4.s_addr = iph->saddr; @@ -463,6 +464,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev, if (!pskb_may_pull(skb, thoff + sizeof(*ports))) return -1; + ip6h = ipv6_hdr(skb); ports = (struct flow_ports *)(skb_network_header(skb) + thoff); tuple->src_v6 = ip6h->saddr; From c921ffe853332584eae4f5905cb2a14a7b3c9932 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Thu, 19 Mar 2020 11:52:25 +0200 Subject: [PATCH 283/372] netfilter: flowtable: Fix flushing of offloaded flows on free Freeing a flowtable with offloaded flows, the flow are deleted from hardware but are not deleted from the flow table, leaking them, and leaving their offload bit on. Add a second pass of the disabled gc to delete the these flows from the flow table before freeing it. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: Paul Blakey Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 8af28e10b4e6..70ebebaf5bc1 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -554,6 +554,9 @@ void nf_flow_table_free(struct nf_flowtable *flow_table) nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table); nf_flow_table_offload_flush(flow_table); + if (nf_flowtable_hw_offload(flow_table)) + nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, + flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); From eb916a5a93a64c182b0a8f43886aa6bb4c3e52b0 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Mon, 2 Mar 2020 07:17:32 +0100 Subject: [PATCH 284/372] drm/amd/display: Fix pageflip event race condition for DCN. Commit '16f17eda8bad ("drm/amd/display: Send vblank and user events at vsartup for DCN")' introduces a new way of pageflip completion handling for DCN, and some trouble. The current implementation introduces a race condition, which can cause pageflip completion events to be sent out one vblank too early, thereby confusing userspace and causing flicker: prepare_flip_isr(): 1. Pageflip programming takes the ddev->event_lock. 2. Sets acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED 3. Releases ddev->event_lock. --> Deadline for surface address regs double-buffering passes on target pipe. 4. dc_commit_updates_for_stream() MMIO programs the new pageflip into hw, but too late for current vblank. => pflip_status == AMDGPU_FLIP_SUBMITTED, but flip won't complete in current vblank due to missing the double-buffering deadline by a tiny bit. 5. VSTARTUP trigger point in vblank is reached, VSTARTUP irq fires, dm_dcn_crtc_high_irq() gets called. 6. Detects pflip_status == AMDGPU_FLIP_SUBMITTED and assumes the pageflip has been completed/will complete in this vblank and sends out pageflip completion event to userspace and resets pflip_status = AMDGPU_FLIP_NONE. => Flip completion event sent out one vblank too early. This behaviour has been observed during my testing with measurement hardware a couple of time. The commit message says that the extra flip event code was added to dm_dcn_crtc_high_irq() to prevent missing to send out pageflip events in case the pflip irq doesn't fire, because the "DCH HUBP" component is clock gated and doesn't fire pflip irqs in that state. Also that this clock gating may happen if no planes are active. This suggests that the problem addressed by that commit can't happen if planes are active. The proposed solution is therefore to only execute the extra pflip completion code iff the count of active planes is zero and otherwise leave pflip completion handling to the pflip irq handler, for a more race-free experience. Note that i don't know if this fixes the problem the original commit tried to address, as i don't know what the test scenario was. It does fix the observed too early pageflip events though and points out the problem introduced. Fixes: 16f17eda8bad ("drm/amd/display: Send vblank and user events at vsartup for DCN") Reviewed-by: Nicholas Kazlauskas Signed-off-by: Mario Kleiner Signed-off-by: Alex Deucher --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e997251a8b57..6240259b3a93 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -522,8 +522,9 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params) acrtc_state = to_dm_crtc_state(acrtc->base.state); - DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d\n", acrtc->crtc_id, - amdgpu_dm_vrr_active(acrtc_state)); + DRM_DEBUG_DRIVER("crtc:%d, vupdate-vrr:%d, planes:%d\n", acrtc->crtc_id, + amdgpu_dm_vrr_active(acrtc_state), + acrtc_state->active_planes); amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); drm_crtc_handle_vblank(&acrtc->base); @@ -543,7 +544,18 @@ static void dm_dcn_crtc_high_irq(void *interrupt_params) &acrtc_state->vrr_params.adjust); } - if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED) { + /* + * If there aren't any active_planes then DCH HUBP may be clock-gated. + * In that case, pageflip completion interrupts won't fire and pageflip + * completion events won't get delivered. Prevent this by sending + * pending pageflip events from here if a flip is still pending. + * + * If any planes are enabled, use dm_pflip_high_irq() instead, to + * avoid race conditions between flip programming and completion, + * which could cause too early flip completion events. + */ + if (acrtc->pflip_status == AMDGPU_FLIP_SUBMITTED && + acrtc_state->active_planes == 0) { if (acrtc->event) { drm_crtc_send_vblank_event(&acrtc->base, acrtc->event); acrtc->event = NULL; From 15ff197237e76c4dab06b7b518afaa4ebb1c43e0 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 19 Mar 2020 19:37:21 +0000 Subject: [PATCH 285/372] netfilter: flowtable: populate addr_type mask nf_flow_rule_match() sets control.addr_type in key, so needs to also set the corresponding mask. An exact match is wanted, so mask is all ones. Fixes: c29f74e0df7a ("netfilter: nf_flow_table: hardware offload support") Signed-off-by: Edward Cree Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_flow_table_offload.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 06f00cdc3891..f2c22c682851 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -87,6 +87,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match, default: return -EOPNOTSUPP; } + mask->control.addr_type = 0xffff; match->dissector.used_keys |= BIT(key->control.addr_type); mask->basic.n_proto = 0xffff; From 1d0c32ec3b860a32df593a22bad0d1dbc5546a59 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Wed, 18 Mar 2020 18:43:30 +0100 Subject: [PATCH 286/372] KVM: PPC: Fix kernel crash with PR KVM With PR KVM, shutting down a VM causes the host kernel to crash: [ 314.219284] BUG: Unable to handle kernel data access on read at 0xc00800000176c638 [ 314.219299] Faulting instruction address: 0xc008000000d4ddb0 cpu 0x0: Vector: 300 (Data Access) at [c00000036da077a0] pc: c008000000d4ddb0: kvmppc_mmu_pte_flush_all+0x68/0xd0 [kvm_pr] lr: c008000000d4dd94: kvmppc_mmu_pte_flush_all+0x4c/0xd0 [kvm_pr] sp: c00000036da07a30 msr: 900000010280b033 dar: c00800000176c638 dsisr: 40000000 current = 0xc00000036d4c0000 paca = 0xc000000001a00000 irqmask: 0x03 irq_happened: 0x01 pid = 1992, comm = qemu-system-ppc Linux version 5.6.0-master-gku+ (greg@palmb) (gcc version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)) #17 SMP Wed Mar 18 13:49:29 CET 2020 enter ? for help [c00000036da07ab0] c008000000d4fbe0 kvmppc_mmu_destroy_pr+0x28/0x60 [kvm_pr] [c00000036da07ae0] c0080000009eab8c kvmppc_mmu_destroy+0x34/0x50 [kvm] [c00000036da07b00] c0080000009e50c0 kvm_arch_vcpu_destroy+0x108/0x140 [kvm] [c00000036da07b30] c0080000009d1b50 kvm_vcpu_destroy+0x28/0x80 [kvm] [c00000036da07b60] c0080000009e4434 kvm_arch_destroy_vm+0xbc/0x190 [kvm] [c00000036da07ba0] c0080000009d9c2c kvm_put_kvm+0x1d4/0x3f0 [kvm] [c00000036da07c00] c0080000009da760 kvm_vm_release+0x38/0x60 [kvm] [c00000036da07c30] c000000000420be0 __fput+0xe0/0x310 [c00000036da07c90] c0000000001747a0 task_work_run+0x150/0x1c0 [c00000036da07cf0] c00000000014896c do_exit+0x44c/0xd00 [c00000036da07dc0] c0000000001492f4 do_group_exit+0x64/0xd0 [c00000036da07e00] c000000000149384 sys_exit_group+0x24/0x30 [c00000036da07e20] c00000000000b9d0 system_call+0x5c/0x68 This is caused by a use-after-free in kvmppc_mmu_pte_flush_all() which dereferences vcpu->arch.book3s which was previously freed by kvmppc_core_vcpu_free_pr(). This happens because kvmppc_mmu_destroy() is called after kvmppc_core_vcpu_free() since commit ff030fdf5573 ("KVM: PPC: Move kvm_vcpu_init() invocation to common code"). The kvmppc_mmu_destroy() helper calls one of the following depending on the KVM backend: - kvmppc_mmu_destroy_hv() which does nothing (Book3s HV) - kvmppc_mmu_destroy_pr() which undoes the effects of kvmppc_mmu_init() (Book3s PR 32-bit) - kvmppc_mmu_destroy_pr() which undoes the effects of kvmppc_mmu_init() (Book3s PR 64-bit) - kvmppc_mmu_destroy_e500() which does nothing (BookE e500/e500mc) It turns out that this is only relevant to PR KVM actually. And both 32 and 64 backends need vcpu->arch.book3s to be valid when calling kvmppc_mmu_destroy_pr(). So instead of calling kvmppc_mmu_destroy() from kvm_arch_vcpu_destroy(), call kvmppc_mmu_destroy_pr() at the beginning of kvmppc_core_vcpu_free_pr(). This is consistent with kvmppc_mmu_init() being the last call in kvmppc_core_vcpu_create_pr(). For the same reason, if kvmppc_core_vcpu_create_pr() returns an error then this means that kvmppc_mmu_init() was either not called or failed, in which case kvmppc_mmu_destroy() should not be called. Drop the line in the error path of kvm_arch_vcpu_create(). Fixes: ff030fdf5573 ("KVM: PPC: Move kvm_vcpu_init() invocation to common code") Signed-off-by: Greg Kurz Reviewed-by: Sean Christopherson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/158455341029.178873.15248663726399374882.stgit@bahia.lan --- arch/powerpc/kvm/book3s_pr.c | 1 + arch/powerpc/kvm/powerpc.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 729a0f12a752..db3a87319642 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1817,6 +1817,7 @@ static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + kvmppc_mmu_destroy_pr(vcpu); free_page((unsigned long)vcpu->arch.shared & PAGE_MASK); #ifdef CONFIG_KVM_BOOK3S_32_HANDLER kfree(vcpu->arch.shadow_vcpu); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1af96fb5dc6f..302e9dccdd6d 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -759,7 +759,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return 0; out_vcpu_uninit: - kvmppc_mmu_destroy(vcpu); kvmppc_subarch_vcpu_uninit(vcpu); return err; } @@ -792,7 +791,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvmppc_core_vcpu_free(vcpu); - kvmppc_mmu_destroy(vcpu); kvmppc_subarch_vcpu_uninit(vcpu); } From 6002059d7882c3512e6ac52fa82424272ddfcd5c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 19 Mar 2020 13:25:39 +0200 Subject: [PATCH 287/372] mlxsw: pci: Only issue reset when system is ready During initialization the driver issues a software reset command and then waits for the system status to change back to "ready" state. However, before issuing the reset command the driver does not check that the system is actually in "ready" state. On Spectrum-{1,2} systems this was always the case as the hardware initialization time is very short. On Spectrum-3 systems this is no longer the case. This results in the software reset command timing-out and the driver failing to load: [ 6.347591] mlxsw_spectrum3 0000:06:00.0: Cmd exec timed-out (opcode=40(ACCESS_REG),opcode_mod=0,in_mod=0) [ 6.358382] mlxsw_spectrum3 0000:06:00.0: Reg cmd access failed (reg_id=9023(mrsr),type=write) [ 6.368028] mlxsw_spectrum3 0000:06:00.0: cannot register bus device [ 6.375274] mlxsw_spectrum3: probe of 0000:06:00.0 failed with error -110 Fix this by waiting for the system to become ready both before issuing the reset command and afterwards. In case of failure, print the last system status to aid in debugging. Fixes: da382875c616 ("mlxsw: spectrum: Extend to support Spectrum-3 ASIC") Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/pci.c | 50 ++++++++++++++++++----- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 914c33e46fb4..e9ded1a6e131 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1322,36 +1322,64 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci, mbox->mapaddr); } -static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, - const struct pci_device_id *id) +static int mlxsw_pci_sys_ready_wait(struct mlxsw_pci *mlxsw_pci, + const struct pci_device_id *id, + u32 *p_sys_status) { unsigned long end; - char mrsr_pl[MLXSW_REG_MRSR_LEN]; - int err; + u32 val; - mlxsw_reg_mrsr_pack(mrsr_pl); - err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); - if (err) - return err; if (id->device == PCI_DEVICE_ID_MELLANOX_SWITCHX2) { msleep(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); return 0; } - /* We must wait for the HW to become responsive once again. */ + /* We must wait for the HW to become responsive. */ msleep(MLXSW_PCI_SW_RESET_WAIT_MSECS); end = jiffies + msecs_to_jiffies(MLXSW_PCI_SW_RESET_TIMEOUT_MSECS); do { - u32 val = mlxsw_pci_read32(mlxsw_pci, FW_READY); - + val = mlxsw_pci_read32(mlxsw_pci, FW_READY); if ((val & MLXSW_PCI_FW_READY_MASK) == MLXSW_PCI_FW_READY_MAGIC) return 0; cond_resched(); } while (time_before(jiffies, end)); + + *p_sys_status = val & MLXSW_PCI_FW_READY_MASK; + return -EBUSY; } +static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci, + const struct pci_device_id *id) +{ + struct pci_dev *pdev = mlxsw_pci->pdev; + char mrsr_pl[MLXSW_REG_MRSR_LEN]; + u32 sys_status; + int err; + + err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status); + if (err) { + dev_err(&pdev->dev, "Failed to reach system ready status before reset. Status is 0x%x\n", + sys_status); + return err; + } + + mlxsw_reg_mrsr_pack(mrsr_pl); + err = mlxsw_reg_write(mlxsw_pci->core, MLXSW_REG(mrsr), mrsr_pl); + if (err) + return err; + + err = mlxsw_pci_sys_ready_wait(mlxsw_pci, id, &sys_status); + if (err) { + dev_err(&pdev->dev, "Failed to reach system ready status after reset. Status is 0x%x\n", + sys_status); + return err; + } + + return 0; +} + static int mlxsw_pci_alloc_irq_vectors(struct mlxsw_pci *mlxsw_pci) { int err; From c8cfcb78c65877313cda7bcbace624d3dbd1f3b3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 18 Mar 2020 20:27:32 -0600 Subject: [PATCH 288/372] crypto: arm64/chacha - correctly walk through blocks Prior, passing in chunks of 2, 3, or 4, followed by any additional chunks would result in the chacha state counter getting out of sync, resulting in incorrect encryption/decryption, which is a pretty nasty crypto vuln: "why do images look weird on webpages?" WireGuard users never experienced this prior, because we have always, out of tree, used a different crypto library, until the recent Frankenzinc addition. This commit fixes the issue by advancing the pointers and state counter by the actual size processed. It also fixes up a bug in the (optional, costly) stride test that prevented it from running on arm64. Fixes: b3aad5bad26a ("crypto: arm64/chacha - expose arm64 ChaCha routine as library function") Reported-and-tested-by: Emil Renner Berthing Cc: Ard Biesheuvel Cc: stable@vger.kernel.org # v5.5+ Signed-off-by: Jason A. Donenfeld Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm64/crypto/chacha-neon-glue.c | 8 ++++---- lib/crypto/chacha20poly1305-selftest.c | 11 ++++++++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c index c1f9660d104c..37ca3e889848 100644 --- a/arch/arm64/crypto/chacha-neon-glue.c +++ b/arch/arm64/crypto/chacha-neon-glue.c @@ -55,10 +55,10 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, break; } chacha_4block_xor_neon(state, dst, src, nrounds, l); - bytes -= CHACHA_BLOCK_SIZE * 5; - src += CHACHA_BLOCK_SIZE * 5; - dst += CHACHA_BLOCK_SIZE * 5; - state[12] += 5; + bytes -= l; + src += l; + dst += l; + state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); } } diff --git a/lib/crypto/chacha20poly1305-selftest.c b/lib/crypto/chacha20poly1305-selftest.c index c391a91364e9..fa43deda2660 100644 --- a/lib/crypto/chacha20poly1305-selftest.c +++ b/lib/crypto/chacha20poly1305-selftest.c @@ -9028,10 +9028,15 @@ bool __init chacha20poly1305_selftest(void) && total_len <= 1 << 10; ++total_len) { for (i = 0; i <= total_len; ++i) { for (j = i; j <= total_len; ++j) { + k = 0; sg_init_table(sg_src, 3); - sg_set_buf(&sg_src[0], input, i); - sg_set_buf(&sg_src[1], input + i, j - i); - sg_set_buf(&sg_src[2], input + j, total_len - j); + if (i) + sg_set_buf(&sg_src[k++], input, i); + if (j - i) + sg_set_buf(&sg_src[k++], input + i, j - i); + if (total_len - j) + sg_set_buf(&sg_src[k++], input + j, total_len - j); + sg_init_marker(sg_src, k); memset(computed_output, 0, total_len); memset(input, 0, total_len); From 22259471b51925353bd7b16f864c79fdd76e425e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20van=20Dorst?= Date: Thu, 19 Mar 2020 14:47:56 +0100 Subject: [PATCH 289/372] net: dsa: mt7530: Change the LINK bit to reflect the link status MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Andrew reported: After a number of network port link up/down changes, sometimes the switch port gets stuck in a state where it thinks it is still transmitting packets but the cpu port is not actually transmitting anymore. In this state you will see a message on the console "mtk_soc_eth 1e100000.ethernet eth0: transmit timed out" and the Tx counter in ifconfig will be incrementing on virtual port, but not incrementing on cpu port. The issue is that MAC TX/RX status has no impact on the link status or queue manager of the switch. So the queue manager just queues up packets of a disabled port and sends out pause frames when the queue is full. Change the LINK bit to reflect the link status. Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch") Reported-by: Andrew Smith Signed-off-by: René van Dorst Reviewed-by: Vivien Didelot Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/mt7530.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 022466ca1c19..7cbd1bd4c5a6 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -566,7 +566,7 @@ mt7530_mib_reset(struct dsa_switch *ds) static void mt7530_port_set_status(struct mt7530_priv *priv, int port, int enable) { - u32 mask = PMCR_TX_EN | PMCR_RX_EN; + u32 mask = PMCR_TX_EN | PMCR_RX_EN | PMCR_FORCE_LNK; if (enable) mt7530_set(priv, MT7530_PMCR_P(port), mask); @@ -1444,7 +1444,7 @@ static void mt7530_phylink_mac_config(struct dsa_switch *ds, int port, mcr_new &= ~(PMCR_FORCE_SPEED_1000 | PMCR_FORCE_SPEED_100 | PMCR_FORCE_FDX | PMCR_TX_FC_EN | PMCR_RX_FC_EN); mcr_new |= PMCR_IFG_XMIT(1) | PMCR_MAC_MODE | PMCR_BACKOFF_EN | - PMCR_BACKPR_EN | PMCR_FORCE_MODE | PMCR_FORCE_LNK; + PMCR_BACKPR_EN | PMCR_FORCE_MODE; /* Are we connected to external phy */ if (port == 5 && dsa_is_user_port(ds, 5)) From 7affd80802afb6ca92dba47d768632fbde365241 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 19 Mar 2020 23:08:09 +0530 Subject: [PATCH 290/372] cxgb4: fix throughput drop during Tx backpressure commit 7c3bebc3d868 ("cxgb4: request the TX CIDX updates to status page") reverted back to getting Tx CIDX updates via DMA, instead of interrupts, introduced by commit d429005fdf2c ("cxgb4/cxgb4vf: Add support for SGE doorbell queue timer") However, it missed reverting back several code changes where Tx CIDX updates are not explicitly requested during backpressure when using interrupt mode. These missed changes cause slow recovery during backpressure because the corresponding interrupt no longer comes and hence results in Tx throughput drop. So, revert back these missed code changes, as well, which will allow explicitly requesting Tx CIDX updates when backpressure happens. This enables the corresponding interrupt with Tx CIDX update message to get generated and hence speed up recovery and restore back throughput. Fixes: 7c3bebc3d868 ("cxgb4: request the TX CIDX updates to status page") Fixes: d429005fdf2c ("cxgb4/cxgb4vf: Add support for SGE doorbell queue timer") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 42 ++---------------------- 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 97cda501e7e8..c816837fbd85 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1486,16 +1486,7 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) * has opened up. */ eth_txq_stop(q); - - /* If we're using the SGE Doorbell Queue Timer facility, we - * don't need to ask the Firmware to send us Egress Queue CIDX - * Updates: the Hardware will do this automatically. And - * since we send the Ingress Queue CIDX Updates to the - * corresponding Ethernet Response Queue, we'll get them very - * quickly. - */ - if (!q->dbqt) - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } wr = (void *)&q->q.desc[q->q.pidx]; @@ -1805,16 +1796,7 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, * has opened up. */ eth_txq_stop(txq); - - /* If we're using the SGE Doorbell Queue Timer facility, we - * don't need to ask the Firmware to send us Egress Queue CIDX - * Updates: the Hardware will do this automatically. And - * since we send the Ingress Queue CIDX Updates to the - * corresponding Ethernet Response Queue, we'll get them very - * quickly. - */ - if (!txq->dbqt) - wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; + wr_mid |= FW_WR_EQUEQ_F | FW_WR_EQUIQ_F; } /* Start filling in our Work Request. Note that we do _not_ handle @@ -3370,26 +3352,6 @@ static void t4_tx_completion_handler(struct sge_rspq *rspq, } txq = &s->ethtxq[pi->first_qset + rspq->idx]; - - /* We've got the Hardware Consumer Index Update in the Egress Update - * message. If we're using the SGE Doorbell Queue Timer mechanism, - * these Egress Update messages will be our sole CIDX Updates we get - * since we don't want to chew up PCIe bandwidth for both Ingress - * Messages and Status Page writes. However, The code which manages - * reclaiming successfully DMA'ed TX Work Requests uses the CIDX value - * stored in the Status Page at the end of the TX Queue. It's easiest - * to simply copy the CIDX Update value from the Egress Update message - * to the Status Page. Also note that no Endian issues need to be - * considered here since both are Big Endian and we're just copying - * bytes consistently ... - */ - if (txq->dbqt) { - struct cpl_sge_egr_update *egr; - - egr = (struct cpl_sge_egr_update *)rsp; - WRITE_ONCE(txq->q.stat->cidx, egr->cidx); - } - t4_sge_eth_txq_egress_update(adapter, txq, -1); } From f1f20a8666c55cb534b8f3fc1130eebf01a06155 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 19 Mar 2020 23:08:10 +0530 Subject: [PATCH 291/372] cxgb4: fix Txq restart check during backpressure Driver reclaims descriptors in much smaller batches, even if hardware indicates more to reclaim, during backpressure. So, fix the check to restart the Txq during backpressure, by looking at how many descriptors hardware had indicated to reclaim, and not on how many descriptors that driver had actually reclaimed. Once the Txq is restarted, driver will reclaim even more descriptors when Tx path is entered again. Fixes: d429005fdf2c ("cxgb4/cxgb4vf: Add support for SGE doorbell queue timer") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/sge.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index c816837fbd85..cab3d17e0e1a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -1307,8 +1307,9 @@ static inline void *write_tso_wr(struct adapter *adap, struct sk_buff *skb, int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq, int maxreclaim) { + unsigned int reclaimed, hw_cidx; struct sge_txq *q = &eq->q; - unsigned int reclaimed; + int hw_in_use; if (!q->in_use || !__netif_tx_trylock(eq->txq)) return 0; @@ -1316,12 +1317,17 @@ int t4_sge_eth_txq_egress_update(struct adapter *adap, struct sge_eth_txq *eq, /* Reclaim pending completed TX Descriptors. */ reclaimed = reclaim_completed_tx(adap, &eq->q, maxreclaim, true); + hw_cidx = ntohs(READ_ONCE(q->stat->cidx)); + hw_in_use = q->pidx - hw_cidx; + if (hw_in_use < 0) + hw_in_use += q->size; + /* If the TX Queue is currently stopped and there's now more than half * the queue available, restart it. Otherwise bail out since the rest * of what we want do here is with the possibility of shipping any * currently buffered Coalesced TX Work Request. */ - if (netif_tx_queue_stopped(eq->txq) && txq_avail(q) > (q->size / 2)) { + if (netif_tx_queue_stopped(eq->txq) && hw_in_use < (q->size / 2)) { netif_tx_wake_queue(eq->txq); eq->q.restarts++; } From 081c54323b27d8d4b40df6b2375b9e1f6846d827 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Thu, 31 Oct 2019 21:14:22 +0800 Subject: [PATCH 292/372] tools/power turbostat: Support Cometlake From a turbostat point of view, Cometlake is like Kabylake. Suggested-by: Rui Zhang Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 31c1ca0bb3ee..dd5ac9f52ac5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4610,6 +4610,8 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_SKYLAKE: case INTEL_FAM6_KABYLAKE_L: case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE_L: + case INTEL_FAM6_COMETLAKE: return INTEL_FAM6_SKYLAKE_L; case INTEL_FAM6_ICELAKE_L: From d8d005ba6afa502ca37ced5782f672c4d2fc1515 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 19 Mar 2020 18:33:12 -0400 Subject: [PATCH 293/372] tools/power turbostat: Fix gcc build warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Warning: ‘__builtin_strncpy’ specified bound 20 equals destination size [-Wstringop-truncation] reduce param to strncpy, to guarantee that a null byte is always copied into destination buffer. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index dd5ac9f52ac5..fa95a8ca5565 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5325,9 +5325,9 @@ int add_counter(unsigned int msr_num, char *path, char *name, } msrp->msr_num = msr_num; - strncpy(msrp->name, name, NAME_BYTES); + strncpy(msrp->name, name, NAME_BYTES - 1); if (path) - strncpy(msrp->path, path, PATH_BYTES); + strncpy(msrp->path, path, PATH_BYTES - 1); msrp->width = width; msrp->type = type; msrp->format = format; From 4bf7132a0ace8888398af8cec6485ee4c6db5ea8 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 14 Jan 2020 12:04:55 +0800 Subject: [PATCH 294/372] tools/power turbostat: Support Tiger Lake From a turbostat point of view, Tiger Lake looks like Ice Lake. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index fa95a8ca5565..d2c3f294da2d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4616,6 +4616,8 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ICELAKE_L: case INTEL_FAM6_ICELAKE_NNPI: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: return INTEL_FAM6_CANNONLAKE_L; case INTEL_FAM6_ATOM_TREMONT_D: From 23274faf96500700da83c4f0ff12d78ae03d5604 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 14 Jan 2020 12:06:49 +0800 Subject: [PATCH 295/372] tools/power turbostat: Support Ice Lake server From a turbostat point of view, Ice Lake server looks like Sky Lake server. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d2c3f294da2d..26088b2a27cc 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4622,6 +4622,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ATOM_TREMONT_D: return INTEL_FAM6_ATOM_GOLDMONT_D; + + case INTEL_FAM6_ICELAKE_X: + return INTEL_FAM6_SKYLAKE_X; } return model; } From d7814c3098ddb2780bb66e787aa3949110dd4a41 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 14 Jan 2020 12:09:45 +0800 Subject: [PATCH 296/372] tools/power turbostat: Support Jasper Lake Jasper Lake, like Elkhart Lake, uses a Tremont CPU. So reuse the code. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 26088b2a27cc..e953afb2e7a1 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4623,6 +4623,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ATOM_TREMONT_D: return INTEL_FAM6_ATOM_GOLDMONT_D; + case INTEL_FAM6_ATOM_TREMONT_L: + return INTEL_FAM6_ATOM_TREMONT; + case INTEL_FAM6_ICELAKE_X: return INTEL_FAM6_SKYLAKE_X; } From f6708400707fec5cb56600710a9be59266df09ad Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 18 Mar 2020 18:55:48 +0800 Subject: [PATCH 297/372] tools/power turbostat: Support Elkhart Lake From a turbostat point of view the Tremont-based Elkhart Lake is very similar to Goldmont, reuse the code of Goldmont. Elkhart Lake does not support 'group turbo limit counter' nor C3, adjust the code accordingly. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 31 ++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e953afb2e7a1..761146c4f9bc 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3265,6 +3265,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ pkg_cstate_limits = glm_pkg_cstate_limits; break; default: @@ -3336,6 +3337,17 @@ int is_skx(unsigned int family, unsigned int model) } return 0; } +int is_ehl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_TREMONT: + return 1; + } + return 0; +} int has_turbo_ratio_limit(unsigned int family, unsigned int model) { @@ -3894,6 +3906,20 @@ void rapl_probe_intel(unsigned int family, unsigned int model) else BIC_PRESENT(BIC_PkgWatt); break; + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + BIC_PRESENT(BIC_GFX_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + BIC_PRESENT(BIC_GFXWatt); + } + break; case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; @@ -4295,6 +4321,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ return 1; } return 0; @@ -4324,6 +4351,7 @@ int has_c8910_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_CANNONLAKE_L: /* CNL */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ return 1; } return 0; @@ -4882,7 +4910,8 @@ void process_cpuid() do_slm_cstates = is_slm(family, model); do_knl_cstates = is_knl(family, model); - if (do_slm_cstates || do_knl_cstates || is_cnl(family, model)) + if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || + is_ehl(family, model)) BIC_NOT_PRESENT(BIC_CPU_c3); if (!quiet) From 1f81c5efc020314b2db30d77efe228b7e117750d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 19 Mar 2020 18:26:05 -0400 Subject: [PATCH 298/372] tools/power turbostat: Fix missing SYS_LPI counter on some Chromebooks Some Chromebook BIOS' do not export an ACPI LPIT, which is how Linux finds the residency counter for CPU and SYSTEM low power states, that is exports in /sys/devices/system/cpu/cpuidle/*residency_us When these sysfs attributes are missing, check the debugfs attrubte from the pmc_core driver, which accesses the same counter value. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 761146c4f9bc..3ecbf709a48c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -304,6 +304,10 @@ int *irqs_per_cpu; /* indexed by cpu_num */ void setup_all_buffers(void); +char *sys_lpi_file; +char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; +char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; + int cpu_is_not_present(int cpu) { return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); @@ -2916,8 +2920,6 @@ int snapshot_gfx_mhz(void) * * record snapshot of * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us - * - * return 1 if config change requires a restart, else return 0 */ int snapshot_cpu_lpi_us(void) { @@ -2941,17 +2943,14 @@ int snapshot_cpu_lpi_us(void) /* * snapshot_sys_lpi() * - * record snapshot of - * /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us - * - * return 1 if config change requires a restart, else return 0 + * record snapshot of sys_lpi_file */ int snapshot_sys_lpi_us(void) { FILE *fp; int retval; - fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", "r"); + fp = fopen_or_die(sys_lpi_file, "r"); retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); if (retval != 1) { @@ -4946,10 +4945,16 @@ void process_cpuid() else BIC_NOT_PRESENT(BIC_CPU_LPI); - if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us", R_OK)) + if (!access(sys_lpi_file_sysfs, R_OK)) { + sys_lpi_file = sys_lpi_file_sysfs; BIC_PRESENT(BIC_SYS_LPI); - else + } else if (!access(sys_lpi_file_debugfs, R_OK)) { + sys_lpi_file = sys_lpi_file_debugfs; + BIC_PRESENT(BIC_SYS_LPI); + } else { + sys_lpi_file_sysfs = NULL; BIC_NOT_PRESENT(BIC_SYS_LPI); + } if (!quiet) decode_misc_feature_control(); From fcaa681c03ea82193e60d7f2cdfd94fbbcd4cae9 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 19 Mar 2020 23:24:17 -0400 Subject: [PATCH 299/372] tools/power turbostat: Fix 32-bit capabilities warning warning: `turbostat' uses 32-bit capabilities (legacy support in use) Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 2 +- tools/power/x86/turbostat/turbostat.c | 48 +++++++++++++++++---------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 13f1e8b9ac52..2b6551269e43 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -16,7 +16,7 @@ override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c @mkdir -p $(BUILD_OUTPUT) - $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap .PHONY : clean clean : diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 3ecbf709a48c..77f89371ec5f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include @@ -3150,27 +3150,41 @@ void check_dev_msr() err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } -void check_permissions() +/* + * check for CAP_SYS_RAWIO + * return 0 on success + * return 1 on fail + */ +int check_for_cap_sys_rawio(void) +{ + cap_t caps; + cap_flag_value_t cap_flag_value; + + caps = cap_get_proc(); + if (caps == NULL) + err(-6, "cap_get_proc\n"); + + if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) + err(-6, "cap_get\n"); + + if (cap_flag_value != CAP_SET) { + warnx("capget(CAP_SYS_RAWIO) failed," + " try \"# setcap cap_sys_rawio=ep %s\"", progname); + return 1; + } + + if (cap_free(caps) == -1) + err(-6, "cap_free\n"); + + return 0; +} +void check_permissions(void) { - struct __user_cap_header_struct cap_header_data; - cap_user_header_t cap_header = &cap_header_data; - struct __user_cap_data_struct cap_data_data; - cap_user_data_t cap_data = &cap_data_data; - extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); int do_exit = 0; char pathname[32]; /* check for CAP_SYS_RAWIO */ - cap_header->pid = getpid(); - cap_header->version = _LINUX_CAPABILITY_VERSION; - if (capget(cap_header, cap_data) < 0) - err(-6, "capget(2) failed"); - - if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) { - do_exit++; - warnx("capget(CAP_SYS_RAWIO) failed," - " try \"# setcap cap_sys_rawio=ep %s\"", progname); - } + do_exit += check_for_cap_sys_rawio(); /* test file permissions */ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); From b738a185beaab8728943acdb3e67371b8a88185e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Mar 2020 12:49:55 -0700 Subject: [PATCH 300/372] tcp: ensure skb->dev is NULL before leaving TCP stack skb->rbnode is sharing three skb fields : next, prev, dev When a packet is sent, TCP keeps the original skb (master) in a rtx queue, which was converted to rbtree a while back. __tcp_transmit_skb() is responsible to clone the master skb, and add the TCP header to the clone before sending it to network layer. skb_clone() already clears skb->next and skb->prev, but copies the master oskb->dev into the clone. We need to clear skb->dev, otherwise lower layers could interpret the value as a pointer to a netdev. This old bug surfaced recently when commit 28f8bfd1ac94 ("netfilter: Support iif matches in POSTROUTING") was merged. Before this netfilter commit, skb->dev value was ignored and changed before reaching dev_queue_xmit() Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") Fixes: 28f8bfd1ac94 ("netfilter: Support iif matches in POSTROUTING") Signed-off-by: Eric Dumazet Reported-by: Martin Zaharinov Cc: Florian Westphal Cc: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 306e25d743e8..e8cf8fde3d37 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1109,6 +1109,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (unlikely(!skb)) return -ENOBUFS; + /* retransmit skbs might have a non zero value in skb->dev + * because skb->dev is aliased with skb->rbnode.rb_left + */ + skb->dev = NULL; } inet = inet_sk(sk); From 4022e7af86be2dd62975dedb6b7ea551d108695e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Mar 2020 19:23:18 -0600 Subject: [PATCH 301/372] io_uring: make sure openat/openat2 honor rlimit nofile Dmitry reports that a test case shows that io_uring isn't honoring a modified rlimit nofile setting. get_unused_fd_flags() checks the task signal->rlimi[] for the limits. As this isn't easily inheritable, provide a __get_unused_fd_flags() that takes the value instead. Then we can grab it when the request is prepared (from the original task), and pass that in when we do the async part part of the open. Reported-by: Dmitry Kadashev Tested-by: Dmitry Kadashev Acked-by: David S. Miller Signed-off-by: Jens Axboe --- fs/file.c | 7 ++++++- fs/io_uring.c | 5 ++++- include/linux/file.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/fs/file.c b/fs/file.c index a364e1a9b7e8..c8a4e4c86e55 100644 --- a/fs/file.c +++ b/fs/file.c @@ -540,9 +540,14 @@ static int alloc_fd(unsigned start, unsigned flags) return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags); } +int __get_unused_fd_flags(unsigned flags, unsigned long nofile) +{ + return __alloc_fd(current->files, 0, nofile, flags); +} + int get_unused_fd_flags(unsigned flags) { - return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); + return __get_unused_fd_flags(flags, rlimit(RLIMIT_NOFILE)); } EXPORT_SYMBOL(get_unused_fd_flags); diff --git a/fs/io_uring.c b/fs/io_uring.c index b1fbc4424aa6..fe5ded7c74ef 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -397,6 +397,7 @@ struct io_open { struct filename *filename; struct statx __user *buffer; struct open_how how; + unsigned long nofile; }; struct io_files_update { @@ -2577,6 +2578,7 @@ static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return ret; } + req->open.nofile = rlimit(RLIMIT_NOFILE); req->flags |= REQ_F_NEED_CLEANUP; return 0; } @@ -2618,6 +2620,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return ret; } + req->open.nofile = rlimit(RLIMIT_NOFILE); req->flags |= REQ_F_NEED_CLEANUP; return 0; } @@ -2636,7 +2639,7 @@ static int io_openat2(struct io_kiocb *req, struct io_kiocb **nxt, if (ret) goto err; - ret = get_unused_fd_flags(req->open.how.flags); + ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); if (ret < 0) goto err; diff --git a/include/linux/file.h b/include/linux/file.h index c6c7b24ea9f7..142d102f285e 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -85,6 +85,7 @@ extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); extern bool get_close_on_exec(unsigned int fd); +extern int __get_unused_fd_flags(unsigned flags, unsigned long nofile); extern int get_unused_fd_flags(unsigned flags); extern void put_unused_fd(unsigned int fd); From 09952e3e7826119ddd4357c453d54bcc7ef25156 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 19 Mar 2020 20:16:56 -0600 Subject: [PATCH 302/372] io_uring: make sure accept honor rlimit nofile Just like commit 4022e7af86be, this fixes the fact that IORING_OP_ACCEPT ends up using get_unused_fd_flags(), which checks current->signal->rlim[] for limits. Add an extra argument to __sys_accept4_file() that allows us to pass in the proper nofile limit, and grab it at request prep time. Acked-by: David S. Miller Signed-off-by: Jens Axboe --- fs/io_uring.c | 5 ++++- include/linux/socket.h | 3 ++- net/socket.c | 8 +++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index fe5ded7c74ef..3affd96a98ba 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -343,6 +343,7 @@ struct io_accept { struct sockaddr __user *addr; int __user *addr_len; int flags; + unsigned long nofile; }; struct io_sync { @@ -3324,6 +3325,7 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); accept->flags = READ_ONCE(sqe->accept_flags); + accept->nofile = rlimit(RLIMIT_NOFILE); return 0; #else return -EOPNOTSUPP; @@ -3340,7 +3342,8 @@ static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt, file_flags = force_nonblock ? O_NONBLOCK : 0; ret = __sys_accept4_file(req->file, file_flags, accept->addr, - accept->addr_len, accept->flags); + accept->addr_len, accept->flags, + accept->nofile); if (ret == -EAGAIN && force_nonblock) return -EAGAIN; if (ret == -ERESTARTSYS) diff --git a/include/linux/socket.h b/include/linux/socket.h index 2d2313403101..15f3412d481e 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -401,7 +401,8 @@ extern int __sys_sendto(int fd, void __user *buff, size_t len, int addr_len); extern int __sys_accept4_file(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags); + int __user *upeer_addrlen, int flags, + unsigned long nofile); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); diff --git a/net/socket.c b/net/socket.c index b79a05de7c6e..2eecf1517f76 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1707,7 +1707,8 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) int __sys_accept4_file(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) + int __user *upeer_addrlen, int flags, + unsigned long nofile) { struct socket *sock, *newsock; struct file *newfile; @@ -1738,7 +1739,7 @@ int __sys_accept4_file(struct file *file, unsigned file_flags, */ __module_get(newsock->ops->owner); - newfd = get_unused_fd_flags(flags); + newfd = __get_unused_fd_flags(flags, nofile); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); @@ -1807,7 +1808,8 @@ int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, f = fdget(fd); if (f.file) { ret = __sys_accept4_file(f.file, 0, upeer_sockaddr, - upeer_addrlen, flags); + upeer_addrlen, flags, + rlimit(RLIMIT_NOFILE)); if (f.flags) fput(f.file); } From 98fd5c723730f560e5bea919a64ac5b83d45eb72 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 12 Mar 2020 16:06:38 -0700 Subject: [PATCH 303/372] nvmet-tcp: set MSG_MORE only if we actually have more to send When we send PDU data, we want to optimize the tcp stack operation if we have more data to send. So when we set MSG_MORE when: - We have more fragments coming in the batch, or - We have a more data to send in this PDU - We don't have a data digest trailer - We optimize with the SUCCESS flag and omit the NVMe completion (used if sq_head pointer update is disabled) This addresses a regression in QD=1 with SUCCESS flag optimization as we unconditionally set MSG_MORE when we didn't actually have more data to send. Fixes: 70583295388a ("nvmet-tcp: implement C2HData SUCCESS optimization") Reported-by: Mark Wunderlich Tested-by: Mark Wunderlich Signed-off-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index af674fc0bb1e..5bb5342b8d0c 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -515,7 +515,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) return 1; } -static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) +static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; int ret; @@ -523,9 +523,15 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) while (cmd->cur_sg) { struct page *page = sg_page(cmd->cur_sg); u32 left = cmd->cur_sg->length - cmd->offset; + int flags = MSG_DONTWAIT; + + if ((!last_in_batch && cmd->queue->send_list_len) || + cmd->wbytes_done + left < cmd->req.transfer_len || + queue->data_digest || !queue->nvme_sq.sqhd_disabled) + flags |= MSG_MORE; ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, - left, MSG_DONTWAIT | MSG_MORE); + left, flags); if (ret <= 0) return ret; @@ -660,7 +666,7 @@ static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, } if (cmd->state == NVMET_TCP_SEND_DATA) { - ret = nvmet_try_send_data(cmd); + ret = nvmet_try_send_data(cmd, last_in_batch); if (ret <= 0) goto done_send; } From d8e6fd5c7991033037842b32c9774370a038e902 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 20 Mar 2020 18:43:48 +0000 Subject: [PATCH 304/372] btrfs: fix removal of raid[56|1c34} incompat flags after removing block group We are incorrectly dropping the raid56 and raid1c34 incompat flags when there are still raid56 and raid1c34 block groups, not when we do not any of those anymore. The logic just got unintentionally broken after adding the support for the raid1c34 modes. Fix this by clear the flags only if we do not have block groups with the respective profiles. Fixes: 9c907446dce3 ("btrfs: drop incompat bit for raid1c34 after last block group is gone") Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/block-group.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 404e050ce8ee..7f09147872dc 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -856,9 +856,9 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags) found_raid1c34 = true; up_read(&sinfo->groups_sem); } - if (found_raid56) + if (!found_raid56) btrfs_clear_fs_incompat(fs_info, RAID56); - if (found_raid1c34) + if (!found_raid1c34) btrfs_clear_fs_incompat(fs_info, RAID1C34); } } From 07f8e4d0fddbf2f87e4cefb551278abc38db8cdd Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 20 Mar 2020 16:52:02 +0100 Subject: [PATCH 305/372] tcp: also NULL skb->dev when copy was needed In rare cases retransmit logic will make a full skb copy, which will not trigger the zeroing added in recent change b738a185beaa ("tcp: ensure skb->dev is NULL before leaving TCP stack"). Cc: Eric Dumazet Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") Fixes: 28f8bfd1ac94 ("netfilter: Support iif matches in POSTROUTING") Signed-off-by: Florian Westphal Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e8cf8fde3d37..2f45cde168c4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3041,8 +3041,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) tcp_skb_tsorted_save(skb) { nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + if (nskb) { + nskb->dev = NULL; + err = tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC); + } else { + err = -ENOBUFS; + } } tcp_skb_tsorted_restore(skb); if (!err) { From abdcbdb265264f736df316622a695ad30019c05f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 21 Mar 2020 00:47:47 -0400 Subject: [PATCH 306/372] tools/power turbostat: Print cpuidle information Print cpuidle driver and governor. Originally-by: Antti Laakso Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 77f89371ec5f..05dbe23570d4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3503,6 +3503,23 @@ dump_cstate_pstate_config_info(unsigned int family, unsigned int model) dump_nhm_cst_cfg(); } +static void dump_sysfs_file(char *path) +{ + FILE *input; + char cpuidle_buf[64]; + + input = fopen(path, "r"); + if (input == NULL) { + if (debug) + fprintf(outf, "NSFOD %s\n", path); + return; + } + if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) + err(1, "%s: failed to read file", path); + fclose(input); + + fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); +} static void dump_sysfs_cstate_config(void) { @@ -3516,6 +3533,15 @@ dump_sysfs_cstate_config(void) if (!DO_BIC(BIC_sysfs)) return; + if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { + fprintf(outf, "cpuidle not loaded\n"); + return; + } + + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); + for (state = 0; state < 10; ++state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", From b95fffb9b4afa8b9aa4a389ec7a0c578811eaf42 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 17 Nov 2019 21:18:31 -0500 Subject: [PATCH 307/372] tools/power turbostat: update version A stitch in time saves nine. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 05dbe23570d4..33b370865d16 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5390,7 +5390,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 19.08.31" + fprintf(outf, "turbostat version 20.03.20" " - Len Brown \n"); } From 7d36665a5886c27ca4c4d0afd3ecc50b400f3587 Mon Sep 17 00:00:00 2001 From: Chunguang Xu Date: Sat, 21 Mar 2020 18:22:10 -0700 Subject: [PATCH 308/372] memcg: fix NULL pointer dereference in __mem_cgroup_usage_unregister_event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An eventfd monitors multiple memory thresholds of the cgroup, closes them, the kernel deletes all events related to this eventfd. Before all events are deleted, another eventfd monitors the memory threshold of this cgroup, leading to a crash: BUG: kernel NULL pointer dereference, address: 0000000000000004 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 800000033058e067 P4D 800000033058e067 PUD 3355ce067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 2 PID: 14012 Comm: kworker/2:6 Kdump: loaded Not tainted 5.6.0-rc4 #3 Hardware name: LENOVO 20AWS01K00/20AWS01K00, BIOS GLET70WW (2.24 ) 05/21/2014 Workqueue: events memcg_event_remove RIP: 0010:__mem_cgroup_usage_unregister_event+0xb3/0x190 RSP: 0018:ffffb47e01c4fe18 EFLAGS: 00010202 RAX: 0000000000000001 RBX: ffff8bb223a8a000 RCX: 0000000000000001 RDX: 0000000000000001 RSI: ffff8bb22fb83540 RDI: 0000000000000001 RBP: ffffb47e01c4fe48 R08: 0000000000000000 R09: 0000000000000010 R10: 000000000000000c R11: 071c71c71c71c71c R12: ffff8bb226aba880 R13: ffff8bb223a8a480 R14: 0000000000000000 R15: 0000000000000000 FS:  0000000000000000(0000) GS:ffff8bb242680000(0000) knlGS:0000000000000000 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000004 CR3: 000000032c29c003 CR4: 00000000001606e0 Call Trace: memcg_event_remove+0x32/0x90 process_one_work+0x172/0x380 worker_thread+0x49/0x3f0 kthread+0xf8/0x130 ret_from_fork+0x35/0x40 CR2: 0000000000000004 We can reproduce this problem in the following ways: 1. We create a new cgroup subdirectory and a new eventfd, and then we monitor multiple memory thresholds of the cgroup through this eventfd. 2. closing this eventfd, and __mem_cgroup_usage_unregister_event () will be called multiple times to delete all events related to this eventfd. The first time __mem_cgroup_usage_unregister_event() is called, the kernel will clear all items related to this eventfd in thresholds-> primary. Since there is currently only one eventfd, thresholds-> primary becomes empty, so the kernel will set thresholds-> primary and hresholds-> spare to NULL. If at this time, the user creates a new eventfd and monitor the memory threshold of this cgroup, kernel will re-initialize thresholds-> primary. Then when __mem_cgroup_usage_unregister_event () is called for the second time, because thresholds-> primary is not empty, the system will access thresholds-> spare, but thresholds-> spare is NULL, which will trigger a crash. In general, the longer it takes to delete all events related to this eventfd, the easier it is to trigger this problem. The solution is to check whether the thresholds associated with the eventfd has been cleared when deleting the event. If so, we do nothing. [akpm@linux-foundation.org: fix comment, per Kirill] Fixes: 907860ed381a ("cgroups: make cftype.unregister_event() void-returning") Signed-off-by: Chunguang Xu Signed-off-by: Andrew Morton Acked-by: Michal Hocko Acked-by: Kirill A. Shutemov Cc: Johannes Weiner Cc: Vladimir Davydov Cc: Link: http://lkml.kernel.org/r/077a6f67-aefa-4591-efec-f2f3af2b0b02@gmail.com Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2058b8da18db..50492aa9d61b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4027,7 +4027,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, struct mem_cgroup_thresholds *thresholds; struct mem_cgroup_threshold_ary *new; unsigned long usage; - int i, j, size; + int i, j, size, entries; mutex_lock(&memcg->thresholds_lock); @@ -4047,14 +4047,20 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, __mem_cgroup_threshold(memcg, type == _MEMSWAP); /* Calculate new number of threshold */ - size = 0; + size = entries = 0; for (i = 0; i < thresholds->primary->size; i++) { if (thresholds->primary->entries[i].eventfd != eventfd) size++; + else + entries++; } new = thresholds->spare; + /* If no items related to eventfd have been cleared, nothing to do */ + if (!entries) + goto unlock; + /* Set thresholds array to NULL if we don't have thresholds */ if (!size) { kfree(new); From d41e2f3bd54699f85b3d6f45abd09fa24a222cb9 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Sat, 21 Mar 2020 18:22:13 -0700 Subject: [PATCH 309/372] mm/hotplug: fix hot remove failure in SPARSEMEM|!VMEMMAP case In section_deactivate(), pfn_to_page() doesn't work any more after ms->section_mem_map is resetting to NULL in SPARSEMEM|!VMEMMAP case. It causes a hot remove failure: kernel BUG at mm/page_alloc.c:4806! invalid opcode: 0000 [#1] SMP PTI CPU: 3 PID: 8 Comm: kworker/u16:0 Tainted: G W 5.5.0-next-20200205+ #340 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 Workqueue: kacpi_hotplug acpi_hotplug_work_fn RIP: 0010:free_pages+0x85/0xa0 Call Trace: __remove_pages+0x99/0xc0 arch_remove_memory+0x23/0x4d try_remove_memory+0xc8/0x130 __remove_memory+0xa/0x11 acpi_memory_device_remove+0x72/0x100 acpi_bus_trim+0x55/0x90 acpi_device_hotplug+0x2eb/0x3d0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x1a7/0x370 worker_thread+0x30/0x380 kthread+0x112/0x130 ret_from_fork+0x35/0x40 Let's move the ->section_mem_map resetting after depopulate_section_memmap() to fix it. [akpm@linux-foundation.org: remove unneeded initialization, per David] Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug") Signed-off-by: Baoquan He Signed-off-by: Andrew Morton Reviewed-by: Pankaj Gupta Reviewed-by: David Hildenbrand Acked-by: Michal Hocko Cc: Wei Yang Cc: Oscar Salvador Cc: Mike Rapoport Cc: Link: http://lkml.kernel.org/r/20200307084229.28251-2-bhe@redhat.com Signed-off-by: Linus Torvalds --- mm/sparse.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/sparse.c b/mm/sparse.c index 596b2a45b100..aadb7298dcef 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -734,6 +734,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, struct mem_section *ms = __pfn_to_section(pfn); bool section_is_early = early_section(ms); struct page *memmap = NULL; + bool empty; unsigned long *subsection_map = ms->usage ? &ms->usage->subsection_map[0] : NULL; @@ -764,7 +765,8 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, * For 2/ and 3/ the SPARSEMEM_VMEMMAP={y,n} cases are unified */ bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION); - if (bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION)) { + empty = bitmap_empty(subsection_map, SUBSECTIONS_PER_SECTION); + if (empty) { unsigned long section_nr = pfn_to_section_nr(pfn); /* @@ -779,13 +781,15 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, ms->usage = NULL; } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - ms->section_mem_map = (unsigned long)NULL; } if (section_is_early && memmap) free_map_bootmem(memmap); else depopulate_section_memmap(pfn, nr_pages, altmap); + + if (empty) + ms->section_mem_map = (unsigned long)NULL; } static struct page * __meminit section_activate(int nid, unsigned long pfn, From d72520ad004a8ce18a6ba6cde317f0081b27365a Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 21 Mar 2020 18:22:17 -0700 Subject: [PATCH 310/372] page-flags: fix a crash at SetPageError(THP_SWAP) Commit bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out") supported writing THP to a swap device but forgot to upgrade an older commit df8c94d13c7e ("page-flags: define behavior of FS/IO-related flags on compound pages") which could trigger a crash during THP swapping out with DEBUG_VM_PGFLAGS=y, kernel BUG at include/linux/page-flags.h:317! page dumped because: VM_BUG_ON_PAGE(1 && PageCompound(page)) page:fffff3b2ec3a8000 refcount:512 mapcount:0 mapping:000000009eb0338c index:0x7f6e58200 head:fffff3b2ec3a8000 order:9 compound_mapcount:0 compound_pincount:0 anon flags: 0x45fffe0000d8454(uptodate|lru|workingset|owner_priv_1|writeback|head|reclaim|swapbacked) end_swap_bio_write() SetPageError(page) VM_BUG_ON_PAGE(1 && PageCompound(page)) bio_endio+0x297/0x560 dec_pending+0x218/0x430 [dm_mod] clone_endio+0xe4/0x2c0 [dm_mod] bio_endio+0x297/0x560 blk_update_request+0x201/0x920 scsi_end_request+0x6b/0x4b0 scsi_io_completion+0x509/0x7e0 scsi_finish_command+0x1ed/0x2a0 scsi_softirq_done+0x1c9/0x1d0 __blk_mqnterrupt+0xf/0x20 Fix by checking PF_NO_TAIL in those places instead. Fixes: bd4c82c22c36 ("mm, THP, swap: delay splitting THP after swapped out") Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Reviewed-by: David Hildenbrand Acked-by: "Huang, Ying" Acked-by: Rafael Aquini Cc: Link: http://lkml.kernel.org/r/20200310235846.1319-1-cai@lca.pw Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 1bf83c8fcaa7..77de28bfefb0 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -311,7 +311,7 @@ static inline int TestClearPage##uname(struct page *page) { return 0; } __PAGEFLAG(Locked, locked, PF_NO_TAIL) PAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) __CLEARPAGEFLAG(Waiters, waiters, PF_ONLY_HEAD) -PAGEFLAG(Error, error, PF_NO_COMPOUND) TESTCLEARFLAG(Error, error, PF_NO_COMPOUND) +PAGEFLAG(Error, error, PF_NO_TAIL) TESTCLEARFLAG(Error, error, PF_NO_TAIL) PAGEFLAG(Referenced, referenced, PF_HEAD) TESTCLEARFLAG(Referenced, referenced, PF_HEAD) __SETPAGEFLAG(Referenced, referenced, PF_HEAD) From d397a45fc741c80c32a14e2de008441e9976f50c Mon Sep 17 00:00:00 2001 From: Chris Down Date: Sat, 21 Mar 2020 18:22:20 -0700 Subject: [PATCH 311/372] mm, memcg: fix corruption on 64-bit divisor in memory.high throttling Commit 0e4b01df8659 had a bunch of fixups to use the right division method. However, it seems that after all that it still wasn't right -- div_u64 takes a 32-bit divisor. The headroom is still large (2^32 pages), so on mundane systems you won't hit this, but this should definitely be fixed. Fixes: 0e4b01df8659 ("mm, memcg: throttle allocators when failing reclaim over memory.high") Reported-by: Johannes Weiner Signed-off-by: Chris Down Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Cc: Tejun Heo Cc: Roman Gushchin Cc: Michal Hocko Cc: Nathan Chancellor Cc: [5.4.x+] Link: http://lkml.kernel.org/r/80780887060514967d414b3cd91f9a316a16ab98.1584036142.git.chris@chrisdown.name Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 50492aa9d61b..6d276e1a3321 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2339,7 +2339,7 @@ void mem_cgroup_handle_over_high(void) */ clamped_high = max(high, 1UL); - overage = div_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT, + overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT, clamped_high); penalty_jiffies = ((u64)overage * overage * HZ) From e26733e0d0ec6798eca93daa300bc3f43616127f Mon Sep 17 00:00:00 2001 From: Chris Down Date: Sat, 21 Mar 2020 18:22:23 -0700 Subject: [PATCH 312/372] mm, memcg: throttle allocators based on ancestral memory.high Prior to this commit, we only directly check the affected cgroup's memory.high against its usage. However, it's possible that we are being reclaimed as a result of hitting an ancestor memory.high and should be penalised based on that, instead. This patch changes memory.high overage throttling to use the largest overage in its ancestors when considering how many penalty jiffies to charge. This makes sure that we penalise poorly behaving cgroups in the same way regardless of at what level of the hierarchy memory.high was breached. Fixes: 0e4b01df8659 ("mm, memcg: throttle allocators when failing reclaim over memory.high") Reported-by: Johannes Weiner Signed-off-by: Chris Down Signed-off-by: Andrew Morton Acked-by: Johannes Weiner Cc: Tejun Heo Cc: Michal Hocko Cc: Nathan Chancellor Cc: Roman Gushchin Cc: [5.4.x+] Link: http://lkml.kernel.org/r/8cd132f84bd7e16cdb8fde3378cdbf05ba00d387.1584036142.git.chris@chrisdown.name Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 93 ++++++++++++++++++++++++++++++------------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6d276e1a3321..7a4bd8b9adc2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2297,28 +2297,41 @@ static void high_work_func(struct work_struct *work) #define MEMCG_DELAY_SCALING_SHIFT 14 /* - * Scheduled by try_charge() to be executed from the userland return path - * and reclaims memory over the high limit. + * Get the number of jiffies that we should penalise a mischievous cgroup which + * is exceeding its memory.high by checking both it and its ancestors. */ -void mem_cgroup_handle_over_high(void) +static unsigned long calculate_high_delay(struct mem_cgroup *memcg, + unsigned int nr_pages) { - unsigned long usage, high, clamped_high; - unsigned long pflags; - unsigned long penalty_jiffies, overage; - unsigned int nr_pages = current->memcg_nr_pages_over_high; - struct mem_cgroup *memcg; + unsigned long penalty_jiffies; + u64 max_overage = 0; - if (likely(!nr_pages)) - return; + do { + unsigned long usage, high; + u64 overage; - memcg = get_mem_cgroup_from_mm(current->mm); - reclaim_high(memcg, nr_pages, GFP_KERNEL); - current->memcg_nr_pages_over_high = 0; + usage = page_counter_read(&memcg->memory); + high = READ_ONCE(memcg->high); + + /* + * Prevent division by 0 in overage calculation by acting as if + * it was a threshold of 1 page + */ + high = max(high, 1UL); + + overage = usage - high; + overage <<= MEMCG_DELAY_PRECISION_SHIFT; + overage = div64_u64(overage, high); + + if (overage > max_overage) + max_overage = overage; + } while ((memcg = parent_mem_cgroup(memcg)) && + !mem_cgroup_is_root(memcg)); + + if (!max_overage) + return 0; /* - * memory.high is breached and reclaim is unable to keep up. Throttle - * allocators proactively to slow down excessive growth. - * * We use overage compared to memory.high to calculate the number of * jiffies to sleep (penalty_jiffies). Ideally this value should be * fairly lenient on small overages, and increasingly harsh when the @@ -2326,24 +2339,9 @@ void mem_cgroup_handle_over_high(void) * its crazy behaviour, so we exponentially increase the delay based on * overage amount. */ - - usage = page_counter_read(&memcg->memory); - high = READ_ONCE(memcg->high); - - if (usage <= high) - goto out; - - /* - * Prevent division by 0 in overage calculation by acting as if it was a - * threshold of 1 page - */ - clamped_high = max(high, 1UL); - - overage = div64_u64((u64)(usage - high) << MEMCG_DELAY_PRECISION_SHIFT, - clamped_high); - - penalty_jiffies = ((u64)overage * overage * HZ) - >> (MEMCG_DELAY_PRECISION_SHIFT + MEMCG_DELAY_SCALING_SHIFT); + penalty_jiffies = max_overage * max_overage * HZ; + penalty_jiffies >>= MEMCG_DELAY_PRECISION_SHIFT; + penalty_jiffies >>= MEMCG_DELAY_SCALING_SHIFT; /* * Factor in the task's own contribution to the overage, such that four @@ -2360,7 +2358,32 @@ void mem_cgroup_handle_over_high(void) * application moving forwards and also permit diagnostics, albeit * extremely slowly. */ - penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES); + return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES); +} + +/* + * Scheduled by try_charge() to be executed from the userland return path + * and reclaims memory over the high limit. + */ +void mem_cgroup_handle_over_high(void) +{ + unsigned long penalty_jiffies; + unsigned long pflags; + unsigned int nr_pages = current->memcg_nr_pages_over_high; + struct mem_cgroup *memcg; + + if (likely(!nr_pages)) + return; + + memcg = get_mem_cgroup_from_mm(current->mm); + reclaim_high(memcg, nr_pages, GFP_KERNEL); + current->memcg_nr_pages_over_high = 0; + + /* + * memory.high is breached and reclaim is unable to keep up. Throttle + * allocators proactively to slow down excessive growth. + */ + penalty_jiffies = calculate_high_delay(memcg, nr_pages); /* * Don't sleep if the amount of jiffies this memcg owes us is so low From 12e967fd8e4e6c3d275b4c69c890adc838891300 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Sat, 21 Mar 2020 18:22:26 -0700 Subject: [PATCH 313/372] mm: do not allow MADV_PAGEOUT for CoW pages Jann has brought up a very interesting point [1]. While shared pages are excluded from MADV_PAGEOUT normally, CoW pages can be easily reclaimed that way. This can lead to all sorts of hard to debug problems. E.g. performance problems outlined by Daniel [2]. There are runtime environments where there is a substantial memory shared among security domains via CoW memory and a easy to reclaim way of that memory, which MADV_{COLD,PAGEOUT} offers, can lead to either performance degradation in for the parent process which might be more privileged or even open side channel attacks. The feasibility of the latter is not really clear to me TBH but there is no real reason for exposure at this stage. It seems there is no real use case to depend on reclaiming CoW memory via madvise at this stage so it is much easier to simply disallow it and this is what this patch does. Put it simply MADV_{PAGEOUT,COLD} can operate only on the exclusively owned memory which is a straightforward semantic. [1] http://lkml.kernel.org/r/CAG48ez0G3JkMq61gUmyQAaCq=_TwHbi1XKzWRooxZkv08PQKuw@mail.gmail.com [2] http://lkml.kernel.org/r/CAKOZueua_v8jHCpmEtTB6f3i9e2YnmX4mqdYVWhV4E=Z-n+zRQ@mail.gmail.com Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD") Reported-by: Jann Horn Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Acked-by: Vlastimil Babka Cc: Minchan Kim Cc: Daniel Colascione Cc: Dave Hansen Cc: "Joel Fernandes (Google)" Cc: Link: http://lkml.kernel.org/r/20200312082248.GS23944@dhcp22.suse.cz Signed-off-by: Linus Torvalds --- mm/madvise.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/mm/madvise.c b/mm/madvise.c index 43b47d3fae02..4bb30ed6c8d2 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -335,12 +335,14 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, } page = pmd_page(orig_pmd); + + /* Do not interfere with other mappings of this page */ + if (page_mapcount(page) != 1) + goto huge_unlock; + if (next - addr != HPAGE_PMD_SIZE) { int err; - if (page_mapcount(page) != 1) - goto huge_unlock; - get_page(page); spin_unlock(ptl); lock_page(page); @@ -426,6 +428,10 @@ regular_page: continue; } + /* Do not interfere with other mappings of this page */ + if (page_mapcount(page) != 1) + continue; + VM_BUG_ON_PAGE(PageTransCompound(page), page); if (pte_young(ptent)) { From 1b53734bd0b2feed8e7761771b2e76fc9126ea0c Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Sat, 21 Mar 2020 18:22:30 -0700 Subject: [PATCH 314/372] epoll: fix possible lost wakeup on epoll_ctl() path This fixes possible lost wakeup introduced by commit a218cc491420. Originally modifications to ep->wq were serialized by ep->wq.lock, but in commit a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention") a new rw lock was introduced in order to relax fd event path, i.e. callers of ep_poll_callback() function. After the change ep_modify and ep_insert (both are called on epoll_ctl() path) were switched to ep->lock, but ep_poll (epoll_wait) was using ep->wq.lock on wqueue list modification. The bug doesn't lead to any wqueue list corruptions, because wake up path and list modifications were serialized by ep->wq.lock internally, but actual waitqueue_active() check prior wake_up() call can be reordered with modifications of ep ready list, thus wake up can be lost. And yes, can be healed by explicit smp_mb(): list_add_tail(&epi->rdlink, &ep->rdllist); smp_mb(); if (waitqueue_active(&ep->wq)) wake_up(&ep->wp); But let's make it simple, thus current patch replaces ep->wq.lock with the ep->lock for wqueue modifications, thus wake up path always observes activeness of the wqueue correcty. Fixes: a218cc491420 ("epoll: use rwlock in order to reduce ep_poll_callback() contention") Reported-by: Max Neunhoeffer Signed-off-by: Roman Penyaev Signed-off-by: Andrew Morton Tested-by: Max Neunhoeffer Cc: Jakub Kicinski Cc: Christopher Kohlhoff Cc: Davidlohr Bueso Cc: Jason Baron Cc: Jes Sorensen Cc: [5.1+] Link: http://lkml.kernel.org/r/20200214170211.561524-1-rpenyaev@suse.de References: https://bugzilla.kernel.org/show_bug.cgi?id=205933 Bisected-by: Max Neunhoeffer Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index b041b66002db..eee3c92a9ebf 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1854,9 +1854,9 @@ fetch_events: waiter = true; init_waitqueue_entry(&wait, current); - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); __add_wait_queue_exclusive(&ep->wq, &wait); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } for (;;) { @@ -1904,9 +1904,9 @@ send_events: goto fetch_events; if (waiter) { - spin_lock_irq(&ep->wq.lock); + write_lock_irq(&ep->lock); __remove_wait_queue(&ep->wq, &wait); - spin_unlock_irq(&ep->wq.lock); + write_unlock_irq(&ep->lock); } return res; From 63886bad904b73f7470fd582fbc41c5ae04d6785 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sat, 21 Mar 2020 18:22:34 -0700 Subject: [PATCH 315/372] mm/mmu_notifier: silence PROVE_RCU_LIST warnings It is safe to traverse mm->notifier_subscriptions->list either under SRCU read lock or mm->notifier_subscriptions->lock using hlist_for_each_entry_rcu(). Silence the PROVE_RCU_LIST false positives, for example, WARNING: suspicious RCU usage ----------------------------- mm/mmu_notifier.c:484 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 3 locks held by libvirtd/802: #0: ffff9321e3f58148 (&mm->mmap_sem#2){++++}, at: do_mprotect_pkey+0xe1/0x3e0 #1: ffffffff91ae6160 (mmu_notifier_invalidate_range_start){+.+.}, at: change_p4d_range+0x5fa/0x800 #2: ffffffff91ae6e08 (srcu){....}, at: __mmu_notifier_invalidate_range_start+0x178/0x460 stack backtrace: CPU: 7 PID: 802 Comm: libvirtd Tainted: G I 5.6.0-rc6-next-20200317+ #2 Hardware name: HP ProLiant BL460c Gen8, BIOS I31 11/02/2014 Call Trace: dump_stack+0xa4/0xfe lockdep_rcu_suspicious+0xeb/0xf5 __mmu_notifier_invalidate_range_start+0x3ff/0x460 change_p4d_range+0x746/0x800 change_protection+0x1df/0x300 mprotect_fixup+0x245/0x3e0 do_mprotect_pkey+0x23b/0x3e0 __x64_sys_mprotect+0x51/0x70 do_syscall_64+0x91/0xae8 entry_SYSCALL_64_after_hwframe+0x49/0xb3 Signed-off-by: Qian Cai Signed-off-by: Andrew Morton Reviewed-by: Paul E. McKenney Reviewed-by: Jason Gunthorpe Link: http://lkml.kernel.org/r/20200317175640.2047-1-cai@lca.pw Signed-off-by: Linus Torvalds --- mm/mmu_notifier.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index ef3973a5d34a..06852b896fa6 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -307,7 +307,8 @@ static void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions, * ->release returns. */ id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) + hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, + srcu_read_lock_held(&srcu)) /* * If ->release runs before mmu_notifier_unregister it must be * handled, as it's the only way for the driver to flush all @@ -370,7 +371,8 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, - &mm->notifier_subscriptions->list, hlist) { + &mm->notifier_subscriptions->list, hlist, + srcu_read_lock_held(&srcu)) { if (subscription->ops->clear_flush_young) young |= subscription->ops->clear_flush_young( subscription, mm, start, end); @@ -389,7 +391,8 @@ int __mmu_notifier_clear_young(struct mm_struct *mm, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, - &mm->notifier_subscriptions->list, hlist) { + &mm->notifier_subscriptions->list, hlist, + srcu_read_lock_held(&srcu)) { if (subscription->ops->clear_young) young |= subscription->ops->clear_young(subscription, mm, start, end); @@ -407,7 +410,8 @@ int __mmu_notifier_test_young(struct mm_struct *mm, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, - &mm->notifier_subscriptions->list, hlist) { + &mm->notifier_subscriptions->list, hlist, + srcu_read_lock_held(&srcu)) { if (subscription->ops->test_young) { young = subscription->ops->test_young(subscription, mm, address); @@ -428,7 +432,8 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, - &mm->notifier_subscriptions->list, hlist) { + &mm->notifier_subscriptions->list, hlist, + srcu_read_lock_held(&srcu)) { if (subscription->ops->change_pte) subscription->ops->change_pte(subscription, mm, address, pte); @@ -476,7 +481,8 @@ static int mn_hlist_invalidate_range_start( int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) { + hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, + srcu_read_lock_held(&srcu)) { const struct mmu_notifier_ops *ops = subscription->ops; if (ops->invalidate_range_start) { @@ -528,7 +534,8 @@ mn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions, int id; id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist) { + hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, + srcu_read_lock_held(&srcu)) { /* * Call invalidate_range here too to avoid the need for the * subsystem of having to register an invalidate_range_end @@ -582,7 +589,8 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm, id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, - &mm->notifier_subscriptions->list, hlist) { + &mm->notifier_subscriptions->list, hlist, + srcu_read_lock_held(&srcu)) { if (subscription->ops->invalidate_range) subscription->ops->invalidate_range(subscription, mm, start, end); @@ -714,7 +722,8 @@ find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops) spin_lock(&mm->notifier_subscriptions->lock); hlist_for_each_entry_rcu(subscription, - &mm->notifier_subscriptions->list, hlist) { + &mm->notifier_subscriptions->list, hlist, + lockdep_is_held(&mm->notifier_subscriptions->lock)) { if (subscription->ops != ops) continue; From 0715e6c516f106ed553828a671d30ad9a3431536 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Sat, 21 Mar 2020 18:22:37 -0700 Subject: [PATCH 316/372] mm, slub: prevent kmalloc_node crashes and memory leaks Sachin reports [1] a crash in SLUB __slab_alloc(): BUG: Kernel NULL pointer dereference on read at 0x000073b0 Faulting instruction address: 0xc0000000003d55f4 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 19 PID: 1 Comm: systemd Not tainted 5.6.0-rc2-next-20200218-autotest #1 NIP: c0000000003d55f4 LR: c0000000003d5b94 CTR: 0000000000000000 REGS: c0000008b37836d0 TRAP: 0300 Not tainted (5.6.0-rc2-next-20200218-autotest) MSR: 8000000000009033 CR: 24004844 XER: 00000000 CFAR: c00000000000dec4 DAR: 00000000000073b0 DSISR: 40000000 IRQMASK: 1 GPR00: c0000000003d5b94 c0000008b3783960 c00000000155d400 c0000008b301f500 GPR04: 0000000000000dc0 0000000000000002 c0000000003443d8 c0000008bb398620 GPR08: 00000008ba2f0000 0000000000000001 0000000000000000 0000000000000000 GPR12: 0000000024004844 c00000001ec52a00 0000000000000000 0000000000000000 GPR16: c0000008a1b20048 c000000001595898 c000000001750c18 0000000000000002 GPR20: c000000001750c28 c000000001624470 0000000fffffffe0 5deadbeef0000122 GPR24: 0000000000000001 0000000000000dc0 0000000000000002 c0000000003443d8 GPR28: c0000008b301f500 c0000008bb398620 0000000000000000 c00c000002287180 NIP ___slab_alloc+0x1f4/0x760 LR __slab_alloc+0x34/0x60 Call Trace: ___slab_alloc+0x334/0x760 (unreliable) __slab_alloc+0x34/0x60 __kmalloc_node+0x110/0x490 kvmalloc_node+0x58/0x110 mem_cgroup_css_online+0x108/0x270 online_css+0x48/0xd0 cgroup_apply_control_enable+0x2ec/0x4d0 cgroup_mkdir+0x228/0x5f0 kernfs_iop_mkdir+0x90/0xf0 vfs_mkdir+0x110/0x230 do_mkdirat+0xb0/0x1a0 system_call+0x5c/0x68 This is a PowerPC platform with following NUMA topology: available: 2 nodes (0-1) node 0 cpus: node 0 size: 0 MB node 0 free: 0 MB node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 1 size: 35247 MB node 1 free: 30907 MB node distances: node 0 1 0: 10 40 1: 40 10 possible numa nodes: 0-31 This only happens with a mmotm patch "mm/memcontrol.c: allocate shrinker_map on appropriate NUMA node" [2] which effectively calls kmalloc_node for each possible node. SLUB however only allocates kmem_cache_node on online N_NORMAL_MEMORY nodes, and relies on node_to_mem_node to return such valid node for other nodes since commit a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating on memoryless node"). This is however not true in this configuration where the _node_numa_mem_ array is not initialized for nodes 0 and 2-31, thus it contains zeroes and get_partial() ends up accessing non-allocated kmem_cache_node. A related issue was reported by Bharata (originally by Ramachandran) [3] where a similar PowerPC configuration, but with mainline kernel without patch [2] ends up allocating large amounts of pages by kmalloc-1k kmalloc-512. This seems to have the same underlying issue with node_to_mem_node() not behaving as expected, and might probably also lead to an infinite loop with CONFIG_SLUB_CPU_PARTIAL [4]. This patch should fix both issues by not relying on node_to_mem_node() anymore and instead simply falling back to NUMA_NO_NODE, when kmalloc_node(node) is attempted for a node that's not online, or has no usable memory. The "usable memory" condition is also changed from node_present_pages() to N_NORMAL_MEMORY node state, as that is exactly the condition that SLUB uses to allocate kmem_cache_node structures. The check in get_partial() is removed completely, as the checks in ___slab_alloc() are now sufficient to prevent get_partial() being reached with an invalid node. [1] https://lore.kernel.org/linux-next/3381CD91-AB3D-4773-BA04-E7A072A63968@linux.vnet.ibm.com/ [2] https://lore.kernel.org/linux-mm/fff0e636-4c36-ed10-281c-8cdb0687c839@virtuozzo.com/ [3] https://lore.kernel.org/linux-mm/20200317092624.GB22538@in.ibm.com/ [4] https://lore.kernel.org/linux-mm/088b5996-faae-8a56-ef9c-5b567125ae54@suse.cz/ Fixes: a561ce00b09e ("slub: fall back to node_to_mem_node() node if allocating on memoryless node") Reported-by: Sachin Sant Reported-by: PUVICHAKRAVARTHY RAMACHANDRAN Signed-off-by: Vlastimil Babka Signed-off-by: Andrew Morton Tested-by: Sachin Sant Tested-by: Bharata B Rao Reviewed-by: Srikar Dronamraju Cc: Mel Gorman Cc: Michael Ellerman Cc: Michal Hocko Cc: Christopher Lameter Cc: linuxppc-dev@lists.ozlabs.org Cc: Joonsoo Kim Cc: Pekka Enberg Cc: David Rientjes Cc: Kirill Tkhai Cc: Vlastimil Babka Cc: Nathan Lynch Cc: Link: http://lkml.kernel.org/r/20200320115533.9604-1-vbabka@suse.cz Debugged-by: Srikar Dronamraju Signed-off-by: Linus Torvalds --- mm/slub.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 97580b41a24b..6589b41d5a60 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1973,8 +1973,6 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, if (node == NUMA_NO_NODE) searchnode = numa_mem_id(); - else if (!node_present_pages(node)) - searchnode = node_to_mem_node(node); object = get_partial_node(s, get_node(s, searchnode), c, flags); if (object || node != NUMA_NO_NODE) @@ -2563,17 +2561,27 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, struct page *page; page = c->page; - if (!page) + if (!page) { + /* + * if the node is not online or has no normal memory, just + * ignore the node constraint + */ + if (unlikely(node != NUMA_NO_NODE && + !node_state(node, N_NORMAL_MEMORY))) + node = NUMA_NO_NODE; goto new_slab; + } redo: if (unlikely(!node_match(page, node))) { - int searchnode = node; - - if (node != NUMA_NO_NODE && !node_present_pages(node)) - searchnode = node_to_mem_node(node); - - if (unlikely(!node_match(page, searchnode))) { + /* + * same as above but node_match() being false already + * implies node != NUMA_NO_NODE + */ + if (!node_state(node, N_NORMAL_MEMORY)) { + node = NUMA_NO_NODE; + goto redo; + } else { stat(s, ALLOC_NODE_MISMATCH); deactivate_slab(s, page, c->freelist, c); goto new_slab; From 763802b53a427ed3cbd419dbba255c414fdd9e7c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sat, 21 Mar 2020 18:22:41 -0700 Subject: [PATCH 317/372] x86/mm: split vmalloc_sync_all() Commit 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") introduced a call to vmalloc_sync_all() in the vunmap() code-path. While this change was necessary to maintain correctness on x86-32-pae kernels, it also adds additional cycles for architectures that don't need it. Specifically on x86-64 with CONFIG_VMAP_STACK=y some people reported severe performance regressions in micro-benchmarks because it now also calls the x86-64 implementation of vmalloc_sync_all() on vunmap(). But the vmalloc_sync_all() implementation on x86-64 is only needed for newly created mappings. To avoid the unnecessary work on x86-64 and to gain the performance back, split up vmalloc_sync_all() into two functions: * vmalloc_sync_mappings(), and * vmalloc_sync_unmappings() Most call-sites to vmalloc_sync_all() only care about new mappings being synchronized. The only exception is the new call-site added in the above mentioned commit. Shile Zhang directed us to a report of an 80% regression in reaim throughput. Fixes: 3f8fd02b1bf1 ("mm/vmalloc: Sync unmappings in __purge_vmap_area_lazy()") Reported-by: kernel test robot Reported-by: Shile Zhang Signed-off-by: Joerg Roedel Signed-off-by: Andrew Morton Tested-by: Borislav Petkov Acked-by: Rafael J. Wysocki [GHES] Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Link: http://lkml.kernel.org/r/20191009124418.8286-1-joro@8bytes.org Link: https://lists.01.org/hyperkitty/list/lkp@lists.01.org/thread/4D3JPPHBNOSPFK2KEPC6KGKS6J25AIDB/ Link: http://lkml.kernel.org/r/20191113095530.228959-1-shile.zhang@linux.alibaba.com Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 26 ++++++++++++++++++++++++-- drivers/acpi/apei/ghes.c | 2 +- include/linux/vmalloc.h | 5 +++-- kernel/notifier.c | 2 +- mm/nommu.c | 10 +++++++--- mm/vmalloc.c | 11 +++++++---- 6 files changed, 43 insertions(+), 13 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fa4ea09593ab..629fdf13f846 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -190,7 +190,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -void vmalloc_sync_all(void) +static void vmalloc_sync(void) { unsigned long address; @@ -217,6 +217,16 @@ void vmalloc_sync_all(void) } } +void vmalloc_sync_mappings(void) +{ + vmalloc_sync(); +} + +void vmalloc_sync_unmappings(void) +{ + vmalloc_sync(); +} + /* * 32-bit: * @@ -319,11 +329,23 @@ out: #else /* CONFIG_X86_64: */ -void vmalloc_sync_all(void) +void vmalloc_sync_mappings(void) { + /* + * 64-bit mappings might allocate new p4d/pud pages + * that need to be propagated to all tasks' PGDs. + */ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); } +void vmalloc_sync_unmappings(void) +{ + /* + * Unmappings never allocate or free p4d/pud pages. + * No work is required here. + */ +} + /* * 64-bit: * diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 103acbbfcf9a..24c9642e8fc7 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -171,7 +171,7 @@ int ghes_estatus_pool_init(int num_ghes) * New allocation must be visible in all pgd before it can be found by * an NMI allocating from the pool. */ - vmalloc_sync_all(); + vmalloc_sync_mappings(); rc = gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); if (rc) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ec3813236699..0507a162ccd0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -141,8 +141,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); -void vmalloc_sync_all(void); - +void vmalloc_sync_mappings(void); +void vmalloc_sync_unmappings(void); + /* * Lowlevel-APIs (not for driver use!) */ diff --git a/kernel/notifier.c b/kernel/notifier.c index 63d7501ac638..5989bbb93039 100644 --- a/kernel/notifier.c +++ b/kernel/notifier.c @@ -519,7 +519,7 @@ NOKPROBE_SYMBOL(notify_die); int register_die_notifier(struct notifier_block *nb) { - vmalloc_sync_all(); + vmalloc_sync_mappings(); return atomic_notifier_chain_register(&die_chain, nb); } EXPORT_SYMBOL_GPL(register_die_notifier); diff --git a/mm/nommu.c b/mm/nommu.c index bd2b4e5ef144..318df4e236c9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -370,10 +370,14 @@ void vm_unmap_aliases(void) EXPORT_SYMBOL_GPL(vm_unmap_aliases); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement a stub for vmalloc_sync_[un]mapping() if the architecture + * chose not to have one. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) +{ +} + +void __weak vmalloc_sync_unmappings(void) { } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1f46c3b86f9f..6b8eeb0ecee5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1295,7 +1295,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end) * First make sure the mappings are removed from all page-tables * before they are freed. */ - vmalloc_sync_all(); + vmalloc_sync_unmappings(); /* * TODO: to calculate a flush range without looping. @@ -3128,16 +3128,19 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, EXPORT_SYMBOL(remap_vmalloc_range); /* - * Implement a stub for vmalloc_sync_all() if the architecture chose not to - * have one. + * Implement stubs for vmalloc_sync_[un]mappings () if the architecture chose + * not to have one. * * The purpose of this function is to make sure the vmalloc area * mappings are identical in all page-tables in the system. */ -void __weak vmalloc_sync_all(void) +void __weak vmalloc_sync_mappings(void) { } +void __weak vmalloc_sync_unmappings(void) +{ +} static int f(pte_t *pte, unsigned long addr, void *data) { From 96758117dc528e6d84bd23d205e8cf7f31eda029 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 20 Mar 2020 23:13:16 +0000 Subject: [PATCH 318/372] hinic: fix a bug of waitting for IO stopped it's unreliable for fw to check whether IO is stopped, so driver wait for enough time to ensure IO process is done in hw before freeing resources Signed-off-by: Luo bin Signed-off-by: David S. Miller --- .../net/ethernet/huawei/hinic/hinic_hw_dev.c | 51 +------------------ 1 file changed, 2 insertions(+), 49 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 79b3d53f2fbf..c7c75b772a86 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -360,50 +360,6 @@ static int wait_for_db_state(struct hinic_hwdev *hwdev) return -EFAULT; } -static int wait_for_io_stopped(struct hinic_hwdev *hwdev) -{ - struct hinic_cmd_io_status cmd_io_status; - struct hinic_hwif *hwif = hwdev->hwif; - struct pci_dev *pdev = hwif->pdev; - struct hinic_pfhwdev *pfhwdev; - unsigned long end; - u16 out_size; - int err; - - if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) { - dev_err(&pdev->dev, "Unsupported PCI Function type\n"); - return -EINVAL; - } - - pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev); - - cmd_io_status.func_idx = HINIC_HWIF_FUNC_IDX(hwif); - - end = jiffies + msecs_to_jiffies(IO_STATUS_TIMEOUT); - do { - err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM, - HINIC_COMM_CMD_IO_STATUS_GET, - &cmd_io_status, sizeof(cmd_io_status), - &cmd_io_status, &out_size, - HINIC_MGMT_MSG_SYNC); - if ((err) || (out_size != sizeof(cmd_io_status))) { - dev_err(&pdev->dev, "Failed to get IO status, ret = %d\n", - err); - return err; - } - - if (cmd_io_status.status == IO_STOPPED) { - dev_info(&pdev->dev, "IO stopped\n"); - return 0; - } - - msleep(20); - } while (time_before(jiffies, end)); - - dev_err(&pdev->dev, "Wait for IO stopped - Timeout\n"); - return -ETIMEDOUT; -} - /** * clear_io_resource - set the IO resources as not active in the NIC * @hwdev: the NIC HW device @@ -423,11 +379,8 @@ static int clear_io_resources(struct hinic_hwdev *hwdev) return -EINVAL; } - err = wait_for_io_stopped(hwdev); - if (err) { - dev_err(&pdev->dev, "IO has not stopped yet\n"); - return err; - } + /* sleep 100ms to wait for firmware stopping I/O */ + msleep(100); cmd_clear_io_res.func_idx = HINIC_HWIF_FUNC_IDX(hwif); From 614eaa943e9fc3fcdbd4aa0692ae84973d363333 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 20 Mar 2020 23:13:17 +0000 Subject: [PATCH 319/372] hinic: fix the bug of clearing event queue should disable eq irq before freeing it, must clear event queue depth in hw before freeing relevant memory to avoid illegal memory access and update consumer idx to avoid invalid interrupt Signed-off-by: Luo bin Signed-off-by: David S. Miller --- .../net/ethernet/huawei/hinic/hinic_hw_eqs.c | 24 +++++++++++++------ 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c index 79243b626ddb..6a723c4757bc 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c @@ -188,7 +188,7 @@ static u8 eq_cons_idx_checksum_set(u32 val) * eq_update_ci - update the HW cons idx of event queue * @eq: the event queue to update the cons idx for **/ -static void eq_update_ci(struct hinic_eq *eq) +static void eq_update_ci(struct hinic_eq *eq, u32 arm_state) { u32 val, addr = EQ_CONS_IDX_REG_ADDR(eq); @@ -202,7 +202,7 @@ static void eq_update_ci(struct hinic_eq *eq) val |= HINIC_EQ_CI_SET(eq->cons_idx, IDX) | HINIC_EQ_CI_SET(eq->wrapped, WRAPPED) | - HINIC_EQ_CI_SET(EQ_ARMED, INT_ARMED); + HINIC_EQ_CI_SET(arm_state, INT_ARMED); val |= HINIC_EQ_CI_SET(eq_cons_idx_checksum_set(val), XOR_CHKSUM); @@ -347,7 +347,7 @@ static void eq_irq_handler(void *data) else if (eq->type == HINIC_CEQ) ceq_irq_handler(eq); - eq_update_ci(eq); + eq_update_ci(eq, EQ_ARMED); } /** @@ -702,7 +702,7 @@ static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif, } set_eq_ctrls(eq); - eq_update_ci(eq); + eq_update_ci(eq, EQ_ARMED); err = alloc_eq_pages(eq); if (err) { @@ -752,18 +752,28 @@ err_req_irq: **/ static void remove_eq(struct hinic_eq *eq) { - struct msix_entry *entry = &eq->msix_entry; - - free_irq(entry->vector, eq); + hinic_set_msix_state(eq->hwif, eq->msix_entry.entry, + HINIC_MSIX_DISABLE); + free_irq(eq->msix_entry.vector, eq); if (eq->type == HINIC_AEQ) { struct hinic_eq_work *aeq_work = &eq->aeq_work; cancel_work_sync(&aeq_work->work); + /* clear aeq_len to avoid hw access host memory */ + hinic_hwif_write_reg(eq->hwif, + HINIC_CSR_AEQ_CTRL_1_ADDR(eq->q_id), 0); } else if (eq->type == HINIC_CEQ) { tasklet_kill(&eq->ceq_tasklet); + /* clear ceq_len to avoid hw access host memory */ + hinic_hwif_write_reg(eq->hwif, + HINIC_CSR_CEQ_CTRL_1_ADDR(eq->q_id), 0); } + /* update cons_idx to avoid invalid interrupt */ + eq->cons_idx = hinic_hwif_read_reg(eq->hwif, EQ_PROD_IDX_REG_ADDR(eq)); + eq_update_ci(eq, EQ_NOT_ARMED); + free_eq_pages(eq); } From 33f15da216a1f4566b4ec880942556ace30615df Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 20 Mar 2020 23:13:18 +0000 Subject: [PATCH 320/372] hinic: fix out-of-order excution in arm cpu add read barrier in driver code to keep from reading other fileds in dma memory which is writable for hw until we have verified the memory is valid for driver Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c | 2 ++ drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c | 2 ++ drivers/net/ethernet/huawei/hinic/hinic_rx.c | 3 +++ drivers/net/ethernet/huawei/hinic/hinic_tx.c | 2 ++ 4 files changed, 9 insertions(+) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c index eb53c15b13f3..33f93cc25193 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c @@ -623,6 +623,8 @@ static int cmdq_cmd_ceq_handler(struct hinic_cmdq *cmdq, u16 ci, if (!CMDQ_WQE_COMPLETED(be32_to_cpu(ctrl->ctrl_info))) return -EBUSY; + dma_rmb(); + errcode = CMDQ_WQE_ERRCODE_GET(be32_to_cpu(status->status_info), VAL); cmdq_sync_cmd_handler(cmdq, ci, errcode); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c index 6a723c4757bc..c0b6bcb067cd 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c @@ -235,6 +235,8 @@ static void aeq_irq_handler(struct hinic_eq *eq) if (HINIC_EQ_ELEM_DESC_GET(aeqe_desc, WRAPPED) == eq->wrapped) break; + dma_rmb(); + event = HINIC_EQ_ELEM_DESC_GET(aeqe_desc, TYPE); if (event >= HINIC_MAX_AEQ_EVENTS) { dev_err(&pdev->dev, "Unknown AEQ Event %d\n", event); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 2695ad69fca6..815649e37cb1 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -350,6 +350,9 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget) if (!rq_wqe) break; + /* make sure we read rx_done before packet length */ + dma_rmb(); + cqe = rq->cqe[ci]; status = be32_to_cpu(cqe->status); hinic_rq_get_sge(rxq->rq, rq_wqe, ci, &sge); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 0e13d1c7e474..375d81d03e86 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -622,6 +622,8 @@ static int free_tx_poll(struct napi_struct *napi, int budget) do { hw_ci = HW_CONS_IDX(sq) & wq->mask; + dma_rmb(); + /* Reading a WQEBB to get real WQE size and consumer index. */ sq_wqe = hinic_sq_read_wqebb(sq, &skb, &wqe_size, &sw_ci); if ((!sq_wqe) || From 0da7c322f116210ebfdda59c7da663a6fc5e9cc8 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 20 Mar 2020 23:13:19 +0000 Subject: [PATCH 321/372] hinic: fix wrong para of wait_for_completion_timeout the second input parameter of wait_for_completion_timeout should be jiffies instead of millisecond Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c | 3 ++- drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c index 33f93cc25193..5f2d57d1b2d3 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c @@ -389,7 +389,8 @@ static int cmdq_sync_cmd_direct_resp(struct hinic_cmdq *cmdq, spin_unlock_bh(&cmdq->cmdq_lock); - if (!wait_for_completion_timeout(&done, CMDQ_TIMEOUT)) { + if (!wait_for_completion_timeout(&done, + msecs_to_jiffies(CMDQ_TIMEOUT))) { spin_lock_bh(&cmdq->cmdq_lock); if (cmdq->errcode[curr_prod_idx] == &errcode) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c index c1a6be6bf6a8..8995e32dd1c0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c @@ -43,7 +43,7 @@ #define MSG_NOT_RESP 0xFFFF -#define MGMT_MSG_TIMEOUT 1000 +#define MGMT_MSG_TIMEOUT 5000 #define mgmt_to_pfhwdev(pf_mgmt) \ container_of(pf_mgmt, struct hinic_pfhwdev, pf_to_mgmt) @@ -267,7 +267,8 @@ static int msg_to_mgmt_sync(struct hinic_pf_to_mgmt *pf_to_mgmt, goto unlock_sync_msg; } - if (!wait_for_completion_timeout(recv_done, MGMT_MSG_TIMEOUT)) { + if (!wait_for_completion_timeout(recv_done, + msecs_to_jiffies(MGMT_MSG_TIMEOUT))) { dev_err(&pdev->dev, "MGMT timeout, MSG id = %d\n", msg_id); err = -ETIMEDOUT; goto unlock_sync_msg; From 7296695fc16dd1761dbba8b68a9181c71cef0633 Mon Sep 17 00:00:00 2001 From: Luo bin Date: Fri, 20 Mar 2020 23:13:20 +0000 Subject: [PATCH 322/372] hinic: fix wrong value of MIN_SKB_LEN the minimum value of skb len that hw supports is 32 rather than 17 Signed-off-by: Luo bin Signed-off-by: David S. Miller --- drivers/net/ethernet/huawei/hinic/hinic_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 375d81d03e86..365016450bdb 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -45,7 +45,7 @@ #define HW_CONS_IDX(sq) be16_to_cpu(*(u16 *)((sq)->hw_ci_addr)) -#define MIN_SKB_LEN 17 +#define MIN_SKB_LEN 32 #define MAX_PAYLOAD_OFFSET 221 #define TRANSPORT_OFFSET(l4_hdr, skb) ((u32)((l4_hdr) - (skb)->data)) From 3a303cfdd28d5f930a307c82e8a9d996394d5ebd Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 21 Mar 2020 06:46:50 +0000 Subject: [PATCH 323/372] hsr: fix general protection fault in hsr_addr_is_self() The port->hsr is used in the hsr_handle_frame(), which is a callback of rx_handler. hsr master and slaves are initialized in hsr_add_port(). This function initializes several pointers, which includes port->hsr after registering rx_handler. So, in the rx_handler routine, un-initialized pointer would be used. In order to fix this, pointers should be initialized before registering rx_handler. Test commands: ip netns del left ip netns del right modprobe -rv veth modprobe -rv hsr killall ping modprobe hsr ip netns add left ip netns add right ip link add veth0 type veth peer name veth1 ip link add veth2 type veth peer name veth3 ip link add veth4 type veth peer name veth5 ip link set veth1 netns left ip link set veth3 netns right ip link set veth4 netns left ip link set veth5 netns right ip link set veth0 up ip link set veth2 up ip link set veth0 address fc:00:00:00:00:01 ip link set veth2 address fc:00:00:00:00:02 ip netns exec left ip link set veth1 up ip netns exec left ip link set veth4 up ip netns exec right ip link set veth3 up ip netns exec right ip link set veth5 up ip link add hsr0 type hsr slave1 veth0 slave2 veth2 ip a a 192.168.100.1/24 dev hsr0 ip link set hsr0 up ip netns exec left ip link add hsr1 type hsr slave1 veth1 slave2 veth4 ip netns exec left ip a a 192.168.100.2/24 dev hsr1 ip netns exec left ip link set hsr1 up ip netns exec left ip n a 192.168.100.1 dev hsr1 lladdr \ fc:00:00:00:00:01 nud permanent ip netns exec left ip n r 192.168.100.1 dev hsr1 lladdr \ fc:00:00:00:00:01 nud permanent for i in {1..100} do ip netns exec left ping 192.168.100.1 & done ip netns exec left hping3 192.168.100.1 -2 --flood & ip netns exec right ip link add hsr2 type hsr slave1 veth3 slave2 veth5 ip netns exec right ip a a 192.168.100.3/24 dev hsr2 ip netns exec right ip link set hsr2 up ip netns exec right ip n a 192.168.100.1 dev hsr2 lladdr \ fc:00:00:00:00:02 nud permanent ip netns exec right ip n r 192.168.100.1 dev hsr2 lladdr \ fc:00:00:00:00:02 nud permanent for i in {1..100} do ip netns exec right ping 192.168.100.1 & done ip netns exec right hping3 192.168.100.1 -2 --flood & while : do ip link add hsr0 type hsr slave1 veth0 slave2 veth2 ip a a 192.168.100.1/24 dev hsr0 ip link set hsr0 up ip link del hsr0 done Splat looks like: [ 120.954938][ C0] general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1]I [ 120.957761][ C0] KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037] [ 120.959064][ C0] CPU: 0 PID: 1511 Comm: hping3 Not tainted 5.6.0-rc5+ #460 [ 120.960054][ C0] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 120.962261][ C0] RIP: 0010:hsr_addr_is_self+0x65/0x2a0 [hsr] [ 120.963149][ C0] Code: 44 24 18 70 73 2f c0 48 c1 eb 03 48 8d 04 13 c7 00 f1 f1 f1 f1 c7 40 04 00 f2 f2 f2 4 [ 120.966277][ C0] RSP: 0018:ffff8880d9c09af0 EFLAGS: 00010206 [ 120.967293][ C0] RAX: 0000000000000006 RBX: 1ffff1101b38135f RCX: 0000000000000000 [ 120.968516][ C0] RDX: dffffc0000000000 RSI: ffff8880d17cb208 RDI: 0000000000000000 [ 120.969718][ C0] RBP: 0000000000000030 R08: ffffed101b3c0e3c R09: 0000000000000001 [ 120.972203][ C0] R10: 0000000000000001 R11: ffffed101b3c0e3b R12: 0000000000000000 [ 120.973379][ C0] R13: ffff8880aaf80100 R14: ffff8880aaf800f2 R15: ffff8880aaf80040 [ 120.974410][ C0] FS: 00007f58e693f740(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000 [ 120.979794][ C0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 120.980773][ C0] CR2: 00007ffcb8b38f29 CR3: 00000000afe8e001 CR4: 00000000000606f0 [ 120.981945][ C0] Call Trace: [ 120.982411][ C0] [ 120.982848][ C0] ? hsr_add_node+0x8c0/0x8c0 [hsr] [ 120.983522][ C0] ? rcu_read_lock_held+0x90/0xa0 [ 120.984159][ C0] ? rcu_read_lock_sched_held+0xc0/0xc0 [ 120.984944][ C0] hsr_handle_frame+0x1db/0x4e0 [hsr] [ 120.985597][ C0] ? hsr_nl_nodedown+0x2b0/0x2b0 [hsr] [ 120.986289][ C0] __netif_receive_skb_core+0x6bf/0x3170 [ 120.992513][ C0] ? check_chain_key+0x236/0x5d0 [ 120.993223][ C0] ? do_xdp_generic+0x1460/0x1460 [ 120.993875][ C0] ? register_lock_class+0x14d0/0x14d0 [ 120.994609][ C0] ? __netif_receive_skb_one_core+0x8d/0x160 [ 120.995377][ C0] __netif_receive_skb_one_core+0x8d/0x160 [ 120.996204][ C0] ? __netif_receive_skb_core+0x3170/0x3170 [ ... ] Reported-by: syzbot+fcf5dd39282ceb27108d@syzkaller.appspotmail.com Fixes: c5a759117210 ("net/hsr: Use list_head (and rcu) instead of array for slave devices.") Signed-off-by: Taehee Yoo Signed-off-by: David S. Miller --- net/hsr/hsr_slave.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index fbfd0db182b7..a9104d42aafb 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -145,16 +145,16 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, if (!port) return -ENOMEM; + port->hsr = hsr; + port->dev = dev; + port->type = type; + if (type != HSR_PT_MASTER) { res = hsr_portdev_setup(dev, port); if (res) goto fail_dev_setup; } - port->hsr = hsr; - port->dev = dev; - port->type = type; - list_add_tail_rcu(&port->port_list, &hsr->ports); synchronize_rcu(); From 06e9bfc1e57daeead541c9f02968412cd284b189 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Sat, 21 Mar 2020 13:05:14 +0100 Subject: [PATCH 324/372] ionic: make spdxcheck.py happy Headers ionic_if.h and ionic_regs.h are licensed under three alternative licenses and the used SPDX-License-Identifier expression makes ./scripts/spdxcheck.py complain: drivers/net/ethernet/pensando/ionic/ionic_if.h: 1:52 Syntax error: OR drivers/net/ethernet/pensando/ionic/ionic_regs.h: 1:52 Syntax error: OR As OR is associative, it is irrelevant if the parentheses are put around the first or the second OR-expression. Simply add parentheses to make spdxcheck.py happy. Signed-off-by: Lukas Bulwahn Acked-by: Shannon Nelson Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/ionic/ionic_if.h | 2 +- drivers/net/ethernet/pensando/ionic/ionic_regs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 54547d53b0f2..51adf5059834 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */ +/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */ /* Copyright (c) 2017-2019 Pensando Systems, Inc. All rights reserved. */ #ifndef _IONIC_IF_H_ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_regs.h b/drivers/net/ethernet/pensando/ionic/ionic_regs.h index 03ee5a36472b..2e174f45c030 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_regs.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_regs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB OR BSD-2-Clause */ +/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */ /* Copyright (c) 2018-2019 Pensando Systems, Inc. All rights reserved. */ #ifndef IONIC_REGS_H From 2091a3d42b4f339eaeed11228e0cbe9d4f92f558 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 21 Mar 2020 14:08:29 +0100 Subject: [PATCH 325/372] slcan: not call free_netdev before rtnl_unlock in slcan_open As the description before netdev_run_todo, we cannot call free_netdev before rtnl_unlock, fix it by reorder the code. This patch is a 1:1 copy of upstream slip.c commit f596c87005f7 ("slip: not call free_netdev before rtnl_unlock in slip_open"). Reported-by: yangerkun Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- drivers/net/can/slcan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 2f5c287eac95..a3664281a33f 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -625,7 +625,10 @@ err_free_chan: tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); slc_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); From 9de9aa487daff7a5c73434c24269b44ed6a428e6 Mon Sep 17 00:00:00 2001 From: Emil Renner Berthing Date: Sat, 21 Mar 2020 15:36:19 +0100 Subject: [PATCH 326/372] net: stmmac: dwmac-rk: fix error path in rk_gmac_probe Make sure we clean up devicetree related configuration also when clock init fails. Fixes: fecd4d7eef8b ("net: stmmac: dwmac-rk: Add integrated PHY support") Signed-off-by: Emil Renner Berthing Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index dc50ba13a746..2d5573b3dee1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1411,7 +1411,7 @@ static int rk_gmac_probe(struct platform_device *pdev) ret = rk_gmac_clk_init(plat_dat); if (ret) - return ret; + goto err_remove_config_dt; ret = rk_gmac_powerup(plat_dat->bsp_priv); if (ret) From 749f6f6843115b424680f1aada3c0dd613ad807c Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 17 Mar 2020 20:04:54 +0200 Subject: [PATCH 327/372] net: phy: dp83867: w/a for fld detect threshold bootstrapping issue When the DP83867 PHY is strapped to enable Fast Link Drop (FLD) feature STRAP_STS2.STRAP_ FLD (reg 0x006F bit 10), the Energy Lost Threshold for FLD Energy Lost Mode FLD_THR_CFG.ENERGY_LOST_FLD_THR (reg 0x002e bits 2:0) will be defaulted to 0x2. This may cause the phy link to be unstable. The new DP83867 DM recommends to always restore ENERGY_LOST_FLD_THR to 0x1. Hence, restore default value of FLD_THR_CFG.ENERGY_LOST_FLD_THR to 0x1 when FLD is enabled by bootstrapping as recommended by DM. Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/phy/dp83867.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 967f57ed0b65..9a07ad137c2e 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -28,7 +28,8 @@ #define DP83867_CTRL 0x1f /* Extended Registers */ -#define DP83867_CFG4 0x0031 +#define DP83867_FLD_THR_CFG 0x002e +#define DP83867_CFG4 0x0031 #define DP83867_CFG4_SGMII_ANEG_MASK (BIT(5) | BIT(6)) #define DP83867_CFG4_SGMII_ANEG_TIMER_11MS (3 << 5) #define DP83867_CFG4_SGMII_ANEG_TIMER_800US (2 << 5) @@ -91,6 +92,7 @@ #define DP83867_STRAP_STS2_CLK_SKEW_RX_MASK GENMASK(2, 0) #define DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT 0 #define DP83867_STRAP_STS2_CLK_SKEW_NONE BIT(2) +#define DP83867_STRAP_STS2_STRAP_FLD BIT(10) /* PHY CTRL bits */ #define DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT 14 @@ -125,6 +127,9 @@ /* CFG4 bits */ #define DP83867_CFG4_PORT_MIRROR_EN BIT(0) +/* FLD_THR_CFG */ +#define DP83867_FLD_THR_CFG_ENERGY_LOST_THR_MASK 0x7 + enum { DP83867_PORT_MIRROING_KEEP, DP83867_PORT_MIRROING_EN, @@ -476,6 +481,20 @@ static int dp83867_config_init(struct phy_device *phydev) phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, BIT(7)); + bs = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_STRAP_STS2); + if (bs & DP83867_STRAP_STS2_STRAP_FLD) { + /* When using strap to enable FLD, the ENERGY_LOST_FLD_THR will + * be set to 0x2. This may causes the PHY link to be unstable - + * the default value 0x1 need to be restored. + */ + ret = phy_modify_mmd(phydev, DP83867_DEVADDR, + DP83867_FLD_THR_CFG, + DP83867_FLD_THR_CFG_ENERGY_LOST_THR_MASK, + 0x1); + if (ret) + return ret; + } + if (phy_interface_is_rgmii(phydev) || phydev->interface == PHY_INTERFACE_MODE_SGMII) { val = phy_read(phydev, MII_DP83867_PHYCTRL); From 9a9ba2a4aaaa4e75a5f118b8ab642a55c34f95cb Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Tue, 17 Mar 2020 17:05:36 -0700 Subject: [PATCH 328/372] net: bcmgenet: always enable status blocks The hardware offloading of the NETIF_F_HW_CSUM and NETIF_F_RXCSUM features requires the use of Transmit Status Blocks before transmit frame data and Receive Status Blocks before receive frame data to carry the checksum information. Unfortunately, these status blocks are currently only enabled when the NETIF_F_HW_CSUM feature is enabled. As a result NETIF_F_RXCSUM will not actually be offloaded to the hardware unless both it and NETIF_F_HW_CSUM are enabled. Fortunately, that is the default configuration. This commit addresses this issue by always enabling the use of status blocks on both transmit and receive frames. Further, it replaces the use of a dedicated flag within the driver private data structure with direct use of the netdev features flags. Fixes: 810155397890 ("net: bcmgenet: use CHECKSUM_COMPLETE for NETIF_F_RXCSUM") Signed-off-by: Doug Berger Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- .../net/ethernet/broadcom/genet/bcmgenet.c | 132 +++++------------- .../net/ethernet/broadcom/genet/bcmgenet.h | 3 +- 2 files changed, 38 insertions(+), 97 deletions(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index ce64b4e47feb..1d678bee2cc9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -94,12 +94,6 @@ static inline void dmadesc_set_length_status(struct bcmgenet_priv *priv, bcmgenet_writel(value, d + DMA_DESC_LENGTH_STATUS); } -static inline u32 dmadesc_get_length_status(struct bcmgenet_priv *priv, - void __iomem *d) -{ - return bcmgenet_readl(d + DMA_DESC_LENGTH_STATUS); -} - static inline void dmadesc_set_addr(struct bcmgenet_priv *priv, void __iomem *d, dma_addr_t addr) @@ -508,61 +502,6 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev, return phy_ethtool_ksettings_set(dev->phydev, cmd); } -static void bcmgenet_set_rx_csum(struct net_device *dev, - netdev_features_t wanted) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - u32 rbuf_chk_ctrl; - bool rx_csum_en; - - rx_csum_en = !!(wanted & NETIF_F_RXCSUM); - - rbuf_chk_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CHK_CTRL); - - /* enable rx checksumming */ - if (rx_csum_en) - rbuf_chk_ctrl |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS; - else - rbuf_chk_ctrl &= ~RBUF_RXCHK_EN; - priv->desc_rxchk_en = rx_csum_en; - - /* If UniMAC forwards CRC, we need to skip over it to get - * a valid CHK bit to be set in the per-packet status word - */ - if (rx_csum_en && priv->crc_fwd_en) - rbuf_chk_ctrl |= RBUF_SKIP_FCS; - else - rbuf_chk_ctrl &= ~RBUF_SKIP_FCS; - - bcmgenet_rbuf_writel(priv, rbuf_chk_ctrl, RBUF_CHK_CTRL); -} - -static void bcmgenet_set_tx_csum(struct net_device *dev, - netdev_features_t wanted) -{ - struct bcmgenet_priv *priv = netdev_priv(dev); - bool desc_64b_en; - u32 tbuf_ctrl, rbuf_ctrl; - - tbuf_ctrl = bcmgenet_tbuf_ctrl_get(priv); - rbuf_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CTRL); - - desc_64b_en = !!(wanted & NETIF_F_HW_CSUM); - - /* enable 64 bytes descriptor in both directions (RBUF and TBUF) */ - if (desc_64b_en) { - tbuf_ctrl |= RBUF_64B_EN; - rbuf_ctrl |= RBUF_64B_EN; - } else { - tbuf_ctrl &= ~RBUF_64B_EN; - rbuf_ctrl &= ~RBUF_64B_EN; - } - priv->desc_64b_en = desc_64b_en; - - bcmgenet_tbuf_ctrl_set(priv, tbuf_ctrl); - bcmgenet_rbuf_writel(priv, rbuf_ctrl, RBUF_CTRL); -} - static int bcmgenet_set_features(struct net_device *dev, netdev_features_t features) { @@ -578,9 +517,6 @@ static int bcmgenet_set_features(struct net_device *dev, reg = bcmgenet_umac_readl(priv, UMAC_CMD); priv->crc_fwd_en = !!(reg & CMD_CRC_FWD); - bcmgenet_set_tx_csum(dev, features); - bcmgenet_set_rx_csum(dev, features); - clk_disable_unprepare(priv->clk); return ret; @@ -1475,8 +1411,8 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev) /* Reallocate the SKB to put enough headroom in front of it and insert * the transmit checksum offsets in the descriptors */ -static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev, - struct sk_buff *skb) +static struct sk_buff *bcmgenet_add_tsb(struct net_device *dev, + struct sk_buff *skb) { struct bcmgenet_priv *priv = netdev_priv(dev); struct status_64 *status = NULL; @@ -1590,13 +1526,11 @@ static netdev_tx_t bcmgenet_xmit(struct sk_buff *skb, struct net_device *dev) */ GENET_CB(skb)->bytes_sent = skb->len; - /* set the SKB transmit checksum */ - if (priv->desc_64b_en) { - skb = bcmgenet_put_tx_csum(dev, skb); - if (!skb) { - ret = NETDEV_TX_OK; - goto out; - } + /* add the Transmit Status Block */ + skb = bcmgenet_add_tsb(dev, skb); + if (!skb) { + ret = NETDEV_TX_OK; + goto out; } for (i = 0; i <= nr_frags; i++) { @@ -1775,6 +1709,9 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, while ((rxpktprocessed < rxpkttoprocess) && (rxpktprocessed < budget)) { + struct status_64 *status; + __be16 rx_csum; + cb = &priv->rx_cbs[ring->read_ptr]; skb = bcmgenet_rx_refill(priv, cb); @@ -1783,20 +1720,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, goto next; } - if (!priv->desc_64b_en) { - dma_length_status = - dmadesc_get_length_status(priv, cb->bd_addr); - } else { - struct status_64 *status; - __be16 rx_csum; - - status = (struct status_64 *)skb->data; - dma_length_status = status->length_status; + status = (struct status_64 *)skb->data; + dma_length_status = status->length_status; + if (dev->features & NETIF_F_RXCSUM) { rx_csum = (__force __be16)(status->rx_csum & 0xffff); - if (priv->desc_rxchk_en) { - skb->csum = (__force __wsum)ntohs(rx_csum); - skb->ip_summed = CHECKSUM_COMPLETE; - } + skb->csum = (__force __wsum)ntohs(rx_csum); + skb->ip_summed = CHECKSUM_COMPLETE; } /* DMA flags and length are still valid no matter how @@ -1840,14 +1769,10 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring, } /* error packet */ skb_put(skb, len); - if (priv->desc_64b_en) { - skb_pull(skb, 64); - len -= 64; - } - /* remove hardware 2bytes added for IP alignment */ - skb_pull(skb, 2); - len -= 2; + /* remove RSB and hardware 2bytes added for IP alignment */ + skb_pull(skb, 66); + len -= 66; if (priv->crc_fwd_en) { skb_trim(skb, len - ETH_FCS_LEN); @@ -2038,11 +1963,28 @@ static void init_umac(struct bcmgenet_priv *priv) bcmgenet_umac_writel(priv, ENET_MAX_MTU_SIZE, UMAC_MAX_FRAME_LEN); - /* init rx registers, enable ip header optimization */ + /* init tx registers, enable TSB */ + reg = bcmgenet_tbuf_ctrl_get(priv); + reg |= TBUF_64B_EN; + bcmgenet_tbuf_ctrl_set(priv, reg); + + /* init rx registers, enable ip header optimization and RSB */ reg = bcmgenet_rbuf_readl(priv, RBUF_CTRL); - reg |= RBUF_ALIGN_2B; + reg |= RBUF_ALIGN_2B | RBUF_64B_EN; bcmgenet_rbuf_writel(priv, reg, RBUF_CTRL); + /* enable rx checksumming */ + reg = bcmgenet_rbuf_readl(priv, RBUF_CHK_CTRL); + reg |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS; + /* If UniMAC forwards CRC, we need to skip over it to get + * a valid CHK bit to be set in the per-packet status word + */ + if (priv->crc_fwd_en) + reg |= RBUF_SKIP_FCS; + else + reg &= ~RBUF_SKIP_FCS; + bcmgenet_rbuf_writel(priv, reg, RBUF_CHK_CTRL); + if (!GENET_IS_V1(priv) && !GENET_IS_V2(priv)) bcmgenet_rbuf_writel(priv, 1, RBUF_TBUF_SIZE_CTRL); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 61a6fe9f4cec..daf8fb2c39b6 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -273,6 +273,7 @@ struct bcmgenet_mib_counters { #define RBUF_FLTR_LEN_SHIFT 8 #define TBUF_CTRL 0x00 +#define TBUF_64B_EN (1 << 0) #define TBUF_BP_MC 0x0C #define TBUF_ENERGY_CTRL 0x14 #define TBUF_EEE_EN (1 << 0) @@ -662,8 +663,6 @@ struct bcmgenet_priv { unsigned int irq0_stat; /* HW descriptors/checksum variables */ - bool desc_64b_en; - bool desc_rxchk_en; bool crc_fwd_en; u32 dma_max_burst_length; From 83a9b6f639e9f6b632337f9776de17d51d969c77 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Wed, 18 Mar 2020 18:53:21 +0000 Subject: [PATCH 329/372] selftests/net: add definition for SOL_DCCP to fix compilation errors for old libc Many systems build/test up-to-date kernels with older libcs, and an older glibc (2.17) lacks the definition of SOL_DCCP in /usr/include/bits/socket.h (it was added in the 4.6 timeframe). Adding the definition to the test program avoids a compilation failure that gets in the way of building tools/testing/selftests/net. The test itself will work once the definition is added; either skipping due to DCCP not being configured in the kernel under test or passing, so there are no other more up-to-date glibc dependencies here it seems beyond that missing definition. Fixes: 11fb60d1089f ("selftests: net: reuseport_addr_any: add DCCP") Signed-off-by: Alan Maguire Signed-off-by: David S. Miller --- tools/testing/selftests/net/reuseport_addr_any.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index c6233935fed1..b8475cb29be7 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -21,6 +21,10 @@ #include #include +#ifndef SOL_DCCP +#define SOL_DCCP 269 +#endif + static const char *IP4_ADDR = "127.0.0.1"; static const char *IP6_ADDR = "::1"; static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; From 16fbf79b0f83bc752cee8589279f1ebfe57b3b6e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 22 Mar 2020 18:31:56 -0700 Subject: [PATCH 330/372] Linux 5.6-rc7 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 16d8271192d1..e56bf7ef182d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 6 SUBLEVEL = 0 -EXTRAVERSION = -rc6 +EXTRAVERSION = -rc7 NAME = Kleptomaniac Octopus # *DOCUMENTATION* From 988aad2f111c768c66efc87d073c73ecb32b682c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 12 Mar 2020 09:23:53 -0700 Subject: [PATCH 331/372] dmaengine: idxd: fix off by one on cdev dwq refcount The refcount check for dedicated workqueue (dwq) is off by one and allows more than 1 user to open the char device. Fix check so only a single user can open the device. Fixes: 42d279f9137a ("dmaengine: idxd: add char driver to expose submission portal to userland") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/158403020187.10208.14117394394540710774.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/cdev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index df47be612ebb..989b7a25ca61 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -81,9 +81,9 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) dev = &idxd->pdev->dev; idxd_cdev = &wq->idxd_cdev; - dev_dbg(dev, "%s called\n", __func__); + dev_dbg(dev, "%s called: %d\n", __func__, idxd_wq_refcount(wq)); - if (idxd_wq_refcount(wq) > 1 && wq_dedicated(wq)) + if (idxd_wq_refcount(wq) > 0 && wq_dedicated(wq)) return -EBUSY; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); From 01c4df39a2bb732e0593ba3cdacc1552fd07f7b9 Mon Sep 17 00:00:00 2001 From: Zhou Wang Date: Fri, 13 Mar 2020 09:23:44 +0800 Subject: [PATCH 332/372] MAINTAINERS: Add maintainer for HiSilicon DMA engine driver Add myself as the maintainer of HiSilicon DMA engine driver. Signed-off-by: Zhou Wang Link: https://lore.kernel.org/r/1584062624-196854-1-git-send-email-wangzhou1@hisilicon.com Signed-off-by: Vinod Koul --- MAINTAINERS | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 44c6a4abef2a..d8cbbbd8429d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7517,6 +7517,12 @@ F: include/uapi/linux/if_hippi.h F: net/802/hippi.c F: drivers/net/hippi/ +HISILICON DMA DRIVER +M: Zhou Wang +L: dmaengine@vger.kernel.org +S: Maintained +F: drivers/dma/hisi_dma.c + HISILICON SECURITY ENGINE V2 DRIVER (SEC2) M: Zaibo Xu L: linux-crypto@vger.kernel.org From 018af9be3dd54e6f24f828966bdd873f4d63ad9b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 18 Mar 2020 20:12:09 +0100 Subject: [PATCH 333/372] dmaengine: ti: k3-udma-glue: Fix an error handling path in 'k3_udma_glue_cfg_rx_flow()' All but one error handling paths in the 'k3_udma_glue_cfg_rx_flow()' function 'goto err' and call 'k3_udma_glue_release_rx_flow()'. This not correct because this function has a 'channel->flows_ready--;' at the end, but 'flows_ready' has not been incremented here, when we branch to the error handling path. In order to keep a correct value in 'flows_ready', un-roll 'k3_udma_glue_release_rx_flow()', simplify it, add some labels and branch at the correct places when an error is detected. Doing so, we also NULLify 'flow->udma_rflow' in a path that was lacking it. Fixes: d70241913413 ("dmaengine: ti: k3-udma: Add glue layer for non DMAengine user") Signed-off-by: Christophe JAILLET Acked-by: Peter Ujfalusi Link: https://lore.kernel.org/r/20200318191209.1267-1-christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul --- drivers/dma/ti/k3-udma-glue.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c index c1511298ece2..4d7561a1b3e3 100644 --- a/drivers/dma/ti/k3-udma-glue.c +++ b/drivers/dma/ti/k3-udma-glue.c @@ -564,12 +564,12 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn, if (IS_ERR(flow->udma_rflow)) { ret = PTR_ERR(flow->udma_rflow); dev_err(dev, "UDMAX rflow get err %d\n", ret); - goto err; + return ret; } if (flow->udma_rflow_id != xudma_rflow_get_id(flow->udma_rflow)) { - xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow); - return -ENODEV; + ret = -ENODEV; + goto err_rflow_put; } /* request and cfg rings */ @@ -578,7 +578,7 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn, if (!flow->ringrx) { ret = -ENODEV; dev_err(dev, "Failed to get RX ring\n"); - goto err; + goto err_rflow_put; } flow->ringrxfdq = k3_ringacc_request_ring(rx_chn->common.ringacc, @@ -586,19 +586,19 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn, if (!flow->ringrxfdq) { ret = -ENODEV; dev_err(dev, "Failed to get RXFDQ ring\n"); - goto err; + goto err_ringrx_free; } ret = k3_ringacc_ring_cfg(flow->ringrx, &flow_cfg->rx_cfg); if (ret) { dev_err(dev, "Failed to cfg ringrx %d\n", ret); - goto err; + goto err_ringrxfdq_free; } ret = k3_ringacc_ring_cfg(flow->ringrxfdq, &flow_cfg->rxfdq_cfg); if (ret) { dev_err(dev, "Failed to cfg ringrxfdq %d\n", ret); - goto err; + goto err_ringrxfdq_free; } if (rx_chn->remote) { @@ -648,7 +648,7 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn, if (ret) { dev_err(dev, "flow%d config failed: %d\n", flow->udma_rflow_id, ret); - goto err; + goto err_ringrxfdq_free; } rx_chn->flows_ready++; @@ -656,8 +656,17 @@ static int k3_udma_glue_cfg_rx_flow(struct k3_udma_glue_rx_channel *rx_chn, flow->udma_rflow_id, rx_chn->flows_ready); return 0; -err: - k3_udma_glue_release_rx_flow(rx_chn, flow_idx); + +err_ringrxfdq_free: + k3_ringacc_ring_free(flow->ringrxfdq); + +err_ringrx_free: + k3_ringacc_ring_free(flow->ringrx); + +err_rflow_put: + xudma_rflow_put(rx_chn->common.udmax, flow->udma_rflow); + flow->udma_rflow = NULL; + return ret; } From cf52c8a776d1b31493a987a0fb3c4dbaa0f7f2cc Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Tue, 17 Mar 2020 12:33:41 +0200 Subject: [PATCH 334/372] iwlwifi: pcie: add 0x2526/0x401* devices back to cfg detection Three devices, with PCI device ID 0x2526 and subdevice IDs 0x4010, 0x4018 and 0x401C were removed accidentally. Add them back. Reported-by: Brett Hassal Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=206661 Fixes: 0b295a1eb81f ("iwlwifi: add device name to device_info") Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200317123331.16762b29f26c.I928bcaa799e7b3d33838c0667714eeb9fa665290@changeid --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 97f227f3cbc3..f441b20e1642 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -981,6 +981,9 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x2526, 0x0014, iwl9260_2ac_160_cfg, iwl9260_160_name), IWL_DEV_INFO(0x2526, 0x0018, iwl9260_2ac_160_cfg, iwl9260_160_name), IWL_DEV_INFO(0x2526, 0x001C, iwl9260_2ac_160_cfg, iwl9260_160_name), + IWL_DEV_INFO(0x2526, 0x4010, iwl9260_2ac_160_cfg, iwl9260_160_name), + IWL_DEV_INFO(0x2526, 0x4018, iwl9260_2ac_160_cfg, iwl9260_160_name), + IWL_DEV_INFO(0x2526, 0x401C, iwl9260_2ac_160_cfg, iwl9260_160_name), IWL_DEV_INFO(0x2526, 0x6010, iwl9260_2ac_160_cfg, iwl9260_160_name), IWL_DEV_INFO(0x2526, 0x6014, iwl9260_2ac_160_cfg, iwl9260_160_name), IWL_DEV_INFO(0x2526, 0x8014, iwl9260_2ac_160_cfg, iwl9260_160_name), From 0433ae556ec8fb588a0735ddb09d3eb9806df479 Mon Sep 17 00:00:00 2001 From: Golan Ben Ami Date: Wed, 18 Mar 2020 08:12:54 +0200 Subject: [PATCH 335/372] iwlwifi: don't send GEO_TX_POWER_LIMIT if no wgds table The GEO_TX_POWER_LIMIT command was sent although there is no wgds table, so the fw got wrong SAR values from the driver. Fix this by avoiding sending the command if no wgds tables are available. Signed-off-by: Golan Ben Ami Fixes: 39c1a9728f93 ("iwlwifi: refactor the SAR tables from mvm to acpi") Signed-off-by: Luca Coelho Tested-By: Jonathan McDowell Tested-by: Len Brown Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/iwlwifi.20200318081237.46db40617cc6.Id5cf852ec8c5dbf20ba86bad7b165a0c828f8b2e@changeid --- drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 14 ++++++++------ drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 14 ++++++++------ drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 9 ++++++++- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c index 48d375a86d86..ba2aff3af0fe 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -27,7 +27,7 @@ * BSD LICENSE * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -491,13 +491,13 @@ int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt, } IWL_EXPORT_SYMBOL(iwl_validate_sar_geo_profile); -void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, - struct iwl_per_chain_offset_group *table) +int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, + struct iwl_per_chain_offset_group *table) { int ret, i, j; if (!iwl_sar_geo_support(fwrt)) - return; + return -EOPNOTSUPP; ret = iwl_sar_get_wgds_table(fwrt); if (ret < 0) { @@ -505,7 +505,7 @@ void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, "Geo SAR BIOS table invalid or unavailable. (%d)\n", ret); /* we don't fail if the table is not available */ - return; + return -ENOENT; } BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS * @@ -530,5 +530,7 @@ void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, i, j, value[1], value[2], value[0]); } } + + return 0; } IWL_EXPORT_SYMBOL(iwl_sar_geo_init); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h index 4a6e8262974b..5590e5cc8fbb 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h @@ -6,7 +6,7 @@ * GPL LICENSE SUMMARY * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -27,7 +27,7 @@ * BSD LICENSE * * Copyright(c) 2017 Intel Deutschland GmbH - * Copyright(c) 2018 - 2019 Intel Corporation + * Copyright(c) 2018 - 2020 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -171,8 +171,9 @@ bool iwl_sar_geo_support(struct iwl_fw_runtime *fwrt); int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt, struct iwl_host_cmd *cmd); -void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, - struct iwl_per_chain_offset_group *table); +int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, + struct iwl_per_chain_offset_group *table); + #else /* CONFIG_ACPI */ static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method) @@ -243,9 +244,10 @@ static inline int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt, return -ENOENT; } -static inline void iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, - struct iwl_per_chain_offset_group *table) +static inline int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt, + struct iwl_per_chain_offset_group *table) { + return -ENOENT; } #endif /* CONFIG_ACPI */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 54c094e88474..98263cd37944 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -762,10 +762,17 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) u16 cmd_wide_id = WIDE_ID(PHY_OPS_GROUP, GEO_TX_POWER_LIMIT); union geo_tx_power_profiles_cmd cmd; u16 len; + int ret; cmd.geo_cmd.ops = cpu_to_le32(IWL_PER_CHAIN_OFFSET_SET_TABLES); - iwl_sar_geo_init(&mvm->fwrt, cmd.geo_cmd.table); + ret = iwl_sar_geo_init(&mvm->fwrt, cmd.geo_cmd.table); + /* + * It is a valid scenario to not support SAR, or miss wgds table, + * but in that case there is no need to send the command. + */ + if (ret) + return 0; cmd.geo_cmd.table_revision = cpu_to_le32(mvm->fwrt.geo_rev); From 6cd6cbf593bfa3ae6fc3ed34ac21da4d35045425 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Mar 2020 19:21:02 -0700 Subject: [PATCH 336/372] tcp: repair: fix TCP_QUEUE_SEQ implementation When application uses TCP_QUEUE_SEQ socket option to change tp->rcv_next, we must also update tp->copied_seq. Otherwise, stuff relying on tcp_inq() being precise can eventually be confused. For example, tcp_zerocopy_receive() might crash because it does not expect tcp_recv_skb() to return NULL. We could add tests in various places to fix the issue, or simply make sure tcp_inq() wont return a random value, and leave fast path as it is. Note that this fixes ioctl(fd, SIOCINQ, &val) at the same time. Fixes: ee9952831cfd ("tcp: Initial repair mode") Fixes: 05255b823a61 ("tcp: add TCP_ZEROCOPY_RECEIVE support for zerocopy receive") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eb2d80519f8e..dc77c303e6f7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2948,8 +2948,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EPERM; else if (tp->repair_queue == TCP_SEND_QUEUE) WRITE_ONCE(tp->write_seq, val); - else if (tp->repair_queue == TCP_RECV_QUEUE) + else if (tp->repair_queue == TCP_RECV_QUEUE) { WRITE_ONCE(tp->rcv_nxt, val); + WRITE_ONCE(tp->copied_seq, val); + } else err = -EINVAL; break; From dddeb30bfc43926620f954266fd12c65a7206f07 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 19 Mar 2020 22:54:21 -0400 Subject: [PATCH 337/372] ipv4: fix a RCU-list lock in inet_dump_fib() There is a place, inet_dump_fib() fib_table_dump fn_trie_dump_leaf() hlist_for_each_entry_rcu() without rcu_read_lock() will trigger a warning, WARNING: suspicious RCU usage ----------------------------- net/ipv4/fib_trie.c:2216 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ip/1923: #0: ffffffff8ce76e40 (rtnl_mutex){+.+.}, at: netlink_dump+0xd6/0x840 Call Trace: dump_stack+0xa1/0xea lockdep_rcu_suspicious+0x103/0x10d fn_trie_dump_leaf+0x581/0x590 fib_table_dump+0x15f/0x220 inet_dump_fib+0x4ad/0x5d0 netlink_dump+0x350/0x840 __netlink_dump_start+0x315/0x3e0 rtnetlink_rcv_msg+0x4d1/0x720 netlink_rcv_skb+0xf0/0x220 rtnetlink_rcv+0x15/0x20 netlink_unicast+0x306/0x460 netlink_sendmsg+0x44b/0x770 __sys_sendto+0x259/0x270 __x64_sys_sendto+0x80/0xa0 do_syscall_64+0x69/0xf4 entry_SYSCALL_64_after_hwframe+0x49/0xb3 Fixes: 18a8021a7be3 ("net/ipv4: Plumb support for filtering route dumps") Signed-off-by: Qian Cai Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 577db1d50a24..213be9c050ad 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -997,7 +997,9 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) return -ENOENT; } + rcu_read_lock(); err = fib_table_dump(tb, skb, cb, &filter); + rcu_read_unlock(); return skb->len ? : err; } From 0dcdf9f64028ec3b75db6b691560f8286f3898bf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 20 Mar 2020 16:21:17 +0300 Subject: [PATCH 338/372] NFC: fdp: Fix a signedness bug in fdp_nci_send_patch() The nci_conn_max_data_pkt_payload_size() function sometimes returns -EPROTO so "max_size" needs to be signed for the error handling to work. We can make "payload_size" an int as well. Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/nfc/fdp/fdp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/nfc/fdp/fdp.c b/drivers/nfc/fdp/fdp.c index 0cc9ac856fe2..ed2123129e0e 100644 --- a/drivers/nfc/fdp/fdp.c +++ b/drivers/nfc/fdp/fdp.c @@ -184,7 +184,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type) const struct firmware *fw; struct sk_buff *skb; unsigned long len; - u8 max_size, payload_size; + int max_size, payload_size; int rc = 0; if ((type == NCI_PATCH_TYPE_OTP && !info->otp_patch) || @@ -207,8 +207,7 @@ static int fdp_nci_send_patch(struct nci_dev *ndev, u8 conn_id, u8 type) while (len) { - payload_size = min_t(unsigned long, (unsigned long) max_size, - len); + payload_size = min_t(unsigned long, max_size, len); skb = nci_skb_alloc(ndev, (NCI_CTRL_HDR_SIZE + payload_size), GFP_KERNEL); From 12a5ba5a1994568d4ceaff9e78c6b0329d953386 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Fri, 20 Mar 2020 21:46:14 +0100 Subject: [PATCH 339/372] net: qmi_wwan: add support for ASKEY WWHC050 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASKEY WWHC050 is a mcie LTE modem. The oem configuration states: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=480 MxCh= 0 D: Ver= 2.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1690 ProdID=7588 Rev=ff.ff S: Manufacturer=Android S: Product=Android S: SerialNumber=813f0eef6e6e C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option E: Ad=86(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan E: Ad=88(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=89(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us Tested on openwrt distribution. Signed-off-by: Cezary Jackiewicz Signed-off-by: Pawel Dembicki Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 5754bb6ca0ee..6c738a271257 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1210,6 +1210,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x1435, 0xd182, 5)}, /* Wistron NeWeb D18 */ {QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */ {QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */ + {QMI_FIXED_INTF(0x1690, 0x7588, 4)}, /* ASKEY WWHC050 */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ {QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */ {QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */ From 55b474c41e586a5c21c7ab81ff474eb6bacb4322 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sat, 21 Mar 2020 00:46:50 +0100 Subject: [PATCH 340/372] netlink: check for null extack in cookie helpers Unlike NL_SET_ERR_* macros, nl_set_extack_cookie_u64() and nl_set_extack_cookie_u32() helpers do not check extack argument for null and neither do their callers, as syzbot recently discovered for ethnl_parse_header(). Instead of fixing the callers and leaving the trap in place, add check of null extack to both helpers to make them consistent with NL_SET_ERR_* macros. v2: drop incorrect second Fixes tag Fixes: 2363d73a2f3e ("ethtool: reject unrecognized request flags") Reported-by: syzbot+258a9089477493cea67b@syzkaller.appspotmail.com Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 4090524c3462..60739d0cbf93 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -115,6 +115,8 @@ static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, { u64 __cookie = cookie; + if (!extack) + return; memcpy(extack->cookie, &__cookie, sizeof(__cookie)); extack->cookie_len = sizeof(__cookie); } @@ -124,6 +126,8 @@ static inline void nl_set_extack_cookie_u32(struct netlink_ext_ack *extack, { u32 __cookie = cookie; + if (!extack) + return; memcpy(extack->cookie, &__cookie, sizeof(__cookie)); extack->cookie_len = sizeof(__cookie); } From b06d072ccc4b1acd0147b17914b7ad1caa1818bb Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 22 Mar 2020 13:51:13 -0400 Subject: [PATCH 341/372] macsec: restrict to ethernet devices Only attach macsec to ethernet devices. Syzbot was able to trigger a KMSAN warning in macsec_handle_frame by attaching to a phonet device. Macvlan has a similar check in macvlan_port_create. v1->v2 - fix commit message typo Reported-by: syzbot Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- drivers/net/macsec.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 6ec6fc191a6e..92bc2b2df660 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -3665,6 +3666,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; + if (real_dev->type != ARPHRD_ETHER) + return -EINVAL; dev->priv_flags |= IFF_MACSEC; From a24ec3220f369aa0b94c863b6b310685a727151c Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 22 Mar 2020 16:40:01 -0400 Subject: [PATCH 342/372] bnxt_en: Fix Priority Bytes and Packets counters in ethtool -S. There is an indexing bug in determining these ethtool priority counters. Instead of using the queue ID to index, we need to normalize by modulo 10 to get the index. This index is then used to obtain the proper CoS queue counter. Rename bp->pri2cos to bp->pri2cos_idx to make this more clear. Fixes: e37fed790335 ("bnxt_en: Add ethtool -S priority counters.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 10 +++++++++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 8 ++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c5c8effc0139..b66ee1dcd417 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7406,14 +7406,22 @@ static int bnxt_hwrm_port_qstats_ext(struct bnxt *bp) pri2cos = &resp2->pri0_cos_queue_id; for (i = 0; i < 8; i++) { u8 queue_id = pri2cos[i]; + u8 queue_idx; + /* Per port queue IDs start from 0, 10, 20, etc */ + queue_idx = queue_id % 10; + if (queue_idx > BNXT_MAX_QUEUE) { + bp->pri2cos_valid = false; + goto qstats_done; + } for (j = 0; j < bp->max_q; j++) { if (bp->q_ids[j] == queue_id) - bp->pri2cos[i] = j; + bp->pri2cos_idx[i] = queue_idx; } } bp->pri2cos_valid = 1; } +qstats_done: mutex_unlock(&bp->hwrm_cmd_lock); return rc; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index cabef0b4f5fb..63b170658532 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1716,7 +1716,7 @@ struct bnxt { u16 fw_rx_stats_ext_size; u16 fw_tx_stats_ext_size; u16 hw_ring_stats_size; - u8 pri2cos[8]; + u8 pri2cos_idx[8]; u8 pri2cos_valid; u16 hwrm_max_req_len; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1f67e6729a2c..3f8a1ded662a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -589,25 +589,25 @@ skip_ring_stats: if (bp->pri2cos_valid) { for (i = 0; i < 8; i++, j++) { long n = bnxt_rx_bytes_pri_arr[i].base_off + - bp->pri2cos[i]; + bp->pri2cos_idx[i]; buf[j] = le64_to_cpu(*(rx_port_stats_ext + n)); } for (i = 0; i < 8; i++, j++) { long n = bnxt_rx_pkts_pri_arr[i].base_off + - bp->pri2cos[i]; + bp->pri2cos_idx[i]; buf[j] = le64_to_cpu(*(rx_port_stats_ext + n)); } for (i = 0; i < 8; i++, j++) { long n = bnxt_tx_bytes_pri_arr[i].base_off + - bp->pri2cos[i]; + bp->pri2cos_idx[i]; buf[j] = le64_to_cpu(*(tx_port_stats_ext + n)); } for (i = 0; i < 8; i++, j++) { long n = bnxt_tx_pkts_pri_arr[i].base_off + - bp->pri2cos[i]; + bp->pri2cos_idx[i]; buf[j] = le64_to_cpu(*(tx_port_stats_ext + n)); } From 62d4073e86e62e316bea2c53e77db10418fd5dd7 Mon Sep 17 00:00:00 2001 From: Edwin Peer Date: Sun, 22 Mar 2020 16:40:02 -0400 Subject: [PATCH 343/372] bnxt_en: fix memory leaks in bnxt_dcbnl_ieee_getets() The allocated ieee_ets structure goes out of scope without being freed, leaking memory. Appropriate result codes should be returned so that callers do not rely on invalid data passed by reference. Also cache the ETS config retrieved from the device so that it doesn't need to be freed. The balance of the code was clearly written with the intent of having the results of querying the hardware cached in the device structure. The commensurate store was evidently missed though. Fixes: 7df4ae9fe855 ("bnxt_en: Implement DCBNL to support host-based DCBX.") Signed-off-by: Edwin Peer Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c index fb6f30d0d1d0..b1511bcffb1b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c @@ -479,24 +479,26 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) { struct bnxt *bp = netdev_priv(dev); struct ieee_ets *my_ets = bp->ieee_ets; + int rc; ets->ets_cap = bp->max_tc; if (!my_ets) { - int rc; - if (bp->dcbx_cap & DCB_CAP_DCBX_HOST) return 0; my_ets = kzalloc(sizeof(*my_ets), GFP_KERNEL); if (!my_ets) - return 0; + return -ENOMEM; rc = bnxt_hwrm_queue_cos2bw_qcfg(bp, my_ets); if (rc) - return 0; + goto error; rc = bnxt_hwrm_queue_pri2cos_qcfg(bp, my_ets); if (rc) - return 0; + goto error; + + /* cache result */ + bp->ieee_ets = my_ets; } ets->cbs = my_ets->cbs; @@ -505,6 +507,9 @@ static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets) memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa)); memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc)); return 0; +error: + kfree(my_ets); + return rc; } static int bnxt_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets) From 0b5b561cea32d5bb1e0a82d65b755a3cb5212141 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 22 Mar 2020 16:40:03 -0400 Subject: [PATCH 344/372] bnxt_en: Return error if bnxt_alloc_ctx_mem() fails. The current code ignores the return value from bnxt_hwrm_func_backing_store_cfg(), causing the driver to proceed in the init path even when this vital firmware call has failed. Fix it by propagating the error code to the caller. Fixes: 1b9394e5a2ad ("bnxt_en: Configure context memory on new devices.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b66ee1dcd417..0628a6abcc3e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6880,12 +6880,12 @@ skip_rdma: } ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES; rc = bnxt_hwrm_func_backing_store_cfg(bp, ena); - if (rc) + if (rc) { netdev_err(bp->dev, "Failed configuring context mem, rc = %d.\n", rc); - else - ctx->flags |= BNXT_CTX_FLAG_INITED; - + return rc; + } + ctx->flags |= BNXT_CTX_FLAG_INITED; return 0; } From 62bfb932a51f6d08eb409248e69f8d6428c2cabd Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Sun, 22 Mar 2020 16:40:04 -0400 Subject: [PATCH 345/372] bnxt_en: Free context memory after disabling PCI in probe error path. Other shutdown code paths will always disable PCI first to shutdown DMA before freeing context memory. Do the same sequence in the error path of probe to be safe and consistent. Fixes: c20dc142dd7b ("bnxt_en: Disable bus master during PCI shutdown and driver unload.") Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0628a6abcc3e..95f4c02aa7e1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11970,12 +11970,12 @@ init_err_pci_clean: bnxt_hwrm_func_drv_unrgtr(bp); bnxt_free_hwrm_short_cmd_req(bp); bnxt_free_hwrm_resources(bp); - bnxt_free_ctx_mem(bp); - kfree(bp->ctx); - bp->ctx = NULL; kfree(bp->fw_health); bp->fw_health = NULL; bnxt_cleanup_pci(bp); + bnxt_free_ctx_mem(bp); + kfree(bp->ctx); + bp->ctx = NULL; init_err_free: free_netdev(dev); From 5d765a5e4bd7c368e564e11402bba74cf7f03ac1 Mon Sep 17 00:00:00 2001 From: Vasundhara Volam Date: Sun, 22 Mar 2020 16:40:05 -0400 Subject: [PATCH 346/372] bnxt_en: Reset rings if ring reservation fails during open() If ring counts are not reset when ring reservation fails, bnxt_init_dflt_ring_mode() will not be called again to reinitialise IRQs when open() is called and results in system crash as napi will also be not initialised. This patch fixes it by resetting the ring counts. Fixes: 47558acd56a7 ("bnxt_en: Reserve rings at driver open if none was reserved at probe time.") Signed-off-by: Vasundhara Volam Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 95f4c02aa7e1..d28b406a26b1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11677,6 +11677,10 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) bp->rx_nr_rings++; bp->cp_nr_rings++; } + if (rc) { + bp->tx_nr_rings = 0; + bp->rx_nr_rings = 0; + } return rc; } From 0e62f543bed03a64495bd2651d4fe1aa4bcb7fe5 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Sun, 22 Mar 2020 13:58:50 -0700 Subject: [PATCH 347/372] net: dsa: Fix duplicate frames flooded by learning When both the switch and the bridge are learning about new addresses, switch ports attached to the bridge would see duplicate ARP frames because both entities would attempt to send them. Fixes: 5037d532b83d ("net: dsa: add Broadcom tag RX/TX handler") Reported-by: Maxime Bizon Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- net/dsa/tag_brcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 9c3114179690..9169b63a89e3 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -140,6 +140,8 @@ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); + skb->offload_fwd_mark = 1; + return skb; } #endif From 2f599ec422ad6634fb5ad43748b9969ca9d742bd Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Sun, 22 Mar 2020 22:24:21 +0100 Subject: [PATCH 348/372] ethtool: fix reference leak in some *_SET handlers Andrew noticed that some handlers for *_SET commands leak a netdev reference if required ethtool_ops callbacks do not exist. A simple reproducer would be e.g. ip link add veth1 type veth peer name veth2 ethtool -s veth1 wol g ip link del veth1 Make sure dev_put() is called when ethtool_ops check fails. v2: add Fixes tags Fixes: a53f3d41e4d3 ("ethtool: set link settings with LINKINFO_SET request") Fixes: bfbcfe2032e7 ("ethtool: set link modes related data with LINKMODES_SET request") Fixes: e54d04e3afea ("ethtool: set message mask with DEBUG_SET request") Fixes: 8d425b19b305 ("ethtool: set wake-on-lan settings with WOL_SET request") Reported-by: Andrew Lunn Signed-off-by: Michal Kubecek Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/ethtool/debug.c | 4 +++- net/ethtool/linkinfo.c | 4 +++- net/ethtool/linkmodes.c | 4 +++- net/ethtool/wol.c | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c index aaef4843e6ba..92599ad7b3c2 100644 --- a/net/ethtool/debug.c +++ b/net/ethtool/debug.c @@ -107,8 +107,9 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; dev = req_info.dev; + ret = -EOPNOTSUPP; if (!dev->ethtool_ops->get_msglevel || !dev->ethtool_ops->set_msglevel) - return -EOPNOTSUPP; + goto out_dev; rtnl_lock(); ret = ethnl_ops_begin(dev); @@ -129,6 +130,7 @@ out_ops: ethnl_ops_complete(dev); out_rtnl: rtnl_unlock(); +out_dev: dev_put(dev); return ret; } diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c index 5d16cb4e8693..6e9e0b590bb5 100644 --- a/net/ethtool/linkinfo.c +++ b/net/ethtool/linkinfo.c @@ -126,9 +126,10 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; dev = req_info.dev; + ret = -EOPNOTSUPP; if (!dev->ethtool_ops->get_link_ksettings || !dev->ethtool_ops->set_link_ksettings) - return -EOPNOTSUPP; + goto out_dev; rtnl_lock(); ret = ethnl_ops_begin(dev); @@ -162,6 +163,7 @@ out_ops: ethnl_ops_complete(dev); out_rtnl: rtnl_unlock(); +out_dev: dev_put(dev); return ret; } diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 96f20be64553..18cc37be2d9c 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -338,9 +338,10 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; dev = req_info.dev; + ret = -EOPNOTSUPP; if (!dev->ethtool_ops->get_link_ksettings || !dev->ethtool_ops->set_link_ksettings) - return -EOPNOTSUPP; + goto out_dev; rtnl_lock(); ret = ethnl_ops_begin(dev); @@ -370,6 +371,7 @@ out_ops: ethnl_ops_complete(dev); out_rtnl: rtnl_unlock(); +out_dev: dev_put(dev); return ret; } diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index e1b8a65b64c4..55e1ecaaf739 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -128,8 +128,9 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; dev = req_info.dev; + ret = -EOPNOTSUPP; if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol) - return -EOPNOTSUPP; + goto out_dev; rtnl_lock(); ret = ethnl_ops_begin(dev); @@ -172,6 +173,7 @@ out_ops: ethnl_ops_complete(dev); out_rtnl: rtnl_unlock(); +out_dev: dev_put(dev); return ret; } From 81573b18f26defe672a7d960f9af9ac2c97f324d Mon Sep 17 00:00:00 2001 From: Vadym Kochan Date: Mon, 23 Mar 2020 16:24:04 +0200 Subject: [PATCH 349/372] selftests/net/forwarding: add Makefile to install tests Add missing Makefile for net/forwarding tests and include it to the targets list, otherwise forwarding tests are not installed in case of cross-compilation. Signed-off-by: Vadym Kochan Signed-off-by: David S. Miller --- tools/testing/selftests/Makefile | 1 + .../testing/selftests/net/forwarding/Makefile | 75 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 tools/testing/selftests/net/forwarding/Makefile diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 6ec503912bea..b93fa645ee54 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -33,6 +33,7 @@ TARGETS += memory-hotplug TARGETS += mount TARGETS += mqueue TARGETS += net +TARGETS += net/forwarding TARGETS += net/mptcp TARGETS += netfilter TARGETS += networking/timestamping diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile new file mode 100644 index 000000000000..44616103508b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = bridge_igmp.sh \ + bridge_port_isolation.sh \ + bridge_sticky_fdb.sh \ + bridge_vlan_aware.sh \ + bridge_vlan_unaware.sh \ + devlink_lib.sh \ + ethtool_lib.sh \ + ethtool.sh \ + fib_offload_lib.sh \ + forwarding.config.sample \ + gre_inner_v4_multipath.sh \ + gre_inner_v6_multipath.sh \ + gre_multipath.sh \ + ip6gre_inner_v4_multipath.sh \ + ip6gre_inner_v6_multipath.sh \ + ipip_flat_gre_key.sh \ + ipip_flat_gre_keys.sh \ + ipip_flat_gre.sh \ + ipip_hier_gre_key.sh \ + ipip_hier_gre_keys.sh \ + ipip_hier_gre.sh \ + ipip_lib.sh \ + lib.sh \ + loopback.sh \ + mirror_gre_bound.sh \ + mirror_gre_bridge_1d.sh \ + mirror_gre_bridge_1d_vlan.sh \ + mirror_gre_bridge_1q_lag.sh \ + mirror_gre_bridge_1q.sh \ + mirror_gre_changes.sh \ + mirror_gre_flower.sh \ + mirror_gre_lag_lacp.sh \ + mirror_gre_lib.sh \ + mirror_gre_neigh.sh \ + mirror_gre_nh.sh \ + mirror_gre.sh \ + mirror_gre_topo_lib.sh \ + mirror_gre_vlan_bridge_1q.sh \ + mirror_gre_vlan.sh \ + mirror_lib.sh \ + mirror_topo_lib.sh \ + mirror_vlan.sh \ + router_bridge.sh \ + router_bridge_vlan.sh \ + router_broadcast.sh \ + router_mpath_nh.sh \ + router_multicast.sh \ + router_multipath.sh \ + router.sh \ + router_vid_1.sh \ + sch_ets_core.sh \ + sch_ets.sh \ + sch_ets_tests.sh \ + sch_tbf_core.sh \ + sch_tbf_etsprio.sh \ + sch_tbf_ets.sh \ + sch_tbf_prio.sh \ + sch_tbf_root.sh \ + tc_actions.sh \ + tc_chains.sh \ + tc_common.sh \ + tc_flower_router.sh \ + tc_flower.sh \ + tc_shblocks.sh \ + tc_vlan_modify.sh \ + vxlan_asymmetric.sh \ + vxlan_bridge_1d_port_8472.sh \ + vxlan_bridge_1d.sh \ + vxlan_bridge_1q_port_8472.sh \ + vxlan_bridge_1q.sh \ + vxlan_symmetric.sh + +include ../../lib.mk From 0c625ccfe6f754d0896b8881f5c85bcb81699f1f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Mon, 2 Mar 2020 12:12:25 +0100 Subject: [PATCH 350/372] gpiolib: acpi: Add quirk to ignore EC wakeups on HP x2 10 CHT + AXP288 model There are at least 3 models of the HP x2 10 models: Bay Trail SoC + AXP288 PMIC Cherry Trail SoC + AXP288 PMIC Cherry Trail SoC + TI PMIC Like on the other HP x2 10 models we need to ignore wakeup for ACPI GPIO events on the external embedded-controller pin to avoid spurious wakeups on the HP x2 10 CHT + AXP288 model too. This commit adds an extra DMI based quirk for the HP x2 10 CHT + AXP288 model, ignoring wakeups for ACPI GPIO events on the EC interrupt pin on this model. This fixes spurious wakeups from suspend on this model. Fixes: aa23ca3d98f7 ("gpiolib: acpi: Add honor_wakeup module-option + quirk mechanism") Reported-and-tested-by: Marc Lehmann Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20200302111225.6641-4-hdegoede@redhat.com Acked-by: Mika Westerberg Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index d1ef060a5873..0017367e94ee 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1437,6 +1437,21 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] = { .ignore_wake = "INT33FC:02@28", }, }, + { + /* + * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an + * external embedded-controller connected via I2C + an ACPI GPIO + * event handler on INT33FF:01 pin 0, causing spurious wakeups. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), + DMI_MATCH(DMI_BOARD_NAME, "813E"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "INT33FF:01@0", + }, + }, {} /* Terminating entry */ }; From 8c2d45b2b65ca1f215244be1c600236e83f9815f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 22 Mar 2020 03:21:58 +0100 Subject: [PATCH 351/372] netfilter: nf_tables: Allow set back-ends to report partial overlaps on insertion Currently, the -EEXIST return code of ->insert() callbacks is ambiguous: it might indicate that a given element (including intervals) already exists as such, or that the new element would clash with existing ones. If identical elements already exist, the front-end is ignoring this without returning error, in case NLM_F_EXCL is not set. However, if the new element can't be inserted due an overlap, we should report this to the user. To this purpose, allow set back-ends to return -ENOTEMPTY on collision with existing elements, translate that to -EEXIST, and return that to userspace, no matter if NLM_F_EXCL was set. Reported-by: Phil Sutter Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 38c680f28f15..d11f1a74d43c 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -5082,6 +5082,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, err = -EBUSY; else if (!(nlmsg_flags & NLM_F_EXCL)) err = 0; + } else if (err == -ENOTEMPTY) { + /* ENOTEMPTY reports overlapping between this element + * and an existing one. + */ + err = -EEXIST; } goto err_element_clash; } From 0eb4b5ee33f2461d149408a247af8ae24756a6ca Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sun, 22 Mar 2020 03:21:59 +0100 Subject: [PATCH 352/372] netfilter: nft_set_pipapo: Separate partial and complete overlap cases on insertion ...and return -ENOTEMPTY to the front-end on collision, -EEXIST if an identical element already exists. Together with the previous patch, element collision will now be returned to the user as -EEXIST. Reported-by: Phil Sutter Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_pipapo.c | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4fc0c924ed5d..ef7e8ad2e344 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1098,21 +1098,41 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, struct nft_pipapo_field *f; int i, bsize_max, err = 0; + if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) + end = (const u8 *)nft_set_ext_key_end(ext)->data; + else + end = start; + dup = pipapo_get(net, set, start, genmask); - if (PTR_ERR(dup) == -ENOENT) { - if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) { - end = (const u8 *)nft_set_ext_key_end(ext)->data; - dup = pipapo_get(net, set, end, nft_genmask_next(net)); - } else { - end = start; + if (!IS_ERR(dup)) { + /* Check if we already have the same exact entry */ + const struct nft_data *dup_key, *dup_end; + + dup_key = nft_set_ext_key(&dup->ext); + if (nft_set_ext_exists(&dup->ext, NFT_SET_EXT_KEY_END)) + dup_end = nft_set_ext_key_end(&dup->ext); + else + dup_end = dup_key; + + if (!memcmp(start, dup_key->data, sizeof(*dup_key->data)) && + !memcmp(end, dup_end->data, sizeof(*dup_end->data))) { + *ext2 = &dup->ext; + return -EEXIST; } + + return -ENOTEMPTY; + } + + if (PTR_ERR(dup) == -ENOENT) { + /* Look for partially overlapping entries */ + dup = pipapo_get(net, set, end, nft_genmask_next(net)); } if (PTR_ERR(dup) != -ENOENT) { if (IS_ERR(dup)) return PTR_ERR(dup); *ext2 = &dup->ext; - return -EEXIST; + return -ENOTEMPTY; } /* Validate */ From 6f7c9caf017be8ab0fe3b99509580d0793bf0833 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sun, 22 Mar 2020 03:22:00 +0100 Subject: [PATCH 353/372] netfilter: nft_set_rbtree: Introduce and use nft_rbtree_interval_start() Replace negations of nft_rbtree_interval_end() with a new helper, nft_rbtree_interval_start(), wherever this helps to visualise the problem at hand, that is, for all the occurrences except for the comparison against given flags in __nft_rbtree_get(). This gets especially useful in the next patch. Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 5000b938ab1e..85572b2a6051 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -33,6 +33,11 @@ static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe) (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END); } +static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) +{ + return !nft_rbtree_interval_end(rbe); +} + static bool nft_rbtree_equal(const struct nft_set *set, const void *this, const struct nft_rbtree_elem *interval) { @@ -64,7 +69,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (interval && nft_rbtree_equal(set, this, interval) && nft_rbtree_interval_end(rbe) && - !nft_rbtree_interval_end(interval)) + nft_rbtree_interval_start(interval)) continue; interval = rbe; } else if (d > 0) @@ -89,7 +94,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && - !nft_rbtree_interval_end(interval)) { + nft_rbtree_interval_start(interval)) { *ext = &interval->ext; return true; } @@ -224,9 +229,9 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, p = &parent->rb_right; else { if (nft_rbtree_interval_end(rbe) && - !nft_rbtree_interval_end(new)) { + nft_rbtree_interval_start(new)) { p = &parent->rb_left; - } else if (!nft_rbtree_interval_end(rbe) && + } else if (nft_rbtree_interval_start(rbe) && nft_rbtree_interval_end(new)) { p = &parent->rb_right; } else if (nft_set_elem_active(&rbe->ext, genmask)) { @@ -317,10 +322,10 @@ static void *nft_rbtree_deactivate(const struct net *net, parent = parent->rb_right; else { if (nft_rbtree_interval_end(rbe) && - !nft_rbtree_interval_end(this)) { + nft_rbtree_interval_start(this)) { parent = parent->rb_left; continue; - } else if (!nft_rbtree_interval_end(rbe) && + } else if (nft_rbtree_interval_start(rbe) && nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; From 7c84d41416d836ef7e533bd4d64ccbdf40c5ac70 Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Sun, 22 Mar 2020 03:22:01 +0100 Subject: [PATCH 354/372] netfilter: nft_set_rbtree: Detect partial overlaps on insertion ...and return -ENOTEMPTY to the front-end in this case, instead of proceeding. Currently, nft takes care of checking for these cases and not sending them to the kernel, but if we drop the set_overlap() call in nft we can end up in situations like: # nft add table t # nft add set t s '{ type inet_service ; flags interval ; }' # nft add element t s '{ 1 - 5 }' # nft add element t s '{ 6 - 10 }' # nft add element t s '{ 4 - 7 }' # nft list set t s table ip t { set s { type inet_service flags interval elements = { 1-3, 4-5, 6-7 } } } This change has the primary purpose of making the behaviour consistent with nft_set_pipapo, but is also functional to avoid inconsistent behaviour if userspace sends overlapping elements for any reason. v2: When we meet the same key data in the tree, as start element while inserting an end element, or as end element while inserting a start element, actually check that the existing element is active, before resetting the overlap flag (Pablo Neira Ayuso) Signed-off-by: Stefano Brivio Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 70 ++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 85572b2a6051..8617fc16a1ed 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -213,8 +213,43 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_next(net); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; + bool overlap = false; int d; + /* Detect overlaps as we descend the tree. Set the flag in these cases: + * + * a1. |__ _ _? >|__ _ _ (insert start after existing start) + * a2. _ _ __>| ?_ _ __| (insert end before existing end) + * a3. _ _ ___| ?_ _ _>| (insert end after existing end) + * a4. >|__ _ _ _ _ __| (insert start before existing end) + * + * and clear it later on, as we eventually reach the points indicated by + * '?' above, in the cases described below. We'll always meet these + * later, locally, due to tree ordering, and overlaps for the intervals + * that are the closest together are always evaluated last. + * + * b1. |__ _ _! >|__ _ _ (insert start after existing end) + * b2. _ _ __>| !_ _ __| (insert end before existing start) + * b3. !_____>| (insert end after existing start) + * + * Case a4. resolves to b1.: + * - if the inserted start element is the leftmost, because the '0' + * element in the tree serves as end element + * - otherwise, if an existing end is found. Note that end elements are + * always inserted after corresponding start elements. + * + * For a new, rightmost pair of elements, we'll hit cases b1. and b3., + * in that order. + * + * The flag is also cleared in two special cases: + * + * b4. |__ _ _!|<_ _ _ (insert start right before existing end) + * b5. |__ _ >|!__ _ _ (insert end right after existing start) + * + * which always happen as last step and imply that no further + * overlapping is possible. + */ + parent = NULL; p = &priv->root.rb_node; while (*p != NULL) { @@ -223,17 +258,42 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, d = memcmp(nft_set_ext_key(&rbe->ext), nft_set_ext_key(&new->ext), set->klen); - if (d < 0) + if (d < 0) { p = &parent->rb_left; - else if (d > 0) + + if (nft_rbtree_interval_start(new)) { + overlap = nft_rbtree_interval_start(rbe) && + nft_set_elem_active(&rbe->ext, + genmask); + } else { + overlap = nft_rbtree_interval_end(rbe) && + nft_set_elem_active(&rbe->ext, + genmask); + } + } else if (d > 0) { p = &parent->rb_right; - else { + + if (nft_rbtree_interval_end(new)) { + overlap = nft_rbtree_interval_end(rbe) && + nft_set_elem_active(&rbe->ext, + genmask); + } else if (nft_rbtree_interval_end(rbe) && + nft_set_elem_active(&rbe->ext, genmask)) { + overlap = true; + } + } else { if (nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(new)) { p = &parent->rb_left; + + if (nft_set_elem_active(&rbe->ext, genmask)) + overlap = false; } else if (nft_rbtree_interval_start(rbe) && nft_rbtree_interval_end(new)) { p = &parent->rb_right; + + if (nft_set_elem_active(&rbe->ext, genmask)) + overlap = false; } else if (nft_set_elem_active(&rbe->ext, genmask)) { *ext = &rbe->ext; return -EEXIST; @@ -242,6 +302,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } } } + + if (overlap) + return -ENOTEMPTY; + rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; From 76a109fac206e158eb3c967af98c178cff738e6a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 23 Mar 2020 14:27:16 +0100 Subject: [PATCH 355/372] netfilter: nft_fwd_netdev: validate family and chain type Make sure the forward action is only used from ingress. Fixes: 39e6dea28adc ("netfilter: nf_tables: add forward expression to the netdev family") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_fwd_netdev.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index aba11c2333f3..ddd28de810b6 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -190,6 +190,13 @@ nla_put_failure: return -1; } +static int nft_fwd_validate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nft_data **data) +{ + return nft_chain_validate_hooks(ctx->chain, (1 << NF_NETDEV_INGRESS)); +} + static struct nft_expr_type nft_fwd_netdev_type; static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = { .type = &nft_fwd_netdev_type, @@ -197,6 +204,7 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = { .eval = nft_fwd_neigh_eval, .init = nft_fwd_neigh_init, .dump = nft_fwd_neigh_dump, + .validate = nft_fwd_validate, }; static const struct nft_expr_ops nft_fwd_netdev_ops = { @@ -205,6 +213,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .eval = nft_fwd_netdev_eval, .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, + .validate = nft_fwd_validate, .offload = nft_fwd_netdev_offload, }; From bcfabee1afd99484b6ba067361b8678e28bbc065 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 23 Mar 2020 19:53:10 +0100 Subject: [PATCH 356/372] netfilter: nft_fwd_netdev: allow to redirect to ifb via ingress Set skb->tc_redirected to 1, otherwise the ifb driver drops the packet. Set skb->tc_from_ingress to 1 to reinject the packet back to the ingress path after leaving the ifb egress path. This patch inconditionally sets on these two skb fields that are meaningful to the ifb driver. The existing forward action is guaranteed to run from ingress path. Fixes: 39e6dea28adc ("netfilter: nf_tables: add forward expression to the netdev family") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_fwd_netdev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index ddd28de810b6..74f050ba6bad 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -28,6 +28,10 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr, struct nft_fwd_netdev *priv = nft_expr_priv(expr); int oif = regs->data[priv->sreg_dev]; + /* These are used by ifb only. */ + pkt->skb->tc_redirected = 1; + pkt->skb->tc_from_ingress = 1; + nf_fwd_netdev_egress(pkt, oif); regs->verdict.code = NF_STOLEN; } From a64d558d8cf98424cc5eb9ae6631782cd8bf789c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 23 Mar 2020 17:34:30 +0100 Subject: [PATCH 357/372] selftests: netfilter: add nfqueue test case Add a test case to check nf queue infrastructure. Could be extended in the future to also cover serialization of conntrack, uid and secctx attributes in nfqueue. For now, this checks that 'queue bypass' works, that a queue rule with no bypass option blocks traffic and that userspace receives the expected number of packets. For this we add two queues and hook all of prerouting/input/forward/output/postrouting. Packets get queued twice with a dummy base chain in between: This passes with current nf tree, but reverting commit 946c0d8e6ed4 ("netfilter: nf_queue: fix reinject verdict handling") makes this trip (it processes 30 instead of expected 20 packets). v2: update config file with queue and other options missing/needed for other tests. v3: also test with tcp, this reveals problem with commit 28f8bfd1ac94 ("netfilter: Support iif matches in POSTROUTING"), due to skb->dev pointing at another skb in the retransmit rbtree (skb->dev aliases to rbnode child). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- tools/testing/selftests/netfilter/Makefile | 6 +- tools/testing/selftests/netfilter/config | 6 + tools/testing/selftests/netfilter/nf-queue.c | 352 ++++++++++++++++++ .../testing/selftests/netfilter/nft_queue.sh | 332 +++++++++++++++++ 4 files changed, 695 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/netfilter/nf-queue.c create mode 100755 tools/testing/selftests/netfilter/nft_queue.sh diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 08194aa44006..9c0f758310fe 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -3,6 +3,10 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ - nft_concat_range.sh + nft_concat_range.sh \ + nft_queue.sh + +LDLIBS = -lmnl +TEST_GEN_FILES = nf-queue include ../lib.mk diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 59caa8f71cd8..4faf2ce021d9 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -1,2 +1,8 @@ CONFIG_NET_NS=y CONFIG_NF_TABLES_INET=y +CONFIG_NFT_QUEUE=m +CONFIG_NFT_NAT=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_FLOW_OFFLOAD=m +CONFIG_NF_CT_NETLINK=m diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c new file mode 100644 index 000000000000..29c73bce38fa --- /dev/null +++ b/tools/testing/selftests/netfilter/nf-queue.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct options { + bool count_packets; + int verbose; + unsigned int queue_num; + unsigned int timeout; +}; + +static unsigned int queue_stats[5]; +static struct options opts; + +static void help(const char *p) +{ + printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p); +} + +static int parse_attr_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type = mnl_attr_get_type(attr); + + /* skip unsupported attribute in user-space */ + if (mnl_attr_type_valid(attr, NFQA_MAX) < 0) + return MNL_CB_OK; + + switch (type) { + case NFQA_MARK: + case NFQA_IFINDEX_INDEV: + case NFQA_IFINDEX_OUTDEV: + case NFQA_IFINDEX_PHYSINDEV: + case NFQA_IFINDEX_PHYSOUTDEV: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) { + perror("mnl_attr_validate"); + return MNL_CB_ERROR; + } + break; + case NFQA_TIMESTAMP: + if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, + sizeof(struct nfqnl_msg_packet_timestamp)) < 0) { + perror("mnl_attr_validate2"); + return MNL_CB_ERROR; + } + break; + case NFQA_HWADDR: + if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, + sizeof(struct nfqnl_msg_packet_hw)) < 0) { + perror("mnl_attr_validate2"); + return MNL_CB_ERROR; + } + break; + case NFQA_PAYLOAD: + break; + } + tb[type] = attr; + return MNL_CB_OK; +} + +static int queue_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[NFQA_MAX+1] = { 0 }; + struct nfqnl_msg_packet_hdr *ph = NULL; + uint32_t id = 0; + + (void)data; + + mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb); + if (tb[NFQA_PACKET_HDR]) { + ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]); + id = ntohl(ph->packet_id); + + if (opts.verbose > 0) + printf("packet hook=%u, hwproto 0x%x", + ntohs(ph->hw_protocol), ph->hook); + + if (ph->hook >= 5) { + fprintf(stderr, "Unknown hook %d\n", ph->hook); + return MNL_CB_ERROR; + } + + if (opts.verbose > 0) { + uint32_t skbinfo = 0; + + if (tb[NFQA_SKB_INFO]) + skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO])); + if (skbinfo & NFQA_SKB_CSUMNOTREADY) + printf(" csumnotready"); + if (skbinfo & NFQA_SKB_GSO) + printf(" gso"); + if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED) + printf(" csumnotverified"); + puts(""); + } + + if (opts.count_packets) + queue_stats[ph->hook]++; + } + + return MNL_CB_OK + id; +} + +static struct nlmsghdr * +nfq_build_cfg_request(char *buf, uint8_t command, int queue_num) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfqnl_msg_config_cmd cmd = { + .command = command, + .pf = htons(AF_INET), + }; + struct nfgenmsg *nfg; + + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG; + nlh->nlmsg_flags = NLM_F_REQUEST; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd); + + return nlh; +} + +static struct nlmsghdr * +nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfqnl_msg_config_params params = { + .copy_range = htonl(range), + .copy_mode = mode, + }; + struct nfgenmsg *nfg; + + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG; + nlh->nlmsg_flags = NLM_F_REQUEST; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), ¶ms); + + return nlh; +} + +static struct nlmsghdr * +nfq_build_verdict(char *buf, int id, int queue_num, int verd) +{ + struct nfqnl_msg_verdict_hdr vh = { + .verdict = htonl(verd), + .id = htonl(id), + }; + struct nlmsghdr *nlh; + struct nfgenmsg *nfg; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT; + nlh->nlmsg_flags = NLM_F_REQUEST; + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh); + + return nlh; +} + +static void print_stats(void) +{ + unsigned int last, total; + int i; + + if (!opts.count_packets) + return; + + total = 0; + last = queue_stats[0]; + + for (i = 0; i < 5; i++) { + printf("hook %d packets %08u\n", i, queue_stats[i]); + last = queue_stats[i]; + total += last; + } + + printf("%u packets total\n", total); +} + +struct mnl_socket *open_queue(void) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + unsigned int queue_num; + struct mnl_socket *nl; + struct nlmsghdr *nlh; + struct timeval tv; + uint32_t flags; + + nl = mnl_socket_open(NETLINK_NETFILTER); + if (nl == NULL) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + queue_num = opts.queue_num; + nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + + nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num); + + flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID; + mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); + mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + + memset(&tv, 0, sizeof(tv)); + tv.tv_sec = opts.timeout; + if (opts.timeout && setsockopt(mnl_socket_get_fd(nl), + SOL_SOCKET, SO_RCVTIMEO, + &tv, sizeof(tv))) { + perror("setsockopt(SO_RCVTIMEO)"); + exit(EXIT_FAILURE); + } + + return nl; +} + +static int mainloop(void) +{ + unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; + struct mnl_socket *nl; + struct nlmsghdr *nlh; + unsigned int portid; + char *buf; + int ret; + + buf = malloc(buflen); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + nl = open_queue(); + portid = mnl_socket_get_portid(nl); + + for (;;) { + uint32_t id; + + ret = mnl_socket_recvfrom(nl, buf, buflen); + if (ret == -1) { + if (errno == ENOBUFS) + continue; + + if (errno == EAGAIN) { + errno = 0; + ret = 0; + break; + } + + perror("mnl_socket_recvfrom"); + exit(EXIT_FAILURE); + } + + ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + exit(EXIT_FAILURE); + } + + id = ret - MNL_CB_OK; + nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + } + + mnl_socket_close(nl); + + return ret; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "chvt:q:")) != -1) { + switch (c) { + case 'c': + opts.count_packets = true; + break; + case 'h': + help(argv[0]); + exit(0); + break; + case 'q': + opts.queue_num = atoi(optarg); + if (opts.queue_num > 0xffff) + opts.queue_num = 0; + break; + case 't': + opts.timeout = atoi(optarg); + break; + case 'v': + opts.verbose++; + break; + } + } +} + +int main(int argc, char *argv[]) +{ + int ret; + + parse_opts(argc, argv); + + ret = mainloop(); + if (opts.count_packets) + print_stats(); + + return ret; +} diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh new file mode 100755 index 000000000000..6898448b4266 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -0,0 +1,332 @@ +#!/bin/bash +# +# This tests nf_queue: +# 1. can process packets from all hooks +# 2. support running nfqueue from more than one base chain +# +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +nsrouter="nsrouter-$sfx" + +cleanup() +{ + ip netns del ${ns1} + ip netns del ${ns2} + ip netns del ${nsrouter} + rm -f "$TMPFILE0" + rm -f "$TMPFILE1" +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add ${nsrouter} +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace" + exit $ksft_skip +fi + +TMPFILE0=$(mktemp) +TMPFILE1=$(mktemp) +trap cleanup EXIT + +ip netns add ${ns1} +ip netns add ${ns2} + +ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} + +ip -net ${nsrouter} link set lo up +ip -net ${nsrouter} link set veth0 up +ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 +ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 + +ip -net ${nsrouter} link set veth1 up +ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 +ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 + +ip -net ${ns1} link set lo up +ip -net ${ns1} link set eth0 up + +ip -net ${ns2} link set lo up +ip -net ${ns2} link set eth0 up + +ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 +ip -net ${ns1} addr add dead:1::99/64 dev eth0 +ip -net ${ns1} route add default via 10.0.1.1 +ip -net ${ns1} route add default via dead:1::1 + +ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 +ip -net ${ns2} addr add dead:2::99/64 dev eth0 +ip -net ${ns2} route add default via 10.0.2.1 +ip -net ${ns2} route add default via dead:2::1 + +load_ruleset() { + local name=$1 + local prio=$2 + +ip netns exec ${nsrouter} nft -f - < /dev/null + if [ $? -ne 0 ];then + return 1 + fi + + ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null + if [ $? -ne 0 ];then + return 1 + fi + + return 0 +} + +test_ping_router() { + ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null + if [ $? -ne 0 ];then + return 1 + fi + + ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null + if [ $? -ne 0 ];then + return 1 + fi + + return 0 +} + +test_queue_blackhole() { + local proto=$1 + +ip netns exec ${nsrouter} nft -f - < /dev/null + lret=$? + elif [ $proto = "ip6" ]; then + ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null + lret=$? + else + lret=111 + fi + + # queue without bypass keyword should drop traffic if no listener exists. + if [ $lret -eq 0 ];then + echo "FAIL: $proto expected failure, got $lret" 1>&2 + exit 1 + fi + + ip netns exec ${nsrouter} nft delete table $proto blackh + if [ $? -ne 0 ] ;then + echo "FAIL: $proto: Could not delete blackh table" + exit 1 + fi + + echo "PASS: $proto: statement with no listener results in packet drop" +} + +test_queue() +{ + local expected=$1 + local last="" + + # spawn nf-queue listeners + ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" & + ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" & + sleep 1 + test_ping + ret=$? + if [ $ret -ne 0 ];then + echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2 + exit $ret + fi + + test_ping_router + ret=$? + if [ $ret -ne 0 ];then + echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2 + exit $ret + fi + + wait + ret=$? + + for file in $TMPFILE0 $TMPFILE1; do + last=$(tail -n1 "$file") + if [ x"$last" != x"$expected packets total" ]; then + echo "FAIL: Expected $expected packets total, but got $last" 1>&2 + cat "$file" 1>&2 + + ip netns exec ${nsrouter} nft list ruleset + exit 1 + fi + done + + echo "PASS: Expected and received $last" +} + +test_tcp_forward() +{ + ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 & + local nfqpid=$! + + tmpfile=$(mktemp) || exit 1 + dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile + ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & + local rpid=$! + + sleep 1 + ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null & + + rm -f "$tmpfile" + + wait $rpid + wait $lpid + [ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain" +} + +test_tcp_localhost() +{ + tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1% + + tmpfile=$(mktemp) || exit 1 + + dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile + ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & + local rpid=$! + + ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 & + local nfqpid=$! + + sleep 1 + ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null + rm -f "$tmpfile" + + wait $rpid + [ $? -eq 0 ] && echo "PASS: tcp via loopback" +} + +ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +load_ruleset "filter" 0 + +sleep 3 + +test_ping +ret=$? +if [ $ret -eq 0 ];then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_queue_blackhole ip +test_queue_blackhole ip6 + +# dummy ruleset to add base chains between the +# queueing rules. We don't want the second reinject +# to re-execute the old hooks. +load_counter_ruleset 10 + +# we are hooking all: prerouting/input/forward/output/postrouting. +# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so: +# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply). +# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply. +# so we expect that userspace program receives 10 packets. +test_queue 10 + +# same. We queue to a second program as well. +load_ruleset "filter2" 20 +test_queue 20 + +test_tcp_forward +test_tcp_localhost + +exit $ret From 306f354c67397b3138300cde875c5cab45b857f7 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 16 Mar 2020 09:31:03 +0200 Subject: [PATCH 358/372] net/mlx5_core: Set IB capability mask1 to fix ib_srpt connection failure The cap_mask1 isn't protected by field_select and not listed among RW fields, but it is required to be written to properly initialize ports in IB virtualization mode. Link: https://lore.kernel.org/linux-rdma/88bab94d2fd72f3145835b4518bc63dda587add6.camel@redhat.com Fixes: ab118da4c10a ("net/mlx5: Don't write read-only fields in MODIFY_HCA_VPORT_CONTEXT command") Signed-off-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 1faac31f74d0..23f879da9104 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1071,6 +1071,9 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, MLX5_SET64(hca_vport_context, ctx, port_guid, req->port_guid); if (req->field_select & MLX5_HCA_VPORT_SEL_NODE_GUID) MLX5_SET64(hca_vport_context, ctx, node_guid, req->node_guid); + MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1); + MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select, + req->cap_mask1_perm); err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); ex: kfree(in); From 1de0306c3a05d305e45b1f1fabe2f4e94222eb6b Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 9 Mar 2020 09:44:18 +0200 Subject: [PATCH 359/372] net/mlx5e: Enhance ICOSQ WQE info fields Add number of WQEBBs (WQE's Basic Block) to WQE info struct. Set the number of WQEBBs on WQE post, and increment the consumer counter (cc) on completion. In case of error completions, the cc was mistakenly not incremented, keeping a gap between cc and pc (producer counter). This failed the recovery flow on the ICOSQ from a CQE error which timed-out waiting for the cc and pc to meet. Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 11 +++++------ drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 220ef9f06f84..571ac5976549 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -371,6 +371,7 @@ enum { struct mlx5e_sq_wqe_info { u8 opcode; + u8 num_wqebbs; /* Auxiliary data for different opcodes. */ union { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1c3ab69cbd96..312d4692425b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -477,6 +477,7 @@ static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, /* fill sq frag edge with nops to avoid wqe wrapping two pages */ for (; wi < edge_wi; wi++) { wi->opcode = MLX5_OPCODE_NOP; + wi->num_wqebbs = 1; mlx5e_post_nop(wq, sq->sqn, &sq->pc); } } @@ -525,6 +526,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].num_wqebbs = MLX5E_UMR_WQEBBS; sq->db.ico_wqe[pi].umr.rq = rq; sq->pc += MLX5E_UMR_WQEBBS; @@ -621,6 +623,7 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc); wi = &sq->db.ico_wqe[ci]; + sqcc += wi->num_wqebbs; if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) { netdev_WARN_ONCE(cq->channel->netdev, @@ -631,16 +634,12 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) break; } - if (likely(wi->opcode == MLX5_OPCODE_UMR)) { - sqcc += MLX5E_UMR_WQEBBS; + if (likely(wi->opcode == MLX5_OPCODE_UMR)) wi->umr.rq->mpwqe.umr_completed++; - } else if (likely(wi->opcode == MLX5_OPCODE_NOP)) { - sqcc++; - } else { + else if (unlikely(wi->opcode != MLX5_OPCODE_NOP)) netdev_WARN_ONCE(cq->channel->netdev, "Bad OPCODE in ICOSQ WQE info: 0x%x\n", wi->opcode); - } } while (!last_wqe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 257a7c9f7a14..800d34ed8a96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -78,6 +78,7 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq) u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc); mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } From 39369fd536d485a99a59d8e357c0d4d3ce19a3b8 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 12 Mar 2020 12:35:32 +0200 Subject: [PATCH 360/372] net/mlx5e: Fix missing reset of SW metadata in Striding RQ reset When resetting the RQ (moving RQ state from RST to RDY), the driver resets the WQ's SW metadata. In striding RQ mode, we maintain a field that reflects the actual expected WQ head (including in progress WQEs posted to the ICOSQ). It was mistakenly not reset together with the WQ. Fix this here. Fixes: 8276ea1353a4 ("net/mlx5e: Report and recover from CQE with error on RQ") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index a226277b0980..f07b1399744e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -181,10 +181,12 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) { - if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { mlx5_wq_ll_reset(&rq->mpwqe.wq); - else + rq->mpwqe.actual_wq_head = 0; + } else { mlx5_wq_cyc_reset(&rq->wqe.wq); + } } /* SW parser related functions */ From e239c6d686e1c37fb2ab143162dfb57471a8643f Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 16 Mar 2020 16:53:10 +0200 Subject: [PATCH 361/372] net/mlx5e: Fix ICOSQ recovery flow with Striding RQ In striding RQ mode, the buffers of an RX WQE are first prepared and posted to the HW using a UMR WQEs via the ICOSQ. We maintain the state of these in-progress WQEs in the RQ SW struct. In the flow of ICOSQ recovery, the corresponding RQ is not in error state, hence: - The buffers of the in-progress WQEs must be released and the RQ metadata should reflect it. - Existing RX WQEs in the RQ should not be affected. For this, wrap the dealloc of the in-progress WQEs in a function, and use it in the ICOSQ recovery flow instead of mlx5e_free_rx_descs(). Fixes: be5323c8379f ("net/mlx5e: Report and recover from CQE error on ICOSQ") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + .../mellanox/mlx5/core/en/reporter_rx.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 31 ++++++++++++++----- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 571ac5976549..c9606b8ab6ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1060,6 +1060,7 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_free_rx_descs(struct mlx5e_rq *rq); +void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); void mlx5e_activate_icosq(struct mlx5e_icosq *icosq); void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 6c72b592315b..a01e2de2488f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -90,7 +90,7 @@ static int mlx5e_rx_reporter_err_icosq_cqe_recover(void *ctx) goto out; mlx5e_reset_icosq_cc_pc(icosq); - mlx5e_free_rx_descs(rq); + mlx5e_free_rx_in_progress_descs(rq); clear_bit(MLX5E_SQ_STATE_RECOVERING, &icosq->state); mlx5e_activate_icosq(icosq); mlx5e_activate_rq(rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 21de4764d4c0..4ef3dc79f73c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -813,6 +813,29 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time) return -ETIMEDOUT; } +void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq; + u16 head; + int i; + + if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + return; + + wq = &rq->mpwqe.wq; + head = wq->head; + + /* Outstanding UMR WQEs (in progress) start at wq->head */ + for (i = 0; i < rq->mpwqe.umr_in_progress; i++) { + rq->dealloc_wqe(rq, head); + head = mlx5_wq_ll_get_wqe_next_ix(wq, head); + } + + rq->mpwqe.actual_wq_head = wq->head; + rq->mpwqe.umr_in_progress = 0; + rq->mpwqe.umr_completed = 0; +} + void mlx5e_free_rx_descs(struct mlx5e_rq *rq) { __be16 wqe_ix_be; @@ -820,14 +843,8 @@ void mlx5e_free_rx_descs(struct mlx5e_rq *rq) if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { struct mlx5_wq_ll *wq = &rq->mpwqe.wq; - u16 head = wq->head; - int i; - /* Outstanding UMR WQEs (in progress) start at wq->head */ - for (i = 0; i < rq->mpwqe.umr_in_progress; i++) { - rq->dealloc_wqe(rq, head); - head = mlx5_wq_ll_get_wqe_next_ix(wq, head); - } + mlx5e_free_rx_in_progress_descs(rq); while (!mlx5_wq_ll_is_empty(wq)) { struct mlx5e_rx_wqe_ll *wqe; From 187a9830c921d92c4a9a8e2921ecc4b35a97532c Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 19 Mar 2020 13:25:17 +0200 Subject: [PATCH 362/372] net/mlx5e: Do not recover from a non-fatal syndrome For non-fatal syndromes like LOCAL_LENGTH_ERR, recovery shouldn't be triggered. In these scenarios, the RQ is not actually in ERR state. This misleads the recovery flow which assumes that the RQ is really in error state and no more completions arrive, causing crashes on bad page state. Fixes: 8276ea1353a4 ("net/mlx5e: Report and recover from CQE with error on RQ") Signed-off-by: Aya Levin Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/health.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h index d3693fa547ac..e54f70d9af22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h @@ -10,8 +10,7 @@ static inline bool cqe_syndrome_needs_recover(u8 syndrome) { - return syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR || - syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || + return syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR; } From 961d0e5b32946703125964f9f5b6321d60f4d706 Mon Sep 17 00:00:00 2001 From: Zh-yuan Ye Date: Tue, 24 Mar 2020 17:28:25 +0900 Subject: [PATCH 363/372] net: cbs: Fix software cbs to consider packet sending time Currently the software CBS does not consider the packet sending time when depleting the credits. It caused the throughput to be Idleslope[kbps] * (Port transmit rate[kbps] / |Sendslope[kbps]|) where Idleslope * (Port transmit rate / (Idleslope + |Sendslope|)) = Idleslope is expected. In order to fix the issue above, this patch takes the time when the packet sending completes into account by moving the anchor time variable "last" ahead to the send completion time upon transmission and adding wait when the next dequeue request comes before the send completion time of the previous packet. changelog: V2->V3: - remove unnecessary whitespace cleanup - add the checks if port_rate is 0 before division V1->V2: - combine variable "send_completed" into "last" - add the comment for estimate of the packet sending Fixes: 585d763af09c ("net/sched: Introduce Credit Based Shaper (CBS) qdisc") Signed-off-by: Zh-yuan Ye Reviewed-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- net/sched/sch_cbs.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index b2905b03a432..2eaac2ff380f 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -181,6 +181,11 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) s64 credits; int len; + /* The previous packet is still being sent */ + if (now < q->last) { + qdisc_watchdog_schedule_ns(&q->watchdog, q->last); + return NULL; + } if (q->credits < 0) { credits = timediff_to_credits(now - q->last, q->idleslope); @@ -212,7 +217,12 @@ static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) credits += q->credits; q->credits = max_t(s64, credits, q->locredit); - q->last = now; + /* Estimate of the transmission of the last byte of the packet in ns */ + if (unlikely(atomic64_read(&q->port_rate) == 0)) + q->last = now; + else + q->last = now + div64_s64(len * NSEC_PER_SEC, + atomic64_read(&q->port_rate)); return skb; } From e80f40cbe4dd51371818e967d40da8fe305db5e4 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 24 Mar 2020 11:45:34 +0200 Subject: [PATCH 364/372] net: dsa: tag_8021q: replace dsa_8021q_remove_header with __skb_vlan_pop Not only did this wheel did not need reinventing, but there is also an issue with it: It doesn't remove the VLAN header in a way that preserves the L2 payload checksum when that is being provided by the DSA master hw. It should recalculate checksum both for the push, before removing the header, and for the pull afterwards. But the current implementation is quite dizzying, with pulls followed immediately afterwards by pushes, the memmove is done before the push, etc. This makes a DSA master with RX checksumming offload to print stack traces with the infamous 'hw csum failure' message. So remove the dsa_8021q_remove_header function and replace it with something that actually works with inet checksumming. Fixes: d461933638ae ("net: dsa: tag_8021q: Create helper function for removing VLAN header") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- include/linux/dsa/8021q.h | 7 ------- net/dsa/tag_8021q.c | 43 --------------------------------------- net/dsa/tag_sja1105.c | 19 ++++++++--------- 3 files changed, 9 insertions(+), 60 deletions(-) diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h index 0aa803c451a3..c620d9139c28 100644 --- a/include/linux/dsa/8021q.h +++ b/include/linux/dsa/8021q.h @@ -28,8 +28,6 @@ int dsa_8021q_rx_switch_id(u16 vid); int dsa_8021q_rx_source_port(u16 vid); -struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb); - #else int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index, @@ -64,11 +62,6 @@ int dsa_8021q_rx_source_port(u16 vid) return 0; } -struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb) -{ - return NULL; -} - #endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */ #endif /* _NET_DSA_8021Q_H */ diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 2fb6c26294b5..b97ad93d1c1a 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -298,47 +298,4 @@ struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev, } EXPORT_SYMBOL_GPL(dsa_8021q_xmit); -/* In the DSA packet_type handler, skb->data points in the middle of the VLAN - * tag, after tpid and before tci. This is because so far, ETH_HLEN - * (DMAC, SMAC, EtherType) bytes were pulled. - * There are 2 bytes of VLAN tag left in skb->data, and upper - * layers expect the 'real' EtherType to be consumed as well. - * Coincidentally, a VLAN header is also of the same size as - * the number of bytes that need to be pulled. - * - * skb_mac_header skb->data - * | | - * v v - * | | | | | | | | | | | | | | | | | | | - * +-----------------------+-----------------------+-------+-------+-------+ - * | Destination MAC | Source MAC | TPID | TCI | EType | - * +-----------------------+-----------------------+-------+-------+-------+ - * ^ | | - * |<--VLAN_HLEN-->to <---VLAN_HLEN---> - * from | - * >>>>>>> v - * >>>>>>> | | | | | | | | | | | | | | | - * >>>>>>> +-----------------------+-----------------------+-------+ - * >>>>>>> | Destination MAC | Source MAC | EType | - * +-----------------------+-----------------------+-------+ - * ^ ^ - * (now part of | | - * skb->head) skb_mac_header skb->data - */ -struct sk_buff *dsa_8021q_remove_header(struct sk_buff *skb) -{ - u8 *from = skb_mac_header(skb); - u8 *dest = from + VLAN_HLEN; - - memmove(dest, from, ETH_HLEN - VLAN_HLEN); - skb_pull(skb, VLAN_HLEN); - skb_push(skb, ETH_HLEN); - skb_reset_mac_header(skb); - skb_reset_mac_len(skb); - skb_pull_rcsum(skb, ETH_HLEN); - - return skb; -} -EXPORT_SYMBOL_GPL(dsa_8021q_remove_header); - MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 5366ea430349..d553bf36bd41 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -250,14 +250,14 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, { struct sja1105_meta meta = {0}; int source_port, switch_id; - struct vlan_ethhdr *hdr; + struct ethhdr *hdr; u16 tpid, vid, tci; bool is_link_local; bool is_tagged; bool is_meta; - hdr = vlan_eth_hdr(skb); - tpid = ntohs(hdr->h_vlan_proto); + hdr = eth_hdr(skb); + tpid = ntohs(hdr->h_proto); is_tagged = (tpid == ETH_P_SJA1105); is_link_local = sja1105_is_link_local(skb); is_meta = sja1105_is_meta_frame(skb); @@ -266,7 +266,12 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, if (is_tagged) { /* Normal traffic path. */ - tci = ntohs(hdr->h_vlan_TCI); + skb_push_rcsum(skb, ETH_HLEN); + __skb_vlan_pop(skb, &tci); + skb_pull_rcsum(skb, ETH_HLEN); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + vid = tci & VLAN_VID_MASK; source_port = dsa_8021q_rx_source_port(vid); switch_id = dsa_8021q_rx_switch_id(vid); @@ -295,12 +300,6 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb, return NULL; } - /* Delete/overwrite fake VLAN header, DSA expects to not find - * it there, see dsa_switch_rcv: skb_push(skb, ETH_HLEN). - */ - if (is_tagged) - skb = dsa_8021q_remove_header(skb); - return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local, is_meta); } From 50e0d28d3808146cc19b0d5564ef4ba9e5bf3846 Mon Sep 17 00:00:00 2001 From: Raju Rangoju Date: Tue, 24 Mar 2020 17:10:00 +0530 Subject: [PATCH 365/372] cxgb4/ptp: pass the sign of offset delta in FW CMD cxgb4_ptp_fineadjtime() doesn't pass the signedness of offset delta in FW_PTP_CMD. Fix it by passing correct sign. Signed-off-by: Raju Rangoju Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c index 58a039c3224a..af1f40cbccc8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ptp.c @@ -246,6 +246,9 @@ static int cxgb4_ptp_fineadjtime(struct adapter *adapter, s64 delta) FW_PTP_CMD_PORTID_V(0)); c.retval_len16 = cpu_to_be32(FW_CMD_LEN16_V(sizeof(c) / 16)); c.u.ts.sc = FW_PTP_SC_ADJ_FTIME; + c.u.ts.sign = (delta < 0) ? 1 : 0; + if (delta < 0) + delta = -delta; c.u.ts.tm = cpu_to_be64(delta); err = t4_wr_mbox(adapter, adapter->mbox, &c, sizeof(c), NULL); From c312c7818b86b663d32ec5d4b512abf06b23899a Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Tue, 24 Mar 2020 16:10:10 +0000 Subject: [PATCH 366/372] net: phy: mdio-bcm-unimac: Fix clock handling The DT binding for this PHY describes an *optional* clock property. Due to a bug in the error handling logic, we are actually ignoring this clock *all* of the time so far. Fix this by using devm_clk_get_optional() to handle this clock properly. Fixes: b78ac6ecd1b6b ("net: phy: mdio-bcm-unimac: Allow configuring MDIO clock divider") Signed-off-by: Andre Przywara Reviewed-by: Andrew Lunn Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/mdio-bcm-unimac.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index 4a28fb29adaa..fbd36891ee64 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -242,11 +242,9 @@ static int unimac_mdio_probe(struct platform_device *pdev) return -ENOMEM; } - priv->clk = devm_clk_get(&pdev->dev, NULL); - if (PTR_ERR(priv->clk) == -EPROBE_DEFER) + priv->clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) return PTR_ERR(priv->clk); - else - priv->clk = NULL; ret = clk_prepare_enable(priv->clk); if (ret) From f13bc68131b0c0d67a77fb43444e109828a983bf Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 24 Mar 2020 20:58:29 +0100 Subject: [PATCH 367/372] r8169: re-enable MSI on RTL8168c The original change fixed an issue on RTL8168b by mimicking the vendor driver behavior to disable MSI on chip versions before RTL8168d. This however now caused an issue on a system with RTL8168c, see [0]. Therefore leave MSI disabled on RTL8168b, but re-enable it on RTL8168c. [0] https://bugzilla.redhat.com/show_bug.cgi?id=1792839 Fixes: 003bd5b4a7b4 ("r8169: don't use MSI before RTL8168d") Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index a2168a14794c..a9bdafd15a35 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5194,7 +5194,7 @@ static int rtl_alloc_irq(struct rtl8169_private *tp) RTL_W8(tp, Config2, RTL_R8(tp, Config2) & ~MSIEnable); rtl_lock_config_regs(tp); /* fall through */ - case RTL_GIGA_MAC_VER_07 ... RTL_GIGA_MAC_VER_24: + case RTL_GIGA_MAC_VER_07 ... RTL_GIGA_MAC_VER_17: flags = PCI_IRQ_LEGACY; break; default: From ccf4ad7da0d9c30a962a116cb55bcd7d8c44b0fe Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 20 Mar 2020 21:36:54 +0900 Subject: [PATCH 368/372] zonfs: Fix handling of read-only zones The write pointer of zones in the read-only consition is defined as invalid by the SCSI ZBC and ATA ZAC specifications. It is thus not possible to determine the correct size of a read-only zone file on mount. Fix this by handling read-only zones in the same manner as offline zones by disabling all accesses to the zone (read and write) and initializing the inode size of the read-only zone to 0). For zones found to be in the read-only condition at runtime, only disable write access to the zone and keep the size of the zone file to its last updated value to allow the user to recover previously written data. Also fix zonefs documentation file to reflect this change. Signed-off-by: Damien Le Moal Reviewed-by: Johannes Thumshirn --- Documentation/filesystems/zonefs.txt | 18 +++++++++++++----- fs/zonefs/super.c | 28 +++++++++++++++++++++------- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/Documentation/filesystems/zonefs.txt b/Documentation/filesystems/zonefs.txt index d54fa98ac158..78813c34ec47 100644 --- a/Documentation/filesystems/zonefs.txt +++ b/Documentation/filesystems/zonefs.txt @@ -258,11 +258,11 @@ conditions. | option | condition | size read write read write | +--------------+-----------+-----------------------------------------+ | | good | fixed yes no yes yes | - | remount-ro | read-only | fixed yes no yes no | + | remount-ro | read-only | as is yes no yes no | | (default) | offline | 0 no no no no | +--------------+-----------+-----------------------------------------+ | | good | fixed yes no yes yes | - | zone-ro | read-only | fixed yes no yes no | + | zone-ro | read-only | as is yes no yes no | | | offline | 0 no no no no | +--------------+-----------+-----------------------------------------+ | | good | 0 no no yes yes | @@ -270,7 +270,7 @@ conditions. | | offline | 0 no no no no | +--------------+-----------+-----------------------------------------+ | | good | fixed yes yes yes yes | - | repair | read-only | fixed yes no yes no | + | repair | read-only | as is yes no yes no | | | offline | 0 no no no no | +--------------+-----------+-----------------------------------------+ @@ -307,8 +307,16 @@ condition changes. The defined behaviors are as follow: * zone-offline * repair -The I/O error actions defined for each behavior are detailed in the previous -section. +The run-time I/O error actions defined for each behavior are detailed in the +previous section. Mount time I/O errors will cause the mount operation to fail. +The handling of read-only zones also differs between mount-time and run-time. +If a read-only zone is found at mount time, the zone is always treated in the +same manner as offline zones, that is, all accesses are disabled and the zone +file size set to 0. This is necessary as the write pointer of read-only zones +is defined as invalib by the ZBC and ZAC standards, making it impossible to +discover the amount of data that has been written to the zone. In the case of a +read-only zone discovered at run-time, as indicated in the previous section. +the size of the zone file is left unchanged from its last updated value. Zonefs User Space Tools ======================= diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 69aee3dfb660..3ce9829a6936 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -178,7 +178,8 @@ static void zonefs_update_stats(struct inode *inode, loff_t new_isize) * amount of readable data in the zone. */ static loff_t zonefs_check_zone_condition(struct inode *inode, - struct blk_zone *zone, bool warn) + struct blk_zone *zone, bool warn, + bool mount) { struct zonefs_inode_info *zi = ZONEFS_I(inode); @@ -196,13 +197,26 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, zone->wp = zone->start; return 0; case BLK_ZONE_COND_READONLY: - /* Do not allow writes in read-only zones */ + /* + * The write pointer of read-only zones is invalid. If such a + * zone is found during mount, the file size cannot be retrieved + * so we treat the zone as offline (mount == true case). + * Otherwise, keep the file size as it was when last updated + * so that the user can recover data. In both cases, writes are + * always disabled for the zone. + */ if (warn) zonefs_warn(inode->i_sb, "inode %lu: read-only zone\n", inode->i_ino); inode->i_flags |= S_IMMUTABLE; + if (mount) { + zone->cond = BLK_ZONE_COND_OFFLINE; + inode->i_mode &= ~0777; + zone->wp = zone->start; + return 0; + } inode->i_mode &= ~0222; - /* fallthrough */ + return i_size_read(inode); default: if (zi->i_ztype == ZONEFS_ZTYPE_CNV) return zi->i_max_size; @@ -231,7 +245,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, * as there is no inconsistency between the inode size and the amount of * data writen in the zone (data_size). */ - data_size = zonefs_check_zone_condition(inode, zone, true); + data_size = zonefs_check_zone_condition(inode, zone, true, false); isize = i_size_read(inode); if (zone->cond != BLK_ZONE_COND_OFFLINE && zone->cond != BLK_ZONE_COND_READONLY && @@ -274,7 +288,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, if (zone->cond != BLK_ZONE_COND_OFFLINE) { zone->cond = BLK_ZONE_COND_OFFLINE; data_size = zonefs_check_zone_condition(inode, zone, - false); + false, false); } } else if (zone->cond == BLK_ZONE_COND_READONLY || sbi->s_mount_opts & ZONEFS_MNTOPT_ERRORS_ZRO) { @@ -283,7 +297,7 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx, if (zone->cond != BLK_ZONE_COND_READONLY) { zone->cond = BLK_ZONE_COND_READONLY; data_size = zonefs_check_zone_condition(inode, zone, - false); + false, false); } } @@ -975,7 +989,7 @@ static void zonefs_init_file_inode(struct inode *inode, struct blk_zone *zone, zi->i_zsector = zone->start; zi->i_max_size = min_t(loff_t, MAX_LFS_FILESIZE, zone->len << SECTOR_SHIFT); - zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true); + zi->i_wpoffset = zonefs_check_zone_condition(inode, zone, true, true); inode->i_uid = sbi->s_uid; inode->i_gid = sbi->s_gid; From 919a23e9d6ccf8b30e6b4c7aa36f8904c99d2dd7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 25 Mar 2020 16:07:01 +0800 Subject: [PATCH 369/372] selftests/net: add missing tests to Makefile Find some tests are missed in Makefile by running: for file in $(ls *.sh); do grep -q $file Makefile || echo $file; done Signed-off-by: Hangbin Liu Reviewed-by: David Ahern Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 287ae916ec0b..4c1bd03ffa1c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -11,7 +11,9 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh +TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh +TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh +TEST_PROGS += route_localnet.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any From c085dbfb1cfcf74e2ef2ef435291e7e63f046d6a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 25 Mar 2020 16:41:01 +0800 Subject: [PATCH 370/372] selftests/net/forwarding: define libs as TEST_PROGS_EXTENDED The lib files should not be defined as TEST_PROGS, or we will run them in run_kselftest.sh. Also remove ethtool_lib.sh exec permission. Fixes: 81573b18f26d ("selftests/net/forwarding: add Makefile to install tests") Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- .../testing/selftests/net/forwarding/Makefile | 31 ++++++++++--------- .../selftests/net/forwarding/ethtool_lib.sh | 0 2 files changed, 16 insertions(+), 15 deletions(-) mode change 100755 => 100644 tools/testing/selftests/net/forwarding/ethtool_lib.sh diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 44616103508b..250fbb2d1625 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -5,11 +5,7 @@ TEST_PROGS = bridge_igmp.sh \ bridge_sticky_fdb.sh \ bridge_vlan_aware.sh \ bridge_vlan_unaware.sh \ - devlink_lib.sh \ - ethtool_lib.sh \ ethtool.sh \ - fib_offload_lib.sh \ - forwarding.config.sample \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath.sh \ @@ -21,8 +17,6 @@ TEST_PROGS = bridge_igmp.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ - ipip_lib.sh \ - lib.sh \ loopback.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ @@ -32,15 +26,11 @@ TEST_PROGS = bridge_igmp.sh \ mirror_gre_changes.sh \ mirror_gre_flower.sh \ mirror_gre_lag_lacp.sh \ - mirror_gre_lib.sh \ mirror_gre_neigh.sh \ mirror_gre_nh.sh \ mirror_gre.sh \ - mirror_gre_topo_lib.sh \ mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ - mirror_lib.sh \ - mirror_topo_lib.sh \ mirror_vlan.sh \ router_bridge.sh \ router_bridge_vlan.sh \ @@ -50,17 +40,12 @@ TEST_PROGS = bridge_igmp.sh \ router_multipath.sh \ router.sh \ router_vid_1.sh \ - sch_ets_core.sh \ sch_ets.sh \ - sch_ets_tests.sh \ - sch_tbf_core.sh \ - sch_tbf_etsprio.sh \ sch_tbf_ets.sh \ sch_tbf_prio.sh \ sch_tbf_root.sh \ tc_actions.sh \ tc_chains.sh \ - tc_common.sh \ tc_flower_router.sh \ tc_flower.sh \ tc_shblocks.sh \ @@ -72,4 +57,20 @@ TEST_PROGS = bridge_igmp.sh \ vxlan_bridge_1q.sh \ vxlan_symmetric.sh +TEST_PROGS_EXTENDED := devlink_lib.sh \ + ethtool_lib.sh \ + fib_offload_lib.sh \ + forwarding.config.sample \ + ipip_lib.sh \ + lib.sh \ + mirror_gre_lib.sh \ + mirror_gre_topo_lib.sh \ + mirror_lib.sh \ + mirror_topo_lib.sh \ + sch_ets_core.sh \ + sch_ets_tests.sh \ + sch_tbf_core.sh \ + sch_tbf_etsprio.sh \ + tc_common.sh + include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh old mode 100755 new mode 100644 From 428c491332bca498c8eb2127669af51506c346c7 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Fri, 20 Mar 2020 09:55:34 -0300 Subject: [PATCH 371/372] net: ena: Add PCI shutdown handler to allow safe kexec Currently ENA only provides the PCI remove() handler, used during rmmod for example. This is not called on shutdown/kexec path; we are potentially creating a failure scenario on kexec: (a) Kexec is triggered, no shutdown() / remove() handler is called for ENA; instead pci_device_shutdown() clears the master bit of the PCI device, stopping all DMA transactions; (b) Kexec reboot happens and the device gets enabled again, likely having its FW with that DMA transaction buffered; then it may trigger the (now invalid) memory operation in the new kernel, corrupting kernel memory area. This patch aims to prevent this, by implementing a shutdown() handler quite similar to the remove() one - the difference being the handling of the netdev, which is unregistered on remove(), but following the convention observed in other drivers, it's only detached on shutdown(). This prevents an odd issue in AWS Nitro instances, in which after the 2nd kexec the next one will fail with an initrd corruption, caused by a wild DMA write to invalid kernel memory. The lspci output for the adapter present in my instance is: 00:05.0 Ethernet controller [0200]: Amazon.com, Inc. Elastic Network Adapter (ENA) [1d0f:ec20] Suggested-by: Gavin Shan Signed-off-by: Guilherme G. Piccoli Acked-by: Sameeh Jubran Signed-off-by: David S. Miller --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 51 ++++++++++++++++---- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 4647d7656761..cada6e7e30f4 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -4336,13 +4336,15 @@ err_disable_device: /*****************************************************************************/ -/* ena_remove - Device Removal Routine +/* __ena_shutoff - Helper used in both PCI remove/shutdown routines * @pdev: PCI device information struct + * @shutdown: Is it a shutdown operation? If false, means it is a removal * - * ena_remove is called by the PCI subsystem to alert the driver - * that it should release a PCI device. + * __ena_shutoff is a helper routine that does the real work on shutdown and + * removal paths; the difference between those paths is with regards to whether + * dettach or unregister the netdevice. */ -static void ena_remove(struct pci_dev *pdev) +static void __ena_shutoff(struct pci_dev *pdev, bool shutdown) { struct ena_adapter *adapter = pci_get_drvdata(pdev); struct ena_com_dev *ena_dev; @@ -4361,13 +4363,17 @@ static void ena_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->reset_task); - rtnl_lock(); + rtnl_lock(); /* lock released inside the below if-else block */ ena_destroy_device(adapter, true); - rtnl_unlock(); - - unregister_netdev(netdev); - - free_netdev(netdev); + if (shutdown) { + netif_device_detach(netdev); + dev_close(netdev); + rtnl_unlock(); + } else { + rtnl_unlock(); + unregister_netdev(netdev); + free_netdev(netdev); + } ena_com_rss_destroy(ena_dev); @@ -4382,6 +4388,30 @@ static void ena_remove(struct pci_dev *pdev) vfree(ena_dev); } +/* ena_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ena_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ + +static void ena_remove(struct pci_dev *pdev) +{ + __ena_shutoff(pdev, false); +} + +/* ena_shutdown - Device Shutdown Routine + * @pdev: PCI device information struct + * + * ena_shutdown is called by the PCI subsystem to alert the driver that + * a shutdown/reboot (or kexec) is happening and device must be disabled. + */ + +static void ena_shutdown(struct pci_dev *pdev) +{ + __ena_shutoff(pdev, true); +} + #ifdef CONFIG_PM /* ena_suspend - PM suspend callback * @pdev: PCI device information struct @@ -4431,6 +4461,7 @@ static struct pci_driver ena_pci_driver = { .id_table = ena_pci_tbl, .probe = ena_probe, .remove = ena_remove, + .shutdown = ena_shutdown, #ifdef CONFIG_PM .suspend = ena_suspend, .resume = ena_resume, From 2c64605b590edadb3fb46d1ec6badb49e940b479 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 25 Mar 2020 13:47:18 +0100 Subject: [PATCH 372/372] net: Fix CONFIG_NET_CLS_ACT=n and CONFIG_NFT_FWD_NETDEV={y, m} build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netfilter/nft_fwd_netdev.c: In function ‘nft_fwd_netdev_eval’: net/netfilter/nft_fwd_netdev.c:32:10: error: ‘struct sk_buff’ has no member named ‘tc_redirected’ pkt->skb->tc_redirected = 1; ^~ net/netfilter/nft_fwd_netdev.c:33:10: error: ‘struct sk_buff’ has no member named ‘tc_from_ingress’ pkt->skb->tc_from_ingress = 1; ^~ To avoid a direct dependency with tc actions from netfilter, wrap the redirect bits around CONFIG_NET_REDIRECT and move helpers to include/linux/skbuff.h. Turn on this toggle from the ifb driver, the only existing client of these bits in the tree. This patch adds skb_set_redirected() that sets on the redirected bit on the skbuff, it specifies if the packet was redirect from ingress and resets the timestamp (timestamp reset was originally missing in the netfilter bugfix). Fixes: bcfabee1afd99484 ("netfilter: nft_fwd_netdev: allow to redirect to ifb via ingress") Reported-by: noreply@ellerman.id.au Reported-by: Geert Uytterhoeven Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- drivers/net/Kconfig | 1 + drivers/net/ifb.c | 6 +++--- drivers/net/wireguard/queueing.h | 2 +- include/linux/skbuff.h | 36 ++++++++++++++++++++++++++++---- include/net/sch_generic.h | 16 -------------- net/Kconfig | 3 +++ net/core/dev.c | 4 ++-- net/core/pktgen.c | 2 +- net/netfilter/nft_fwd_netdev.c | 5 ++--- net/sched/act_mirred.c | 6 ++---- 10 files changed, 47 insertions(+), 34 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 25a8f9387d5a..db8884ad6d40 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,7 @@ config NET_FC config IFB tristate "Intermediate Functional Block support" depends on NET_CLS_ACT + select NET_REDIRECT ---help--- This is an intermediate driver that allows sharing of resources. diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 242b9b0943f8..7fe306e76281 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -75,7 +75,7 @@ static void ifb_ri_tasklet(unsigned long _txp) } while ((skb = __skb_dequeue(&txp->tq)) != NULL) { - skb->tc_redirected = 0; + skb->redirected = 0; skb->tc_skip_classify = 1; u64_stats_update_begin(&txp->tsync); @@ -96,7 +96,7 @@ static void ifb_ri_tasklet(unsigned long _txp) rcu_read_unlock(); skb->skb_iif = txp->dev->ifindex; - if (!skb->tc_from_ingress) { + if (!skb->from_ingress) { dev_queue_xmit(skb); } else { skb_pull_rcsum(skb, skb->mac_len); @@ -243,7 +243,7 @@ static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) txp->rx_bytes += skb->len; u64_stats_update_end(&txp->rsync); - if (!skb->tc_redirected || !skb->skb_iif) { + if (!skb->redirected || !skb->skb_iif) { dev_kfree_skb(skb); dev->stats.rx_dropped++; return NETDEV_TX_OK; diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index cf1e0e2376d8..3432232afe06 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -100,8 +100,8 @@ static inline void wg_reset_packet(struct sk_buff *skb) skb->dev = NULL; #ifdef CONFIG_NET_SCHED skb->tc_index = 0; - skb_reset_tc(skb); #endif + skb_reset_redirect(skb); skb->hdr_len = skb_headroom(skb); skb_reset_mac_header(skb); skb_reset_network_header(skb); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 5b50278c4bc8..e59620234415 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -645,8 +645,8 @@ typedef unsigned char *sk_buff_data_t; * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress - * @tc_redirected: packet was redirected by a tc action - * @tc_from_ingress: if tc_redirected, tc_at_ingress at time of redirect + * @redirected: packet was redirected by packet classifier + * @from_ingress: packet was redirected from the ingress path * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag @@ -848,8 +848,10 @@ struct sk_buff { #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; __u8 tc_at_ingress:1; - __u8 tc_redirected:1; - __u8 tc_from_ingress:1; +#endif +#ifdef CONFIG_NET_REDIRECT + __u8 redirected:1; + __u8 from_ingress:1; #endif #ifdef CONFIG_TLS_DEVICE __u8 decrypted:1; @@ -4579,5 +4581,31 @@ static inline __wsum lco_csum(struct sk_buff *skb) return csum_partial(l4_hdr, csum_start - l4_hdr, partial); } +static inline bool skb_is_redirected(const struct sk_buff *skb) +{ +#ifdef CONFIG_NET_REDIRECT + return skb->redirected; +#else + return false; +#endif +} + +static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) +{ +#ifdef CONFIG_NET_REDIRECT + skb->redirected = 1; + skb->from_ingress = from_ingress; + if (skb->from_ingress) + skb->tstamp = 0; +#endif +} + +static inline void skb_reset_redirect(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_REDIRECT + skb->redirected = 0; +#endif +} + #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 151208704ed2..c30f914867e6 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -675,22 +675,6 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab); int skb_do_redirect(struct sk_buff *); -static inline void skb_reset_tc(struct sk_buff *skb) -{ -#ifdef CONFIG_NET_CLS_ACT - skb->tc_redirected = 0; -#endif -} - -static inline bool skb_is_tc_redirected(const struct sk_buff *skb) -{ -#ifdef CONFIG_NET_CLS_ACT - return skb->tc_redirected; -#else - return false; -#endif -} - static inline bool skb_at_tc_ingress(const struct sk_buff *skb) { #ifdef CONFIG_NET_CLS_ACT diff --git a/net/Kconfig b/net/Kconfig index 2eeb0e55f7c9..df8d8c9bd021 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -52,6 +52,9 @@ config NET_INGRESS config NET_EGRESS bool +config NET_REDIRECT + bool + config SKB_EXTENSIONS bool diff --git a/net/core/dev.c b/net/core/dev.c index 402a986659cf..500bba8874b0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4516,7 +4516,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, /* Reinjected packets coming from act_mirred or similar should * not get XDP generic processing. */ - if (skb_is_tc_redirected(skb)) + if (skb_is_redirected(skb)) return XDP_PASS; /* XDP packets must be linear and must have sufficient headroom @@ -5063,7 +5063,7 @@ skip_taps: goto out; } #endif - skb_reset_tc(skb); + skb_reset_redirect(skb); skip_classify: if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index acc849df60b5..d0641bba6b81 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3362,7 +3362,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) /* skb was 'freed' by stack, so clean few * bits and reuse it */ - skb_reset_tc(skb); + skb_reset_redirect(skb); } while (--burst > 0); goto out; /* Skips xmit_mode M_START_XMIT */ } else if (pkt_dev->xmit_mode == M_QUEUE_XMIT) { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 74f050ba6bad..3087e23297db 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -28,9 +28,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr, struct nft_fwd_netdev *priv = nft_expr_priv(expr); int oif = regs->data[priv->sreg_dev]; - /* These are used by ifb only. */ - pkt->skb->tc_redirected = 1; - pkt->skb->tc_from_ingress = 1; + /* This is used by ifb only. */ + skb_set_redirected(pkt->skb, true); nf_fwd_netdev_egress(pkt, oif); regs->verdict.code = NF_STOLEN; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1ad300e6dbc0..83dd82fc9f40 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -284,10 +284,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, /* mirror is always swallowed */ if (is_redirect) { - skb2->tc_redirected = 1; - skb2->tc_from_ingress = skb2->tc_at_ingress; - if (skb2->tc_from_ingress) - skb2->tstamp = 0; + skb_set_redirected(skb2, skb2->tc_at_ingress); + /* let's the caller reinsert the packet, if possible */ if (use_reinsert) { res->ingress = want_ingress;